#!/usr/bin/perl

eval 'exec /usr/bin/perl  -S $0 ${1+"$@"}'
    if 0; # not running under some shell
# TODO
# - find channel icons somewhere
# - add more informative errors in xml

eval 'exec /usr/bin/perl  -S $0 ${1+"$@"}'
    if 0; # not running under some shell

=pod

=head1 NAME

tv_grab_it - Grab TV listings for Italy.

=head1 SYNOPSIS

tv_grab_it --help

tv_grab_it [--config-file FILE] --configure

tv_grab_it [--config-file FILE] [--output FILE]
           [--days N] [--offset N] [--quiet]
           [--slow] [--verbose] [--errors-in-xml]
           [--backend SITE1[,SITE2[,SITE3]]]
        

=head1 DESCRIPTION

Output TV listings for several channels available in Italy.
The grabber relies on parsing HTML so it might stop working at any time.
The data comes from different backends. This is to minimize blackouts 
in case of site changes but also to extend the number of channels.
If the grabber canE<39>t find the data with the first backend it will
try the second one, and so on. You can specify your order of preference
using the --backend option.

Currently configured backends are (in default order):

=over

=item B<lastampa> - grabs data from www.lastampa.it

=item B<skytv>    - grabs data from www.skytv.it

=item B<mytv>     - grabs data from www.my-tv.it

=back

First run B<tv_grab_it --configure> to choose which channels you want
to download. Then running B<tv_grab_it> with no arguments will output
listings in XML format to standard output.

B<--configure> Prompt for which channels, and writes the configuration file.

B<--config-file FILE> Set the name of the configuration file, the
default is B<~/.xmltv/tv_grab_it.conf>.  This is the file written
by B<--configure> and read when grabbing.

B<--gui OPTION> Use this option to enable a graphical interface to be used.
OPTION may be 'Tk', or left blank for the best available choice.
Additional allowed values of OPTION are 'Term' for normal terminal output
(default) and 'TermNoProgressBar' to disable the use of XMLTV::ProgressBar.

B<--output FILE> write to FILE rather than standard output.

B<--days N> grab N days.  The default is 7.

B<--offset N> start N days in the future.  The default is to start
from today.

B<--quiet> suppress the progress messages normally written to standard
error.

B<--slow> downloads more details (descriptions, actors...). This means
downloading a new file for each programme, so itE<39>s off by default to
save time.

B<--verbose> prints out verbose information useful for debugging.

B<--errors-in-xml> outputs warnings as programmes in the xml file,
so that you can see errors in your favorite frontend in addition
to the default STDERR. 

B<--backend> set the backend (or backends) to use. See the examples.

=head1 CAVEATS

If you use --quiet you should also use --errors-in-xml or you wonE<39>t
be warned about errors. Note also that, as opposed to previous versions,
this grabber doesnE<39>t die if it cannot find any data, but returns an
empty (or optionally containing just warnings) xml file instead.

The backendsE<39> data quality differs a lot. For example, mytv is very
basic, yet complete and uses the least amount of bandwith. Skytv has a
lot of channels, but unless you use it with the --slow option the data
is not very good (and in this case i would be VERY slow). lastampa is a 
good overall site if you donE<39>t need the whole sky package.

The --slow option has no effect on the mytv backend.

=head1 EXAMPLES

=over 

=item tv_grab_it --backend mytv --configure

configures tv_grab_it using only the backend mytv 

=item tv_grab_it --backend skytv,mytv --days 1

grabs one day of data ovveriding the default order (could also be --backend skytv --backend mytv)

=item tv_grab_it --cache --slow --days 3

grabs the full data for the next three days using the default backend order and using a disk cache.

=back

=head1 SEE ALSO

L<xmltv(5)>.

=head1 AUTHORS

Davide Chiarini, pinwiz@inwind.it

=cut

######################################################################
# initializations
use warnings;
use strict;

use XMLTV::Version '$Id: tv_grab_it.in,v 1.35 2005/05/31 07:41:56 mnbjhguyt Exp $';
use HTML::Entities;
use HTML::Parser;
use URI::Escape;
use Getopt::Long;
use Date::Manip;
use Memoize;
use XMLTV;
use XMLTV::Memoize;
use XMLTV::Ask;
use XMLTV::Config_file;
use XMLTV::ProgressBar;
use XMLTV::DST;
use XMLTV::Get_nice;

# Todo: perhaps we should internationalize messages and docs?
use XMLTV::Usage <<END
$0: get Italian television listings in XMLTV format
To configure: $0 --configure [--config-file FILE]
To grab listings: $0 [--config-file FILE] [--output FILE] [--days N]
        [--offset N] [--quiet] [--slow] [--verbose] [--backend]
        [--errors-in-xml] 
END
  ;

# Use Log::TraceMessages if installed.
BEGIN {
    eval { require Log::TraceMessages };
    if ($@) {
    *t = sub {};
    *d = sub { '' };
    }
    else {
    *t = \&Log::TraceMessages::t;
    *d = \&Log::TraceMessages::d;
    Log::TraceMessages::check_argv();
    }
}

#max days on the server
my $MAX_DAYS=7;

# default language
my $LANG="it";
my $date_today = UnixDate("today", '%Y-%m-%d');

my @default_backends = ('lastampa', 'skytv', 'mytv');

my %channels; #to store display names

# backend configurations
my %backend_info
  = ( 
     'lastampa' =>
     { domain => 'lastampa.it',
       base_chan => 'http://www.wfactory.net/lastampa/programmi.jsp?FRM_SEARCH_DATE='.$date_today.'&FRM_SEARCH_START_TIME=16%3A00&FRM_SEARCH_TYPE=%25%25&FRM_SEARCH_PACK=%25%25&FRM_SEARCH_CHANNEL=%25%25&canale=%2FTelevisione&x=4&y=4',
       base_data => 'http://www.wfactory.net/lastampa/search_channel.jsp?',
       rturl     => "http://www.lastampa.it/_WEB/_SERVIZI/programmiTV/",
       needs_login => 0,
       needs_cookies => 0,
       fetch_data_sub   => \&lastampa_fetch_data,
       channel_list_sub => \&lastampa_get_channels_list,
     },

     'mytv' =>
     { domain => 'my-tv.it',
       base_chan => 'http://www.my-tv.it/palinsesto/guidatv_xml.jsp',
       base_data => 'http://www.my-tv.it/palinsesto/guidatv_xml.jsp',
       rturl => "http://www.my-tv.it/",
       needs_login => 0,
       needs_cookies => 0,
       fetch_data_sub =>   \&mytv_fetch_data,
       channel_list_sub => \&mytv_get_channels_list,
     },

     'skytv' =>
     { domain => 'skytv.it',
       base_chan => 'http://www.skytv.it/GuidaTv/default.htm',
       base_data => 'http://www.skytv.it/GuidaTv/default.htm',
       rturl => "http://www.skytv.it/",
       needs_login => 0,
       needs_cookies => 0,
       fetch_data_sub =>   \&skytv_fetch_data,
       channel_list_sub => \&skytv_get_channels_list,
     },
    );

######################################################################
# Get options, including undocumented --cache option.
my $func_name = 'XMLTV::Get_nice::get_nice_aux';
XMLTV::Memoize::check_argv($func_name) # cache on disk
  or memoize($func_name)               # cache in memory
  or die "cannot memoize $func_name: $!";

my ($opt_days,
    $opt_offset,
    $opt_help,
    $opt_output,
    $opt_slow,
    $opt_verbose,
    $opt_configure,
    $opt_config_file,
    $opt_gui,
    $opt_quiet,
    $opt_share,
    $opt_errors_in_xml,
    @opt_backends,
   );

# server only holds 7 days, so if there is an offset days must be
# opt_days-offset or less.

$opt_offset = 0;   # default
$opt_quiet  = 0;   # default
$opt_slow   = 0;   # default
$opt_verbose  = 0; # default

GetOptions('days=i'       => \$opt_days,
       'offset=i'         => \$opt_offset,
       'help'             => \$opt_help,
       'configure'        => \$opt_configure,
       'config-file=s'    => \$opt_config_file,
       'gui:s'            => \$opt_gui,
       'output=s'         => \$opt_output,
       'quiet'            => \$opt_quiet,
       'slow'             => \$opt_slow,
       'verbose'          => \$opt_verbose,
       'share=s'          => \$opt_share,       # undocumented
       'errors-in-xml'    => \$opt_errors_in_xml,
       'backend=s'        => \@opt_backends,
      )
  or usage(0);

die "number of days (--days) must not be negative. You gave: $opt_days\n"
  if (defined $opt_days && $opt_days < 0);

die "offset days (--offset) must not be negative. You gave: $opt_offset\n"
  if ($opt_offset < 0);
usage(1) if $opt_help;

if ($opt_quiet) {
    $opt_verbose = 0;
}

$opt_days = $opt_days || $MAX_DAYS;

# parse the --backend option
@opt_backends = split(/,/,join(',',@opt_backends)); #we allow both multiple --backend and --backend=name1,name2

my @backends = ();
foreach (@opt_backends) {
    if (defined $backend_info{$_}) {
        push @backends, $_;
    }
    else {
        warn "Unknown backend $_!\n";
    }
}
unless (@backends) {
    @backends = @default_backends;
    if (@opt_backends) {    #we specified backends but we didn't like them, warn the user
        warn "No good backend specified, falling back on defaults\n";
    }
}

XMLTV::Ask::init($opt_gui);

# share/ directory for storing channel mapping files.  This next line
# is altered by processing through tv_grab_it.PL.  But we can use
# the current directory instead of share/tv_grab_it for development.
#
# The 'source' file tv_grab_it.in has $SHARE_DIR undef, which means
# use the current directory.  In any case the directory can be
# overridden with the --share option (useful for testing).
#

my $SHARE_DIR='/usr/share/xmltv'; # by grab/it/tv_grab_it.PL
$SHARE_DIR = $opt_share if defined $opt_share;
my $OUR_SHARE_DIR = (defined $SHARE_DIR) ? "$SHARE_DIR/tv_grab_it" : '.';
(my $CHANNEL_NAMES_FILE = "$OUR_SHARE_DIR/channel_ids") =~ tr!/!/!s;

# reads the file channel_ids, which contains the tables to convert 
# between backends' ids and XMLTV ids of channels.
# to support multiple backends i add a ini-style [section] header
# there are two fields: xmltv_id and site_id.

my (%xmltv_chanid, %seen);
my $line_num = 0;
my $backend;
foreach (XMLTV::Config_file::read_lines($CHANNEL_NAMES_FILE, 1)) {
    ++ $line_num;
    next unless defined;
    my $where = "$CHANNEL_NAMES_FILE:$line_num";

    if (/^\[(.*)\]$/) {
        if (defined $backend_info{$1}) {    #esiste la configurazione
            $backend = $1;
        }
        else {
            warn "Unknown backend $1 in $where\n";
            $backend = undef;
        }
    }
    elsif ($backend) {
        my @fields = split /;/;
        die "$where: wrong number of fields"
          if @fields != 2;#3;

        my ($xmltv_id, $site_id) = @fields;

        warn "$where: backend id $site_id for site '$backend' seen already\n"
          if defined $backend_info{$backend}{site_ids}{$xmltv_id};
        $backend_info{$backend}{site_ids}{$xmltv_id}{site_id} = $site_id;
        #$backend_info{$backend}{site_ids}{$xmltv_id}{satellite} = $sat;

        warn "$where: XMLTV_id $xmltv_id for site '$backend' seen already\n"
          if $seen{$backend.$xmltv_id}++;
    }
}
# File that stores which channels to download.
my $config_file
  = XMLTV::Config_file::filename($opt_config_file, 'tv_grab_it', $opt_quiet);

if ($opt_configure) {
    XMLTV::Config_file::check_no_overwrite($config_file);
}

$line_num = 0;

my $foundchannels;

my $bar = new XMLTV::ProgressBar('getting list of channels', scalar @backends)
  if not $opt_quiet;
# find list of available channels
foreach $backend (@backends) {
    %{$backend_info{$backend}{channels}} = &{$backend_info{$backend}{channel_list_sub}}($backend_info{$backend}{base_chan});
    $foundchannels+=scalar(keys(%{$backend_info{$backend}{channels}}));

    if (not $opt_quiet) {
        update $bar; 
    }
}
$bar->finish() if (not $opt_quiet);
die "no channels could be found" unless ($foundchannels);
warn "VERBOSE: $foundchannels channels found.\n" if ($opt_verbose);

######################################################################
# write configuration
if ($opt_configure) {
    open(CONF, ">$config_file") or die "cannot write to $config_file: $!";

    my %channels;
    foreach $backend (@backends) {
        #faccio un hash con tutti gli id
        foreach (keys %{$backend_info{$backend}{channels}}) {
            $channels{$_} = xmltv_chanid($backend, $_);
        }

        #not used yet
        if ($backend_info{$backend}{needs_login}) {
            say "To get listings on '$backend' you will need a login on the site.\n";
            my $username_wanted = ask_boolean('Do you have a login?', 0);
            if ($username_wanted) {
                $backend_info{$backend}{username} = ask("Username:");
                print CONF "username: $backend:$backend_info{$backend}{username}\n";
            }
        }
    }

    #double reverse to get rid of duplicates
    %channels = reverse %channels;
    %channels = reverse %channels;

    # Ask about each channel.
    my @names = sort keys %channels;
    my @qs = map { "add channel $_?" } @names;
    my @want = ask_many_boolean(1, @qs);
    foreach (@names) {
        die if $_ =~ tr/\r\n//;
        my $w = shift @want;
        warn("cannot read input, stopping channel questions"), last
          if not defined $w;
        # No need to print to user - XMLTV::Ask is verbose enough.

        # Print a config line, but comment it out if channel not wanted.
        print CONF '#' if not $w;
        print CONF "channel ".$channels{$_}." # $_\n";
    }

    close CONF or warn "cannot close $config_file: $!";
    say("Finished configuration.");

    exit();
}

######################################################################
# read configuration
my @channels;
$line_num = 0;
foreach (XMLTV::Config_file::read_lines($config_file)) {
    ++ $line_num;
    next if not defined;
    if (/^channel:?\s*(.*\S+)\s*$/) {
	  push @channels, $1;
    }
    elsif (/^username:?\s+(\S+):(\S+)/){
        if (defined $backend_info{$1}) {    #esiste la configurazione
            $backend_info{$1}{username} = $2;
        }
        else {
            warn "Found username for unknown backend $1 in $config_file\n";
        }
    }
    else {
        warn "$config_file:$line_num: bad line\n";
    }
}


######################################################################
# sort out problem in offset options
if ($opt_offset >= $MAX_DAYS) {
    warn "Day offset too big. No program information will be fetched.\n";
    $opt_offset = 0;
    $opt_days = 0;
}
my $days2get;
if (($opt_days+$opt_offset) > $MAX_DAYS) {
    $days2get=$MAX_DAYS-$opt_offset;
    warn "The server only has info for ".($MAX_DAYS-1)." days from today.\n";
    if ($days2get > 1) {
        warn "You'll get listings for only $days2get days.\n";
        }
    else {
        warn "You'll get listings for only 1 day.\n";
        }
    }
    else {
        $days2get=$opt_days;
    }
t "will get $days2get days from $opt_offset onwards";

## If we are not getting any days of program data we still need to go through
## the loop once to get the channel icons.
#my $last_day;
#if ($days2get == 0) {
#    $last_day = $opt_offset;
#    }
#else {
#    $last_day = $days2get + $opt_offset - 1;
#    }

######################################################################
# begin main program
my %w_args;
if (defined $opt_output) {
    my $fh = new IO::File(">$opt_output");
    die "cannot write to $opt_output: $!" if not defined $fh;
    $w_args{OUTPUT} = $fh;
}
$w_args{encoding} = 'ISO-8859-1';
my $w = new XMLTV::Writer(%w_args);

my $source_info_str = join ",", map {'http://'.$backend_info{$_}{domain}} @backends;
my $source_data_str = join ",", map {$backend_info{$_}{rturl}} @backends;

$w->start({ 'source-info-url'     => $source_info_str ,
        'source-data-url'     => $source_data_str,
        'generator-info-name' => 'XMLTV',
        'generator-info-url'  => 'http://membled.com/work/apps/xmltv/',
        });


my %display_names;
foreach my $back (@backends) {
	foreach (keys %{$backend_info{$back}{site_ids}}) {
		$display_names{$_} = $backend_info{$back}{site_ids}{$_}{site_id};
	}
}

foreach my $xmltv_id (@channels) {
    $w->write_channel({
        id => $xmltv_id,
        'display-name' => [ [ $display_names{$xmltv_id} ] ],
#        icon => [{src => get_ch_icon($ch_id)}] #can't find it yet!
        });
}

#make a list of channels and days to grab
my @to_get;
foreach my $day ($opt_offset .. ($days2get + $opt_offset - 1)) {
    foreach my $channel (@channels) {
        push @to_get, [$channel, $day];
    }
}

$bar = new XMLTV::ProgressBar('getting listings', scalar @to_get)
  if not $opt_quiet;

## If we aren't getting any days of program data then clear out the list
## that was created to fetch icons.
#if ($days2get == 0) {@to_get = ();}

foreach (@to_get) {
    my $day     = $_->[1];
    my $channel = $_->[0];

    #this is where i would handle cookies and logins if needed
    warn "VERBOSE: Grabbing channel $channel, day $day\n" if ($opt_verbose);

    my $error;
    foreach $backend (@backends) {
        warn "VERBOSE: Trying with $backend\n" if ($opt_verbose);

        my @dati; $error = 0;
        ($error, @dati) = &{$backend_info{$backend}{fetch_data_sub}}($channel, $day);

        #TODO different kinds of errors?
        if ($error) {
            warn "VERBOSE: Error fetching channel $channel day $day with backend $backend\n" if ($opt_verbose);
        }
        else {
            $w->write_programme($_) foreach @dati;
            last;
        }
    }

    #nessuno ci e' riuscito
    if ($error) {
        #this is an easier way to know about errors if all of our scripts are automated
        if ($opt_errors_in_xml) {
            $w->write_programme(
                {
                    title   => [['ERROR FETCHING DATA', $LANG]],
                    start   => xmltv_date('00:01', $day),
                    stop    => xmltv_date('23:59', $day),
                    channel => $channel,
                    desc    => [["XMLTV couldn't grab data for $channel, day $day. Sorry about that.", $LANG]],
                }
            );
        }
        else {
            warn "I couldn't fetch data for channel $channel, day $day from any backend!!\n" if (not $opt_quiet);
        }
    }

    update $bar if not $opt_quiet;
}
$w->end;
$bar->finish() if not $opt_quiet;

#####################
# general functions #
#####################

####################################################
# xmltv_chanid
# to handle channels that are not yet in the channel_ids file
sub xmltv_chanid {
    my ($backend, $channel_id) = @_;
    my %chan_ids;

    #reverse id hash
    foreach my $xmltv_id (keys %{$backend_info{$backend}{site_ids}}) {
        my $site_id = $backend_info{$backend}{site_ids}{$xmltv_id}{site_id};
        $chan_ids{$site_id} = $xmltv_id;
        next if (not defined $site_id);
        }

    if (defined $chan_ids{$channel_id}) {
        return $chan_ids{$channel_id};
        }
    else {
        warn "***Channel |$channel_id| for '$backend' is not in channel_ids, should be updated.\n";
        $channel_id=~ s/\W//gs;

        #make up an id
        my $id = lc($channel_id).".".$backend_info{$backend}{domain};

    ##update backend info
        #$backend_info{$backend}{site_ids}{$id}{site_id} = $channel_id;
        return $id;
    }


}

##########################################################
# tidy
# decodes entities and removes some illegal chars
sub tidy($) {
    for (my $tmp=shift) {
    s/[\000-\037]//g;   # remove control characters
    s/[\222]/\'/g;      # messed up char
    s/[\224]/\"/g;      # end quote
    s/[\205]/\.\.\./g;  # ... must be something messed up in my regexps?
    s/[\223]/\"/g;      #start quote
    s/[\221]/\'/g;

    if (s/[\200-\237]//g) {
        if ($opt_verbose){
            warn "VERBOSE: removing illegal char: |\\".ord($&)."|\n";
         }
    }

    # Remove leading white space
    s/^\s*//;
    # Remove trailing white space
    s/\s*$//;
    return decode_entities($_);
    }
}


######################
# my-tv.it functions #
######################

####################################################
# mytv_get_channels_list
# returns hash of channel details. which, basically is
# just 'this channel is in the satellite page or the 
# terrestrial page'
sub mytv_get_channels_list {
    my %chan_hash;
    my $base = shift;

    for my $t (0, 1) {
        my $content;

        warn "VERBOSE: Getting channel list from $base?d=0&t=$t\n" if ($opt_verbose);

		eval { $content = get_nice("$base?d=0&t=".$t); };
		if ($@) {   #get_nice has died
			warn "VERBOSE: Cannot get mytv's channel list ($base?d=0&t=$t). Site down?\n";

			return ();
		} 

        $content=~/<data>(.*)<\/data>/s;
        $content = $1;

        #split the lines
        my @lines = split /\n/, $content; #shift @lines;
        foreach (@lines) {
            if (/ can="(.*?)"/) {
                $chan_hash{$1}="$t";
    
                #update backend info, in case this is a new channel not in channel_ids
                my $xmltv_id = xmltv_chanid('mytv', $1);
                $backend_info{mytv}{site_ids}{$xmltv_id}{site_id}   = $1;
                $backend_info{mytv}{site_ids}{$xmltv_id}{satellite} = $t;
            }
        }
    }

    return %chan_hash;
}

####################################################
# mytv_fetch_data
# 2 parameters: xmltv_id of channel 
#               day offset
# returns an error or an array of data
sub mytv_fetch_data {
    my ($xmltv_id, $offset) = @_;
    my $content;

    # build url to grab
    # my-tv is a very simple site: it has two pages per day
    # (one for satellite, one for terrestrial channels)
    # so we are basically fetching the same page over and over
    # this is not a problem as XMLTV caches the fetches.

    # the channels hash holds 1/0 if the channel is sat/ter
    my $site_id = $backend_info{mytv}{site_ids}{$xmltv_id}{site_id};

    if (not defined $site_id) {
        warn "VERBOSE: \tThis site doesn't know about $xmltv_id!\n" if ($opt_verbose);
        return (1, ());
    }

    my $sat = $backend_info{mytv}{channels}{$site_id};
    $sat = 1 if (not defined $sat); #if we don't know we try to guess!;

    my $url = $backend_info{mytv}{base_data}."?d=".$offset."&t=".$sat;
    warn "VERBOSE: fetching $url\n"  if ($opt_verbose);

    eval { $content=get_nice($url) };
    if ($@) {   #get_nice has died
        warn "VERBOSE: Error fetching $url channel $xmltv_id day $offset backend mytv\n" if ($opt_verbose);

        # Indicate to the caller that we had problems
        return (1, ());
    } 

    my @programmes = ();
    warn "VERBOSE: parsing...\n" if ($opt_verbose);

    $content=~/<data>(.*)<\/data>/s;
    $content = $1;

    #split the lines
    my @lines = split /\n/, $content; shift @lines;
    foreach my $line (@lines) {
        my %programme = ();
        my ($title, $time_start, $chan, $time_end, $year, $country, $category, $category2, $sub_title);
        $line=~/^<row (.*) \/>$/s;

        my @d = split /\" /, $1;
        foreach my $c (@d) {
            my ($key, $val) = split /=\"/, $c;
            next if (not defined $val);
            if ($val=~/(.*)\"$/) {$val = $1;}

            for ($key) {
              /tit/ && do {$title = $val; last};
              /anno/ && do {$year = $val if ($val != 0); last};
              /gen/ && do {$category = $val; last};
              /orain/ && do {$time_start = $val; last};
              /orafi/ && do {$time_end = $val; last};             
              /can/ && do {$chan = $val; last};           
              /ep/ && do {$sub_title = $val; last};           
              /cat/ && do {$category2 = $val; last};              
              /naz/ && do {last}; #same as cat
              /idprg/ && do { last};              
              /idev/ && do { last}; 
              /data/ && do { last};

              warn "unhandled attribute $key\n";
            }
        }

        # Three mandatory fields: title, start, channel.
        if (not defined $title) {
            warn 'no title found, skipping programme';
            next;
        }
            $programme{title}=[[tidy($title), $LANG] ];
        if (not defined $time_start) {
            warn "no start time for title $title, skipping programme";
            next;
        }
            $programme{start}=xmltv_date($time_start, $offset);
        if (not defined $chan) {
            warn "no channel for programme $title at $time_start, skipping programme";
            next;
        }

        my $past_midnight = 0;
        $time_end =~/.......... (..).(..)/; my $time_end2 = "$1.$2";
        $time_start =~/.......... (..).(..)/; my $time_start2 = "$1.$2";
        $past_midnight = 1 if ($time_end2 < $time_start2); #they can work as decimals, 0.32 < 23.44

        next if ($chan ne $backend_info{mytv}{site_ids}{$xmltv_id}{site_id});

        $programme{channel}=$xmltv_id;#"$chan";
        $programme{category}=[[tidy($category), $LANG ]] if defined $category;
        $programme{'sub-title'}=[[$sub_title, $LANG] ] if (defined $sub_title);
        push (@{$programme{category}}, [tidy($category2), $LANG ]) if defined $category2;
        $programme{country} = [[$country, $LANG]] if (defined $country);
        $programme{date} = $year if (defined $year);
        $programme{stop}=xmltv_date($time_end, $offset + $past_midnight) if (defined $time_end);

        #put info in array, if it is wanted
        push @programmes, {%programme};
    }

    if (scalar @programmes) {
        return (0, @programmes);
    }
    else {
        # there is a number of reasons why we could get an empty array.
        # so we return an error 
        return (1, @programmes);
    }
}

####################################################
# xmltv_date
# this returns a date formatted like 20021229121300 CET
# first argument is time (like '14:20')
# second is date offset from today
sub xmltv_date {
    my ($time, $offset) = @_;

    $time =~/([0-9]+?):([0-9]+).*/ or die "bad time $time";
    my $hour=$1; my $min=$2;

    my $data = &DateCalc("today","+ ".$offset." days");
    die 'date calculation failed' if not defined $data;
    return utc_offset(UnixDate($data, '%Y%m%d').$hour.$min.'00', '+0100');
}


#########################
# lastampa.it functions #
#########################

####################################################
# lastampa_get_channels_list
sub lastampa_get_channels_list {
    my %chan_hash;
    my $base = shift;
    
    my $content;
    warn "VERBOSE: Getting channel list from $base\n" if ($opt_verbose);

	eval { $content = get_nice($base); };
	if ($@) {   #get_nice has died
		warn "VERBOSE: Cannot get lastampa's channel list ($base). Site down?\n";

		return ();
	} 


    my @lines = split /</, $content;

    foreach my $l (@lines) {
        if ($l=~/\Qa href='javascript:ShowChannel(\E(.*?),\"(.*?)\"\)/) {
            $chan_hash{$2}=$1;

            #update backend info, in case this is a new channel not in channel_ids
            #my $xmltv_id = xmltv_chanid('lastampa', $2);
            #$backend_info{lastampa}{site_ids}{$xmltv_id}{site_id} = $1;
        };
    }

    return %chan_hash;
}

####################################################
# lastampa_fetch_data
# 2 parameters: xmltv_id of channel 
#               day offset
# returns an error or an array of data
sub lastampa_fetch_data {
    my ($xmltv_id, $offset) = @_;
    my $content;

    my $site_id = $backend_info{lastampa}{site_ids}{$xmltv_id}{site_id};

	if (not defined $site_id) {
        warn "VERBOSE: \tThis site doesn't know about $xmltv_id!\n" if ($opt_verbose);
        return (1, ());
    }

    # build url to grab
#   my %chan = reverse %{$backend_info{lastampa}{channels}};
    my %chan = %{$backend_info{lastampa}{channels}};
    my $channel_name  = $backend_info{lastampa}{site_ids}{$xmltv_id}{site_id};
    my $channel_num = $chan{$channel_name}; 
       $channel_name=~s/ /%20/g;

	if (not defined $channel_num) {
		# if we get here it means that the site should have the channel (it's in channel_ids)
		# but for some reason we are missing it's site id (probably the site is down)
		# we return an error so that another backend will by used, if possible
        warn "VERBOSE: \tThis site appears to be down!\n" if ($opt_verbose);
        return (1, ());
	}

    my $date_grab = &DateCalc("today","+ ".$offset." days");

#    use Data::Dump;
#    warn Data::Dump::dump(\%chan)."\n";
#    warn "num $channel_num\nname $channel_name\n";
#    exit;

    die 'date calculation failed' if not defined $date_grab;
    $date_grab = UnixDate($date_grab, '%Y-%m-%d');

    my $url = $backend_info{lastampa}{base_data}.'FRM_CHAN_CHAN='.$channel_num.'&FRM_CHAN_DATE='.$date_grab.'&FRM_CHAN_NAME='.$channel_name;
    warn "VERBOSE: fetching $url\n"  if ($opt_verbose);

    eval { $content=get_nice($url) };
    if ($@) {   #get_nice has died
        warn "VERBOSE: Error fetching $url channel $xmltv_id day $offset backend lastampa\n" if ($opt_verbose);

        # Indicate to the caller that we had problems
        return (1, ());
    } 

    my @programmes = ();
    warn "VERBOSE: parsing...\n" if ($opt_verbose);

    $content=~/\Q<table cellspacing=0 cellpadding=2 border=0 width='164'>\E(.*?)\Q<\/table>\E/s;
    $content = $1;

    #split and parse
    my @lines = split /\n/, $content;# shift @lines;
    while ( my ($l1, $l2) = splice(@lines, 1, 2)) {
        next unless ($l2);

        my %programme = ();
        my ($title, $time_start, $time_end, $id);

        $l1 =~ /bottom\'>(.*)<\/td>\E/;
        ($time_start, $time_end) = split /-/, $1;

        $l2 =~ /ShowPopUp\((.*)\)'>(.*?)</;
        ($id, $title) = ($1, $2);

        my $past_midnight = 0;
        $past_midnight = 1 if ($time_end < $time_start); #they can work as decimals, 0.32 < 23.44

        # Three mandatory fields: title, start, channel.
        if (not defined $title) {
            warn 'no title found, skipping programme';
            next;
        }
            $programme{title}=[[tidy($title), $LANG] ];
        if (not defined $time_start) {
            warn "no start time for title $title, skipping programme";
            next;
        }
            $time_start=~s/\./:/;
            $programme{start}=xmltv_date($time_start, $offset);
        if (not defined $xmltv_id) {
            warn "no channel for programme $title at $time_start, skipping programme";
            next;
        }

        $programme{channel}=$xmltv_id;
        $time_end=~s/\./:/;

        $programme{stop}=xmltv_date($time_end, $offset + $past_midnight);

        if ($opt_slow) {
            lastampa_fetch_data_slow($id, \%programme);
        }

        #put info in array
        push @programmes, {%programme};
    }

    if (scalar @programmes) {
        return (0, @programmes);
    }
    else {
        # there is a number of reasons why we could get an empty array.
        # so we return an error 
        return (1, @programmes);
    }
}

sub lastampa_fetch_data_slow {
    my ($id, $programme) = @_;

    my $content;
    my $url = 'http://www.wfactory.net/lastampa/showtread.jsp?ID='.$id;

    warn "VERBOSE: getting --slow data from $url" if ($opt_verbose); 
    eval { $content=get_nice($url) };
    if ($@) {   #get_nice has died
        warn "VERBOSE: there was some error!" if ($opt_verbose);
        return 0;
    }

    if ($content=~/\Q<div name='descript'\E.*?>(.*?)</) {
        my $desc = $1;
        $desc=~s/^\s+//; $desc=~s/\s+$//;
        $programme->{desc}=[[tidy($desc), $LANG] ] if ($desc ne '');
    }

    if ($content=~/\Q<img src='\E(.*?)=yes\'/) {
        #TODO check &amp;
        $programme->{icon}= [{src => 'http://www.wfactory.net/lastampa/'.$1.'=yes'}];
    }

    my @lines = split /\n/, $content;
    foreach my $l (@lines) {
        $l = tidy($l);

        if ($l=~/\Q&nbsp;\E/) {
            #TODO checkME
    #       warn $1."\n";
    #           Azione&nbsp;2000&nbsp;Stati Uniti </td>
        }

        if ($l=~/^<b>(.*?)<\/b>(.*)/mgi) {
            my ($cat, $val) = ($1, $2);
            $val=~s/<br>$//;
            for ($cat){
                /Regia:/ && do {
                    my @directors = split /, /, $val;
                    foreach $a (@directors) {
                        $a=~s/^\s+//; $a=~s/\s+$//;
                        push @{$programme->{credits}->{director}}, $a;
                        }
                    last;
                    };

                /Con:|Con la voce di:/ && do {
                    my @cast = split /,/, $val;
                    foreach (@cast) {
                        s/^\s+//; s/\s+$//;
                        (push @{$programme->{credits}->{actor}}, $_);
                        }
                    last;
                    };

                /Condotto da:|A cura di:/ && do {
                    my @cast = split /,/, $val;
                    foreach (@cast) {
                        s/^\s+//; s/\s+$//;
                        (push @{$programme->{credits}->{presenter}}, $_);
                        }
                    last;
                    };

                /Episodio:/ && do {
                    $val=~s/^\s+//; $val=~s/\s+$//;
                    $programme->{'sub-title'}=[[$val, $LANG] ];
                    last;
                    };

                warn "Don't know what |$cat|$val| is\n";
            }
        }
    }
}

####################
# skytv.it functions #
####################

####################################################
# skytv_get_channels_list
sub skytv_get_channels_list {
    my %chan_hash;

    my @urls = (
        'http://www.skytv.it/GuidaTv/_Cinema.htm',
        'http://www.skytv.it/GuidaTv/_Sport.htm',
        'http://www.skytv.it/GuidaTv/_News.htm',
        'http://www.skytv.it/GuidaTv/_Mondo_e_Tendenze.htm',
        'http://www.skytv.it/GuidaTv/_Ragazzi_e_Musica.htm',
        'http://www.skytv.it/GuidaTv/_Intrattenimento.htm',
        );

    foreach my $url (@urls) {
        warn "VERBOSE: Getting channel list from $url\n" if ($opt_verbose);

        my $content;

		eval { $content = get_nice($url); };
		if ($@) {   #get_nice has died
			warn "VERBOSE: Cannot get skytv's channel list ($url). Site down?\n";

			return ();
		} 


        my @lines = split /\n/, $content;
        
        my $rowcount = 1;
        foreach my $l (@lines) {
            if ($l=~/\Q<span class=testocanaleTvGuide>\E(.*?)</) {
                $rowcount++;

                my $chan = $1; $chan=~s/^\s+//; $chan=~s/\s+$//;
                next if ($chan=~/^MC/); #we skip them as they have no real pg

                $chan_hash{$chan} = "$url;$rowcount";
    
                #update backend info, in case this is a new channel not in channel_ids
                my $xmltv_id = xmltv_chanid('skytv', $chan);
                $backend_info{skytv}{site_ids}{$xmltv_id}{site_id} = $chan;
            }
        }
    }

    return %chan_hash;
}


sub skytv_fetch_data {
    #this is the worst site design i've ever seen since the <blink> tag days.
    my ($xmltv_id, $offset) = @_;
    my $content;

    my $site_id = $backend_info{skytv}{site_ids}{$xmltv_id}{site_id};
    if (not defined $site_id) {
        warn "VERBOSE: \tThis site doesn't know about $xmltv_id!\n" if ($opt_verbose);
        return (1, ());
    }

    # build url to grab
    my ($url, $row) = split /;/, $backend_info{skytv}{channels}{$site_id};
    my $date_prev = skytv_urldata($offset-1).'%7C'.($offset); #yesterday
    my $date = skytv_urldata($offset).'%7C'.($offset+1);
    my $num;
    for ($url) {
        /Mondo_e_Tendenze.htm/ && do{ $num = 2; last;};
        /Ragazzi_e_Musica.htm/ && do{ $num = 4; last;};
        /Intrattenimento.htm/ && do{ $num = 3; last;};
        /News.htm/ && do{ $num = 5; last;};
        /Sport.htm/ && do{ $num = 6; last;};
        /Cinema.htm/ && do{ $num = 1; last;};
    }

    # date to grab
    my $grabdate      = UnixDate(&DateCalc("today","+ ".$offset." days"), '%Y%m%d');
    my $grabdate_prev = UnixDate(&DateCalc("today","+ ".($offset - 1)." days"), '%Y%m%d');



    my @urls;
    #for midnight to 06 am we need to get yesterday night's stuff.
    if ($offset > 0) {
        #date_prev
        push @urls, [$url.'?giorno='.$date_prev.'&orario=3&tema=1&temaTVGuide='.$num.'&TypeSearch=2&TimeSearch=2&strStartTimeHidden=20%3A00%3A00&temaTimeNavigation='.$num.'&giornoTimeNavigation='.$date_prev, '22:00', -1];
        push @urls, [$url.'?giorno='.$date_prev.'&orario=3&tema=1&temaTVGuide='.$num.'&TypeSearch=2&TimeSearch=2&strStartTimeHidden=22%3A00%3A00&temaTimeNavigation='.$num.'&giornoTimeNavigation='.$date_prev, '00:00', 0];
        push @urls, [$url.'?giorno='.$date_prev.'&orario=1&tema=1&temaTVGuide='.$num.'&TypeSearch=2&TimeSearch=2&strStartTimeHidden=00%3A00%3A00&temaTimeNavigation='.$num.'&giornoTimeNavigation='.$date_prev, '02:00', 0];
        push @urls, [$url.'?giorno='.$date_prev.'&orario=1&tema=1&temaTVGuide='.$num.'&TypeSearch=2&TimeSearch=2&strStartTimeHidden=02%3A00%3A00&temaTimeNavigation='.$num.'&giornoTimeNavigation='.$date_prev, '04:00', 0];
    }
    push @urls, [$url.'?giorno='.$date.'&orario=1&tema=1&temaTVGuide='.$num.'&TypeSearch=2&TimeSearch=2&strStartTimeHidden=04%3A00%3A00&temaTimeNavigation='.$num.'&giornoTimeNavigation='.$date, '06:00', 0];
    push @urls, [$url.'?giorno='.$date.'&orario=1&tema=1&temaTVGuide='.$num.'&TypeSearch=2&TimeSearch=2&strStartTimeHidden=06%3A00%3A00&temaTimeNavigation='.$num.'&giornoTimeNavigation='.$date, '08:00', 0];
    push @urls, [$url.'?giorno='.$date.'&orario=1&tema=1&temaTVGuide='.$num.'&TypeSearch=2&TimeSearch=2&strStartTimeHidden=08%3A00%3A00&temaTimeNavigation='.$num.'&giornoTimeNavigation='.$date, '10:00', 0];
    push @urls, [$url.'?giorno='.$date.'&orario=1&tema=1&temaTVGuide='.$num.'&TypeSearch=2&TimeSearch=2&strStartTimeHidden=10%3A00%3A00&temaTimeNavigation='.$num.'&giornoTimeNavigation='.$date, '12:00', 0];
    push @urls, [$url.'?giorno='.$date.'&orario=2&tema=1&temaTVGuide='.$num.'&TypeSearch=2&TimeSearch=2&strStartTimeHidden=12%3A00%3A00&temaTimeNavigation='.$num.'&giornoTimeNavigation='.$date, '14:00', 0];
    push @urls, [$url.'?giorno='.$date.'&orario=2&tema=1&temaTVGuide='.$num.'&TypeSearch=2&TimeSearch=2&strStartTimeHidden=14%3A00%3A00&temaTimeNavigation='.$num.'&giornoTimeNavigation='.$date, '16:00', 0];
    push @urls, [$url.'?giorno='.$date.'&orario=2&tema=1&temaTVGuide='.$num.'&TypeSearch=2&TimeSearch=2&strStartTimeHidden=16%3A00%3A00&temaTimeNavigation='.$num.'&giornoTimeNavigation='.$date, '18:00', 0];
    push @urls, [$url.'?giorno='.$date.'&orario=3&tema=1&temaTVGuide='.$num.'&TypeSearch=2&TimeSearch=2&strStartTimeHidden=18%3A00%3A00&temaTimeNavigation='.$num.'&giornoTimeNavigation='.$date, '20:00', 0];
    push @urls, [$url.'?giorno='.$date.'&orario=3&tema=1&temaTVGuide='.$num.'&TypeSearch=2&TimeSearch=2&strStartTimeHidden=20%3A00%3A00&temaTimeNavigation='.$num.'&giornoTimeNavigation='.$date, '22:00', 0];
    #this is technically tomorrow, but i need to know at what time the last prog ends
    push @urls, [$url.'?giorno='.$date.'&orario=3&tema=1&temaTVGuide='.$num.'&TypeSearch=2&TimeSearch=2&strStartTimeHidden=22%3A00%3A00&temaTimeNavigation='.$num.'&giornoTimeNavigation='.$date, '24:00', 0];

    my @prog_to_check = ();
    foreach (@urls) {
        my $url2 = $_->[0];
        my $starttime = $_->[1];
        my $realday = $_->[2];

        if ($realday == -1) {
            $starttime = ParseDate("$grabdate_prev $starttime");
        }
        else {
            $starttime = ParseDate("$grabdate $starttime");
        }

        die 'date calculation failed' if (! $starttime);

        warn "VERBOSE: fetching $url2\n"  if ($opt_verbose);

        eval { $content=get_nice($url2) };
        if ($@) {   #get_nice has died
            warn "VERBOSE: Error fetching $url channel $xmltv_id day $offset backend skytv\n" if ($opt_verbose);

            # Indicate to the caller that we had problems
            return (1, ());
        } 

        warn "VERBOSE: parsing...\n" if ($opt_verbose);

        #split and parse the lines
        my @lines = split /\n/, $content;
        my $rowcount = 1;
        foreach my $line (@lines) {
            if ($line=~/cellasxEvento/) {
                $rowcount++;
                if ($rowcount == $row){
                    $line=~s/<td class/\n<td class/gm;
                    my @lines2 = split /\n/, $line;
                    foreach my $line2 (@lines2) {
                        if ($line2=~/cellaCenterEvento/) {
                            if ($line2=~/.*colspan=\"(\d+?)\".*OpenEvent\((\d+?),.*title=\"(.*?)\"/){
                                my ($title, $length, $id) = ($3, $1, $2);
                                $title=~s/^\s+//; $title=~s/\s+$//;
                                my $start = $starttime;
                                $starttime = &DateCalc($starttime, "+ ".$length." minutes") || die 'date calculation failed';
                                my $stop = $starttime;
                                push @prog_to_check, [$title, $length, $id, $start, $stop];
                            }
                            elsif ($line2=~/.*colspan=\"(\d+?)\"/){
                                if ($line2!~/prev/ and $line2!~/next/) {
                                    $starttime = &DateCalc($starttime, "+ ".$1." minutes") || die 'date calculation failed';
                                }
                            }
                        }
                    }
                }
            }
        }
    }


    my @programmes = ();
    my ($oldtitle, $oldlength, $oldid, $oldstart, $oldstop) = ('', '', 0, '', '');

    #we join the first prog of the page with the last of the previous page if they are the same
    my %programme = ();

    my $parse_date = &DateCalc("today 00:00","+ ".$offset." days");
    my $next_day   = &DateCalc("today 00:00","+ ".($offset + 1)." days");

    foreach (@prog_to_check) {
        my ($title, $length, $id, $start, $stop) = @$_;

        if ($id != $oldid and $title ne $oldtitle) {
            #write data
            if ($oldid != 0) {
                #if (substr($oldstart, 6, 2) < $num_date){
                if (Date_Cmp(ParseDate($oldstart), ParseDate($parse_date)) <= 0) {

                    #i get rid of yesterday's programmes, even if they end today (i.e. across midnight shows)
                    ($oldtitle, $oldlength, $oldid, $oldstart, $oldstop) = @$_;
                    next;
                }

                $programme{title} = [[tidy($oldtitle), $LANG] ];
                $programme{start} = utc_offset(UnixDate($oldstart, '%Y%m%d%H%M').'00', '+0100');
                $programme{stop}  = utc_offset(UnixDate($oldstop, '%Y%m%d%H%M').'00', '+0100');
                $programme{channel} = $xmltv_id;
                skytv_fetch_data_slow($oldid, \%programme) if ($opt_slow);
                push @programmes, {%programme};
            }
            %programme = ();
            ($oldtitle, $oldlength, $oldid, $oldstart, $oldstop) = @$_;
            
            #we don't care about tomorrow's data
			last if (Date_Cmp(ParseDate($oldstart), ParseDate($next_day)) > 0); #abbiamo cambiato giorno
        }
        else {
            $oldstop = $_->[4];
        }
    }

    #the programme might end exactly at midnight
    if (Date_Cmp(ParseDate($oldstart), ParseDate($parse_date)) == 0){
        $programme{title} = [[tidy($oldtitle), $LANG] ];
        $programme{start} = utc_offset(UnixDate($oldstart, '%Y%m%d%H%M').'00', '+0100');
        $programme{stop}  = utc_offset(UnixDate($oldstop, '%Y%m%d%H%M').'00', '+0100');
        $programme{channel} = $xmltv_id;
        skytv_fetch_data_slow($oldid, \%programme) if ($opt_slow);
        push @programmes, {%programme} if ($oldid != 0);
    }

    if (scalar @programmes) {
        return (0, @programmes);
    }
    else {
        # there is a number of reasons why we could get an empty array.
        # so we return an error 
        return (1, @programmes);
    }
}

sub skytv_urldata {
    my $time_offset = shift;
    my $data=&DateCalc("today","+ ".$time_offset." days");
    die 'date calculation failed' if not defined $data;

    my $str=UnixDate($data, '%a+%e');

    #traduciamo in italiano
    $str=~s/Sun/Domenica/;
    $str=~s/Mon/Luned%C3%AC/;
    $str=~s/Tue/Marted%C3%AC/;
    $str=~s/Wed/Mercoled%C3%AC/;
    $str=~s/Thu/Gioved%C3%AC/;
    $str=~s/Fri/Venerd%C3%AC/;
    $str=~s/Sat/Sabato/;

	$str=~s/ //;

    return $str;
}

sub skytv_fetch_data_slow {
    my ($id, $programme) = @_;

    my $content;
    my $url = 'http://www.skytv.it/TvCards/Event/Event.htm?idevento='.$id;

    warn "VERBOSE: getting --slow data from $url" if ($opt_verbose); 
    eval { $content=get_nice($url) };
    if ($@) {   #get_nice has died
        warn "VERBOSE: there was some error!" if ($opt_verbose);
        return 0;
    }

    if ($content=~/\Q<td width="120" height=15 class="Verdana10_000000" align=center>\E(.*?)</) {
        push (@{$programme->{category}}, [tidy($1), $LANG ]) if ($1 ne '');
    }
    
    if ($content=~/\Q<td width="131" height=15 class="Verdana10B_000000">\E(.*?)</) {
        $programme->{category}=[[tidy($1), $LANG ]] if ($1 ne '');
    }

    if ($content=~/\Q<tr><td align=right height=30% class="Verdana10_000000">\E(\d+?) min.</) {
        $programme->{length}= $1*60 if ($1 ne '');
    }

    if ($content=~/\Q<td bgcolor="#ffffff" align=left valign=top class="Verdana10_000000">\E(.*?)</) {
        my $desc = $1;

        if ($desc=~/(.*) - (.*)/) {
            $programme->{'sub-title'}=[[$1, $LANG] ] if ($1 ne '');
            $desc = $2 if ($2 ne '');
        }


        if ($desc=~/^Regia di (.*?), con (.*?); (.*?) (\d+?) \((\d+) min\)\. (.*)/) {
            my $director = $1;
            my $cast = $2;
            my $country = $3;
            my $year = $4;
            my $length = $5;
            $desc = $6 || '';

            my @cast = split /,/, $cast;
            foreach (@cast) {
                s/^\s+//; s/\s+$//;
                (push @{$programme->{credits}->{actor}}, $_);
            }

            push @{$programme->{credits}->{director}}, $director;
            push (@{$programme->{country}}, [$country, $LANG]) if ($country ne '');
            $programme->{date}= $year if ($year ne '');
            $programme->{length}= $length*60 if ($length ne '');
        }

        if ($desc=~/^Con (.*?); (.*?)\.(.*)/) {
            my $cast = $1;
            my $country = $2;
            $desc = $3 || '';

            my @cast = split /,/, $cast;
            foreach (@cast) {
                s/^\s+//; s/\s+$//;
                (push @{$programme->{credits}->{actor}}, $_);
            }

           push (@{$programme->{country}}, [$country, $LANG]) if ($country ne '');
        }

        $programme->{desc}=[[tidy($desc), $LANG ]] if ($desc ne '');
    }

}