#!/usr/bin/env python
 
import re
import urllib
import getopt, sys
from string import replace, split, strip
from time import time
from time import localtime
from time import strftime
 
locale = 'Latin-1'
 
def usage():
     print """tv_grab_be --days=3 (default=2) and in the grabber comment
out the channel you don't need
"""
    
 
def inttochar( match ):
     """Return the hex string for a decimal number"""
     f = re.compile(r'&#(\d+);')
     k = f.sub(r'\1', match.group())
     return chr(int(k))
 

def escape(s):
    """Replace special HTML chars"""
    s = replace(s,'&#146;','\x27')
    s = replace(s,'&nbsp;',' ')
    p = re.compile(r'&#(\d+);')
    s = p.sub(inttochar,s)

    # A couple of characters which are not legal in Latin-1, we have
    # to guess what they are.
    #
    s = replace(s, '', "'")
    s = replace(s, '', "'")

    s = replace(s,' & ',' &amp; ')
    return s
 

class cEvent:
    start=''
    end=''
    title=''
    subtitle=''
    description=[]
    images=[]
   
    def __init__(self,block,line,today,tomorrow):
        self.start_h='00'
        self.start_m='00'
        self.end_h=''
        self.end_m=''
        self.title=''
        self.category=''
        self.description=''
        self.today = today
        self.tomorrow = tomorrow
        state = 0
 
        for l in block:
            if state == 0:              # looking for first <starttime>
                r = re.search(
                    "<td class='tvnucontent' valign='top'>(.+)\.(.+)</td>",l)
                if r != None:
                    self.start_h = r.group(1)
                    self.start_m = r.group(2)
                    state = 1
 
            elif state == 1:
                r = re.search(
                    "<td class='tvnucontent' valign='top'>(.+)\.(.+)</td>",l)
                if r != None:
                    self.end_h = r.group(1)
                    self.end_m = r.group(2)
                    state = 2
 
            elif state == 2:
                r = re.search(".+ class=tvnu>(.+)</a>",l)
                if r != None:
                    self.title = strip(escape(r.group(1)))
                    state = 3
 
            elif state == 3:
                r = re.search(
                    "<td class='tvnuthema' align=right valign='top' nowrap>(.+)</td>",l)
                if r != None:
                    self.category = strip(escape(r.group(1)))
                    state = 4
 
            elif state == 4:
                r = re.search(
                    "<td width= '100%' valign='top' colspan=2 class=programmabeschrijving>(.+)<br>",l)
                if r != None:
                    self.description = strip(escape(r.group(1)))
 

    def xml(self,channel_id):
        if self.title != '':
          #veranderd terug nr zes, sommig proggies op ketnet beginne
          #om 7u
          if self.start_h < '06':
              print('  <programme start="%s%s%s +0000" stop="%s%s%s +0000" channel="%s">'
                    % (self.tomorrow, self.start_h, self.start_m,
                       self.tomorrow, self.end_h, self.end_m, channel_id))
          else:
            #programmas die vandaag beginnen mr morgen eindigen, aka
            #hun einduur is kleiner dan het startuur
            if self.end_h < self.start_h:             
                print('  <programme start="%s%s%s +0000" stop="%s%s%s +0000" channel="%s">'
                      % (self.today, self.start_h, self.start_m,
                         self.tomorrow, self.end_h, self.end_m, channel_id))
            else:
                print('  <programme start="%s%s%s +0000" stop="%s%s%s +0000" channel="%s">'
                      % (self.today, self.start_h, self.start_m,
                         self.today, self.end_h, self.end_m, channel_id))
          print "    <title lang=\"nl\">%s</title>" % self.title
          if self.description != '':
            print "    <desc lang=\"nl\">%s</desc>" % self.description
          if self.category != '':
            print("    <category lang=\"nl\">%s</category>"
                  % self.category)
          print "  </programme>"
 

class cChannel:
    title = ''
    events = []
   
    def __init__(self,id,title,days):
        self.id=title
        self.title=title
        self.events = []
 
        for x in range(days):
 
          block = []
          state = 0
          date = strftime("%m/%d/%Y",localtime(time()+(x*86400)))
          today = strftime("%Y%m%d",localtime(time()+(x*86400)))
          tomorrow = strftime("%Y%m%d",localtime(time()+(x*86400)+86400))
          f=urllib.urlopen(
              "http://www.teveblad.be/ndl/zender.asp?move=full&channel=%s&da=%s"
              %(title,date))
          for l in split(f.read(), "\n"):
            if state==0:        # looking for first <starttime>
                r = re.search(
                    "<td class='tvnucontent' valign='top'>.+</td>",l)
                if r != None:
                    block.append(l)
                    state = 1
 
            elif state == 1:    # looking for next <starttime>
                r = re.search(
                    "<td class='tvnucontent' valign='top' rowspan=2>.+",l)
                if r != None:
                    self.events.append(cEvent(block,l,today,tomorrow))
                    block=[]
 
                block.append(l)
 
            else:
                exit(1)
 
          self.events.append(cEvent(block,l,today,tomorrow))
 

    def channel_xml(self,
                    today = strftime("%Y/%m/%d",localtime(time())),
                    tomorrow = strftime("%Y/%m/%d",localtime(time()+86400)),
                    ):
 
        print "  <channel id=\"%s\">" % self.title
        print("    <display-name lang=\"nl\">%s</display-name>"
              % self.title)
        print('    <icon src="http://www.teveblad.be/gfx/logos/%s.gif" />'
              % self.title)
        print "  </channel>"

    def programme_xml(self,
                      today = strftime("%Y/%m/%d",localtime(time())),
                      tomorrow = strftime("%Y/%m/%d",localtime(time()+86400)),
                      ):
        for event in self.events:
            event.xml(self.id)
 

def main():
 
  try:
    opts, args = getopt.getopt(sys.argv[1:], "hd:", ["help", "days="])
  except getopt.GetoptError:
    # print help information and exit:
    usage()
    sys.exit(2)
  dagen = 2
  for o, a in opts:
    if o in ("-h", "--help"):
       print "help"
       usage()
       sys.exit()
    if o in ("-d", "--days"):
       dagen = int(a)
  print "<?xml version=\"1.0\" encoding=\"ISO-8859-1\"?>"
  print "<!DOCTYPE tv SYSTEM \"xmltv.dtd\">\n"
  print "<tv generator-info-name=\"based on the Script by Bart Heremans\n"
  print "  and extended by Watteel Pascal (piwi3910)\">\n\n"

  chs = [
      cChannel(1,'TV1',dagen),
      cChannel(2,'Ketnet',dagen),
      cChannel(3,'Canvas',dagen),
      cChannel(4,'VTM',dagen),
      cChannel(5,'Kanaal2',dagen),
      cChannel(6,'VT4',dagen),
      cChannel(7,'Vitaya',dagen),
      cChannel(8,'EVENTTV',dagen),
      cChannel(9,'Zones.tv',dagen),
      cChannel(10,'KanaalZ',dagen),
      cChannel(11,'NBC/NGC',dagen),
      cChannel(12,'Ned1',dagen),
      cChannel(13,'Ned2',dagen),
      cChannel(14,'Ned3',dagen),
      cChannel(14,'Net5',dagen),
      cChannel(15,'RTL4',dagen),
      cChannel(16,'RTL5',dagen),
      cChannel(17,'SBS6',dagen),
      cChannel(18,'VERONICA',dagen),
      cChannel(19,'TCM',dagen),
      cChannel(20,'CARTOONNETWORK',dagen),
      cChannel(21,'EUROSPORT',dagen),
      cChannel(22,'NICKELODEON',dagen),
      cChannel(23,'BVN-TV',dagen),
      cChannel(24,'CANAL+',dagen),
      cChannel(25,'RTBF1',dagen),
      cChannel(26,'RTBF2',dagen),
      cChannel(27,'RTL-TVI',dagen),
      cChannel(28,'CLUBRTL',dagen),
      cChannel(29,'TF1',dagen),
      cChannel(30,'FRANCE2',dagen),
      cChannel(31,'FRANCE3',dagen),
      cChannel(32,'TV5',dagen),
      cChannel(33,'ARTE',dagen),
      cChannel(34,'CANAL+FR',dagen),
      cChannel(35,'BBC1',dagen),
      cChannel(36,'BBC2',dagen),
      cChannel(37,'BBCWORLD',dagen),
      cChannel(38,'CNN',dagen),
      cChannel(39,'ITV-MERIDIAN',dagen),
      cChannel(40,'ARD',dagen),
      cChannel(41,'ZDF',dagen),
      cChannel(42,'WDR',dagen),
      cChannel(43,'SWR',dagen),
      cChannel(44,'RTL',dagen),
      cChannel(45,'MTV',dagen),
      cChannel(46,'TMF',dagen),
      cChannel(47,'JIMTV',dagen),
      cChannel(48,'TVE',dagen),
      cChannel(49,'RAIUNO',dagen),
      cChannel(50,'TRT',dagen),
      cChannel(51,'ATV',dagen),
      cChannel(52,'AVS',dagen),
      cChannel(53,'FOCUS',dagen),
      cChannel(54,'KANAAL3',dagen),
      cChannel(55,'RINGTV',dagen),
      cChannel(56,'ROBTV',dagen),
      cChannel(57,'RTV',dagen),
      cChannel(58,'TVBRUSSEL',dagen),
      cChannel(59,'TVLIMBURG',dagen),
      cChannel(60,'WTV',dagen),
      ]
  for c in chs:
      c.channel_xml()
  for c in chs:
      c.programme_xml()
  print "</tv>"
      
if __name__ == "__main__":
  main()
  
