#!/usr/bin/python
"""Download automatically files from MegaUpload (using a free account)"""

# Copyright (c) 2008-2009 Arnau Sanchez <tokland@gmail.com>

# This file is part of Megaupload-dl.

# This script is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.

# This is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with this software.  If not, see <http://www.gnu.org/licenses/>

import PIL.Image
import StringIO
import optparse
import time
import math
import sys
import os
from itertools import ifilter

# Import our modules
try:
    from megaupload_dl import lib
except ImportError:
    import lib

# Global variables
VERSION = "0.2" # TODO: Use a subversion variable
LINK_ENABLE_TIME = 45

# Default debug function
debug = lib.get_debug_func(0)        

def loop_with_retries(retries, func, *args, **kwargs):
    """Run func with a retry mechanism"""    
    current_try = 1
    while current_try <= retries:
        debug(1, "start try: %d/%d" %(current_try, retries)) 
        try:
            return func(*args, **kwargs)
        except AssertionError, details:
            debug(1, "error: %s" % details)
            current_try += 1
    debug(0, "Retries exhausted")

def process_image(captcha_image):
    """Process PIL image (clean, clip, ...) to help the OCR process"""
    image = captcha_image.convert("L")
    return image

def get_captcha_url(index):
    """Return PIL Image of the captcha image from the index soup"""
    baseurl = "http://www.megaupload.com"
    holds_captcha = lambda tag: tag["src"].startswith("/capgen")
    img = lib.first(ifilter(holds_captcha, index.findAll("img")))
    if not img:
        debug(0, "No captcha found in download page.: incorrect link " +
            "or file not available")
        # Raise an application exception?
        sys.exit(2)
    return baseurl + img["src"]

###
                                         
def get_download_form(index, captcha_image, image_callback=None):
    """Get the captcha image, get the text and return the download page URL"""
    form = index.find("form")
    hidden = form.findAll("input", {'type': 'hidden'})
    postdata = dict(map(str, (tag["name"], tag["value"])) for tag in hidden)
    captcha_field_name = form.find("input", {'type': 'text'})["name"]    
    image = process_image(captcha_image)
    if image_callback:
        image_callback(image)
    text = lib.ocr(image)
    debug(2, "decoded captcha: %s" % text)
    assert len(text) == 3, "captcha has not the expected length"
    postdata[captcha_field_name] = text
    return form["action"], postdata

def process_download_page(soup):
    """Parse the download page to extract the javascript code that
    build the file URL"""
    # We want the second javascript in the page
    javascripts = soup.findAll("script", {"language": "Javascript"})
    assert len(javascripts) > 2, "this html does not seem the download page"
    javascript = str(javascripts[1]).splitlines()
    # Seek an special line and get the code where the URL is encoded
    condition = lambda s: s.startswith('document.getElementById("download')
    index = lib.first(n for (n, s) in enumerate(javascript) if condition(s))
    str1, str2, str3 = javascript[index+1:index+1+3]
    debug(3, "Javascript URL encoding:")
    for line in [str1, str2, str3]:
        debug(3, line) 
    regexp1 = "(\w+) = String\.fromCharCode\(Math\.abs\(-(\d+)\)\);"
    var1_name, var1_value = lib.search(regexp1, str1) 
    regexp2 = "(\w+) = '(\w+)' \+ String\.fromCharCode\(Math\.sqrt\((\d+)\)\)"
    var2_name, var2_value_a, var2_value_b = lib.search(regexp2, str2)
    # Simulate the javascript code
    values = {
        var1_name: chr(int(var1_value)),
        var2_name: var2_value_a + chr(int(math.sqrt(int(var2_value_b)))),
    }         
    regexp3 = "innerHTML = '<a href=\"([^']*)' \+ (\w+) \+ (\w+) \+ '([^\"]*)\""
    href1, var1_name2, var2_name2, href2 = lib.search(regexp3, str3)
    href = href1 + values[var1_name2] + values[var2_name2] + href2
    debug(2, "decoded file URL: %s" % href)
    return lib.unescape_entities(href)
    
###
    
def get_download_url(main_url, image_callback=None):
    """Download, parse and ocr the captcha to return the file URL"""
    debug(1, "downloading main html: %s" % main_url)
    index = lib.get_soup(lib.download(main_url))
    image_url = get_captcha_url(index)
    debug(1, "captcha image: %s" % image_url)
    captcha_image = PIL.Image.open(StringIO.StringIO(lib.download(image_url)))
    action, postdata = get_download_form(index, captcha_image, image_callback)
    request = lib.build_request(action, postdata)
    debug(3, "building POSTDATA: %s" % request.get_data())
    download_page = lib.get_soup(lib.download(request))
    debug(1, "downloading html page: %s" % action)
    return process_download_page(download_page)

###

def main(args0):    
    usage = """usage: megaupload_dl.py [options]\n\n    %s""" % __doc__
    parser = optparse.OptionParser(usage, version=VERSION)
    parser.add_option('-v', '--verbose', dest='debug_level', action="count",
        default=0, help='Increase verbose level')
    parser.add_option('-r', '--captcha-retries', dest='captcha_retries',
        default=5, metavar="TIMES", type="int", 
        help='Maximum captcha deconding retries before giving up')
    parser.add_option('-w', '--wait-time', dest='wait_time',
        default=LINK_ENABLE_TIME, metavar="SECONDS", type="int", 
        help='Time to wait before exiting')                
    options, args = parser.parse_args(args0)
    if not args:
        parser.print_help()
        return 1
    global debug        
    debug = lib.get_debug_func(options.debug_level)        
    if options.debug_level >= 3:
        image_callback = lambda image: image.show()
    else: image_callback = None
    url = loop_with_retries(options.captcha_retries, get_download_url, 
        args[0], image_callback)
    if not url:
        debug(1, "operation unsuccesful")
        return 2
    if options.wait_time:
        debug(1, "waiting the link to be enabled: %d secs" % options.wait_time)
        time.sleep(options.wait_time)
    debug(1, "operation succesful")
    lib.output(url)
       
if __name__ == "__main__":
    sys.exit(main(sys.argv[1:]))
