__doc__ = 'Download Ziggo facturen'

"""
A proof-of-concept for scraping the invoices from the ISP (Ziggo) site.
The ISP website is built on Backstage (http://www.backbase.com) and relies
heavily on JavaScript generated stuff. The Qt Webkit is used for scraping the rendered pages.

Change history
--------------
2013-02-22 use webscraping.py package from https://code.google.com/p/webscraping
2015-05-06 Changed site navigation (UPC/Ziggo)


Reference material
------------------
Qt Webkit:
http://www.webkit.org/

Webscraping:
http://sitescraper.net/blog/Scraping-JavaScript-webpages-with-webkit/
http://code.google.com/p/webscraping/

Loading external resources:
http://www.qtcentre.org/threads/27976-QWebView-sometimes-not-loading-external-resources
self.connect(self, QtCore.SIGNAL('loadFinished(bool)'), self.checkLoadreply, QtCore.Qt.QueuedConnection) 

http://stackoverflow.com/questions/3757569/qwebpage-how-to-handle-failed-request
extension example (C code) http://www.qtcentre.org/archive/index.php/t-39870.html

use reimplemented function qwebpage::extension. There you can specify error handler for three different error domains: http errors, qtnetwork layer errors, qtwebkit layer errors.
http://stackoverflow.com/questions/3757569/qwebpage-how-to-handle-failed-request

handle redirection:
http://stackoverflow.com/questions/9514287/how-to-get-first-redirect-301-or-302-event-in-qtwebkit

subclass QNetworkAccessManager:
http://www.gossamer-threads.com/lists/python/python/894565

CA certificates:
http://lists.qt.nokia.com/pipermail/qt-interest/2010-July/026018.html
"""

import sys
import logging

from optparse import OptionParser
from webscraping import webkit
from webscraping import download

from webscraping import settings
from webscraping import common

from PyQt4.QtGui import *
from PyQt4.QtCore import *
from PyQt4.QtNetwork import *
from PyQt4.QtWebKit import *

def pdfUrl( webKitBrowser ):
    """get invoicepage URL.
    """
    collection = webKitBrowser.find('a')
    for e in collection:
        attr = e.attribute('onclick')
        #i = attr.indexOf('https')
        i = attr.find('https')
        if i >= 0:
            invoicePdfUrl = QUrl(attr[i:-3])
            common.logger.debug('invoicePdfUrl: %s' % invoicePdfUrl.toString())
            break

def savePDF(pdf, filename):
    """Save the invoice PDF.
    """
    file = QFile( fileName )
    file.open( QIODevice.WriteOnly )
    file.write( pdf )
    file.close()
    
def downloadPDF(nam, pdfUrl, seq=0):
    """Download the invoice PDF.
       %TODO can this be done more elegant, using webkit.py functionality?
    """
    common.logger.debug('DOWNLOAD INVOICE')

    loop = QEventLoop() # enable Qt events
    nam.finished.connect(loop.quit)

    fileName = 'Ziggo-factuur-' + str(seq) + '.pdf'
    file = QFile( fileName )
    file.open( QIODevice.WriteOnly )
    common.logger.debug('PDF Filename: %s' % fileName)

    request = QNetworkRequest( pdfUrl )
    reply = nam.get( request )

    # eventloop (http://www.siteduzero.com/forum-83-615013-p1-qt-telechargement-de-fichier-via-qnetworkaccessmanager.html)
    loop.exec_()
    if (reply.error() == QNetworkReply.NoError):
        common.logger.debug('PDF Size: %d' % reply.bytesAvailable())
        file.write( reply.readAll() )
        file.close()
    else:
        common.logger.error('PDF download error: %s', reply.error())

if __name__ == '__main__':
    # Parse command line arguments.
    # Syntax:
    # $0 --userid uid --password pwd [--nrinvoices n] [--url u] [--logfile]
    qtargs = [sys.argv[0]]
    description = 'Download Ziggo invoices.' \
                + 'This program comes with ABSOLUTELY NO WARRANTY. ' \
                + 'This is free software, and you are welcome to redistribute ' \
                + 'it under the terms of the GNU General Public License v2.'
    parser = OptionParser(usage='usage: %prog --userid uid --password pwd [--nrinvoices n] [--url u] [--debug]',
                          version='%prog 0.2, Copyright (c) 2013 Johan van Oostrum',
                          description=description)
    parser.add_option('-u', '--user', dest='user',
                      help='The user id', metavar='USER')
    parser.add_option('-p', '--password', dest='password',
                      help='The password', metavar='PASSWORD')
    parser.add_option('-n', '--nrinvoices', dest='nrinvoices', nargs=1, default=(0), type='int',
                      help='Number of invoices to download (0=all) [default: %default].', metavar='NRINVOICES')
    parser.add_option('-U', '--url', dest='url', default='http://www.ziggo.nl/login.login/?target=selfcare',
                      help='The IS homepage [default: %default]', metavar='URL')
    parser.add_option('-d', '--debug', dest='debug', action='store_true',
                      help='Log debug info', metavar='DEBUG')
    (options,args) = parser.parse_args()
    if len(args) != 0:
        parser.error('incorrect number of arguments')
    
    if options.debug:
        common.logger.setLevel(logging.DEBUG)

    common.logger.debug('START')

    w = webkit.WebkitBrowser(gui=True)    
    w.get(options.url)

    w.fill('input[name=userId]', options.user)
    w.fill('input[name=password]', options.password)
    w.click('button[name=button]')
    w.wait(5)
    #w.screenshot('ziggo_login.png')
    
    # handle redirect?
    # http://stackoverflow.com/questions/9514287/how-to-get-first-redirect-301-or-302-event-in-qtwebkit
        
    w.click('a[title=Facturen]')
    w.wait(5)
    #w.screenshot('ziggo_facturen.png')
    
    # create a list comprising all invoice links
    # example links:
    #https://www.ziggo.nl/mijn_ziggo/#mijn-gegevens/facturen/123456789
    #https://www.ziggo.nl/mijn_ziggo/rest/app/pdf/pdf/invoicewithdetails/123456789.pdf
    invoiceUrls = []
    collection = w.find('table tr a')
    for e in collection:
        hRef = e.attribute('href')
        common.logger.debug('Invoice href: %s' % hRef)
        # %TODO Ugly hack which simplifies navigation but makes the script (more) site dependent
        #url = 'https://mijn.ziggo.nl/mijnziggo/app/pdf/pdf/invoice/'+hRef[-9:]
        url = 'https://www.ziggo.nl/mijn_ziggo/rest/app/pdf/pdf/invoicewithdetails/'+hRef[-9:]
        common.logger.debug('Invoice PDF URL: %s' % url)
        invoiceUrls.append( QUrl(url) )
    """    
    firstHref = element.attribute('href')
    w.click('table tr td a[href="'+firstHref+'"]')
    w.wait(5)
    w.screenshot('ziggo_factuur_'+firstHref+'.png')
    
    pdfHrefList = w.find('a[class=pdf]')
    pdfHRef = pdfHrefList[1].attribute('href')
    print 'PDF href: %s' % pdfHRef
    w.click('a[href="'+pdfHRef+'"]')
    w.wait(5)        
    pdfUrl = 'https://mijn.ziggo.nl/'+pdfHRef
    downloadPDF( w.manager, QUrl(pdfUrl), seq )
    """
    seq = 0
    for url in invoiceUrls:
        seq += 1
        downloadPDF( w.manager, url, seq )
        w.wait(5)
        if (options.nrinvoices > 0 and seq >= options.nrinvoices):
            break
    
    common.logger.debug('END')