=== added file 'examples/myspace.py' --- examples/myspace.py 1970-01-01 00:00:00 +0000 +++ examples/myspace.py 2007-04-16 14:05:27 +0000 @@ -0,0 +1,78 @@ + +""" Myspace is a notoriously crufty website. Walking the +DOM (as opposed to scraggling through markup) with PyKHTML +is a fair bit nicer! """ + +# in case pykhtml isn't already installed +import sys, codecs +sys.path.append("..") + +import pykhtml +#pykhtml.debugWithGUI = True +#pykhtml.useXvfb = True + +def login(email, password, browser): + print "Here" + document = browser.document + # First, check if we're already logged in + header = document.getElementById("header") + for anchor in header.getElementsByTagName("a"): + if anchor.text == "SignOut": + # We are. Sign out and then quit + print "Already signed in to Myspace! Signing out..." + browser.load(anchor.href, signedOut) + return + # Log in + # Set the text in the email/password boxes + document.getElementById("email").value = email + document.getElementById("password").value = password + # And then get a reference to the submit image button: + submit = None + # the ID regularly seems to change, so fuzzy match: + for element in document.getElementsByTagName("input"): + if element.id.count("loginbutton"): + submit = element + break + assert submit is not None + # and then click on it. We have to do this instead + # of just submitting the form because it runs a muck + # of javascript onclick that submits the form + # + # There is another problem, however: when we click + # on the button, there will be an implicit change of + # page. We won't know when the new page has loaded + # because of the asynchronous design of PyKHTML. + # To fix this, we set browser.onNextLoad to our + # callback function. The function specified will be + # called when the next browsed-to page has finished + # loading + browser.onNextLoad = displayAccountInformation + submit.click() + print "Clicked" + +def displayAccountInformation(browser): + print "Stop" + pykhtml.stopEventLoop() + +def signedOut(browser): + # We were signed in before. When this func is called, we've + # signed out and so can exit. + print "Signed out. Bye bye" + pykhtml.stopEventLoop() + return + +def main(): + if len(sys.argv) != 3: + print "Usage: myspace.py email@email.com password" + return + email, password = sys.argv[1:] + browser = pykhtml.Browser() + # the browser is passed as a parameter to `login` + # when it is called (when the page has loaded) + browser.load("http://www.myspace.com/", pykhtml.partial(login, email, password)) + # kick things off + pykhtml.startEventLoop() + + +if __name__ == "__main__": + main()