import sys
sys.path.append("..")
import pykhtml
PyKHTMLUrl = "http://paul.giannaros.org/pykhtml"
def extractBitsFromPage(browser):
# getElementsByTagName returns a generator, so we convert
# to a list and access the first element
title = list(browser.document.getElementsByTagName("title"))[0]
print "Title:", title.text
# Get the text of the navigation items
navigation = []
# First get the container of the list items...
navigationElement = browser.document.getElementById("navigation")
# ... and then loop over the li elements we find
for listItem in navigationElement.getElementsByTagName("li"):
# Inside the list item is an anchor
anchor = listItem.children[0]
# And the text inside the anchor is what we want
navigation.append(anchor.text)
print "Navigation:", " | ".join(navigation)
# Stop here, we're done
pykhtml.stopEventLoop()
def main():
browser = pykhtml.Browser()
# the browser is passed as a parameter to extractBitsFromPage
# when it is called (when the page has loaded)
browser.load(PyKHTMLUrl, extractBitsFromPage)
# kick things off
pykhtml.startEventLoop()
if __name__ == "__main__":
main()