import sys sys.path.append("..") import pykhtml PyKHTMLUrl = "http://paul.giannaros.org/pykhtml" def extractBitsFromPage(browser): # getElementsByTagName returns a generator, so we convert # to a list and access the first element title = list(browser.document.getElementsByTagName("title"))[0] print "Title:", title.text # Get the text of the navigation items navigation = [] # First get the container of the list items... navigationElement = browser.document.getElementById("navigation") # ... and then loop over the li elements we find for listItem in navigationElement.getElementsByTagName("li"): # Inside the list item is an anchor anchor = listItem.children[0] # And the text inside the anchor is what we want navigation.append(anchor.text) print "Navigation:", " | ".join(navigation) # Stop here, we're done pykhtml.stopEventLoop() def main(): browser = pykhtml.Browser() # the browser is passed as a parameter to extractBitsFromPage # when it is called (when the page has loaded) browser.load(PyKHTMLUrl, extractBitsFromPage) # kick things off pykhtml.startEventLoop() if __name__ == "__main__": main()