=== added file 'examples/spider.py'
--- examples/spider.py 1970-01-01 00:00:00 +0000
+++ examples/spider.py 2007-07-14 19:27:31 +0000
@@ -0,0 +1,42 @@
+
+""" Scrapes all of the headlines from Google News and then recursively visits the links to try to find the writer of the article """
+
+import sys
+sys.path.append("..")
+
+import pykhtml
+pykhtml.debugWithGUI = True
+
+
+def discoverJournalist(browser, headline):
+ print headline, browser.document
+ pykhtml.stopEventLoop()
+
+
+def scrapeHeadlines(browser):
+ #anchors = browser.document.getElementsByTagName("a")
+ #for anchor in list(anchors)[:35]:
+ for anchor in browser.document.getElementsByTagName("a"):
+ if anchor["id"]:
+ boldElements = list(anchor.getElementsByTagName("b"))
+ if boldElements:
+ title = boldElements[0].text
+ newBrowser = pykhtml.Browser()
+ func = pykhtml.partial(discoverJournalist, title)
+ newBrowser.load(anchor["href"], func)
+ pykhtml.timer(9.0, pykhtml.stopEventLoop)
+ return
+ #print anchor.getElementsByTagName("b").next().text
+ #print
+ #print anchor["id"], anchor["href"]
+
+
+def main():
+ browser = pykhtml.Browser()
+ browser.load("http://news.google.co.uk/news", scrapeHeadlines)
+ # kick things off
+ pykhtml.startEventLoop()
+
+
+if __name__ == "__main__":
+ main()
=== modified file 'doc/pykhtml.htm'
--- doc/pykhtml.htm 2007-07-03 00:39:39 +0000
+++ doc/pykhtml.htm 2007-07-14 19:27:31 +0000
@@ -22,6 +22,7 @@
Load a webpage in the browser. It takes as parameters the URI of the page to load, and a callable object to call when the page has loaded. This callback will be given the browser object as a reference unless you set Browser.referencelessCallbacks to True.
Browse to a new location. You probably don't want to set this directly as you'll receive no notification when the page has loaded. Have a look at
Browser.load instead.
Set this to any callable that you want to receive alert messages. The default implementation just does nothing.
+Set this to any callable that you want to receive JavaScript confirm messages. It will be called passing the message. Return True or False from your callable accordingly.
If you're going to do something that will inadvertently cause PyKHTML to browse to a new page and you want a function to be called when the page is loaded, set onNextLoad to the function.
Set this to any callable that you want to receive JavaScript prompt messages. It will be called passing the message and the defaultText (if specified; None otherwise). Return the text you would like to be passed back to the JS interpreter or None for JavaScript null. The default implementation just returns None.
Set whether callbacks passed to functions such as
Browser.load or
dom.Document.visit will have a reference to this browser object passed as a parameter. Default is True.
=== modified file 'pykhtml/__init__.py'
--- pykhtml/__init__.py 2007-07-03 00:39:39 +0000
+++ pykhtml/__init__.py 2007-07-14 19:27:31 +0000
@@ -401,6 +401,14 @@
// fortunately, dispatchEvent is synchronous
return window._promptResponse || null;
}
+ window.confirm = function(text) {
+ window._confirmEventMessage = text;
+ var event = document.createEvent('Event');
+ event.initEvent('ConfirmEvent', true, true);
+ document.dispatchEvent(event);
+ // fortunately, dispatchEvent is synchronous
+ return window._confirmResponse || null;
+ }
""")
# which we can listen to from here
node = dom.Node(self.document._d, self)
@@ -413,10 +421,21 @@
# in the javascript docs indicates this should happen.
node.addEvent("AlertEvent", self._handleAlert, True)
node.addEvent("PromptEvent", self._handlePrompt, False)
+ node.addEvent("ConfirmEvent", self._handleConfirm, False)
+
+ def _handleConfirm(self, e):
+ if str(e.type().string()) == "ConfirmEvent":
+ message = self.eval("window._confirmEventMessage")
+ ret = self.onPrompt(message, defaultText)
+ if ret:
+ ret = "true"
+ else:
+ ret = "false"
+ self.eval("window._confirmResponse = %s" % ret)
def _handlePrompt(self, e):
if str(e.type().string()) == "PromptEvent":
- message = self.eval("window._promptEventMessage")
+ message = self.eval("window._promptEventMessage")
defaultText = self.eval("window._promptDefaultValue")
ret = self.onPrompt(message, defaultText)
if ret:
@@ -438,6 +457,10 @@
""" Set this to any callable that you want to receive JavaScript prompt messages. It will be called passing the message and the defaultText (if specified; None otherwise). Return the text you would like to be passed back to the JS interpreter or None for JavaScript null. The default implementation just returns None. """
pass
+ def onConfirm(self, message):
+ """ Set this to any callable that you want to receive JavaScript confirm messages. It will be called passing the message. Return True or False from your callable accordingly. """
+ pass
+
def _slotDocCreated(self):
self._installJavaScriptOverrides()
self.disconnect(self.part, qt.SIGNAL("docCreated()"), self._slotDocCreated)