=== modified file 'doc/pykhtml.dom.htm' --- doc/pykhtml.dom.htm 2007-02-13 21:13:05 +0000 +++ doc/pykhtml.dom.htm 2007-03-05 16:00:30 +0000 @@ -24,14 +24,14 @@

serialized (read-only property)

Return a string that represents the DOM structure of this document, much like what is returned via innerHTML in JavaScript.

visit(self, text=True, callback=None, attributes=None, stripSpace=None)

Visit a page pointed to by a certain link. This function searches for all links in the document that either:
If the `stripSpace` attribute is True, when searching for a string match all whitespace is stripped from the item we are matching against.

class Node (inherits object)

-
Node and all of its subclasses provide you with read-only access to the page's DOM. Instantiating the classes themselves won't do you much good.
-

__init__(self, cNode)

+
Node and all of its subclasses provide you with access to the page's DOM. Instantiating the classes themselves won't do you much good.
+

__init__(self, cNode, browser)

childNodes(self)

For those that can't live without JavaScript DOM-compatible method names.

children (read-only property)

Get the children nodes of this node.

isA(self, klass)

Syntactic sugar for isinstance.

class Element (inherits Node)

An HTML element. Instances of it provide methods for doing things with the element – traversing it, adding events, etc.
-

__init__(self, cElement)

+

__init__(self, cElement, browser)

addEvent(self, eventName, func, capture=False)

This lets you listen for certain events as they occur on the current element. Only particularly useful when listening for load events reaaally.

childNodes(self)

Inherited from Node
For those that can't live without JavaScript DOM-compatible method names.

children (read-only property)

Inherited from Node
Get the children nodes of this node.
@@ -46,7 +46,7 @@

text (read-only property)

If the next child of this element is a text node, this will return the text value of that node.

class Anchor (inherits Element)

Anchor elements with an Anchor.href property.
-

__init__(self, cAnchor)

+

__init__(self, cAnchor, browser)

addEvent(self, eventName, func, capture=False)

Inherited from Element
This lets you listen for certain events as they occur on the current element. Only particularly useful when listening for load events reaaally.

childNodes(self)

Inherited from Node
For those that can't live without JavaScript DOM-compatible method names.

children (read-only property)

Inherited from Node
Get the children nodes of this node.
@@ -60,17 +60,50 @@

removeEvent(self, eventName, func, capture=False)

Inherited from Element
Removes events that you've added with Element.addEvent.

tagName (read-only property)

Inherited from Element
Get the lowercase name of this tag.

text (read-only property)

Inherited from Element
If the next child of this element is a text node, this will return the text value of that node.
+

class Form (inherits Element)

+
Form elements contain input elements. You can submit a form with Form.submit.
+

__init__(self, cForm, browser)

+

action (property)

The method with which this form is to be submitted (GET, POST, etc).
+

addEvent(self, eventName, func, capture=False)

Inherited from Element
This lets you listen for certain events as they occur on the current element. Only particularly useful when listening for load events reaaally.
+

childNodes(self)

Inherited from Node
For those that can't live without JavaScript DOM-compatible method names.
+

children (read-only property)

Inherited from Node
Get the children nodes of this node.
+

getElementById(self, id)

Inherited from Element
Get a reference to an element in the page by its id attribute.
+

getElementsByClass(self, className, tagName='*')

Inherited from Element
Get elements in the document (optionally with a given tag name) that have a certain class.
+

getElementsByTagName(self, name)

Inherited from Element
Get elements by tag name. Returns a generator that you can loop over or flatten into a list with list().
+

getElementsByTagNameNS(self, ns, name)

Inherited from Element
Get elements by tag name given a certain namespace.
+

isA(self, klass)

Inherited from Node
Syntactic sugar for isinstance.
+

originalTagName (read-only property)

Inherited from Element
Like tagName but this won't convert things to lower. Only really useful if you're dealing with XML.
+

removeEvent(self, eventName, func, capture=False)

Inherited from Element
Removes events that you've added with Element.addEvent.
+

submit(self)

Submit the form to the page specified in the action.
+

tagName (read-only property)

Inherited from Element
Get the lowercase name of this tag.
+

text (read-only property)

Inherited from Element
If the next child of this element is a text node, this will return the text value of that node.
+

class Input (inherits Element)

+
Input elements that you can set the value/name of.
+

__init__(self, cInput, browser)

+

addEvent(self, eventName, func, capture=False)

Inherited from Element
This lets you listen for certain events as they occur on the current element. Only particularly useful when listening for load events reaaally.
+

childNodes(self)

Inherited from Node
For those that can't live without JavaScript DOM-compatible method names.
+

children (read-only property)

Inherited from Node
Get the children nodes of this node.
+

form (read-only property)

Get the form element this input resides in.
+

getElementById(self, id)

Inherited from Element
Get a reference to an element in the page by its id attribute.
+

getElementsByClass(self, className, tagName='*')

Inherited from Element
Get elements in the document (optionally with a given tag name) that have a certain class.
+

getElementsByTagName(self, name)

Inherited from Element
Get elements by tag name. Returns a generator that you can loop over or flatten into a list with list().
+

getElementsByTagNameNS(self, ns, name)

Inherited from Element
Get elements by tag name given a certain namespace.
+

isA(self, klass)

Inherited from Node
Syntactic sugar for isinstance.
+

originalTagName (read-only property)

Inherited from Element
Like tagName but this won't convert things to lower. Only really useful if you're dealing with XML.
+

removeEvent(self, eventName, func, capture=False)

Inherited from Element
Removes events that you've added with Element.addEvent.
+

tagName (read-only property)

Inherited from Element
Get the lowercase name of this tag.
+

text (read-only property)

Inherited from Element
If the next child of this element is a text node, this will return the text value of that node.

class Text (inherits Node)

A text node lets you access the text in it using the Text.value attribute or by converting to a string with str().
-

__init__(self, cTextNode)

+

__init__(self, cTextNode, browser)

__repr__(self)

__str__(self)

childNodes(self)

Inherited from Node
For those that can't live without JavaScript DOM-compatible method names.

children (read-only property)

Inherited from Node
Get the children nodes of this node.

isA(self, klass)

Inherited from Node
Syntactic sugar for isinstance.

value (read-only property)

Equivalent to str(textNode). Get the string this node represents.
-

elementToObject(e)

-

nodeToClass(n)

+

elementCast(e, browser)

+

nodeCast(n, browser)

registerElement(elementName, klass)

registerNode(nodeType, klass)

=== modified file 'doc/pykhtml.htm' --- doc/pykhtml.htm 2007-02-13 21:13:05 +0000 +++ doc/pykhtml.htm 2007-03-05 16:00:30 +0000 @@ -27,9 +27,9 @@

stopEventLoop()

Stop the event loop and hence exit the scraper.

timer(time, func)

Call the given function after the alloted time. The PyKHTML event loop needs to be running.

init(display=1, _sleep=1)

Initiate the system if necessary (start Xvfb if it's not running, connect to it, start our program instance). This is called automatically when you create a Browser instance, so you shouldn't have to worry about it. You can specify use of a certain display by setting the `display` parameter.
-

class curry

+

class partial

Partial application of parameters. This is used internally but is also very useful with Browser.load as it allows you to pass data to other functions.
(Provide Example).
-

__init__(self, fun)

+

__init__(self, fun)

pathSearch(name)

Utility function to search for and get the full path of a file in $PATH.

running(name)

Check whether a process of the given name is running.
=== modified file 'examples/dynamicdom.py' --- examples/dynamicdom.py 2007-02-12 23:13:40 +0000 +++ examples/dynamicdom.py 2007-03-05 16:00:30 +0000 @@ -34,13 +34,11 @@ data = browser.document.serialized if data != previousData: print "DOM changed!" - print "New text: %s", repr(browser.document.getElementById("mydiv").text) - print len(pykhtml.application.children()) + print "New text:", repr(browser.document.getElementById("mydiv").text) pykhtml.stopEventLoop() else: print "DOM not changed. Checking in 1 second..." - pykhtml.timer(1, pykhtml.curry(checkForDomChanges, browser, data)) - print len(pykhtml.application.children()) + pykhtml.timer(1, pykhtml.partial(checkForDomChanges, browser, data)) def main(): browser = pykhtml.Browser() @@ -48,9 +46,9 @@ browser.setHtml(page) # to see if the DOM has changed we poll and access # browser.document.serialized and compare its to - # the previous calling. Note the use of pykhtml.curry + # the previous calling. Note the use of pykhtml.partial # to bind the serialised data to the function - pykhtml.timer(1, pykhtml.curry(checkForDomChanges, browser, browser.document.serialized)) + pykhtml.timer(1, pykhtml.partial(checkForDomChanges, browser, browser.document.serialized)) pykhtml.startEventLoop() if __name__ == "__main__": === modified file 'pykhtml/__init__.py' --- pykhtml/__init__.py 2007-02-12 23:13:40 +0000 +++ pykhtml/__init__.py 2007-03-05 16:00:30 +0000 @@ -20,7 +20,7 @@ -class curry: +class partial: """ Partial application of parameters. This is used internally but is also very useful with [[Browser.load]] as it allows you to pass data to other functions. (Provide Example). """ def __init__(self, fun, *args, **kwargs): @@ -187,6 +187,13 @@ self.connect(self.part, qt.SIGNAL("docCreated()"), self._slotDocCreated) self.location = uri + def _setOnLoadHandler(self, callback): + """ If a function is going to be called that changes the URL and we want a callback to be called when the page is loaded, this is the method for you. Does kinda what load does, without loading the page """ + if self.loadFunction: + self.disconnect(self.part, qt.SIGNAL("docCreated()"). self._slotDocCreated) + self.loadFunction = callback + self.connect(self.part, qt.SIGNAL("docCreated()"), self._slotDocCreated) + def _slotDocCreated(self): self.part.executeScript(DOM.Node(), "window.alert = function() {}") self.disconnect(self.part, qt.SIGNAL("docCreated()"), self._slotDocCreated) @@ -196,10 +203,10 @@ self.loadFunction = None # If _passReferenceToCallbacks, bind this browser to the function if self._passReferenceToCallbacks: - func = curry(func, self) + func = partial(func, self) # do this so the DOM loads fully. Cast to an Element -- not strictly correct, but we just want to get to addEvent. # XX why not just put addEvent in Node and make Document inherit from Node? Document IS meant to be a Node, after all. - docElement = dom.Element(self.document._d).addEvent("load", func) + dom.Element(self.document._d, self).addEvent("load", func) def setHtml(self, source, url=None): """ Set the HTML of the browser. Parses the HTML and generates the DOM tree so you can navigate it as usual. As well as the `source` parameter, a `url` parameter allows you to specify a URL with which this source code is linked so that e.g any scripts/images referenced in the HTML will be found. """ === modified file 'pykhtml/dom.py' --- pykhtml/dom.py 2007-02-11 15:16:45 +0000 +++ pykhtml/dom.py 2007-03-05 16:00:30 +0000 @@ -5,11 +5,11 @@ _nodeTypeToClass = {} -def nodeToClass(n): - return _nodeTypeToClass.get(n.nodeType(), Node) +def nodeCast(n, browser): + return _nodeTypeToClass.get(n.nodeType(), Node)(n, browser) _elementNameToClassMap = {} -def elementToObject(e): - e = _elementNameToClassMap.get(str(e.nodeName().string()).upper(), Element)(e) +def elementCast(e, browser): + e = _elementNameToClassMap.get(str(e.nodeName().string()).upper(), Element)(e, browser) return e def registerNode(nodeType, klass): @@ -20,9 +20,10 @@ class Node(object): - """ Node and all of its subclasses provide you with read-only access to the page's DOM. Instantiating the classes themselves won't do you much good. """ - def __init__(self, cNode): + """ Node and all of its subclasses provide you with access to the page's DOM. Instantiating the classes themselves won't do you much good. """ + def __init__(self, cNode, browser): self._ = cNode + self.browser = browser object.__init__(self) self.__children = None @@ -39,7 +40,7 @@ elements = self._.childNodes() for i in xrange(elements.length()): n = elements.item(i) - l.append(nodeToClass(n)(n)) + l.append(nodeCast(n, self.browser)) return self.__children def childNodes(self): @@ -49,8 +50,8 @@ class Text(Node): """ A text node lets you access the text in it using the [[Text.value]] attribute or by converting to a string with str() """ - def __init__(self, cTextNode): - Node.__init__(self, cTextNode) + def __init__(self, cTextNode, browser): + Node.__init__(self, cTextNode, browser) def __str__(self): return self.value @@ -67,8 +68,8 @@ class Element(Node): """ An HTML element. Instances of it provide methods for doing things with the element -- traversing it, adding events, etc. """ - def __init__(self, cElement): - Node.__init__(self, cElement) + def __init__(self, cElement, browser): + Node.__init__(self, cElement, browser) @property def text(self): @@ -83,21 +84,21 @@ def getElementById(self, id): """ Get a reference to an element in the page by its id attribute """ n = self._.getElementById(DOMString(id)) - return nodeToClass(n)(n) + return nodeCast(n, self.browser) def getElementsByTagName(self, name): """ Get elements by tag name. Returns a generator that you can loop over or flatten into a list with list() """ elements = self._.getElementsByTagName(DOMString(name)) for i in xrange(elements.length()): node = elements.item(i) - yield nodeToClass(node)(node) + yield nodeCast(node, self.browser) def getElementsByTagNameNS(self, ns, name): """ Get elements by tag name given a certain namespace """ elements = self._.getElementsByTagNameNS(DOMString(ns), DOMString(name)) for i in xrange(elements.length()): node = elements.item(i) - yield nodeToClass(node)(node) + yield nodeCast(node, self.browser) # convenience def getElementsByClass(self, className, tagName="*"): @@ -105,7 +106,7 @@ for element in self.getElementsByTagName(tagName): if className in str(element.getAttribute(DOMString("class")).string()).split(" "): e = elements.item(i) - yield nodeToClass(e)(e) + yield nodeCast(e, self.browser) @property def tagName(self): @@ -127,13 +128,13 @@ self._.removeEventListener(DOMString(eventName), _CallbackEventListener.getCallbackInstance(eventName, func), capture) _CallbackEventListener.remove(eventName, func) # -- important, we hook to the method not Element base class -registerNode(1, elementToObject) +registerNode(1, elementCast) class Anchor(Element): """ Anchor elements with an [[Anchor.href]] property """ - def __init__(self, cAnchor): - Element.__init__(self, cAnchor) + def __init__(self, cAnchor, browser): + Element.__init__(self, cAnchor, browser) self._ = sip.cast(self._, _DOM.HTMLAnchorElement) @property @@ -143,6 +144,49 @@ registerElement("A", Anchor) +class Form(Element): + """ Form elements contain input elements. You can submit a form with [[Form.submit]] """ + def __init__(self, cForm, browser): + Element.__init__(self, cForm, browser) + self._ = sip.cast(self._, _DOM.HTMLFormElement) + + def _getAction(self): + return str(self._.action().string()) + def _setAction(self, action): + self._.setAction(DOMString(action)) + action = property(_getAction, _setAction, None, "The action for this form (the page to which data is to be submited)") + + def _getMethod(self): + return str(self._.method().string()) + def _setMethod(self, method): + self._.setMethod(DOMString(method)) + action = property(_getMethod, _setMethod, None, "The method with which this form is to be submitted (GET, POST, etc)") + + def submit(self, callback): + """ Submit the form to the page specified in the action. The callback given is like one you would pass to [[pykthml.Browser.load]] """ + # load the callback + self.browser._setOnLoadHandler(callback) + # submit + self._.submit() + + def submit(self): + """ Submit the form to the page specified in the action """ + self._.submit() +registerElement("FORM", Form) + + +class Input(Element): + """ Input elements that you can set the value/name of """ + def __init__(self, cInput, browser): + Element.__init__(self, cInput, browser) + self._ = sip.cast(self._, _DOM.HTMLInputElement) + + @property + def form(self): + """ Get the form element this input resides in """ + return Form(self._.form()) +registerElement("INPUT", Input) + #class Title(Element): #def __init__(self, cTitle): #Node.__init__(self, cTitle) @@ -165,21 +209,21 @@ def getElementById(self, id): """ Get a reference to an element in the page by its id attribute """ n = self._d.getElementById(DOMString(id)) - return nodeToClass(n)(n) + return nodeCast(n, self.browser) def getElementsByTagName(self, name): """ Get elements by tag name. Returns a generator that you can loop over or flatten into a list with list() """ elements = self._d.getElementsByTagName(DOMString(name)) for i in xrange(elements.length()): node = elements.item(i) - yield nodeToClass(node)(node) + yield nodeCast(node, self.browser) def getElementsByTagNameNS(self, ns, name): """ Get elements by tag name given a certain namespace """ elements = self._d.getElementsByTagNameNS(DOMString(ns), DOMString(name)) for i in xrange(elements.length()): node = elements.item(i) - yield nodeToClass(node)(node) + yield nodeCast(node, self.browser) # convenience def getElementsByClass(self, className, tagName="*"): @@ -187,7 +231,7 @@ for element in self.getElementsByTagName(tagName): if className in str(element.getAttribute(DOMString("class")).string()).split(" "): e = elements.item(i) - yield nodeToClass(e)(e) + yield nodeCast(e, self.browser) def visit(self, text=None, callback=None, attributes=None, stripSpace=True): """ Visit a page pointed to by a certain link. This function searches for all links in the document that either: