=== added directory 'doc/images' === added file 'doc/images/rounded-bottom-left.png' Binary files doc/images/rounded-bottom-left.png 1970-01-01 00:00:00 +0000 and doc/images/rounded-bottom-left.png 2007-02-10 19:24:59 +0000 differ === added file 'doc/images/rounded-bottom-right.png' Binary files doc/images/rounded-bottom-right.png 1970-01-01 00:00:00 +0000 and doc/images/rounded-bottom-right.png 2007-02-10 19:24:59 +0000 differ === added file 'doc/pykhtml.dom.htm' --- doc/pykhtml.dom.htm 1970-01-01 00:00:00 +0000 +++ doc/pykhtml.dom.htm 2007-02-10 19:24:59 +0000 @@ -0,0 +1,79 @@ + + + + + pykhtml.dom module documentation + + + + + +
+
Modules:
+
+
+

pykhtml.dom

+

None

+

class Document (inherits object)

+
Document object for accessing the DOM tree. Don't keep this object around, when the page changes it's invalidated. Just access it through pykhtml.Browser.document whenever you want it.
+ +

getElementById(self, id)

Get a reference to an element in the page by its id attribute.
+

getElementsByClass(self, className, tagName='*')

Get elements in the document that have a certain class.
+

getElementsByTagName(self, name)

Get elements by tag name. Returns a generator that you can loop over or flatten into a list with list().
+

getElementsByTagNameNS(self, ns, name)

Get elements by tag name given a certain namespace.
+

visit(self, text=True, callback=None, attributes=None, stripSpace=None)

Visit a page pointed to by a certain link. This function searches for all links in the document that either:
  • Match the given text (as a string or regular expression object)
  • Match the attributes given (a dictionary mapping attribute name to attribute value, where the value is again either a string or regular expression object)
If the `stripSpace` attribute is True, when searching for a string match all whitespace is stripped from the item we are matching against.
+

class Node (inherits object)

+
Node and all of its subclasses provide you with read-only access to the page's DOM. Instantiating the classes themselves won't do you much good.
+ +

childNodes(self)

For those that can't live without JavaScript DOM-compatible method names.
+

children (read-only property)

Get the children nodes of this node.
+

isA(self, klass)

Syntactic sugar for isinstance.
+

class Element (inherits Node)

+
An HTML element. Instances of it provide methods for doing things with the element – traversing it, adding events, etc.
+ +

addEvent(self, eventName, func, capture=False)

This lets you listen for certain events as they occur on the current element. Only particularly useful when listening for load events reaaally.
+

childNodes(self)

Inherited from Node
For those that can't live without JavaScript DOM-compatible method names.
+

children (read-only property)

Inherited from Node
Get the children nodes of this node.
+

getElementById(self, id)

Get a reference to an element in the page by its id attribute.
+

getElementsByClass(self, className, tagName='*')

Get elements in the document that have a certain class.
+

getElementsByTagName(self, name)

Get elements by tag name. Returns a generator that you can loop over or flatten into a list with list().
+

getElementsByTagNameNS(self, ns, name)

Get elements by tag name given a certain namespace.
+

isA(self, klass)

Inherited from Node
Syntactic sugar for isinstance.
+

originalTagName (read-only property)

Like tagName but this won't convert things to lower. Only really useful if you're dealing with XML.
+ +

tagName (read-only property)

Get the lowercase name of this tag.
+

text (read-only property)

If the next child of this element is a text node, this will return the text value of that node.
+

class Anchor (inherits Element)

+
Anchor elements with an Anchor.href property.
+ +

addEvent(self, eventName, func, capture=False)

Inherited from Element
This lets you listen for certain events as they occur on the current element. Only particularly useful when listening for load events reaaally.
+

childNodes(self)

Inherited from Node
For those that can't live without JavaScript DOM-compatible method names.
+

children (read-only property)

Inherited from Node
Get the children nodes of this node.
+

getElementById(self, id)

Inherited from Element
Get a reference to an element in the page by its id attribute.
+

getElementsByClass(self, className, tagName='*')

Inherited from Element
Get elements in the document that have a certain class.
+

getElementsByTagName(self, name)

Inherited from Element
Get elements by tag name. Returns a generator that you can loop over or flatten into a list with list().
+

getElementsByTagNameNS(self, ns, name)

Inherited from Element
Get elements by tag name given a certain namespace.
+

href (read-only property)

The anchor's 'href' value. Returns the full URL pointed to by the href attribute of this element.
+

isA(self, klass)

Inherited from Node
Syntactic sugar for isinstance.
+

originalTagName (read-only property)

Inherited from Element
Like tagName but this won't convert things to lower. Only really useful if you're dealing with XML.
+

removeEvent(self, eventName, func, capture=False)

Inherited from Element
Removes events that you've added with Element.addEvent.
+

tagName (read-only property)

Inherited from Element
Get the lowercase name of this tag.
+

text (read-only property)

Inherited from Element
If the next child of this element is a text node, this will return the text value of that node.
+

class Text (inherits Node)

+
A text node lets you access the text in it using the Text.value attribute or by converting to a string with str().
+ + + +

childNodes(self)

Inherited from Node
For those that can't live without JavaScript DOM-compatible method names.
+

children (read-only property)

Inherited from Node
Get the children nodes of this node.
+

isA(self, klass)

Inherited from Node
Syntactic sugar for isinstance.
+

value (read-only property)

Equivalent to str(textNode). Get the string this node represents.
+ + + + + +
+
+ + === added file 'doc/pykhtml.htm' --- doc/pykhtml.htm 1970-01-01 00:00:00 +0000 +++ doc/pykhtml.htm 2007-02-10 19:24:59 +0000 @@ -0,0 +1,34 @@ + + + + + pykhtml module documentation + + + + + +
+
Modules:
+
+
+

pykhtml

+

None

+

class Browser (inherits object)

+
A Browser is the main class you use to navigate around and visit different pages. Have a look at Browser.load and Browser.document to access basic use.
+

__init__(self)

Create a new Browser.
+

document (read-only property)

Get a reference to the document (see dom.Document) for the currently loaded page. It contains all the tasty methods for walking the DOM tree like getElementById / getElementsByTagName, and methods for browsing to other linked pages.
+

load(self, uri, callback)

Load a webpage in the browser. It takes as parameters the URI of the page to load, and a callable object to call when the page has loaded.
+

location (property)

Browse to a new location. You probably don't want to set this directly as you'll receive no notification when the page has loaded. Have a look at Browser.load instead.
+ +

startEventLoop()

Starts the PyKHTML event loop. PyKHTML works with an asynchronous callback mechanism – a little like Twisted does. Calls to open a new webpage aren't synchronous, as with urllib, for example.
+

stopEventLoop()

Stop the event loop and hence exit the scraper.
+

timer(time, func)

Call the given function after the alloted time. Requires that the PyKHTML event loop is running.
+

init(display=1, _sleep=1)

Initiate the system if necessary (start Xvfb if it's not running, connect to it, start our program instance). This is called automatically when you create a Browser instance, so you shouldn't have to worry about it. You can specify use of a certain display by setting the `display` parameter.
+

pathSearch(name)

Utility function to search for and get the full path of a file in $PATH.
+

running(name)

Check whether a process of the given name is running.
+ +
+
+ + === added file 'doc/script.js' --- doc/script.js 1970-01-01 00:00:00 +0000 +++ doc/script.js 2007-02-10 19:24:59 +0000 @@ -0,0 +1,114 @@ + +// + + +function createCookie(name,value,days) { + if (days) { + var date = new Date(); + date.setTime(date.getTime()+(days*24*60*60*1000)); + var expires = "; expires="+date.toGMTString(); + } + else var expires = ""; + document.cookie = name+"="+value+expires+"; path=/"; +} + +function readCookie(name) { + var nameEQ = name + "="; + var ca = document.cookie.split(';'); + for(var i=0;i < ca.length;i++) { + var c = ca[i]; + while (c.charAt(0)==' ') c = c.substring(1,c.length); + if (c.indexOf(nameEQ) == 0) return c.substring(nameEQ.length, c.length); + } + return null; +} + +function eraseCookie(name) { + createCookie(name, "", -1); +} + + + +function init() { + initOptions(); +// document.getElementBy +} + +function initOptions() { + var optionsContainer = document.getElementById('options'); + var markup = ''; + optionsContainer.innerHTML = markup; + readOptionsStateCookie(); + setTimeout(hookupOptions, 0); +} + +var optionsState = [1]; + +function hookupOptions() { + var checkbox, val; +// checkbox = document.getElementById('hide-inherited-methods'); + var checkboxes = [ + [document.getElementById('hide-inherited-methods'), hideOrShowInheritedMethods, hideInheritedMethods, showInheritedMethods] + ]; + for(var i = 0, j = checkboxes.length; i < j; ++i) { + checkbox = checkboxes[i][0]; + val = optionsState[i]; + checkbox.onclick = checkboxes[i][1]; + checkbox.checked = val; + if(val) + checkboxes[i][2](); + else + checkboxes[i][3](); + } + +// document.getElementById('hide-inherited-methods').onclick = hideOrShowInheritedMethods; + // it may be checked already +// setTimeout(hideOrShowInheritedMethods, 100); +} + +function updateOptionsStateCookie() { + optionsState = [ + document.getElementById('hide-inherited-methods').checked ? 1 : 0 + ]; + createCookie('options', optionsState.join(','), 365); +} +function readOptionsStateCookie() { + var cook = readCookie('options'); + if(cook) { + optionsState = cook.split(","); + for(var i = 0, j = optionsState.length; i < j; ++i) { + optionsState[i] = parseInt(optionsState[i]); + } + } +} + +function hideOrShowInheritedMethods() { + if(this.checked) + hideInheritedMethods(); + else + showInheritedMethods(); + updateOptionsStateCookie(); +} + +function hideInheritedMethods() { + var divs = document.getElementsByTagName('div'), div; + for(var i = 0, j = divs.length; i < j; ++i) { + div = divs[i]; + if(div.className.indexOf("inherited") != -1) { + div.style.display = "none"; + } + } +} +function showInheritedMethods() { + var divs = document.getElementsByTagName('div'), div; + for(var i = 0, j = divs.length; i < j; ++i) { + div = divs[i]; + if(div.className.indexOf("inherited") != -1) { + div.style.display = "block"; + } + } +} + + +window.onload = init; + === added file 'doc/styles.css' --- doc/styles.css 1970-01-01 00:00:00 +0000 +++ doc/styles.css 2007-02-10 19:24:59 +0000 @@ -0,0 +1,129 @@ + +html, body { +margin: 0; +padding: 0; +text-align: center; +font-family: sans-serif; +} + +#container { +text-align: left; +margin: 0 auto; +width: 39em; +border: 1px solid #ccc; +border-top: 0; +} + +#docs { +padding: 0 1em 1em 1em; +background: url("../images/content-background.png") white repeat-x top left; +} + +h1 { +text-align: center; +margin: 0; +color: #aba; +font-size: 2.5em; +text-shadow: white 1px 1px 4px; +padding-top: .5em; +/* color: #65a042; */ +/* border: 1px solid blue; */ +} + +.method, .property { +margin-left: 2em; +} +.property-label, .inherits-label { +color: #aaa; +margin-left: .5em; +} + +#modules { +padding: .3em 0; +border-top: .3em solid #ccc; +cursor: default; +border-bottom: 1px solid #c4d7b9; +background: #c7e2b6 url("../images/navigation-background.png") repeat-x; +/* border: 1px solid blue; */ +} + +#modules-label { +float: left; +padding-left: 1em; +font-size: .8em; +margin-top: .2em; +display: none; +color: #666; +/* border: 1px solid green; */ +} +#modules-links { +text-align: right; +padding-right: .8em; +/* border: 1px solid pink; */ +} +#modules-links .sep { +color: #ccc; +font-size: .7em; +position: relative; +bottom: .1em; +cursor: default; +/* padding: */ +} +#modules-links a { +padding: 1px 6px; +text-shadow: #bbb 2px 2px 3px; +color: #65a042; +text-decoration: none; +} +#modules-links a.here { +color: darkgreen; +} +#modules-links a:hover { +padding: 1px 6px; +color: darkgreen; +} + +#options { +/* border-top: 1px solid #cdc; */ +font-size: .8em; +background-color: #d7e8d6; +padding: .2em .5em .3em .8em; +position: relative; +} +#rounded-bottom-left, #rounded-bottom-right { +position: absolute; +bottom: 0; +} +#rounded-bottom-left { +left: 0; +} +#rounded-bottom-right { +right: 0; +} +#options input { +position: relative; +top: .1em; +/* border: 1px solid #ccc; */ +/* padding: .2em 1em .2em .5em; */ +} +#options label { +cursor: default; +color: #666; +padding: .2em 1em .2em .5em; +} +#options label:hover { +color: #444; +} + +.clear { +clear: both; +} + +a.reference { +color: #337825; +text-decoration: none; +} +a.reference:hover { +color: darkgreen; +text-shadow: #ddd 1px 1px 2px; +} \ No newline at end of file === added file 'doc/text-shadow.js' --- doc/text-shadow.js 1970-01-01 00:00:00 +0000 +++ doc/text-shadow.js 2007-02-10 19:24:59 +0000 @@ -0,0 +1,365 @@ +if(window.addEventListener)window.addEventListener('load',textShadows,false); +else if(window.attachEvent)window.attachEvent('onload',textShadows); +function setStyles(o,s){ + var i; + s=s.split(';'); + for(i in s){ + var p=s[i].split(':'); + o.style[p[0]]=p[1]; + } +} +function textShadows(){ + var ua=navigator.userAgent; + if(ua.indexOf('KHTML')>=0&&!(ua.indexOf('Safari')>=0))return; + var ss=document.styleSheets,a; + for(a in ss){ + var theRules=[],b; + if(ss[a].cssRules)theRules=ss[a].cssRules; + else if(ss[a].rules)theRules=ss[a].rules; + for(b in theRules){ + var selector=theRules[b].selectorText,r=theRules[b].style.cssText; + if(/text-shadow/.test(r)){ + r=r.replace(/([ ,]) /g,'$1').replace(/.*text-shadow[ :]+/,'').replace(/[ ]*;.*/,''); + var shadows=r.split(','),k,els=cssQuery(selector),l; + for(l in els){ + var x=parseInt(els[l].offsetLeft),y=parseInt(els[l].offsetTop),el3=els[l].cloneNode(true); + setStyles(el3,'position:absolute;zIndex:50;margin:0'); + for(k in shadows){ + var parts=shadows[k].split(' '); + var newX=x+parseInt(parts[1]),newY=y+parseInt(parts[2]),rad=parseInt(parts[3]); + for(m=0-rad;m<=rad;++m)for(n=0-rad;n<=rad;++n)showShadow(els[l],newX+m,newY+n,parts[0]); + var el2=el3.cloneNode(true); + setStyles(el2,'left:'+x+'px;top:'+y+'px'); + els[l].parentNode.appendChild(el2); + } + } + } + } + } +} +function showShadow(el,x,y,color){ + var el2=el.cloneNode(true); + setStyles(el2,'position:absolute;color:'+color+';left:'+x+'px;top:'+y+'px;margin:0;textShadow:none;zIndex:49'); + el2.style.opacity='.08'; + el2.style.filter='alpha(opacity=8)'; + el.parentNode.appendChild(el2); +} + + + +/* + This work is licensed under a Creative Commons License. + + License: http://creativecommons.org/licenses/by/1.0/ + + You are free: + + to copy, distribute, display, and perform the work + to make derivative works + to make commercial use of the work + + Under the following conditions: + + Attribution. You must give the original author credit + + Author: Dean Edwards/2004 + Web: http://dean.edwards.name/ +*/ + +/* keeping code tidy! */ + +/* extendible css query function for common platforms + + tested on IE5.0/5.5/6.0, Mozilla 1.6/Firefox 0.8, Opera 7.23/7.5 + (all windows platforms - somebody buy me a mac!) +*/ + +// ----------------------------------------------------------------------- +// css query engine +// ----------------------------------------------------------------------- + +var cssQuery=function() { + var version="1.0.1"; // timestamp: 2004/05/25 + + // constants + var STANDARD_SELECT=/^[^>\+~\s]/; + var STREAM=/[\s>\+~:@#\.]|[^\s>\+~:@#\.]+/g; + var NAMESPACE=/\|/; + var IMPLIED_SELECTOR=/([\s>\+~\,]|^)([\.:#@])/g; + var ASTERISK ="$1*$2"; + var WHITESPACE=/^\s+|\s*([\+\,>\s;:])\s*|\s+$/g; + var TRIM="$1"; + var NODE_ELEMENT=1; + var NODE_TEXT=3; + var NODE_DOCUMENT=9; + + // sniff for explorer (cos of one little bug) + var isMSIE=/MSIE/.test(navigator.appVersion), isXML; + + // cache results for faster processing + var cssCache={}; + + // this is the query function + function cssQuery(selector, from) { + if (!selector) return []; + var useCache=arguments.callee.caching && !from; + from=(from) ? (from.constructor == Array) ? from : [from] : [document]; + isXML=checkXML(from[0]); + // process comma separated selectors + var selectors=parseSelector(selector).split(","); + var match=[]; + for (var i in selectors) { + // convert the selector to a stream + selector=toStream(selectors[i]); + // process the stream + var j=0, token, filter, cacheSelector="", filtered=from; + while (j < selector.length) { + token=selector[j++]; + filter=selector[j++]; + cacheSelector += token + filter; + // process a token/filter pair + filtered=(useCache && cssCache[cacheSelector]) ? cssCache[cacheSelector] : select(filtered, token, filter); + if (useCache) cssCache[cacheSelector]=filtered; + } + match=match.concat(filtered); + } + // return the filtered selection + return match; + }; + cssQuery.caching=false; + cssQuery.reset=function() { + cssCache={}; + }; + cssQuery.toString=function () { + return "function cssQuery() {\n [version " + version + "]\n}"; + }; + + var checkXML=(isMSIE) ? function(node) { + if (node.nodeType != NODE_DOCUMENT) node=node.document; + return node.mimeType == "XML Document"; + } : function(node) { + if (node.nodeType == NODE_DOCUMENT) node=node.documentElement; + return node.localName != "HTML"; + }; + + function parseSelector(selector) { + return selector + // trim whitespace + .replace(WHITESPACE, TRIM) + // encode attribute selectors + .replace(attributeSelector.ALL, attributeSelector.ID) + // e.g. ".class1" --> "*.class1" + .replace(IMPLIED_SELECTOR, ASTERISK); + }; + + // convert css selectors to a stream of tokens and filters + // it's not a real stream. it's just an array of strings. + function toStream(selector) { + if (STANDARD_SELECT.test(selector)) selector=" " + selector; + return selector.match(STREAM) || []; + }; + + var pseudoClasses={ // static + // CSS1 + "link": function(element) { + for (var i=0; i < document.links; i++) { + if (document.links[i] == element) return true; + } + }, + "visited": function(element) { + // can't do this without jiggery-pokery + }, + // CSS2 + "first-child": function(element) { + return !previousElement(element); + }, + // CSS3 + "last-child": function(element) { + return !nextElement(element); + }, + "root": function(element) { + var document=element.ownerDocument || element.document; + return Boolean(element == document.documentElement); + }, + "empty": function(element) { + for (var i=0; i < element.childNodes.length; i++) { + if (isElement(element.childNodes[i]) || element.childNodes[i].nodeType == NODE_TEXT) return false; + } + return true; + } + // add your own... + }; + + var QUOTED=/([\'\"])[^\1]*\1/; + function quote(value) {return (QUOTED.test(value)) ? value : "'" + value + "'"}; + function unquote(value) {return (QUOTED.test(value)) ? value.slice(1, -1) : value}; + + var attributeSelectors=[]; + + function attributeSelector(attribute, compare, value) { + // properties + this.id=attributeSelectors.length; + // build the test expression + var test="element."; + switch (attribute.toLowerCase()) { + case "id": + test += "id"; + break; + case "class": + test += "className"; + break; + default: + test += "getAttribute('" + attribute + "')"; + } + // continue building the test expression + switch (compare) { + case "=": + test += "==" + quote(value); + break; + case "~=": + test="/(^|\\s)" + unquote(value) + "(\\s|$)/.test(" + test + ")"; + break; + case "|=": + test="/(^|-)" + unquote(value) + "(-|$)/.test(" + test + ")"; + break; + } + push(attributeSelectors, new Function("element", "return " + test)); + }; + attributeSelector.prototype.toString=function() { + return attributeSelector.PREFIX + this.id; + }; + // constants + attributeSelector.PREFIX="@"; + attributeSelector.ALL=/\[([^~|=\]]+)([~|]?=?)([^\]]+)?\]/g; + // class methods + attributeSelector.ID=function(match, attribute, compare, value) { + return new attributeSelector(attribute, compare, value); + }; + + // select a set of matching elements. + // "from" is an array of elements. + // "token" is a character representing the type of filter + // e.g. ">" means child selector + // "filter" represents the tag name, id or class name that is being selected + // the function returns an array of matching elements + function select(from, token, filter) { + //alert("token="+token+",filter="+filter); + var namespace=""; + if (NAMESPACE.test(filter)) { + filter=filter.split("|"); + namespace=filter[0]; + filter=filter[1]; + } + var filtered=[], i; + switch (token) { + case " ": // descendant + for (i in from) { + var subset=getElementsByTagNameNS(from[i], filter, namespace); + for (var j=0; j < subset.length; j++) { + if (isElement(subset[j]) && (!namespace || compareNamespace(subset[j], namespace))) + push(filtered, subset[j]); + } + } + break; + case ">": // child + for (i in from) { + var subset=from[i].childNodes; + for (var j=0; j < subset.length; j++) + if (compareTagName(subset[j], filter, namespace)) push(filtered, subset[j]); + } + break; + case "+": // adjacent (direct) + for (i in from) { + var adjacent=nextElement(from[i]); + if (adjacent && compareTagName(adjacent, filter, namespace)) push(filtered, adjacent); + } + break; + case "~": // adjacent (indirect) + for (i in from) { + var adjacent=from[i]; + while (adjacent=nextElement(adjacent)) { + if (adjacent && compareTagName(adjacent, filter, namespace)) push(filtered, adjacent); + } + } + break; + case ".": // class + filter=new RegExp("(^|\\s)" + filter + "(\\s|$)"); + for (i in from) if (filter.test(from[i].className)) push(filtered, from[i]); + break; + case "#": // id + for (i in from) if (from[i].id == filter) push(filtered, from[i]); + break; + case "@": // attribute selector + filter=attributeSelectors[filter]; + for (i in from) if (filter(from[i])) push(filtered, from[i]); + break; + case ":": // pseudo-class (static) + filter=pseudoClasses[filter]; + for (i in from) if (filter(from[i])) push(filtered, from[i]); + break; + } + return filtered; + }; + + var getElementsByTagNameNS=(isMSIE) ? function(from, tagName) { + return (tagName == "*" && from.all) ? from.all : from.getElementsByTagName(tagName); + } : function(from, tagName, namespace) { + return (namespace) ? from.getElementsByTagNameNS("*", tagName) : from.getElementsByTagName(tagName); + }; + + function compareTagName(element, tagName, namespace) { + if (namespace && !compareNamespace(element, namespace)) return false; + return (tagName == "*") ? isElement(element) : (isXML) ? (element.tagName == tagName) : (element.tagName == tagName.toUpperCase()); + }; + + var PREFIX=(isMSIE) ? "scopeName" : "prefix"; + function compareNamespace(element, namespace) { + return element[PREFIX] == namespace; + }; + + // return the previous element to the supplied element + // previousSibling is not good enough as it might return a text or comment node + function previousElement(element) { + while ((element=element.previousSibling) && !isElement(element)) continue; + return element; + }; + + // return the next element to the supplied element + function nextElement(element) { + while ((element=element.nextSibling) && !isElement(element)) continue; + return element; + }; + + function isElement(node) { + return Boolean(node.nodeType == NODE_ELEMENT && node.tagName != "!"); + }; + + + // use a baby push function because IE5.0 doesn't support Array.push + function push(array, item) { + array[array.length]=item; + }; + + // fix IE5.0 String.replace + if ("i".replace(/i/,function(){return""})) { + // preserve String.replace + var string_replace=String.prototype.replace; + // create String.replace for handling functions + var function_replace=function(regexp, replacement) { + var match, newString="", string=this; + while ((match=regexp.exec(string))) { + // five string replacement arguments is sufficent for cssQuery + newString += string.slice(0, match.index) + replacement(match[0], match[1], match[2], match[3], match[4]); + string=string.slice(match.lastIndex); + } + return newString + string; + }; + // replace String.replace + String.prototype.replace=function (regexp, replacement) { + this.replace=(typeof replacement == "function") ? function_replace : string_replace; + return this.replace(regexp, replacement); + }; + } + + return cssQuery; +}();