#!/usr/bin/perl -w
#
# domExtensions -- useful methods on top of XML::DOM.
#
# Written 2010-04-01~23 by Steven J. DeRose, sderose@acm.org.
# 2010-10-08 sjd: Convert to a real package, normalize naming.
#
# Integrate into:
#     xxx2html
#

use strict;
use XML::Parser;
use XML::DOM;
#use XML::XPath;

package DOMextensions;

my $version = "2010-10-08";

sub new {
    my ($class) = @_;
    my $self = {
        source      => "",
        theDOM      => undef,
    };
    bless $self, $class;
    return($self);
}

sub getVersion {
    return($version);
}


###############################################################################
# Methods to walk the XPath axes, selecting by gi, attr, value, and n.
#
#   XPath axis               DOM properties and methods()
#    ancestor
#    attribute                 attributes, getAttribute(), getAttributeNode()
#    child                     childNodes, firstChild, lastChild, 
#    descendant
#    following
#    following-sibling         nextSibling
#    namespace
#    parent                    parentNode
#    preceding
#    preceding-sibling         previousSibling
#    self
#
# Additional DOM Node methods:
#    ownerDocument, textContent, tagName
#    nodeName, nodeType, nodeValue, localName, prefix, namespaceURI
#    appendChild(), insertBefore(), removeChild(), replaceChild()
#    removeAttribute(), removeAttributeNode(),
#    setAttribute(), setAttributeNode()
#    hasAttribute(), hasAttributes(), hasChldNodes()
#    compareDocumentPosition(), isEqualNode, isSameNode()
#    getElementsByTagName()
#    normalize()
#
sub selectAncestor {
    my ($self,$n,$type,$aname,$avalue) = @_;
    if (!defined $type) { $type = ""; }
    if (!defined $n) { $n = 1; }
    while($self=getParentNode($self)) {
        if (nodeMatches($self,$type,$aname,$avalue)) {
            $n--;
            if ($n<=0) { return($self); }
        }
    }
    return(undef);
}

sub selectChild {
    my ($self,$n,$type,$aname,$avalue) = @_;
    if (!defined $type) { $type = ""; }
    if (!defined $n) { $n = 1; }
    $self = $self->getFirstChild();
    while(defined $self) {
        if (nodeMatches($self,$type,$aname,$avalue)) {
            $n--;
            if ($n<=0) { return($self); }
        }
        $self = $self->getNextSibling();
    }
    return(undef);
}

sub selectDescendant {
    my ($self,$n,$type,$aname,$avalue) = @_;
    setN((defined $n) ? $n:1);
    return(selectDescendantR($self,$n,$type,$aname,$avalue));
}

sub selectDescendantR { # XXX FIX ???
    my ($self,$n,$type,$aname,$avalue) = @_;
    if (nodeMatches($self,$type,$aname,$avalue)) {
        $n--;
        if ($n<=0) { return($self); }
    }
    for my $ch ($self->getChildNodes()) {
        my $node = getDescendantByAttributeR($ch,$n,$type,$aname,$avalue);
        if (defined $node) {
            $n--;
            if ($n<=0) { return($node); }
        }
    }
    return(undef);
}

sub selectPreceding {
    my ($self,$n,$type,$aname,$avalue) = @_;
    if (!defined $n) { $n = 1; }
    if (!defined $type) { $type = ""; }
    while($self=$self->getPreceding()) {
        if (nodeMatches($self,$type,$aname,$avalue)) {
            $n--;
            if ($n<=0) { return($self); }
        }
    }
    return(undef);
}
sub getPreceding {
    my ($self) = @_;
    my $node = $self->getPreviousSibling();
    if (defined $node) { 
        while(my $next = $node->getFirstChild()) {
            $node = $next;
        }
        return($node);
    }
    return($self->getParentNode());
}

sub selectFollowing {
    my ($self,$n,$type,$aname,$avalue) = @_;
    if (!defined $n) { $n = 1; }
    if (!defined $type) { $type = ""; }
    while($self=getFollowing($self)) {
        if (nodeMatches($self,$type,$aname,$avalue)) {
            $n--;
            if ($n<=0) { return($self); }
        }
    }
    return(undef);
}
sub getFollowing {
    my $self = $_[0];
    my $node = $self->getFirstChild();
    if (defined $node) { return($node); }
    $node = $self->getNextSibling();
    if (defined $node) { return($node); }
    while ($self = $self->getParentNode()) {
        if ($self->getNextSibling()) { return($self->getNextSibling()); }
    }
    return(undef);
}

sub selectPrecedingSibling {
    my ($self,$n,$type,$aname,$avalue) = @_;
    if (!defined $type) { $type = ""; }
    if (!defined $n) { $n = 1; }
    while($self=getPreviousSibling($self)) {
        if (nodeMatches($self,$type,$aname,$avalue)) {
            $n--;
            if ($n<=0) { return($self); }
        }
    }
    return(undef);
}

sub selectFollowingSibling {
    my ($self,$n,$type,$aname,$avalue) = @_;
    if (!defined $type) { $type = ""; }
    if (!defined $n) { $n = 1; }
    while($self=getNextSibling($self)) {
        if (nodeMatches($self,$type,$aname,$avalue)) {
            $n--;
            if ($n<=0) { return($self); }
        }
    }
    return(undef);
}


###############################################################################
#
sub getLeftBranch {
    my ($self) = @_;
    (defined $self) || return(undef);
    while(my $fc = $self->getFirstChild()) {
        $self = $fc;
    }
    return($self);
}

sub getRightBranch {
    my ($self) = @_;
    (defined $self) || return(undef);
    while(my $fc = $self->getLastChild()) {
        $self = $fc;
    }
    return($self);
}

sub getDepth {
    my ($self) = @_;
    my $d = 0;
    while(defined $self) {
        $d++;
        $self = $self->getParentNode();
    }
    return($d);
}

sub getFQGI {
    my ($self) = @_;
    my $f = "";
    while(defined $self) {
        $f = "/" . $self->getNodeName() . $f;
        $self = $self->getParentNode();
    }
    return($f);
}

sub getXPointer {
    my ($self) = @_;
    my $f = "";
    while(defined $self) {
        $f = "/" . $self->getParentNode()->getChildIndex($self) . $f;
        $self = $self->getParentNode();
    }
    return($f);
}
sub xPointerCompare {
    my ($self,$xp1,$xp2) = @_;
    my @t1 = split(/\//,$xp1);
    my @t2 = split(/\//,$xp2);
    for (my $i=0; $i<scalar @t1 && $i<scalar @t2; $i++) {
        if ($t1[$i] < $t2[$i]) { return(-1); }
        if ($t1[$i] > $t2[$i]) { return( 1); }
    }
    # At least one of them ran out...
    return (scalar @t1 <=> scalar @t2);
}

sub nodeCompare {
    my ($self,$other) = @_;
    return($self->xPointerCompare(
               $self->getXPointer(),$other->getXPointer()));
}

sub XPointerInterpret {
    my ($self, $xp) = @_;
    my $document = $self->ownerDocument();
    my $node = $document->getDocumentElement();
    my @t = split(/\//,$xp);
    for (my $i=0; $i<scalar @t; $i++) {
        my $node = $node->getChildAtIndex($t[$i]);
        (defined $node) || return(undef);
    }
    return($node);
}

# Check if a node matches the supported selection constraints:
#     If type is non-nil, "*" matches any *element*, else name must be right.
#     If aname is non-nil, attribute must exist
#     If avalue is non-nill, attribute aname must = avalue.
#
sub nodeMatches {
    my ($self,$type,$aname,$avalue) = @_;

    if (defined $type && $type ne "") {              # check type constraint
        if ($type eq "*") {
            if ($self->getNodeType!=1) { return(0); }
        }
        else {
            if ($self->getNodeName() ne $type) { return(0); }
        }
    }

    if (!defined $aname) {                           # No attribute constraint
        return(1);
    }
    my $thisvalue = $self->getAttribute($aname);
    if (!defined $thisvalue) {                       # attr specified, absent
        return(0);
    }
    if (!defined $avalue || $thisvalue eq $avalue) { # attr matches
        return(1);
    }
    return(0);
}


# Search upward to find an assignment to an attribute -- like xml:lang.
#
sub getInheritedAttribute {
    my ($self,$aname) = @_;
    do {
        my $avalue = $self->getAttribute($aname);
        if (defined $avalue) { return($avalue); }
        $self = $self->getParentNode();
    } while($self);
    return(undef);
}


# Assemble the entire attribute list, escaped as needed to write out as XML.
#
sub getEscapedAttributeList {
    my ($self) = @_;
    my $buf = "";
    (defined $self) || return(undef);
    my $alist = $self->getAttributes();
    (defined $alist) || return($buf);
    for (my $i=0; $i<$alist->getLength(); $i++) {
        my $anode = $alist->item($i);
        my $aname  = $anode->getName();
        my $avalue = escapeAttribute($anode->getValue());
        $buf .= " $aname=\"$avalue\"";
    }
    return($buf);
}

sub deleteWhiteSpaceNodes {
    my ($self) = @_;
    # print "running deleteWhiteSpaceNodesCB\n";
    forEachNode($self,\&deleteWhiteSpaceNodesCB,undef);
}
sub deleteWhiteSpaceNodesCB {
    my ($self) = @_;
    ($self->getNodeName eq "#text") || return;
    my $t = $self->getData();
    ($t =~ m/[^\s]/) && return;
    $self->getParentNode()->removeChild($self);
}


###############################################################################
#
sub normalizeSpace {
    my ($self) = @_;
    forEachNode($self,\&normalizeSpaceCB,undef);
}
sub normalizeSpaceCB {
    my ($self) = @_;
    ($self->getNodeName eq "#text") || return;
    $self->setData(normalizeSpace($self->getData()))
}

sub normalizeDashChars {
    my ($self) = @_;
    forEachNode($self,\&normalizeDashCharsCB,undef);
}
sub normalizeDashCharsCB {
    my ($self) = @_;
    ($self->getNodeName eq "#text") || return;
    $self->setData(normalize_DashChars($self->getData()))
}

sub normalizeDQuoteChars {
    my ($self) = @_;
    forEachNode($self,\&normalizeDQuoteCharsCB,undef);
}
sub normalizeDQuoteCharsCB {
    my ($self) = @_;
    ($self->getNodeName eq "#text") || return;
    $self->setData(normalize_DQuoteChars($self->getData()))
}

sub normalizeSQuoteChars {
    my ($self) = @_;
    forEachNode($self,\&normalizeSQuoteCharsCB,undef);
}
sub normalizeSQuoteCharsCB {
    my ($self) = @_;
    ($self->getNodeName eq "#text") || return;
    $self->setData(normalize_SQuoteChars($self->getData()))
}


###############################################################################
# Take a block of contiguous siblings and enclose them in a new intermediate
# parent node (which in inserted at the level they *used* to be at).
#
sub groupSiblings {
    my ($self, $first, $last, $newParentType) = @_;
    (defined $first) || return;
    my $oldParent = $first->getParentNode();
    my $newParent = $first->getOwnerDocument()->createElement($newParentType);
    $oldParent->insertChildBefore($newParent,$first);

    my $next;
    for (my $cur = $first; defined $cur; $cur=$next) {
        $next = $cur->getNextSibling();
        my $moving = $oldParent->removeChild($cur);
        $newParent->insertBefore($moving,undef);
    }
} # groupSiblings


###############################################################################
# Remove the given node, promoting all its children.
#
sub promoteChildren {
    my ($self) = @_;
    my $parent = $self->getParentNode();
    my $next;
    for (my $cur = $self->getFirstChild(); defined $cur; $cur=$next) {
        $next = $cur->getNextSibling();
        my $moving = $self->removeChild($cur);
        $parent->insertBefore($moving,$parent);
    }
    $parent->deleteChild($self);
}


###############################################################################
# Traverse a subtree given its root, and call separate callbacks before
# and after traversing each subtree. Callbacks might, for example,
# respectively generate the start- and end-tags for the element. Callbacks
# are allowed to be undef if not needed.
#
# If a callback returns true, we stop traversing.
# 
sub forEachNode {
    my ($self,$callbackA,$callbackB,$depth) = @_;
    if (!defined $depth) { $depth = 1; }
    (defined $self) || return(0);
    my $name = $self->getNodeName();
    if (defined $callbackA) {
        if ($callbackA->($self,$name,$depth)) { return(1); }
    }
    if ($self->hasChildNodes()) {
        #print "Recursing for child nodes at level " . ($depth+1) . "\n";
        for my $ch ($self->getChildNodes()) {
            my $rc = forEachNode($ch,$callbackA,$callbackB,$depth+1);
            if ($rc) { return(1); }
        }
    }
    if (defined $callbackB) {
        if ($callbackB->($self,$name,$depth)) { return(1); }
    }
    return(0); # succeed
} # forEachNode


###############################################################################
# Cat together all descendant text nodes, with spaces between.
#    (at the moment, it leaves a stray $delim at the start)
#
sub collectAllText {
    my ($self,$delim) = @_;
    (defined $self) || return("");
    if (!defined $delim) { $delim = " "; }
    my $name = $self->getNodeName();
    my $textBuf = "";
    if ($self->getNodeName() eq "#text") {
        $textBuf = $delim . $self->getData();
    }
    elsif ($self->hasChildNodes()) {
        for my $ch ($self->getChildNodes()) {
            $textBuf .= collectAllText($ch,$delim);
        }
    }
    return($textBuf);
} # collectAllText


###############################################################################
# Collect a subtree as WF XML, with appropriate escaping. Can also
# put a delimiter before each start-tag; if it contains '\n', then the
# XML will also be hierarchically indented. -color applies.
#
sub collectAllXML {
    my ($self,$delim,$depth,$ascii) = @_;
    if (!defined $delim) { $delim = " "; }
    if (!defined $depth) { $depth = 1; }
    if (!defined $ascii) { $ascii = 0; }
    my $indent = ($delim =~ m/\n/) ? ("  " x $depth) : "";
    my $name = $self->getNodeName();
    my $textBuf = "";
    if ($self->getNodeName() eq "#text") {
        if ($ascii) { $textBuf = escapeXML($self->getData()); }
        else        { $textBuf = escapeASCII($self->getData()); }
    }
    else {
        $textBuf = "$delim$indent<$name" .
            getEscapedAttributeList($self) . ">";
        if ($self->hasChildNodes()) {
            for my $ch ($self->getChildNodes()) {
                $textBuf .= collectAllXML($ch,$delim,$depth+1);
            }
        }
        $textBuf .= "</$name>";
    }
    return($textBuf);
} # collectAllXML


###############################################################################
###############################################################################
# Index should be a separate object.
#
package Dom::Index;

sub new {
    my ($class, $document, $aname) = @_;
    my %index = ();
    my $node = $document->getDocumentelement();
    do {
        if ($node->getNodeType==1) {  # elements
            my $key = $node->getAttribute($aname);
            if (defined $key) { $index{$key} = $node; }
        }
        $node = $node->getFolowing();
    } while($node);
    my $self = {
        document    => $document,
        attrName    => $aname,
        theIndex    => \%index,
    };
    bless $self, $class;
    return(\%index);
}

sub find {
    my ($self, $avalue) = @_;
    my $indexRef = $self->{theIndex};
    my %index = %$indexRef;
    return($index{$avalue});
}


###############################################################################
###############################################################################
#
package CharStuff;

sub new {
    my ($class) = @_;
    my $self = {
        spaceChars  => setupSpaces(),
        dashChars   => setupDashes(),
        sQuoteChars => setupSQuotes(),
        dQuoteChars => setupDQuotes(),
    };
    bless $self, $class;
    return($self);
}

sub escapeAttribute {
    my ($self,$s) = @_;
    $s =~ s/</&lt;/g;
    $s =~ s/"/&quo;/g;
    $s =~ s/&/&amp;/g;
    return($s);
}
sub escapeXML {
    my ($self,$s) = @_;
    $s =~ s/</&lt;/g;
    $s =~ s/"/&quo;/g;
    $s =~ s/]]>/]]&gt;/g;
    $s =~ s/&/&amp;/g;
    return($s);
}
sub escapeASCII {
    my ($self,$s) = @_;
    $s =~ s/(\P{IsASCII})/ { sprintf("&#x%04x;",ord($1)); }/ge;
    $s = xmlEscape($s);
    return($s);
}
sub unescapeXML {
    my ($self,$s) = @_;
    $s =~ s/&lt;/</g;
    $s =~ s/&gt;/>/g;
    $s =~ s/&quo;/"/g;
    $s =~ s/&apos;/'/g;
    $s =~ s/&#x([0-9a-f]+);/{ chr(hex($1)); }/gie;
    $s =~ s/&#([0-9]+);/{ chr($1); }/gie;
    $s =~ s/&amp;/&/g;
    return($s);
}

sub normalize_Space {
    my ($self,$s) = @_;
    $s =~ s/\s\s+/ /g;
    $s =~ s/^\s+//g;
    $s =~ s/\s+$//g;
    return($s);
}

sub normalize_DashChars {
    my ($self,$s) = @_;
    $s =~ s/[$self->{dashChars}]/-/g;
    return($s);
}

sub normalize_DQuoteChars {
    my ($self,$s) = @_;
    $s =~ s/[$self->{dQuoteChars}]/"/g;
    return($s);
}

sub normalize_SQuoteChars {
    my ($self,$s) = @_;
    $s =~ s/[$self->{sQuoteChars}]/'/g;
    return($s);
}

sub normalize_SpaceChars {
    my ($self,$s) = @_;
    $s =~ s/[$self->{spaceChars}]/ /g;
    return($s);
}

sub setupDashes {
    my ($self,$s) = @_;
    my $rc = 
        # Leave out regular hyphen so doesn't mess up regex, and since it's
        # what we normalize *to*.
        #
        chr(0x000ad) . # soft hyphen
        chr(0x0058a) . # armenian hyphen      
        chr(0x01806) . # mongolian todo soft hyphen
        chr(0x01b60) . # balinese pameneng (line-breaking hyphen)
        chr(0x02010) . # 008208) . hyphen) .        '-',
        chr(0x02011) . # non-breaking hyphen,
        chr(0x02012) . # figure dash
        chr(0x02013) . # 008211) . ndash,         '-',
        chr(0x02014) . # 008212, mdash,         '--',
        chr(0x02015) . # 008213, horbar,        '--',
        chr(0x02027) . # hyphenation point
        chr(0x02043) . # hyphen bullet
        chr(0x02053) . # swung dash
        #chr(0x21E0) .	# LEFTWARDS DASHED ARROW
        #chr(0x21E1) .	# UPWARDS DASHED ARROW
        #chr(0x21E2) .	# RIGHTWARDS DASHED ARROW
        #chr(0x21E3) .	# DOWNWARDS DASHED ARROW
        chr(0x0229d) . # circled dash
        chr(0x02448) . # ocr dash

        # Box-drawing dashes (maybe unlikely, but...):
        #
        #chr(0x2504) .	# ... LIGHT TRIPLE DASH HORIZONTAL
        #chr(0x2505) .	# ... HEAVY TRIPLE DASH HORIZONTAL
        #chr(0x2508) .	# ... LIGHT QUADRUPLE DASH HORIZONTAL
        #chr(0x2509) .	# ... HEAVY QUADRUPLE DASH HORIZONTAL
        #chr(0x254C) .	# ... LIGHT DOUBLE DASH HORIZONTAL
        #chr(0x254D) .	# ... HEAVY DOUBLE DASH HORIZONTAL

        chr(0x02e17) . # double oblique hyphen
        chr(0x02E1A) . # HYPHEN WITH DIAERESIS
        chr(0x0301c) . # wave dash
        chr(0x03030) . # wavy dash
        chr(0x030a0) . # katakana-hiragana double hyphen
        chr(0x0FE49) . # DASHED OVERLINE
        chr(0x0FE4D) . # DASHED LOW LINE
        chr(0x0FE58) . # small em dash
        chr(0x0fe63) . # small hyphen-minus
        chr(0x0ff0d) . # fullwidth hyphen-minus
        "";
    return $rc;
}

sub setupDQuotes {
    my ($self,$s) = @_;
    my $dQuotes = 
    chr(0x00AB) .	# LEFT-POINTING DOUBLE ANGLE QUOTATION MARK *
    chr(0x00BB) .	# RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK *
    chr(0x201C) .	# LEFT DOUBLE QUOTATION MARK
    chr(0x201D) .	# RIGHT DOUBLE QUOTATION MARK
    chr(0x201E) .	# DOUBLE LOW-9 QUOTATION MARK
    chr(0x201F) .	# DOUBLE HIGH-REVERSED-9 QUOTATION MARK
    chr(0x2358) .	# APL FUNCTIONAL SYMBOL QUOTE UNDERBAR
    chr(0x235E) .	# APL FUNCTIONAL SYMBOL QUOTE QUAD
    chr(0x275D) .	# HEAVY DOUBLE TURNED COMMA QUOTATION MARK ORNAMENT
    chr(0x275E) .	# HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT
    chr(0x301D) .	# REVERSED DOUBLE PRIME QUOTATION MARK
    chr(0x301E) .	# DOUBLE PRIME QUOTATION MARK
    chr(0x301F) .	# LOW DOUBLE PRIME QUOTATION MARK
    "";
    return($dQuotes);
}

sub setupSQuotes {
    my ($self,$s) = @_;
    my $sQuotes =
    chr(0x2018) .	# LEFT SINGLE QUOTATION MARK
    chr(0x2019) .	# RIGHT SINGLE QUOTATION MARK
    chr(0x201A) .	# SINGLE LOW-9 QUOTATION MARK
    chr(0x201B) .	# SINGLE HIGH-REVERSED-9 QUOTATION MARK
    chr(0x2039) .	# SINGLE LEFT-POINTING ANGLE QUOTATION MARK
    chr(0x203A) .	# SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
    chr(0x275B) .	# HEAVY SINGLE TURNED COMMA QUOTATION MARK ORNAMENT
    chr(0x275C) .	# HEAVY SINGLE COMMA QUOTATION MARK ORNAMENT
    chr(0x276E) .	# HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT
    chr(0x276F) .	# HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT
    "";
    return($sQuotes);
}


sub setupSpaces {
    my ($self,$s) = @_;
    my $spaces = 
    chr(0x0009) .    # TAB
    chr(0x000A) .    # LINE FEED
    chr(0x000B) .    # VERTICAL TAB
    chr(0x000C) .    # FORM FEED
    chr(0x000D) .    # CARRIAGE RETURN
    chr(0x0020) .    # SPACE
    chr(0x00A0) .    # NO-BREAK SPACE

    chr(0x1680) .    # OGHAM SPACE MARK

    chr(0x2002) .    # EN SPACE
    chr(0x2003) .    # EM SPACE
    chr(0x2004) .    # THREE-PER-EM SPACE
    chr(0x2005) .    # FOUR-PER-EM SPACE
    chr(0x2006) .    # SIX-PER-EM SPACE
    chr(0x2007) .    # FIGURE SPACE
    chr(0x2008) .    # PUNCTUATION SPACE
    chr(0x2009) .    # THIN SPACE
    chr(0x200A) .    # HAIR SPACE
    chr(0x200B) .    # ZERO WIDTH SPACE
    chr(0x202F) .    # NARROW NO-BREAK SPACE
    chr(0x205F) .    # MEDIUM MATHEMATICAL SPACE

    chr(0x2409) .    # SYMBOL FOR HORIZONTAL TABULATION
    chr(0x240B) .    # SYMBOL FOR VERTICAL TABULATION
    chr(0x2420) .    # SYMBOL FOR SPACE

    chr(0x3000) .    # IDEOGRAPHIC SPACE
    chr(0x303F) .    # IDEOGRAPHIC HALF FILL SPACE

    chr(0xFEFF) .    # ZERO WIDTH NO-BREAK SPACE (= byte-order mark)
    "";
    return($spaces);
}


###############################################################################
#
sub showUsage {
    print "

=head1 Notes

An XML-manipulation package that sits on top of XML::DOM and provides
higher-level, XPath-like methods.

It's very often useful to walk around the XPath 'axes' in a DOM tree.
When doing so, it's often useful to consider only with element nodes,
or only #text nodes, or only element nodes of a certain element type, or
with a certain attribute or attribute-value pair.

Thus, for the relevant XPath axes, there are calls such as these (illustrated
for the Child axis, but you can substitute Descendant, PrecedingSibling,
FollowingSibling, Preceding, Following, or Ancestor):

    getChildByAttribute(node,n,type,attributeName,attributeValue)

This will return the n-th child of node, which is of the given element type,
and has the given attributeName=attributeValue pair.

=over 

=item * If type is undefined or '',
any node type is allowed; if it is '*', any *element* is allowed (in both 
cases, attribute constraints may still apply). 

=item * If attributeName is undef or '',
no attribute is required.

=item * If attributeValue is undef or '', the attribute
named by attributeName must be present, but may have any value (including '').

=back

=head3 Other methods

=over

=item getInheritedAttribute(name)

=item getLeftBranch() -- returns the leftmost descendant of node. This
is not the first descendant, which would be the first child, since nodes
are ordered before their descendants.

=item getRightBranch() -- returns the rightmost (last) descendant of node.

=item getDepth() -- returns how deeply nested the node is.

=item getFQGI() -- returns the list of element types of the node's
ancestors, from the root down, separated by '/'.

=item getXPointer() -- returns the XPointer child sequence to node.
That is, the list of child-numbers for all the ancestors of the node, from
the root down, separated by '/'. This is a fine unique name for the node's
location in the document.

=item XPointerCompare(x1,x2) -- takes two XPointer child-sequences as just
described, and compares them for document order, returning -1, 0, or 1.
This does not require actually looking at the tree at all.

=item nodeCompare(n1,n2) -- takes two XML::DOM nodes, and compares them
for document order, returning -1, 0, or 1.

=item XPointerInterpret(document,x) -- Interprets the XPointer child sequence
x, in the context of the given document, and returns the node it identifies,
or undef if there is no such node.

=item getEscapedAttributeList() -- returns the entire attribute list for 
node, escaped as needed within an XML start-tag.

=item deleteWhiteSpaceNodes() -- deletes all of them.

=item normalizeAllSpace() -- does the equivalent of XSLT normalize-space()
on all text nodes.

=item mergeWithFollowingSibling() -- 

=item groupSiblings(typeForNewParent) -- 

=item promoteChildren() -- 

=item forEachNode(preCallback,postCallback) -- traverse the subtree headed
at node, calling the callbacks before and after traversing each node's subtree. 

=item collectAllText(delimiter) -- concatenates together the content of
all the text nodes in a subtree, putting the delimiter in between.

=item collectAllXML() --  generates the XML representation for the
subtree headed at node. If the delimiter contains a newline, the subtree
will include newlines and indentation.

=back


=head2 Index package

=over

=item buildIndex(attributeName) -- Returns a hash table in which each 
entry has the value of the specified attribute as key, and the element on
which the attribute occurred as value. This is similar to the XSLT 'key'
feature.

=item find(value)

=back


=head2 Character stuff package

=over

=item escapeAttribute(string) -- escapes the string as needed for it to
fit in an attribute value.

=item escapeXML(string) -- escapes the string as needed for it to
fit in XML text content.

=item escapeASCII(string) -- escapes the string as needed for it to
fit in XML text content, *and* recodes and non-ASCII characters as XML
numeric characters references.

=item unescapeXML(string) -- Changes XML numeric characters references, as
well as references to the 5 pre-defined XML named entities, into the
corresponding literal characters.

=back


=head1 Ownership

This work by Steven J. DeRose is licensed under a Creative Commons 
Attribution-Share Alike 3.0 Unported License. For further information on
this license, see http://creativecommons.org/licenses/by-sa/3.0/.

The author's present email is sderose at acm.org.

For the most recent version, see http://www.derose.net/steve/utilities/.

=cut
";
}