Find nodes that match a group of CSS selectors in an XML tree.
querySelectorAll.RdThe purpose of these functions is to mimic the functionality of the
querySelector and querySelectorAll functions present in
Internet browsers. This is so we can succinctly query an XML tree for
nodes matching a CSS selector.
Namespaced functions querySelectorNS and
querySelectorAllNS are also provided to search relative to a
given namespace.
Usage
querySelector(doc, selector, ns = NULL, ...)
querySelectorAll(doc, selector, ns = NULL, ...)
querySelectorNS(doc, selector, ns,
prefix = "descendant-or-self::", ...)
querySelectorAllNS(doc, selector, ns,
prefix = "descendant-or-self::", ...)Arguments
- doc
The XML document or node to be evaluated against.
- selector
A selector used to query
doc. This must be a single character string.- ns
The namespace that the query will be filtered to. This is a named list or vector which has as its name a namespace, and its value is the namespace URI. This can be ignored for the un-namespaced functions.
- prefix
The prefix to apply to the resulting XPath expression. The default or
""are most commonly used.- ...
Parameters to be passed onto
css_to_xpath.
Details
The querySelectorNS and querySelectorAllNS functions are
convenience functions for working with namespaced documents. They
filter out all content that does not belong within the given
namespaces. Note that when searching for particular elements in a
selector, they must have a namespace prefix, e.g. "svg|g".
The namespace argument, ns, is simply passed on to
getNodeSet or xml_find_all if
it is necessary to use a namespace present within the document. This
can be ignored for content lacking a namespace, which is usually the
case when using querySelector or querySelectorAll.
Value
For querySelector, the result is a single node that represents
the first matched node from a selector. If no matching nodes are
found, NULL is returned.
For querySelectorAll, the result is a list of XML nodes. This
list may be empty in the case that no match is found.
The querySelectorNS and querySelectorAllNS functions
return the same type of content as their un-namespaced counterparts.
References
CSS Selectors Level 4 https://www.w3.org/TR/selectors-4/, XPath https://www.w3.org/TR/xpath/, querySelectorAll https://developer.mozilla.org/en-US/docs/DOM/Document.querySelectorAll and https://www.w3.org/TR/selectors-api/#interface-definitions.
Examples
hasXML <- require(XML)
#> Loading required package: XML
hasxml2 <- require(xml2)
#> Loading required package: xml2
if (!hasXML && !hasxml2)
return() # can't demo without XML or xml2 packages present
parseFn <- if (hasXML) xmlParse else read_xml
# Demo for working with the XML package (if present, otherwise xml2)
exdoc <- parseFn('<a><b class="aclass"/><c id="anid"/></a>')
querySelector(exdoc, "#anid") # Returns the matching node
#> <c id="anid"/>
querySelector(exdoc, ".aclass") # Returns the matching node
#> <b class="aclass"/>
querySelector(exdoc, "b, c") # First match from grouped selection
#> <b class="aclass"/>
querySelectorAll(exdoc, "b, c") # Grouped selection
#> [[1]]
#> <b class="aclass"/>
#>
#> [[2]]
#> <c id="anid"/>
#>
#> attr(,"class")
#> [1] "XMLNodeSet"
querySelectorAll(exdoc, "b") # A list of length one
#> [[1]]
#> <b class="aclass"/>
#>
#> attr(,"class")
#> [1] "XMLNodeSet"
querySelector(exdoc, "d") # No match
#> NULL
querySelectorAll(exdoc, "d") # No match
#> list()
#> attr(,"class")
#> [1] "XMLNodeSet"
# Read in a document where two namespaces are being set:
# SVG and MathML
svgdoc <- parseFn(system.file("demos/svg-mathml.svg",
package = "selectr"))
# Search for <script/> elements in the SVG namespace
querySelectorNS(svgdoc, "svg|script",
c(svg = "http://www.w3.org/2000/svg"))
#> <script type="application/ecmascript" xlink:href="test2.svg.coords.js"/>
querySelectorAllNS(svgdoc, "svg|script",
c(svg = "http://www.w3.org/2000/svg"))
#> [[1]]
#> <script type="application/ecmascript" xlink:href="test2.svg.coords.js"/>
#>
#> [[2]]
#> <script type="application/ecmascript" xlink:href="test2.svg.convert.js"/>
#>
#> attr(,"class")
#> [1] "XMLNodeSet"
# MathML content is *within* SVG content,
# search for <mtext> elements within the MathML namespace
querySelectorNS(svgdoc, "math|mtext",
c(math = "http://www.w3.org/1998/Math/MathML"))
#> <mtext mathvariant="sans-serif">a</mtext>
querySelectorAllNS(svgdoc, "math|mtext",
c(math = "http://www.w3.org/1998/Math/MathML"))
#> [[1]]
#> <mtext mathvariant="sans-serif">a</mtext>
#>
#> [[2]]
#> <mtext mathvariant="sans-serif">b</mtext>
#>
#> attr(,"class")
#> [1] "XMLNodeSet"
# Search for *both* SVG and MathML content
querySelectorAllNS(svgdoc, "svg|script, math|mo",
c(svg = "http://www.w3.org/2000/svg",
math = "http://www.w3.org/1998/Math/MathML"))
#> [[1]]
#> <mo>+</mo>
#>
#> [[2]]
#> <script type="application/ecmascript" xlink:href="test2.svg.coords.js"/>
#>
#> [[3]]
#> <script type="application/ecmascript" xlink:href="test2.svg.convert.js"/>
#>
#> attr(,"class")
#> [1] "XMLNodeSet"
if (!hasXML)
return() # already demo'd xml2
# Demo for working with the xml2 package
exdoc <- read_xml('<a><b class="aclass"/><c id="anid"/></a>')
querySelector(exdoc, "#anid") # Returns the matching node
#> {xml_node}
#> <c id="anid">
querySelector(exdoc, ".aclass") # Returns the matching node
#> {xml_node}
#> <b class="aclass">
querySelector(exdoc, "b, c") # First match from grouped selection
#> {xml_node}
#> <b class="aclass">
querySelectorAll(exdoc, "b, c") # Grouped selection
#> {xml_nodeset (2)}
#> [1] <b class="aclass"/>
#> [2] <c id="anid"/>
querySelectorAll(exdoc, "b") # A list of length one
#> {xml_nodeset (1)}
#> [1] <b class="aclass"/>
querySelector(exdoc, "d") # No match
#> NULL
querySelectorAll(exdoc, "d") # No match
#> {xml_nodeset (0)}
# Read in a document where two namespaces are being set:
# SVG and MathML
svgdoc <- read_xml(system.file("demos/svg-mathml.svg",
package = "selectr"))
# Search for <script/> elements in the SVG namespace
querySelectorNS(svgdoc, "svg|script",
c(svg = "http://www.w3.org/2000/svg"))
#> {xml_node}
#> <script type="application/ecmascript" href="test2.svg.coords.js">
querySelectorAllNS(svgdoc, "svg|script",
c(svg = "http://www.w3.org/2000/svg"))
#> {xml_nodeset (2)}
#> [1] <script type="application/ecmascript" xlink:href="test2.svg.coords.js"/>
#> [2] <script type="application/ecmascript" xlink:href="test2.svg.convert.js"/>
# MathML content is *within* SVG content,
# search for <mtext> elements within the MathML namespace
querySelectorNS(svgdoc, "math|mtext",
c(math = "http://www.w3.org/1998/Math/MathML"))
#> {xml_node}
#> <mtext mathvariant="sans-serif">
querySelectorAllNS(svgdoc, "math|mtext",
c(math = "http://www.w3.org/1998/Math/MathML"))
#> {xml_nodeset (2)}
#> [1] <mtext mathvariant="sans-serif">a</mtext>
#> [2] <mtext mathvariant="sans-serif">b</mtext>
# Search for *both* SVG and MathML content
querySelectorAllNS(svgdoc, "svg|script, math|mo",
c(svg = "http://www.w3.org/2000/svg",
math = "http://www.w3.org/1998/Math/MathML"))
#> {xml_nodeset (3)}
#> [1] <mo>+</mo>
#> [2] <script type="application/ecmascript" xlink:href="test2.svg.coords.js"/>
#> [3] <script type="application/ecmascript" xlink:href="test2.svg.convert.js"/>