You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
243 lines
11 KiB
Cython
243 lines
11 KiB
Cython
# XSLT extension elements
|
|
|
|
cdef class XSLTExtension:
|
|
u"""Base class of an XSLT extension element.
|
|
"""
|
|
def execute(self, context, self_node, input_node, output_parent):
|
|
u"""execute(self, context, self_node, input_node, output_parent)
|
|
Execute this extension element.
|
|
|
|
Subclasses must override this method. They may append
|
|
elements to the `output_parent` element here, or set its text
|
|
content. To this end, the `input_node` provides read-only
|
|
access to the current node in the input document, and the
|
|
`self_node` points to the extension element in the stylesheet.
|
|
|
|
Note that the `output_parent` parameter may be `None` if there
|
|
is no parent element in the current context (e.g. no content
|
|
was added to the output tree yet).
|
|
"""
|
|
pass
|
|
|
|
def apply_templates(self, _XSLTContext context not None, node, output_parent=None,
|
|
*, elements_only=False, remove_blank_text=False):
|
|
u"""apply_templates(self, context, node, output_parent=None, elements_only=False, remove_blank_text=False)
|
|
|
|
Call this method to retrieve the result of applying templates
|
|
to an element.
|
|
|
|
The return value is a list of elements or text strings that
|
|
were generated by the XSLT processor. If you pass
|
|
``elements_only=True``, strings will be discarded from the result
|
|
list. The option ``remove_blank_text=True`` will only discard
|
|
strings that consist entirely of whitespace (e.g. formatting).
|
|
These options do not apply to Elements, only to bare string results.
|
|
|
|
If you pass an Element as `output_parent` parameter, the result
|
|
will instead be appended to the element (including attributes
|
|
etc.) and the return value will be `None`. This is a safe way
|
|
to generate content into the output document directly, without
|
|
having to take care of special values like text or attributes.
|
|
Note that the string discarding options will be ignored in this
|
|
case.
|
|
"""
|
|
cdef xmlNode* c_parent
|
|
cdef xmlNode* c_node
|
|
cdef xmlNode* c_context_node
|
|
assert context._xsltCtxt is not NULL, "XSLT context not initialised"
|
|
c_context_node = _roNodeOf(node)
|
|
#assert c_context_node.doc is context._xsltContext.node.doc, \
|
|
# "switching input documents during transformation is not currently supported"
|
|
|
|
if output_parent is not None:
|
|
c_parent = _nonRoNodeOf(output_parent)
|
|
else:
|
|
c_parent = tree.xmlNewDocNode(
|
|
context._xsltCtxt.output, NULL, <unsigned char*>"fake-parent", NULL)
|
|
|
|
c_node = context._xsltCtxt.insert
|
|
context._xsltCtxt.insert = c_parent
|
|
xslt.xsltProcessOneNode(
|
|
context._xsltCtxt, c_context_node, NULL)
|
|
context._xsltCtxt.insert = c_node
|
|
|
|
if output_parent is not None:
|
|
return None
|
|
|
|
try:
|
|
return self._collectXSLTResultContent(
|
|
context, c_parent, elements_only, remove_blank_text)
|
|
finally:
|
|
# free all intermediate nodes that will not be freed by proxies
|
|
tree.xmlFreeNode(c_parent)
|
|
|
|
def process_children(self, _XSLTContext context not None, output_parent=None,
|
|
*, elements_only=False, remove_blank_text=False):
|
|
u"""process_children(self, context, output_parent=None, elements_only=False, remove_blank_text=False)
|
|
|
|
Call this method to process the XSLT content of the extension
|
|
element itself.
|
|
|
|
The return value is a list of elements or text strings that
|
|
were generated by the XSLT processor. If you pass
|
|
``elements_only=True``, strings will be discarded from the result
|
|
list. The option ``remove_blank_text=True`` will only discard
|
|
strings that consist entirely of whitespace (e.g. formatting).
|
|
These options do not apply to Elements, only to bare string results.
|
|
|
|
If you pass an Element as `output_parent` parameter, the result
|
|
will instead be appended to the element (including attributes
|
|
etc.) and the return value will be `None`. This is a safe way
|
|
to generate content into the output document directly, without
|
|
having to take care of special values like text or attributes.
|
|
Note that the string discarding options will be ignored in this
|
|
case.
|
|
"""
|
|
cdef xmlNode* c_parent
|
|
cdef xslt.xsltTransformContext* c_ctxt = context._xsltCtxt
|
|
cdef xmlNode* c_old_output_parent = c_ctxt.insert
|
|
assert context._xsltCtxt is not NULL, "XSLT context not initialised"
|
|
|
|
# output_parent node is used for adding results instead of
|
|
# elements list used in apply_templates, that's easier and allows to
|
|
# use attributes added to extension element with <xsl:attribute>.
|
|
|
|
if output_parent is not None:
|
|
c_parent = _nonRoNodeOf(output_parent)
|
|
else:
|
|
c_parent = tree.xmlNewDocNode(
|
|
context._xsltCtxt.output, NULL, <unsigned char*>"fake-parent", NULL)
|
|
|
|
c_ctxt.insert = c_parent
|
|
xslt.xsltApplyOneTemplate(c_ctxt,
|
|
c_ctxt.node, c_ctxt.inst.children, NULL, NULL)
|
|
c_ctxt.insert = c_old_output_parent
|
|
|
|
if output_parent is not None:
|
|
return None
|
|
|
|
try:
|
|
return self._collectXSLTResultContent(
|
|
context, c_parent, elements_only, remove_blank_text)
|
|
finally:
|
|
# free all intermediate nodes that will not be freed by proxies
|
|
tree.xmlFreeNode(c_parent)
|
|
|
|
cdef _collectXSLTResultContent(self, _XSLTContext context, xmlNode* c_parent,
|
|
bint elements_only, bint remove_blank_text):
|
|
cdef xmlNode* c_node
|
|
cdef xmlNode* c_next
|
|
cdef _ReadOnlyProxy proxy
|
|
cdef list results = [] # or maybe _collectAttributes(c_parent, 2) ?
|
|
c_node = c_parent.children
|
|
while c_node is not NULL:
|
|
c_next = c_node.next
|
|
if c_node.type == tree.XML_TEXT_NODE:
|
|
if not elements_only:
|
|
s = funicode(c_node.content)
|
|
if not remove_blank_text or s.strip():
|
|
results.append(s)
|
|
s = None
|
|
elif c_node.type == tree.XML_ELEMENT_NODE:
|
|
proxy = _newReadOnlyProxy(
|
|
context._extension_element_proxy, c_node)
|
|
results.append(proxy)
|
|
# unlink node and make sure it will be freed later on
|
|
tree.xmlUnlinkNode(c_node)
|
|
proxy.free_after_use()
|
|
else:
|
|
raise TypeError, \
|
|
f"unsupported XSLT result type: {c_node.type}"
|
|
c_node = c_next
|
|
return results
|
|
|
|
|
|
cdef _registerXSLTExtensions(xslt.xsltTransformContext* c_ctxt,
|
|
extension_dict):
|
|
for ns_utf, name_utf in extension_dict:
|
|
xslt.xsltRegisterExtElement(
|
|
c_ctxt, _xcstr(name_utf), _xcstr(ns_utf),
|
|
<xslt.xsltTransformFunction>_callExtensionElement)
|
|
|
|
cdef void _callExtensionElement(xslt.xsltTransformContext* c_ctxt,
|
|
xmlNode* c_context_node,
|
|
xmlNode* c_inst_node,
|
|
void* dummy) with gil:
|
|
cdef _XSLTContext context
|
|
cdef XSLTExtension extension
|
|
cdef python.PyObject* dict_result
|
|
cdef xmlNode* c_node
|
|
cdef _ReadOnlyProxy context_node = None, self_node = None
|
|
cdef object output_parent # not restricted to ro-nodes
|
|
c_uri = _getNs(c_inst_node)
|
|
if c_uri is NULL:
|
|
# not allowed, and should never happen
|
|
return
|
|
if c_ctxt.xpathCtxt.userData is NULL:
|
|
# just for safety, should never happen
|
|
return
|
|
context = <_XSLTContext>c_ctxt.xpathCtxt.userData
|
|
try:
|
|
try:
|
|
dict_result = python.PyDict_GetItem(
|
|
context._extension_elements, (c_uri, c_inst_node.name))
|
|
if dict_result is NULL:
|
|
raise KeyError, f"extension element {funicode(c_inst_node.name)} not found"
|
|
extension = <object>dict_result
|
|
|
|
try:
|
|
# build the context proxy nodes
|
|
self_node = _newReadOnlyProxy(None, c_inst_node)
|
|
if _isElement(c_ctxt.insert):
|
|
output_parent = _newAppendOnlyProxy(self_node, c_ctxt.insert)
|
|
else:
|
|
# may be the document node or other stuff
|
|
output_parent = _newOpaqueAppendOnlyNodeWrapper(c_ctxt.insert)
|
|
if c_context_node.type in (tree.XML_DOCUMENT_NODE,
|
|
tree.XML_HTML_DOCUMENT_NODE):
|
|
c_node = tree.xmlDocGetRootElement(<xmlDoc*>c_context_node)
|
|
if c_node is not NULL:
|
|
context_node = _newReadOnlyProxy(self_node, c_node)
|
|
else:
|
|
context_node = None
|
|
elif c_context_node.type in (tree.XML_ATTRIBUTE_NODE,
|
|
tree.XML_TEXT_NODE,
|
|
tree.XML_CDATA_SECTION_NODE):
|
|
# this isn't easy to support using read-only
|
|
# nodes, as the smart-string factory must
|
|
# instantiate the parent proxy somehow...
|
|
raise TypeError(f"Unsupported element type: {c_context_node.type}")
|
|
else:
|
|
context_node = _newReadOnlyProxy(self_node, c_context_node)
|
|
|
|
# run the XSLT extension
|
|
context._extension_element_proxy = self_node
|
|
extension.execute(context, self_node, context_node, output_parent)
|
|
finally:
|
|
context._extension_element_proxy = None
|
|
if self_node is not None:
|
|
_freeReadOnlyProxies(self_node)
|
|
except Exception as e:
|
|
try:
|
|
e = unicode(e).encode(u"UTF-8")
|
|
except:
|
|
e = repr(e).encode(u"UTF-8")
|
|
message = python.PyBytes_FromFormat(
|
|
"Error executing extension element '%s': %s",
|
|
c_inst_node.name, _cstr(e))
|
|
xslt.xsltTransformError(c_ctxt, NULL, c_inst_node, "%s", message)
|
|
context._exc._store_raised()
|
|
except:
|
|
# just in case
|
|
message = python.PyBytes_FromFormat(
|
|
"Error executing extension element '%s'", c_inst_node.name)
|
|
xslt.xsltTransformError(c_ctxt, NULL, c_inst_node, "%s", message)
|
|
context._exc._store_raised()
|
|
except:
|
|
# no Python functions here - everything can fail...
|
|
xslt.xsltTransformError(c_ctxt, NULL, c_inst_node,
|
|
"Error during XSLT extension element evaluation")
|
|
context._exc._store_raised()
|
|
finally:
|
|
return # swallow any further exceptions
|