You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

213 lines
7.9 KiB
Cython

# support for XMLSchema validation
from lxml.includes cimport xmlschema
cdef class XMLSchemaError(LxmlError):
"""Base class of all XML Schema errors
"""
cdef class XMLSchemaParseError(XMLSchemaError):
"""Error while parsing an XML document as XML Schema.
"""
cdef class XMLSchemaValidateError(XMLSchemaError):
"""Error while validating an XML document with an XML Schema.
"""
################################################################################
# XMLSchema
cdef XPath _check_for_default_attributes = XPath(
u"boolean(//xs:attribute[@default or @fixed][1])",
namespaces={u'xs': u'http://www.w3.org/2001/XMLSchema'})
cdef class XMLSchema(_Validator):
u"""XMLSchema(self, etree=None, file=None)
Turn a document into an XML Schema validator.
Either pass a schema as Element or ElementTree, or pass a file or
filename through the ``file`` keyword argument.
Passing the ``attribute_defaults`` boolean option will make the
schema insert default/fixed attributes into validated documents.
"""
cdef xmlschema.xmlSchema* _c_schema
cdef _Document _doc
cdef bint _has_default_attributes
cdef bint _add_attribute_defaults
def __cinit__(self):
self._has_default_attributes = True # play it safe
self._add_attribute_defaults = False
def __init__(self, etree=None, *, file=None, bint attribute_defaults=False):
cdef xmlschema.xmlSchemaParserCtxt* parser_ctxt
cdef xmlDoc* c_doc
self._add_attribute_defaults = attribute_defaults
_Validator.__init__(self)
c_doc = NULL
if etree is not None:
doc = _documentOrRaise(etree)
root_node = _rootNodeOrRaise(etree)
c_doc = _copyDocRoot(doc._c_doc, root_node._c_node)
self._doc = _documentFactory(c_doc, doc._parser)
parser_ctxt = xmlschema.xmlSchemaNewDocParserCtxt(c_doc)
elif file is not None:
file = _getFSPathOrObject(file)
if _isString(file):
filename = _encodeFilename(file)
parser_ctxt = xmlschema.xmlSchemaNewParserCtxt(_cstr(filename))
else:
self._doc = _parseDocument(file, None, None)
parser_ctxt = xmlschema.xmlSchemaNewDocParserCtxt(self._doc._c_doc)
else:
raise XMLSchemaParseError, u"No tree or file given"
if parser_ctxt is NULL:
raise MemoryError()
xmlschema.xmlSchemaSetParserStructuredErrors(
parser_ctxt, _receiveError, <void*>self._error_log)
if self._doc is not None:
# calling xmlSchemaParse on a schema with imports or
# includes will cause libxml2 to create an internal
# context for parsing, so push an implied context to route
# resolve requests to the document's parser
__GLOBAL_PARSER_CONTEXT.pushImpliedContextFromParser(self._doc._parser)
with nogil:
orig_loader = _register_document_loader()
self._c_schema = xmlschema.xmlSchemaParse(parser_ctxt)
_reset_document_loader(orig_loader)
if self._doc is not None:
__GLOBAL_PARSER_CONTEXT.popImpliedContext()
xmlschema.xmlSchemaFreeParserCtxt(parser_ctxt)
if self._c_schema is NULL:
raise XMLSchemaParseError(
self._error_log._buildExceptionMessage(
u"Document is not valid XML Schema"),
self._error_log)
if self._doc is not None:
self._has_default_attributes = _check_for_default_attributes(self._doc)
self._add_attribute_defaults = attribute_defaults and self._has_default_attributes
def __dealloc__(self):
xmlschema.xmlSchemaFree(self._c_schema)
def __call__(self, etree):
u"""__call__(self, etree)
Validate doc using XML Schema.
Returns true if document is valid, false if not.
"""
cdef xmlschema.xmlSchemaValidCtxt* valid_ctxt
cdef _Document doc
cdef _Element root_node
cdef xmlDoc* c_doc
cdef int ret
assert self._c_schema is not NULL, "Schema instance not initialised"
doc = _documentOrRaise(etree)
root_node = _rootNodeOrRaise(etree)
valid_ctxt = xmlschema.xmlSchemaNewValidCtxt(self._c_schema)
if valid_ctxt is NULL:
raise MemoryError()
try:
if self._add_attribute_defaults:
xmlschema.xmlSchemaSetValidOptions(
valid_ctxt, xmlschema.XML_SCHEMA_VAL_VC_I_CREATE)
self._error_log.clear()
xmlschema.xmlSchemaSetValidStructuredErrors(
valid_ctxt, _receiveError, <void*>self._error_log)
c_doc = _fakeRootDoc(doc._c_doc, root_node._c_node)
with nogil:
ret = xmlschema.xmlSchemaValidateDoc(valid_ctxt, c_doc)
_destroyFakeDoc(doc._c_doc, c_doc)
finally:
xmlschema.xmlSchemaFreeValidCtxt(valid_ctxt)
if ret == -1:
raise XMLSchemaValidateError(
u"Internal error in XML Schema validation.",
self._error_log)
if ret == 0:
return True
else:
return False
cdef _ParserSchemaValidationContext _newSaxValidator(
self, bint add_default_attributes):
cdef _ParserSchemaValidationContext context
context = _ParserSchemaValidationContext.__new__(_ParserSchemaValidationContext)
context._schema = self
context._add_default_attributes = (self._has_default_attributes and (
add_default_attributes or self._add_attribute_defaults))
return context
@cython.final
@cython.internal
cdef class _ParserSchemaValidationContext:
cdef XMLSchema _schema
cdef xmlschema.xmlSchemaValidCtxt* _valid_ctxt
cdef xmlschema.xmlSchemaSAXPlugStruct* _sax_plug
cdef bint _add_default_attributes
def __cinit__(self):
self._valid_ctxt = NULL
self._sax_plug = NULL
self._add_default_attributes = False
def __dealloc__(self):
self.disconnect()
if self._valid_ctxt:
xmlschema.xmlSchemaFreeValidCtxt(self._valid_ctxt)
cdef _ParserSchemaValidationContext copy(self):
assert self._schema is not None, "_ParserSchemaValidationContext not initialised"
return self._schema._newSaxValidator(
self._add_default_attributes)
cdef void inject_default_attributes(self, xmlDoc* c_doc):
# we currently need to insert default attributes manually
# after parsing, as libxml2 does not support this at parse
# time
if self._add_default_attributes:
with nogil:
xmlschema.xmlSchemaValidateDoc(self._valid_ctxt, c_doc)
cdef int connect(self, xmlparser.xmlParserCtxt* c_ctxt, _BaseErrorLog error_log) except -1:
if self._valid_ctxt is NULL:
self._valid_ctxt = xmlschema.xmlSchemaNewValidCtxt(
self._schema._c_schema)
if self._valid_ctxt is NULL:
raise MemoryError()
if self._add_default_attributes:
xmlschema.xmlSchemaSetValidOptions(
self._valid_ctxt, xmlschema.XML_SCHEMA_VAL_VC_I_CREATE)
if error_log is not None:
xmlschema.xmlSchemaSetValidStructuredErrors(
self._valid_ctxt, _receiveError, <void*>error_log)
self._sax_plug = xmlschema.xmlSchemaSAXPlug(
self._valid_ctxt, &c_ctxt.sax, &c_ctxt.userData)
cdef void disconnect(self):
if self._sax_plug is not NULL:
xmlschema.xmlSchemaSAXUnplug(self._sax_plug)
self._sax_plug = NULL
if self._valid_ctxt is not NULL:
xmlschema.xmlSchemaSetValidStructuredErrors(
self._valid_ctxt, NULL, NULL)
cdef bint isvalid(self):
if self._valid_ctxt is NULL:
return 1 # valid
return xmlschema.xmlSchemaIsValid(self._valid_ctxt)