Source code for xsdata.formats.dataclass.parsers.handlers.lxml
from typing import Any
from typing import Iterable
from lxml import etree
from xsdata.exceptions import XmlHandlerError
from xsdata.formats.dataclass.parsers.mixins import SaxHandler
from xsdata.formats.dataclass.parsers.mixins import XmlHandler
from xsdata.models.enums import EventType
EVENTS = (EventType.START, EventType.END, EventType.START_NS)
[docs]class LxmlEventHandler(XmlHandler):
"""
Event handler based on :class:`lxml.etree.iterparse` api.
:param parser: The parser instance to feed with events
:param clazz: The target binding model, auto located if omitted.
"""
__slots__ = ()
[docs] def parse(self, source: Any) -> Any:
"""
Parse an XML document from a system identifier or an InputSource.
The xml parser will ignore comments, recover from errors. The
parser will parse the whole document and then walk down the tree
if the process xinclude is enabled.
"""
if self.parser.config.process_xinclude:
tree = etree.parse(source, base_url=self.parser.config.base_url) # nosec
tree.xinclude()
ctx = etree.iterwalk(tree, EVENTS)
else:
ctx = etree.iterparse(source, EVENTS, recover=True, remove_comments=True)
return self.process_context(ctx)
[docs] def process_context(self, context: Iterable) -> Any:
"""Iterate context and push the events to main parser."""
for event, element in context:
if event == EventType.START:
self.parser.start(
self.clazz,
self.queue,
self.objects,
element.tag,
element.attrib,
element.nsmap,
)
elif event == EventType.END:
self.parser.end(
self.queue,
self.objects,
element.tag,
element.text,
element.tail,
)
element.clear()
elif event == EventType.START_NS:
prefix, uri = element
self.parser.register_namespace(prefix or None, uri)
else:
raise XmlHandlerError(f"Unhandled event: `{event}`.")
return self.objects[-1][1] if self.objects else None
[docs]class LxmlSaxHandler(SaxHandler):
"""
Sax content handler based on :class:`lxml.etree.XMLParser` api.
:param parser: The parser instance to feed with events
:param clazz: The target binding model, auto located if omitted.
"""
__slots__ = ()
[docs] def parse(self, source: Any) -> Any:
"""
Parse an XML document from a system identifier or an InputSource.
The xml parser will ignore comments, recover from errors and
clean duplicate namespace prefixes.
"""
if self.parser.config.process_xinclude:
raise XmlHandlerError(
f"{type(self).__name__} doesn't support xinclude elements."
)
parser = etree.XMLParser(
target=self,
recover=True,
remove_comments=True,
ns_clean=True,
resolve_entities=False,
no_network=True,
)
return etree.parse(source, parser=parser) # nosec