Skip to content

lxml

xsdata.formats.dataclass.parsers.handlers.lxml

LxmlEventHandler

Bases: XmlHandler

An lxml event handler.

Source code in xsdata/formats/dataclass/parsers/handlers/lxml.py
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
class LxmlEventHandler(XmlHandler):
    """An lxml event handler."""

    def parse(self, source: Any, ns_map: Dict[Optional[str], str]) -> Any:
        """Parse the source XML document.

        Args:
            source: The xml source, can be a file resource or an input stream,
                or a lxml tree/element.
            ns_map: A namespace prefix-URI recorder map

        Returns:
            An instance of the class type representing the parsed content.
        """
        if isinstance(source, (etree._ElementTree, etree._Element)):
            ctx = etree.iterwalk(source, EVENTS)
        elif self.parser.config.process_xinclude:
            tree = etree.parse(source, base_url=self.parser.config.base_url)  # nosec
            tree.xinclude()
            ctx = etree.iterwalk(tree, EVENTS)
        else:
            ctx = etree.iterparse(
                source,
                EVENTS,
                recover=True,
                remove_comments=True,
                load_dtd=self.parser.config.load_dtd,
            )

        return self.process_context(ctx, ns_map)

    def process_context(
        self,
        context: Iterable[Tuple[str, Any]],
        ns_map: Dict[Optional[str], str],
    ) -> Any:
        """Iterate context and push events to main parser.

        Args:
            context: The iterable lxml context
            ns_map: A namespace prefix-URI recorder map

        Returns:
            An instance of the class type representing the parsed content.
        """
        for event, element in context:
            if event == EventType.START:
                self.parser.start(
                    self.clazz,
                    self.queue,
                    self.objects,
                    element.tag,
                    element.attrib,
                    element.nsmap,
                )
            elif event == EventType.END:
                self.parser.end(
                    self.queue,
                    self.objects,
                    element.tag,
                    element.text,
                    element.tail,
                )
                element.clear()
            elif event == EventType.START_NS:
                prefix, uri = element
                self.parser.register_namespace(ns_map, prefix or None, uri)
            else:
                raise XmlHandlerError(f"Unhandled event: `{event}`.")

        return self.objects[-1][1] if self.objects else None

parse(source, ns_map)

Parse the source XML document.

Parameters:

Name Type Description Default
source Any

The xml source, can be a file resource or an input stream, or a lxml tree/element.

required
ns_map Dict[Optional[str], str]

A namespace prefix-URI recorder map

required

Returns:

Type Description
Any

An instance of the class type representing the parsed content.

Source code in xsdata/formats/dataclass/parsers/handlers/lxml.py
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
def parse(self, source: Any, ns_map: Dict[Optional[str], str]) -> Any:
    """Parse the source XML document.

    Args:
        source: The xml source, can be a file resource or an input stream,
            or a lxml tree/element.
        ns_map: A namespace prefix-URI recorder map

    Returns:
        An instance of the class type representing the parsed content.
    """
    if isinstance(source, (etree._ElementTree, etree._Element)):
        ctx = etree.iterwalk(source, EVENTS)
    elif self.parser.config.process_xinclude:
        tree = etree.parse(source, base_url=self.parser.config.base_url)  # nosec
        tree.xinclude()
        ctx = etree.iterwalk(tree, EVENTS)
    else:
        ctx = etree.iterparse(
            source,
            EVENTS,
            recover=True,
            remove_comments=True,
            load_dtd=self.parser.config.load_dtd,
        )

    return self.process_context(ctx, ns_map)

process_context(context, ns_map)

Iterate context and push events to main parser.

Parameters:

Name Type Description Default
context Iterable[Tuple[str, Any]]

The iterable lxml context

required
ns_map Dict[Optional[str], str]

A namespace prefix-URI recorder map

required

Returns:

Type Description
Any

An instance of the class type representing the parsed content.

Source code in xsdata/formats/dataclass/parsers/handlers/lxml.py
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
def process_context(
    self,
    context: Iterable[Tuple[str, Any]],
    ns_map: Dict[Optional[str], str],
) -> Any:
    """Iterate context and push events to main parser.

    Args:
        context: The iterable lxml context
        ns_map: A namespace prefix-URI recorder map

    Returns:
        An instance of the class type representing the parsed content.
    """
    for event, element in context:
        if event == EventType.START:
            self.parser.start(
                self.clazz,
                self.queue,
                self.objects,
                element.tag,
                element.attrib,
                element.nsmap,
            )
        elif event == EventType.END:
            self.parser.end(
                self.queue,
                self.objects,
                element.tag,
                element.text,
                element.tail,
            )
            element.clear()
        elif event == EventType.START_NS:
            prefix, uri = element
            self.parser.register_namespace(ns_map, prefix or None, uri)
        else:
            raise XmlHandlerError(f"Unhandled event: `{event}`.")

    return self.objects[-1][1] if self.objects else None