Skip to content

native

xsdata.formats.dataclass.parsers.handlers.native

XmlEventHandler

Bases: XmlHandler

A native xml event handler.

Source code in xsdata/formats/dataclass/parsers/handlers/native.py
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
class XmlEventHandler(XmlHandler):
    """A native xml event handler."""

    def parse(self, source: Any) -> Any:
        """Parse the source XML document.

        Args:
            source: The xml source, can be a file resource or an input stream,
                or a xml tree/element.

        Returns:
            An instance of the class type representing the parsed content.
        """
        if isinstance(source, etree.ElementTree):
            source = source.getroot()

        if isinstance(source, etree.Element):
            ctx = iterwalk(source, {})
        elif self.parser.config.process_xinclude:
            root = etree.parse(source).getroot()  # nosec
            base_url = get_base_url(self.parser.config.base_url, source)
            loader = functools.partial(xinclude_loader, base_url=base_url)

            xinclude.include(root, loader=loader)
            ctx = iterwalk(root, {})
        else:
            ctx = etree.iterparse(source, EVENTS)  # nosec

        return self.process_context(ctx)

    def process_context(self, context: Iterable[Tuple[str, Any]]) -> Any:
        """Iterate context and push events to main parser.

        Args:
            context: The iterable xml context

        Returns:
            An instance of the class type representing the parsed content.
        """
        ns_map: Dict = {}
        for event, element in context:
            if event == EventType.START:
                self.parser.start(
                    self.clazz,
                    self.queue,
                    self.objects,
                    element.tag,
                    element.attrib,
                    self.merge_parent_namespaces(ns_map),
                )
                ns_map = {}
            elif event == EventType.END:
                self.parser.end(
                    self.queue,
                    self.objects,
                    element.tag,
                    element.text,
                    element.tail,
                )
                element.clear()
            elif event == EventType.START_NS:
                prefix, uri = element
                ns_map[prefix or None] = uri
            else:
                raise XmlHandlerError(f"Unhandled event: `{event}`.")

        return self.objects[-1][1] if self.objects else None

parse(source)

Parse the source XML document.

Parameters:

Name Type Description Default
source Any

The xml source, can be a file resource or an input stream, or a xml tree/element.

required

Returns:

Type Description
Any

An instance of the class type representing the parsed content.

Source code in xsdata/formats/dataclass/parsers/handlers/native.py
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
def parse(self, source: Any) -> Any:
    """Parse the source XML document.

    Args:
        source: The xml source, can be a file resource or an input stream,
            or a xml tree/element.

    Returns:
        An instance of the class type representing the parsed content.
    """
    if isinstance(source, etree.ElementTree):
        source = source.getroot()

    if isinstance(source, etree.Element):
        ctx = iterwalk(source, {})
    elif self.parser.config.process_xinclude:
        root = etree.parse(source).getroot()  # nosec
        base_url = get_base_url(self.parser.config.base_url, source)
        loader = functools.partial(xinclude_loader, base_url=base_url)

        xinclude.include(root, loader=loader)
        ctx = iterwalk(root, {})
    else:
        ctx = etree.iterparse(source, EVENTS)  # nosec

    return self.process_context(ctx)

process_context(context)

Iterate context and push events to main parser.

Parameters:

Name Type Description Default
context Iterable[Tuple[str, Any]]

The iterable xml context

required

Returns:

Type Description
Any

An instance of the class type representing the parsed content.

Source code in xsdata/formats/dataclass/parsers/handlers/native.py
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
def process_context(self, context: Iterable[Tuple[str, Any]]) -> Any:
    """Iterate context and push events to main parser.

    Args:
        context: The iterable xml context

    Returns:
        An instance of the class type representing the parsed content.
    """
    ns_map: Dict = {}
    for event, element in context:
        if event == EventType.START:
            self.parser.start(
                self.clazz,
                self.queue,
                self.objects,
                element.tag,
                element.attrib,
                self.merge_parent_namespaces(ns_map),
            )
            ns_map = {}
        elif event == EventType.END:
            self.parser.end(
                self.queue,
                self.objects,
                element.tag,
                element.text,
                element.tail,
            )
            element.clear()
        elif event == EventType.START_NS:
            prefix, uri = element
            ns_map[prefix or None] = uri
        else:
            raise XmlHandlerError(f"Unhandled event: `{event}`.")

    return self.objects[-1][1] if self.objects else None

iterwalk(element, ns_map)

Walk over the element tree and emit events.

The ElementTree doesn't preserve the original namespace prefixes, we have to generate new ones.

Parameters:

Name Type Description Default
element Element

The etree element instance

required
ns_map Dict

The namespace prefix-URI mapping

required

Yields:

Type Description
str

An iterator of events

Source code in xsdata/formats/dataclass/parsers/handlers/native.py
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
def iterwalk(element: etree.Element, ns_map: Dict) -> Iterator[Tuple[str, Any]]:
    """Walk over the element tree and emit events.

    The ElementTree doesn't preserve the original namespace prefixes, we
    have to generate new ones.

    Args:
        element: The etree element instance
        ns_map: The namespace prefix-URI mapping

    Yields:
        An iterator of events
    """
    uri = namespaces.target_uri(element.tag)
    if uri is not None:
        prefix = namespaces.load_prefix(uri, ns_map)
        yield EventType.START_NS, (prefix, uri)

    yield EventType.START, element

    for child in element:
        yield from iterwalk(child, ns_map)

    yield EventType.END, element

get_base_url(base_url, source)

Return the base url of the source.

Parameters:

Name Type Description Default
base_url Optional[str]

The base url from the parser config

required
source Any

The xml source input

required

Returns:

Type Description
Optional[str]

A base url str or None, if no base url is provided

Optional[str]

and the source is not a string path.

Source code in xsdata/formats/dataclass/parsers/handlers/native.py
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
def get_base_url(base_url: Optional[str], source: Any) -> Optional[str]:
    """Return the base url of the source.

    Args:
        base_url: The base url from the parser config
        source: The xml source input

    Returns:
        A base url str or None, if no base url is provided
        and the source is not a string path.
    """
    if base_url:
        return base_url

    return source if isinstance(source, str) else None

xinclude_loader(href, parse, encoding=None, base_url=None)

Custom loader for xinclude parsing.

The base_url argument was added in python >= 3.9.

Source code in xsdata/formats/dataclass/parsers/handlers/native.py
127
128
129
130
131
132
133
134
135
136
137
def xinclude_loader(
    href: str,
    parse: str,
    encoding: Optional[str] = None,
    base_url: Optional[str] = None,
) -> Any:
    """Custom loader for xinclude parsing.

    The base_url argument was added in python >= 3.9.
    """
    return xinclude.default_loader(urljoin(base_url or "", href), parse, encoding)