Skip to content

native

xsdata.formats.dataclass.parsers.handlers.native

XmlEventHandler

Bases: XmlHandler

A native xml event handler.

Source code in xsdata/formats/dataclass/parsers/handlers/native.py
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
class XmlEventHandler(XmlHandler):
    """A native xml event handler."""

    def parse(self, source: Any, ns_map: Dict[Optional[str], str]) -> Any:
        """Parse the source XML document.

        Args:
            source: The xml source, can be a file resource or an input stream,
                or a xml tree/element.
            ns_map: A namespace prefix-URI recorder map

        Returns:
            An instance of the class type representing the parsed content.
        """
        if isinstance(source, etree.ElementTree):
            source = source.getroot()

        if isinstance(source, etree.Element):
            ctx = iterwalk(source, {})
        elif self.parser.config.process_xinclude:
            root = etree.parse(source).getroot()  # nosec
            base_url = get_base_url(self.parser.config.base_url, source)
            loader = functools.partial(xinclude_loader, base_url=base_url)

            xinclude.include(root, loader=loader)
            ctx = iterwalk(root, {})
        else:
            ctx = etree.iterparse(source, EVENTS)  # nosec

        return self.process_context(ctx, ns_map)

    def process_context(
        self, context: Iterable[Tuple[str, Any]], ns_map: Dict[Optional[str], str]
    ) -> Any:
        """Iterate context and push events to main parser.

        Args:
            context: The iterable xml context
            ns_map: A namespace prefix-URI recorder map

        Returns:
            An instance of the class type representing the parsed content.
        """
        element_ns_map: Dict = {}
        for event, element in context:
            if event == EventType.START:
                self.parser.start(
                    self.clazz,
                    self.queue,
                    self.objects,
                    element.tag,
                    element.attrib,
                    self.merge_parent_namespaces(element_ns_map),
                )
                element_ns_map = {}
            elif event == EventType.END:
                self.parser.end(
                    self.queue,
                    self.objects,
                    element.tag,
                    element.text,
                    element.tail,
                )
                element.clear()
            elif event == EventType.START_NS:
                prefix, uri = element
                prefix = prefix or None
                element_ns_map[prefix] = uri
                self.parser.register_namespace(ns_map, prefix, uri)

            else:
                raise XmlHandlerError(f"Unhandled event: `{event}`.")

        return self.objects[-1][1] if self.objects else None

    def merge_parent_namespaces(self, ns_map: Dict[Optional[str], str]) -> Dict:
        """Merge the given prefix-URI map with the parent node map.

        This method also registers new prefixes with the parser.

        Args:
            ns_map: The current element namespace prefix-URI map

        Returns:
            The new merged namespace prefix-URI map.
        """
        if self.queue:
            parent_ns_map = self.queue[-1].ns_map

            if not ns_map:
                return parent_ns_map

            result = parent_ns_map.copy() if parent_ns_map else {}
        else:
            result = {}

        for prefix, uri in ns_map.items():
            result[prefix] = uri

        return result

parse(source, ns_map)

Parse the source XML document.

Parameters:

Name Type Description Default
source Any

The xml source, can be a file resource or an input stream, or a xml tree/element.

required
ns_map Dict[Optional[str], str]

A namespace prefix-URI recorder map

required

Returns:

Type Description
Any

An instance of the class type representing the parsed content.

Source code in xsdata/formats/dataclass/parsers/handlers/native.py
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
def parse(self, source: Any, ns_map: Dict[Optional[str], str]) -> Any:
    """Parse the source XML document.

    Args:
        source: The xml source, can be a file resource or an input stream,
            or a xml tree/element.
        ns_map: A namespace prefix-URI recorder map

    Returns:
        An instance of the class type representing the parsed content.
    """
    if isinstance(source, etree.ElementTree):
        source = source.getroot()

    if isinstance(source, etree.Element):
        ctx = iterwalk(source, {})
    elif self.parser.config.process_xinclude:
        root = etree.parse(source).getroot()  # nosec
        base_url = get_base_url(self.parser.config.base_url, source)
        loader = functools.partial(xinclude_loader, base_url=base_url)

        xinclude.include(root, loader=loader)
        ctx = iterwalk(root, {})
    else:
        ctx = etree.iterparse(source, EVENTS)  # nosec

    return self.process_context(ctx, ns_map)

process_context(context, ns_map)

Iterate context and push events to main parser.

Parameters:

Name Type Description Default
context Iterable[Tuple[str, Any]]

The iterable xml context

required
ns_map Dict[Optional[str], str]

A namespace prefix-URI recorder map

required

Returns:

Type Description
Any

An instance of the class type representing the parsed content.

Source code in xsdata/formats/dataclass/parsers/handlers/native.py
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
def process_context(
    self, context: Iterable[Tuple[str, Any]], ns_map: Dict[Optional[str], str]
) -> Any:
    """Iterate context and push events to main parser.

    Args:
        context: The iterable xml context
        ns_map: A namespace prefix-URI recorder map

    Returns:
        An instance of the class type representing the parsed content.
    """
    element_ns_map: Dict = {}
    for event, element in context:
        if event == EventType.START:
            self.parser.start(
                self.clazz,
                self.queue,
                self.objects,
                element.tag,
                element.attrib,
                self.merge_parent_namespaces(element_ns_map),
            )
            element_ns_map = {}
        elif event == EventType.END:
            self.parser.end(
                self.queue,
                self.objects,
                element.tag,
                element.text,
                element.tail,
            )
            element.clear()
        elif event == EventType.START_NS:
            prefix, uri = element
            prefix = prefix or None
            element_ns_map[prefix] = uri
            self.parser.register_namespace(ns_map, prefix, uri)

        else:
            raise XmlHandlerError(f"Unhandled event: `{event}`.")

    return self.objects[-1][1] if self.objects else None

merge_parent_namespaces(ns_map)

Merge the given prefix-URI map with the parent node map.

This method also registers new prefixes with the parser.

Parameters:

Name Type Description Default
ns_map Dict[Optional[str], str]

The current element namespace prefix-URI map

required

Returns:

Type Description
Dict

The new merged namespace prefix-URI map.

Source code in xsdata/formats/dataclass/parsers/handlers/native.py
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
def merge_parent_namespaces(self, ns_map: Dict[Optional[str], str]) -> Dict:
    """Merge the given prefix-URI map with the parent node map.

    This method also registers new prefixes with the parser.

    Args:
        ns_map: The current element namespace prefix-URI map

    Returns:
        The new merged namespace prefix-URI map.
    """
    if self.queue:
        parent_ns_map = self.queue[-1].ns_map

        if not ns_map:
            return parent_ns_map

        result = parent_ns_map.copy() if parent_ns_map else {}
    else:
        result = {}

    for prefix, uri in ns_map.items():
        result[prefix] = uri

    return result

iterwalk(element, ns_map)

Walk over the element tree and emit events.

The ElementTree doesn't preserve the original namespace prefixes, we have to generate new ones.

Parameters:

Name Type Description Default
element Element

The etree element instance

required
ns_map Dict

The namespace prefix-URI mapping

required

Yields:

Type Description
str

An iterator of events

Source code in xsdata/formats/dataclass/parsers/handlers/native.py
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
def iterwalk(element: etree.Element, ns_map: Dict) -> Iterator[Tuple[str, Any]]:
    """Walk over the element tree and emit events.

    The ElementTree doesn't preserve the original namespace prefixes, we
    have to generate new ones.

    Args:
        element: The etree element instance
        ns_map: The namespace prefix-URI mapping

    Yields:
        An iterator of events
    """
    uri = namespaces.target_uri(element.tag)
    if uri is not None:
        prefix = namespaces.load_prefix(uri, ns_map)
        yield EventType.START_NS, (prefix, uri)

    yield EventType.START, element

    for child in element:
        yield from iterwalk(child, ns_map)

    yield EventType.END, element

get_base_url(base_url, source)

Return the base url of the source.

Parameters:

Name Type Description Default
base_url Optional[str]

The base url from the parser config

required
source Any

The xml source input

required

Returns:

Type Description
Optional[str]

A base url str or None, if no base url is provided

Optional[str]

and the source is not a string path.

Source code in xsdata/formats/dataclass/parsers/handlers/native.py
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
def get_base_url(base_url: Optional[str], source: Any) -> Optional[str]:
    """Return the base url of the source.

    Args:
        base_url: The base url from the parser config
        source: The xml source input

    Returns:
        A base url str or None, if no base url is provided
        and the source is not a string path.
    """
    if base_url:
        return base_url

    return source if isinstance(source, str) else None

xinclude_loader(href, parse, encoding=None, base_url=None)

Custom loader for xinclude parsing.

The base_url argument was added in python >= 3.9.

Source code in xsdata/formats/dataclass/parsers/handlers/native.py
160
161
162
163
164
165
166
167
168
169
170
def xinclude_loader(
    href: str,
    parse: str,
    encoding: Optional[str] = None,
    base_url: Optional[str] = None,
) -> Any:
    """Custom loader for xinclude parsing.

    The base_url argument was added in python >= 3.9.
    """
    return xinclude.default_loader(urljoin(base_url or "", href), parse, encoding)