Skip to content

mixins

xsdata.formats.dataclass.parsers.mixins

PushParser dataclass

A generic interface for event based content handlers like sax.

Parameters:

Name Type Description Default
config ParserConfig

The parser configuration instance

ParserConfig()

Attributes:

Name Type Description
ns_map Dict[Optional[str], str]

The parsed namespace prefix-URI map

Source code in xsdata/formats/dataclass/parsers/mixins.py
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
@dataclass
class PushParser:
    """A generic interface for event based content handlers like sax.

    Args:
        config: The parser configuration instance

    Attributes:
        ns_map: The parsed namespace prefix-URI map
    """

    config: ParserConfig = field(default_factory=ParserConfig)
    ns_map: Dict[Optional[str], str] = field(init=False, default_factory=dict)

    def from_path(
        self,
        path: pathlib.Path,
        clazz: Optional[Type[T]] = None,
        ns_map: Optional[Dict[Optional[str], str]] = None,
    ) -> T:
        """Parse the input file into the target class type.

        If no clazz is provided, the binding context will try
        to locate it from imported dataclasses.

        Args:
            path: The path to the input file
            clazz: The target class type to parse the file into
            ns_map: A namespace prefix-URI map to record prefixes during parsing

        Returns:
            An instance of the specified class representing the parsed content.
        """
        return self.parse(str(path.resolve()), clazz, ns_map)

    def from_string(
        self,
        source: str,
        clazz: Optional[Type[T]] = None,
        ns_map: Optional[Dict[Optional[str], str]] = None,
    ) -> T:
        """Parse the input source string into the target class type.

        If no clazz is provided, the binding context will try
        to locate it from imported dataclasses.

        Args:
            source: The source string to parse
            clazz: The target class type to parse the source string into
            ns_map: A namespace prefix-URI map to record prefixes during parsing

        Returns:
            An instance of the specified class representing the parsed content.
        """
        return self.from_bytes(source.encode(), clazz, ns_map)

    def from_bytes(
        self,
        source: bytes,
        clazz: Optional[Type[T]] = None,
        ns_map: Optional[Dict[Optional[str], str]] = None,
    ) -> T:
        """Parse the input source bytes object into the target class type.

        If no clazz is provided, the binding context will try
        to locate it from imported dataclasses.

        Args:
            source: The source bytes object to parse
            clazz: The target class type to parse the source bytes object
            ns_map: A namespace prefix-URI map to record prefixes during parsing

        Returns:
            An instance of the specified class representing the parsed content.
        """
        return self.parse(io.BytesIO(source), clazz, ns_map)

    @abc.abstractmethod
    def parse(
        self,
        source: Any,
        clazz: Optional[Type[T]] = None,
        ns_map: Optional[Dict[Optional[str], str]] = None,
    ) -> T:
        """Parse the input file or stream into the target class type.

        If no clazz is provided, the binding context will try
        to locate it from imported dataclasses.

        Args:
            source: The source stream object to parse
            clazz: The target class type to parse the source bytes object
            ns_map: A namespace prefix-URI map to record prefixes during parsing

        Returns:
            An instance of the specified class representing the parsed content.
        """

    @abc.abstractmethod
    def start(
        self,
        clazz: Optional[Type],
        queue: List[Any],
        objects: List[Any],
        qname: str,
        attrs: Dict[str, str],
        ns_map: Dict[Optional[str], str],
    ):
        """Build and queue the XmlNode for the starting element.

        Args:
            clazz: The target class type, auto locate if omitted
            queue: The XmlNode queue list
            objects: The list of all intermediate parsed objects
            qname: The element qualified name
            attrs: The element attributes
            ns_map: The element namespace prefix-URI map
        """

    @abc.abstractmethod
    def end(
        self,
        queue: List,
        objects: List,
        qname: str,
        text: Optional[str],
        tail: Optional[str],
    ) -> bool:
        """Parse the last xml node and bind any intermediate objects.

        Args:
            queue: The XmlNode queue list
            objects: The list of all intermediate parsed objects
            qname: The element qualified name
            text: The element text content
            tail: The element tail content

        Returns:
            Whether the binding process was successful.
        """

    def register_namespace(
        self, ns_map: Dict[Optional[str], str], prefix: Optional[str], uri: str
    ):
        """Register the uri prefix in the namespace prefix-URI map.

        Args:
            ns_map: The namespace prefix-URI map
            prefix: The namespace prefix
            uri: The namespace uri
        """
        if prefix not in ns_map:
            ns_map[prefix] = uri

from_path(path, clazz=None, ns_map=None)

Parse the input file into the target class type.

If no clazz is provided, the binding context will try to locate it from imported dataclasses.

Parameters:

Name Type Description Default
path Path

The path to the input file

required
clazz Optional[Type[T]]

The target class type to parse the file into

None
ns_map Optional[Dict[Optional[str], str]]

A namespace prefix-URI map to record prefixes during parsing

None

Returns:

Type Description
T

An instance of the specified class representing the parsed content.

Source code in xsdata/formats/dataclass/parsers/mixins.py
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
def from_path(
    self,
    path: pathlib.Path,
    clazz: Optional[Type[T]] = None,
    ns_map: Optional[Dict[Optional[str], str]] = None,
) -> T:
    """Parse the input file into the target class type.

    If no clazz is provided, the binding context will try
    to locate it from imported dataclasses.

    Args:
        path: The path to the input file
        clazz: The target class type to parse the file into
        ns_map: A namespace prefix-URI map to record prefixes during parsing

    Returns:
        An instance of the specified class representing the parsed content.
    """
    return self.parse(str(path.resolve()), clazz, ns_map)

from_string(source, clazz=None, ns_map=None)

Parse the input source string into the target class type.

If no clazz is provided, the binding context will try to locate it from imported dataclasses.

Parameters:

Name Type Description Default
source str

The source string to parse

required
clazz Optional[Type[T]]

The target class type to parse the source string into

None
ns_map Optional[Dict[Optional[str], str]]

A namespace prefix-URI map to record prefixes during parsing

None

Returns:

Type Description
T

An instance of the specified class representing the parsed content.

Source code in xsdata/formats/dataclass/parsers/mixins.py
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
def from_string(
    self,
    source: str,
    clazz: Optional[Type[T]] = None,
    ns_map: Optional[Dict[Optional[str], str]] = None,
) -> T:
    """Parse the input source string into the target class type.

    If no clazz is provided, the binding context will try
    to locate it from imported dataclasses.

    Args:
        source: The source string to parse
        clazz: The target class type to parse the source string into
        ns_map: A namespace prefix-URI map to record prefixes during parsing

    Returns:
        An instance of the specified class representing the parsed content.
    """
    return self.from_bytes(source.encode(), clazz, ns_map)

from_bytes(source, clazz=None, ns_map=None)

Parse the input source bytes object into the target class type.

If no clazz is provided, the binding context will try to locate it from imported dataclasses.

Parameters:

Name Type Description Default
source bytes

The source bytes object to parse

required
clazz Optional[Type[T]]

The target class type to parse the source bytes object

None
ns_map Optional[Dict[Optional[str], str]]

A namespace prefix-URI map to record prefixes during parsing

None

Returns:

Type Description
T

An instance of the specified class representing the parsed content.

Source code in xsdata/formats/dataclass/parsers/mixins.py
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
def from_bytes(
    self,
    source: bytes,
    clazz: Optional[Type[T]] = None,
    ns_map: Optional[Dict[Optional[str], str]] = None,
) -> T:
    """Parse the input source bytes object into the target class type.

    If no clazz is provided, the binding context will try
    to locate it from imported dataclasses.

    Args:
        source: The source bytes object to parse
        clazz: The target class type to parse the source bytes object
        ns_map: A namespace prefix-URI map to record prefixes during parsing

    Returns:
        An instance of the specified class representing the parsed content.
    """
    return self.parse(io.BytesIO(source), clazz, ns_map)

parse(source, clazz=None, ns_map=None) abstractmethod

Parse the input file or stream into the target class type.

If no clazz is provided, the binding context will try to locate it from imported dataclasses.

Parameters:

Name Type Description Default
source Any

The source stream object to parse

required
clazz Optional[Type[T]]

The target class type to parse the source bytes object

None
ns_map Optional[Dict[Optional[str], str]]

A namespace prefix-URI map to record prefixes during parsing

None

Returns:

Type Description
T

An instance of the specified class representing the parsed content.

Source code in xsdata/formats/dataclass/parsers/mixins.py
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
@abc.abstractmethod
def parse(
    self,
    source: Any,
    clazz: Optional[Type[T]] = None,
    ns_map: Optional[Dict[Optional[str], str]] = None,
) -> T:
    """Parse the input file or stream into the target class type.

    If no clazz is provided, the binding context will try
    to locate it from imported dataclasses.

    Args:
        source: The source stream object to parse
        clazz: The target class type to parse the source bytes object
        ns_map: A namespace prefix-URI map to record prefixes during parsing

    Returns:
        An instance of the specified class representing the parsed content.
    """

start(clazz, queue, objects, qname, attrs, ns_map) abstractmethod

Build and queue the XmlNode for the starting element.

Parameters:

Name Type Description Default
clazz Optional[Type]

The target class type, auto locate if omitted

required
queue List[Any]

The XmlNode queue list

required
objects List[Any]

The list of all intermediate parsed objects

required
qname str

The element qualified name

required
attrs Dict[str, str]

The element attributes

required
ns_map Dict[Optional[str], str]

The element namespace prefix-URI map

required
Source code in xsdata/formats/dataclass/parsers/mixins.py
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
@abc.abstractmethod
def start(
    self,
    clazz: Optional[Type],
    queue: List[Any],
    objects: List[Any],
    qname: str,
    attrs: Dict[str, str],
    ns_map: Dict[Optional[str], str],
):
    """Build and queue the XmlNode for the starting element.

    Args:
        clazz: The target class type, auto locate if omitted
        queue: The XmlNode queue list
        objects: The list of all intermediate parsed objects
        qname: The element qualified name
        attrs: The element attributes
        ns_map: The element namespace prefix-URI map
    """

end(queue, objects, qname, text, tail) abstractmethod

Parse the last xml node and bind any intermediate objects.

Parameters:

Name Type Description Default
queue List

The XmlNode queue list

required
objects List

The list of all intermediate parsed objects

required
qname str

The element qualified name

required
text Optional[str]

The element text content

required
tail Optional[str]

The element tail content

required

Returns:

Type Description
bool

Whether the binding process was successful.

Source code in xsdata/formats/dataclass/parsers/mixins.py
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
@abc.abstractmethod
def end(
    self,
    queue: List,
    objects: List,
    qname: str,
    text: Optional[str],
    tail: Optional[str],
) -> bool:
    """Parse the last xml node and bind any intermediate objects.

    Args:
        queue: The XmlNode queue list
        objects: The list of all intermediate parsed objects
        qname: The element qualified name
        text: The element text content
        tail: The element tail content

    Returns:
        Whether the binding process was successful.
    """

register_namespace(ns_map, prefix, uri)

Register the uri prefix in the namespace prefix-URI map.

Parameters:

Name Type Description Default
ns_map Dict[Optional[str], str]

The namespace prefix-URI map

required
prefix Optional[str]

The namespace prefix

required
uri str

The namespace uri

required
Source code in xsdata/formats/dataclass/parsers/mixins.py
154
155
156
157
158
159
160
161
162
163
164
165
def register_namespace(
    self, ns_map: Dict[Optional[str], str], prefix: Optional[str], uri: str
):
    """Register the uri prefix in the namespace prefix-URI map.

    Args:
        ns_map: The namespace prefix-URI map
        prefix: The namespace prefix
        uri: The namespace uri
    """
    if prefix not in ns_map:
        ns_map[prefix] = uri

XmlNode

Bases: ABC

The xml node interface.

The nodes are responsible to find and queue the child nodes when a new element starts and build the resulting object tree when the element ends. The parser needs to maintain a queue for these nodes and a list of all the intermediate objects.

Source code in xsdata/formats/dataclass/parsers/mixins.py
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
class XmlNode(abc.ABC):
    """The xml node interface.

    The nodes are responsible to find and queue the child nodes when a
    new element starts and build the resulting object tree when the
    element ends. The parser needs to maintain a queue for these nodes
    and a list of all the intermediate objects.
    """

    @abc.abstractmethod
    def child(self, qname: str, attrs: Dict, ns_map: Dict, position: int) -> "XmlNode":
        """Initialize the next child node to be queued, when an element starts.

        This entry point is responsible to create the next node type
        with all the necessary information on how to bind the incoming
        input data.

        Args:
            qname: The element qualified name
            attrs: The element attributes
            ns_map: The element namespace prefix-URI map
            position: The current length of the intermediate objects

        Returns:
            The child xml node instance.
        """

    @abc.abstractmethod
    def bind(
        self,
        qname: str,
        text: Optional[str],
        tail: Optional[str],
        objects: List[Any],
    ) -> bool:
        """Bind the parsed data into an object for the ending element.

        This entry point is called when a xml element ends and is
        responsible to parse the current element attributes/text, bind
        any children objects and initialize new object.

        Args:
            qname: The element qualified name
            text: The element text content
            tail: The element tail content
            objects: The list of intermediate parsed objects

        Returns:
            Whether the binding process was successful or not.
        """

child(qname, attrs, ns_map, position) abstractmethod

Initialize the next child node to be queued, when an element starts.

This entry point is responsible to create the next node type with all the necessary information on how to bind the incoming input data.

Parameters:

Name Type Description Default
qname str

The element qualified name

required
attrs Dict

The element attributes

required
ns_map Dict

The element namespace prefix-URI map

required
position int

The current length of the intermediate objects

required

Returns:

Type Description
XmlNode

The child xml node instance.

Source code in xsdata/formats/dataclass/parsers/mixins.py
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
@abc.abstractmethod
def child(self, qname: str, attrs: Dict, ns_map: Dict, position: int) -> "XmlNode":
    """Initialize the next child node to be queued, when an element starts.

    This entry point is responsible to create the next node type
    with all the necessary information on how to bind the incoming
    input data.

    Args:
        qname: The element qualified name
        attrs: The element attributes
        ns_map: The element namespace prefix-URI map
        position: The current length of the intermediate objects

    Returns:
        The child xml node instance.
    """

bind(qname, text, tail, objects) abstractmethod

Bind the parsed data into an object for the ending element.

This entry point is called when a xml element ends and is responsible to parse the current element attributes/text, bind any children objects and initialize new object.

Parameters:

Name Type Description Default
qname str

The element qualified name

required
text Optional[str]

The element text content

required
tail Optional[str]

The element tail content

required
objects List[Any]

The list of intermediate parsed objects

required

Returns:

Type Description
bool

Whether the binding process was successful or not.

Source code in xsdata/formats/dataclass/parsers/mixins.py
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
@abc.abstractmethod
def bind(
    self,
    qname: str,
    text: Optional[str],
    tail: Optional[str],
    objects: List[Any],
) -> bool:
    """Bind the parsed data into an object for the ending element.

    This entry point is called when a xml element ends and is
    responsible to parse the current element attributes/text, bind
    any children objects and initialize new object.

    Args:
        qname: The element qualified name
        text: The element text content
        tail: The element tail content
        objects: The list of intermediate parsed objects

    Returns:
        Whether the binding process was successful or not.
    """

XmlHandler

Abstract content handler.

Parameters:

Name Type Description Default
parser PushParser

The parser instance to feed with events

required
clazz Optional[Type]

The target class type, auto locate if omitted

required

Attributes:

Name Type Description
queue List

The XmlNode queue list

objects List

The list of intermediate parsed objects

Source code in xsdata/formats/dataclass/parsers/mixins.py
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
class XmlHandler:
    """Abstract content handler.

    Args:
        parser: The parser instance to feed with events
        clazz: The target class type, auto locate if omitted

    Attributes:
        queue: The XmlNode queue list
        objects: The list of intermediate parsed objects
    """

    __slots__ = ("parser", "clazz", "queue", "objects")

    def __init__(self, parser: PushParser, clazz: Optional[Type]):
        self.parser = parser
        self.clazz = clazz
        self.queue: List = []
        self.objects: List = []

    def parse(self, source: Any, ns_map: Dict[Optional[str], str]) -> Any:
        """Parse the source XML document.

        Args:
            source: The xml source, can be a file resource or an input stream.
            ns_map: A dictionary to capture namespace prefixes.

        Returns:
            An instance of the class type representing the parsed content.
        """
        raise NotImplementedError("This method must be implemented!")

parse(source, ns_map)

Parse the source XML document.

Parameters:

Name Type Description Default
source Any

The xml source, can be a file resource or an input stream.

required
ns_map Dict[Optional[str], str]

A dictionary to capture namespace prefixes.

required

Returns:

Type Description
Any

An instance of the class type representing the parsed content.

Source code in xsdata/formats/dataclass/parsers/mixins.py
240
241
242
243
244
245
246
247
248
249
250
def parse(self, source: Any, ns_map: Dict[Optional[str], str]) -> Any:
    """Parse the source XML document.

    Args:
        source: The xml source, can be a file resource or an input stream.
        ns_map: A dictionary to capture namespace prefixes.

    Returns:
        An instance of the class type representing the parsed content.
    """
    raise NotImplementedError("This method must be implemented!")

EventsHandler

Bases: XmlHandler

Sax content handler for pre-recorded events.

Source code in xsdata/formats/dataclass/parsers/mixins.py
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
class EventsHandler(XmlHandler):
    """Sax content handler for pre-recorded events."""

    def parse(self, source: List[Tuple], ns_map: Dict[Optional[str], str]) -> Any:
        """Forward the pre-recorded events to the main parser.

        Args:
            source: A list of event data

        Returns:
            An instance of the class type representing the parsed content.
        """
        for event, *args in source:
            if event == EventType.START:
                qname, attrs, element_ns_map = args
                self.parser.start(
                    self.clazz,
                    self.queue,
                    self.objects,
                    qname,
                    attrs,
                    element_ns_map,
                )
            elif event == EventType.END:
                qname, text, tail = args
                self.parser.end(self.queue, self.objects, qname, text, tail)
            elif event == EventType.START_NS:
                prefix, uri = args
                self.parser.register_namespace(ns_map, prefix or None, uri)
            else:
                raise XmlHandlerError(f"Unhandled event: `{event}`.")

        return self.objects[-1][1] if self.objects else None

parse(source, ns_map)

Forward the pre-recorded events to the main parser.

Parameters:

Name Type Description Default
source List[Tuple]

A list of event data

required

Returns:

Type Description
Any

An instance of the class type representing the parsed content.

Source code in xsdata/formats/dataclass/parsers/mixins.py
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
def parse(self, source: List[Tuple], ns_map: Dict[Optional[str], str]) -> Any:
    """Forward the pre-recorded events to the main parser.

    Args:
        source: A list of event data

    Returns:
        An instance of the class type representing the parsed content.
    """
    for event, *args in source:
        if event == EventType.START:
            qname, attrs, element_ns_map = args
            self.parser.start(
                self.clazz,
                self.queue,
                self.objects,
                qname,
                attrs,
                element_ns_map,
            )
        elif event == EventType.END:
            qname, text, tail = args
            self.parser.end(self.queue, self.objects, qname, text, tail)
        elif event == EventType.START_NS:
            prefix, uri = args
            self.parser.register_namespace(ns_map, prefix or None, uri)
        else:
            raise XmlHandlerError(f"Unhandled event: `{event}`.")

    return self.objects[-1][1] if self.objects else None