Skip to content

mixins

xsdata.formats.dataclass.parsers.mixins

PushParser dataclass

Bases: AbstractParser

A generic interface for event based content handlers like sax.

Parameters:

Name Type Description Default
config ParserConfig

The parser configuration instance

field(default_factory=ParserConfig)

Attributes:

Name Type Description
ns_map Dict[Optional[str], str]

The parsed namespace prefix-URI map

Source code in xsdata/formats/dataclass/parsers/mixins.py
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
@dataclass
class PushParser(AbstractParser):
    """A generic interface for event based content handlers like sax.

    Args:
        config: The parser configuration instance

    Attributes:
        ns_map: The parsed namespace prefix-URI map
    """

    config: ParserConfig = field(default_factory=ParserConfig)
    ns_map: Dict[Optional[str], str] = field(init=False, default_factory=dict)

    @abc.abstractmethod
    def start(
        self,
        clazz: Optional[Type],
        queue: List[Any],
        objects: List[Any],
        qname: str,
        attrs: Dict[str, str],
        ns_map: Dict[Optional[str], str],
    ):
        """Build and queue the XmlNode for the starting element.

        Args:
            clazz: The target class type, auto locate if omitted
            queue: The XmlNode queue list
            objects: The list of all intermediate parsed objects
            qname: The element qualified name
            attrs: The element attributes
            ns_map: The element namespace prefix-URI map
        """

    @abc.abstractmethod
    def end(
        self,
        queue: List,
        objects: List,
        qname: str,
        text: NoneStr,
        tail: NoneStr,
    ) -> bool:
        """Parse the last xml node and bind any intermediate objects.

        Args:
            queue: The XmlNode queue list
            objects: The list of all intermediate parsed objects
            qname: The element qualified name
            text: The element text content
            tail: The element tail content

        Returns:
            Whether the binding process was successful.
        """

    def register_namespace(self, prefix: NoneStr, uri: str):
        """Register the uri prefix in the namespace registry.

        Args:
            prefix: Namespace prefix
            uri: Namespace uri
        """
        if prefix not in self.ns_map:
            self.ns_map[prefix] = uri

start(clazz, queue, objects, qname, attrs, ns_map) abstractmethod

Build and queue the XmlNode for the starting element.

Parameters:

Name Type Description Default
clazz Optional[Type]

The target class type, auto locate if omitted

required
queue List[Any]

The XmlNode queue list

required
objects List[Any]

The list of all intermediate parsed objects

required
qname str

The element qualified name

required
attrs Dict[str, str]

The element attributes

required
ns_map Dict[Optional[str], str]

The element namespace prefix-URI map

required
Source code in xsdata/formats/dataclass/parsers/mixins.py
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
@abc.abstractmethod
def start(
    self,
    clazz: Optional[Type],
    queue: List[Any],
    objects: List[Any],
    qname: str,
    attrs: Dict[str, str],
    ns_map: Dict[Optional[str], str],
):
    """Build and queue the XmlNode for the starting element.

    Args:
        clazz: The target class type, auto locate if omitted
        queue: The XmlNode queue list
        objects: The list of all intermediate parsed objects
        qname: The element qualified name
        attrs: The element attributes
        ns_map: The element namespace prefix-URI map
    """

end(queue, objects, qname, text, tail) abstractmethod

Parse the last xml node and bind any intermediate objects.

Parameters:

Name Type Description Default
queue List

The XmlNode queue list

required
objects List

The list of all intermediate parsed objects

required
qname str

The element qualified name

required
text NoneStr

The element text content

required
tail NoneStr

The element tail content

required

Returns:

Type Description
bool

Whether the binding process was successful.

Source code in xsdata/formats/dataclass/parsers/mixins.py
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
@abc.abstractmethod
def end(
    self,
    queue: List,
    objects: List,
    qname: str,
    text: NoneStr,
    tail: NoneStr,
) -> bool:
    """Parse the last xml node and bind any intermediate objects.

    Args:
        queue: The XmlNode queue list
        objects: The list of all intermediate parsed objects
        qname: The element qualified name
        text: The element text content
        tail: The element tail content

    Returns:
        Whether the binding process was successful.
    """

register_namespace(prefix, uri)

Register the uri prefix in the namespace registry.

Parameters:

Name Type Description Default
prefix NoneStr

Namespace prefix

required
uri str

Namespace uri

required
Source code in xsdata/formats/dataclass/parsers/mixins.py
70
71
72
73
74
75
76
77
78
def register_namespace(self, prefix: NoneStr, uri: str):
    """Register the uri prefix in the namespace registry.

    Args:
        prefix: Namespace prefix
        uri: Namespace uri
    """
    if prefix not in self.ns_map:
        self.ns_map[prefix] = uri

XmlNode

Bases: ABC

The xml node interface.

The nodes are responsible to find and queue the child nodes when a new element starts and build the resulting object tree when the element ends. The parser needs to maintain a queue for these nodes and a list of all the intermediate objects.

Source code in xsdata/formats/dataclass/parsers/mixins.py
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
class XmlNode(abc.ABC):
    """The xml node interface.

    The nodes are responsible to find and queue the child nodes when a
    new element starts and build the resulting object tree when the
    element ends. The parser needs to maintain a queue for these nodes
    and a list of all the intermediate objects.
    """

    @abc.abstractmethod
    def child(self, qname: str, attrs: Dict, ns_map: Dict, position: int) -> "XmlNode":
        """Initialize the next child node to be queued, when an element starts.

        This entry point is responsible to create the next node type
        with all the necessary information on how to bind the incoming
        input data.

        Args:
            qname: The element qualified name
            attrs: The element attributes
            ns_map: The element namespace prefix-URI map
            position: The current length of the intermediate objects

        Returns:
            The child xml node instance.
        """

    @abc.abstractmethod
    def bind(
        self,
        qname: str,
        text: NoneStr,
        tail: NoneStr,
        objects: List[Any],
    ) -> bool:
        """Bind the parsed data into an object for the ending element.

        This entry point is called when a xml element ends and is
        responsible to parse the current element attributes/text, bind
        any children objects and initialize new object.

        Args:
            qname: The element qualified name
            text: The element text content
            tail: The element tail content
            objects: The list of intermediate parsed objects

        Returns:
            Whether the binding process was successful or not.
        """

child(qname, attrs, ns_map, position) abstractmethod

Initialize the next child node to be queued, when an element starts.

This entry point is responsible to create the next node type with all the necessary information on how to bind the incoming input data.

Parameters:

Name Type Description Default
qname str

The element qualified name

required
attrs Dict

The element attributes

required
ns_map Dict

The element namespace prefix-URI map

required
position int

The current length of the intermediate objects

required

Returns:

Type Description
XmlNode

The child xml node instance.

Source code in xsdata/formats/dataclass/parsers/mixins.py
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
@abc.abstractmethod
def child(self, qname: str, attrs: Dict, ns_map: Dict, position: int) -> "XmlNode":
    """Initialize the next child node to be queued, when an element starts.

    This entry point is responsible to create the next node type
    with all the necessary information on how to bind the incoming
    input data.

    Args:
        qname: The element qualified name
        attrs: The element attributes
        ns_map: The element namespace prefix-URI map
        position: The current length of the intermediate objects

    Returns:
        The child xml node instance.
    """

bind(qname, text, tail, objects) abstractmethod

Bind the parsed data into an object for the ending element.

This entry point is called when a xml element ends and is responsible to parse the current element attributes/text, bind any children objects and initialize new object.

Parameters:

Name Type Description Default
qname str

The element qualified name

required
text NoneStr

The element text content

required
tail NoneStr

The element tail content

required
objects List[Any]

The list of intermediate parsed objects

required

Returns:

Type Description
bool

Whether the binding process was successful or not.

Source code in xsdata/formats/dataclass/parsers/mixins.py
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
@abc.abstractmethod
def bind(
    self,
    qname: str,
    text: NoneStr,
    tail: NoneStr,
    objects: List[Any],
) -> bool:
    """Bind the parsed data into an object for the ending element.

    This entry point is called when a xml element ends and is
    responsible to parse the current element attributes/text, bind
    any children objects and initialize new object.

    Args:
        qname: The element qualified name
        text: The element text content
        tail: The element tail content
        objects: The list of intermediate parsed objects

    Returns:
        Whether the binding process was successful or not.
    """

XmlHandler

Abstract content handler.

Parameters:

Name Type Description Default
parser PushParser

The parser instance to feed with events

required
clazz Optional[Type]

The target class type, auto locate if omitted

required

Attributes:

Name Type Description
queue List

The XmlNode queue list

objects List

The list of intermediate parsed objects

Source code in xsdata/formats/dataclass/parsers/mixins.py
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
class XmlHandler:
    """Abstract content handler.

    Args:
        parser: The parser instance to feed with events
        clazz: The target class type, auto locate if omitted

    Attributes:
        queue: The XmlNode queue list
        objects: The list of intermediate parsed objects
    """

    __slots__ = ("parser", "clazz", "queue", "objects")

    def __init__(self, parser: PushParser, clazz: Optional[Type]):
        self.parser = parser
        self.clazz = clazz
        self.queue: List = []
        self.objects: List = []

    def parse(self, source: Any) -> Any:
        """Parse the source XML document.

        Args:
            source: The xml source, can be a file resource or an input stream.

        Returns:
            An instance of the class type representing the parsed content.
        """
        raise NotImplementedError("This method must be implemented!")

    def merge_parent_namespaces(self, ns_map: Dict[Optional[str], str]) -> Dict:
        """Merge the given prefix-URI map with the parent node map.

        This method also registers new prefixes with the parser.

        Args:
            ns_map: The current element namespace prefix-URI map

        Returns:
            The new merged namespace prefix-URI map.
        """
        if self.queue:
            parent_ns_map = self.queue[-1].ns_map

            if not ns_map:
                return parent_ns_map

            result = parent_ns_map.copy() if parent_ns_map else {}
        else:
            result = {}

        for prefix, uri in ns_map.items():
            self.parser.register_namespace(prefix, uri)
            result[prefix] = uri

        return result

parse(source)

Parse the source XML document.

Parameters:

Name Type Description Default
source Any

The xml source, can be a file resource or an input stream.

required

Returns:

Type Description
Any

An instance of the class type representing the parsed content.

Source code in xsdata/formats/dataclass/parsers/mixins.py
153
154
155
156
157
158
159
160
161
162
def parse(self, source: Any) -> Any:
    """Parse the source XML document.

    Args:
        source: The xml source, can be a file resource or an input stream.

    Returns:
        An instance of the class type representing the parsed content.
    """
    raise NotImplementedError("This method must be implemented!")

merge_parent_namespaces(ns_map)

Merge the given prefix-URI map with the parent node map.

This method also registers new prefixes with the parser.

Parameters:

Name Type Description Default
ns_map Dict[Optional[str], str]

The current element namespace prefix-URI map

required

Returns:

Type Description
Dict

The new merged namespace prefix-URI map.

Source code in xsdata/formats/dataclass/parsers/mixins.py
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
def merge_parent_namespaces(self, ns_map: Dict[Optional[str], str]) -> Dict:
    """Merge the given prefix-URI map with the parent node map.

    This method also registers new prefixes with the parser.

    Args:
        ns_map: The current element namespace prefix-URI map

    Returns:
        The new merged namespace prefix-URI map.
    """
    if self.queue:
        parent_ns_map = self.queue[-1].ns_map

        if not ns_map:
            return parent_ns_map

        result = parent_ns_map.copy() if parent_ns_map else {}
    else:
        result = {}

    for prefix, uri in ns_map.items():
        self.parser.register_namespace(prefix, uri)
        result[prefix] = uri

    return result

EventsHandler

Bases: XmlHandler

Sax content handler for pre-recorded events.

Source code in xsdata/formats/dataclass/parsers/mixins.py
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
class EventsHandler(XmlHandler):
    """Sax content handler for pre-recorded events."""

    def parse(self, source: List[Tuple]) -> Any:
        """Forward the pre-recorded events to the main parser.

        Args:
            source: A list of event data

        Returns:
            An instance of the class type representing the parsed content.
        """
        for event, *args in source:
            if event == EventType.START:
                qname, attrs, ns_map = args
                self.parser.start(
                    self.clazz,
                    self.queue,
                    self.objects,
                    qname,
                    attrs,
                    ns_map,
                )
            elif event == EventType.END:
                qname, text, tail = args
                self.parser.end(self.queue, self.objects, qname, text, tail)
            elif event == EventType.START_NS:
                prefix, uri = args
                self.parser.register_namespace(prefix or None, uri)
            else:
                raise XmlHandlerError(f"Unhandled event: `{event}`.")

        return self.objects[-1][1] if self.objects else None

parse(source)

Forward the pre-recorded events to the main parser.

Parameters:

Name Type Description Default
source List[Tuple]

A list of event data

required

Returns:

Type Description
Any

An instance of the class type representing the parsed content.

Source code in xsdata/formats/dataclass/parsers/mixins.py
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
def parse(self, source: List[Tuple]) -> Any:
    """Forward the pre-recorded events to the main parser.

    Args:
        source: A list of event data

    Returns:
        An instance of the class type representing the parsed content.
    """
    for event, *args in source:
        if event == EventType.START:
            qname, attrs, ns_map = args
            self.parser.start(
                self.clazz,
                self.queue,
                self.objects,
                qname,
                attrs,
                ns_map,
            )
        elif event == EventType.END:
            qname, text, tail = args
            self.parser.end(self.queue, self.objects, qname, text, tail)
        elif event == EventType.START_NS:
            prefix, uri = args
            self.parser.register_namespace(prefix or None, uri)
        else:
            raise XmlHandlerError(f"Unhandled event: `{event}`.")

    return self.objects[-1][1] if self.objects else None