Source code for xsdata.parser
import io
import pathlib
from dataclasses import dataclass, field
from typing import List, Optional
from lxml import etree
from xsdata.models import elements
from xsdata.models.elements import Attribute, Choice, Element, Schema
from xsdata.models.enums import EventType, FormType, TagType
from xsdata.models.mixins import BaseModel
from xsdata.utils.text import snake_case
[docs]@dataclass
class SchemaParser:
"""
A simple parser to convert an xsd schema to an easy to handle data
structure based on dataclasses.
The parser is a dummy as possible but it will try to normalize
certain things like apply parent properties to children.
"""
context: etree.iterparse
elements: List[BaseModel] = field(default_factory=list)
element_form: Optional[FormType] = field(init=False, default=None)
attribute_form: Optional[FormType] = field(init=False, default=None)
[docs] @classmethod
def create(cls, source: object) -> Schema:
"""A shortcut class method to initialize the parser, parse the given
source and return the generated Schema instance."""
ctx = etree.iterparse(source, events=(EventType.START, EventType.END))
return cls(context=ctx).parse()
[docs] @classmethod
def from_file(cls, path: pathlib.Path) -> Schema:
"""A shortcut class method for file path sources."""
if isinstance(path, str):
path = pathlib.Path(path).resolve()
schema = cls.create(str(path))
schema.location = path
return schema
[docs] @classmethod
def from_bytes(cls, source: bytes) -> Schema:
"""A shortcut class method for bytes source."""
return cls.create(io.BytesIO(source))
[docs] @classmethod
def from_string(cls, source: str) -> Schema:
"""A shortcut class method for string source."""
return cls.from_bytes(source=source.encode())
[docs] def parse(self) -> Schema:
"""
Main parse procedure which depends heavily on binding data classes that
have all the necessary attributes to match all the possible xsd
elements and attributes.
Elements are initialized on the start event of the parser and assigned
to the parent element on the end event. The procedure all includes
start/end hooks for each element type.
Elements with no attributes and text are ignored.
"""
methods = TagType.qnames()
index = 0
for event, elem in self.context:
tag = methods.get(elem.tag)
if tag is None:
raise NotImplementedError(
"Unsupported tag `{}`".format(elem.tag)
)
if event == EventType.START:
builder = getattr(elements, tag.cname)
element = builder.from_element(elem, index=index)
self.elements.append(element)
index += 1
elif event == EventType.END:
element = self.elements.pop()
if len(elem.attrib) == 0 and elem.text is None:
continue
if len(self.elements) > 0:
self.assign_to_parent(element)
method = getattr(self, "{}_{}".format(event, tag.value), None)
if method:
method(element, elem)
return element
[docs] def start_schema(self, schema: Schema, *args):
"""Collect the schema's default form for attributes and elements for
later usage."""
if isinstance(schema, Schema):
self.element_form = schema.element_form_default
self.attribute_form = schema.attribute_form_default
[docs] def start_element(self, element: Element, *args):
"""Assign the schema's default form for elements if the given element
form is None."""
if isinstance(element, Element) and element.form is None:
element.form = self.element_form
[docs] def start_attribute(self, attribute: Attribute, *args):
"""Assign the schema's default form for attributes if the given
attribute form is None."""
if isinstance(attribute, Attribute) and attribute.form is None:
attribute.form = self.attribute_form
[docs] def end_choice(self, choice: Choice, *args):
"""Elements inside a choice are by definition optional, reset their min
occurs counter."""
if isinstance(choice, Choice):
for child in choice.elements:
child.min_occurs = 0
[docs] def assign_to_parent(self, element):
"""
Assign an element to its parent either in a list of same type objects
or directly as an attribute.
:raise ValueError when we can't assign or append the element to the
correct place. This practically will mean that we encountered a
new not implemented element.
"""
name = snake_case(type(element).__name__)
parent = self.elements[-1]
if hasattr(parent, name):
return setattr(parent, name, element)
else:
plural_name = "{}s".format(name)
if hasattr(parent, plural_name):
children = getattr(parent, plural_name)
if type(children) == list:
return children.append(element)
raise ValueError(
"Class {} missing attribute `{}`".format(
type(parent).__name__, name
)
)