Skip to content

downloader

xsdata.utils.downloader

Downloader

Remote recursive resource downloader.

Helper class to download a schema or a definitions with all their imports locally. The imports paths will be adjusted if necessary.

Parameters:

Name Type Description Default
output Path

The output path

required

Attributes:

Name Type Description
base_path Optional[Path]

The base path for the resources

downloaded Dict

A cache of the downloaded resources

Source code in xsdata/utils/downloader.py
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
class Downloader:
    """Remote recursive resource downloader.

    Helper class to download a schema or a definitions with all their imports
    locally. The imports paths will be adjusted if necessary.

    Args:
        output: The output path

    Attributes:
        base_path: The base path for the resources
        downloaded: A cache of the downloaded resources
    """

    __slots__ = ("output", "base_path", "downloaded")

    def __init__(self, output: Path):
        self.output = output
        self.base_path: Optional[Path] = None
        self.downloaded: Dict = {}

    def wget(self, uri: str, location: Optional[str] = None):
        """Download handler for any uri input with circular protection."""
        if not (uri in self.downloaded or (location and location in self.downloaded)):
            self.downloaded[uri] = None
            self.downloaded[location] = None
            self.adjust_base_path(uri)

            logger.info("Fetching %s", uri)

            input_stream = opener.open(uri).read()  # nosec
            if uri.endswith("wsdl"):
                self.parse_definitions(uri, input_stream)
            else:
                self.parse_schema(uri, input_stream)

            self.write_file(uri, location, input_stream.decode())

    def parse_schema(self, uri: str, content: bytes):
        """Convert content to a schema instance and process all sub imports."""
        parser = SchemaParser(location=uri)
        schema = parser.from_bytes(content, Schema)
        self.wget_included(schema)

    def parse_definitions(self, uri: str, content: bytes):
        """Convert content to a definitions instance and process all sub imports."""
        parser = DefinitionsParser(location=uri)
        definitions = parser.from_bytes(content, Definitions)
        self.wget_included(definitions)

        for schema in definitions.schemas:
            self.wget_included(schema)

    def wget_included(self, definition: Union[Schema, Definitions]):
        """Download the definitions included resources."""
        for included in definition.included():
            if included.location:
                schema_location = getattr(included, "schema_location", None)
                self.wget(included.location, schema_location)

    def adjust_base_path(self, uri: str):
        """Adjust base path for every new uri loaded.

        Example runs:
            - file:///schemas/air_v48_0/Air.wsdl -> file:///schemas/air_v48_0
            - file:///schemas/common_v48_0/CommonReqRsp.xsd -> file:///schemas

        Args:
            uri: A resource location URI
        """
        if not self.base_path:
            self.base_path = Path(uri).parent
            logger.info("Setting base path to %s", self.base_path)
        else:
            common_path = os.path.commonpath((str(self.base_path) or "", uri))

            if common_path:
                common_path_path = Path(common_path)
                if common_path_path < self.base_path:
                    self.base_path = Path(common_path)
                    logger.info("Adjusting base path to %s", self.base_path)

    def adjust_imports(self, path: Path, content: str) -> str:
        """Update the location of the imports to point to the downloaded files."""
        matches = re.findall(r"ocation=\"(.*)\"", content)
        for match in matches:
            if isinstance(self.downloaded.get(match), Path):
                location = os.path.relpath(self.downloaded[match], path)
                replace = str(location).replace("\\", "/")
                content = content.replace(f'ocation="{match}"', f'ocation="{replace}"')

        return content

    def write_file(self, uri: str, location: Optional[str], content: str):
        """Write the downloaded uri to a local file.

        Keep track of all the written file paths, in case we have to
        modify the location attribute in an upcoming schema/definition
        import.

        Args:
            uri: The resource URI
            location: The import location of the resource
            content: The raw content string
        """
        common_path = os.path.commonpath((self.base_path or "", uri))
        if common_path:
            file_path = self.output.joinpath(Path(uri).relative_to(common_path))
        else:
            file_path = self.output.joinpath(Path(uri).name)

        content = self.adjust_imports(file_path.parent, content)
        file_path.parent.mkdir(parents=True, exist_ok=True)
        file_path.write_text(content, encoding="utf-8")

        logger.info("Writing %s", file_path)
        self.downloaded[uri] = file_path

        if location:
            self.downloaded[location] = file_path

wget(uri, location=None)

Download handler for any uri input with circular protection.

Source code in xsdata/utils/downloader.py
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
def wget(self, uri: str, location: Optional[str] = None):
    """Download handler for any uri input with circular protection."""
    if not (uri in self.downloaded or (location and location in self.downloaded)):
        self.downloaded[uri] = None
        self.downloaded[location] = None
        self.adjust_base_path(uri)

        logger.info("Fetching %s", uri)

        input_stream = opener.open(uri).read()  # nosec
        if uri.endswith("wsdl"):
            self.parse_definitions(uri, input_stream)
        else:
            self.parse_schema(uri, input_stream)

        self.write_file(uri, location, input_stream.decode())

parse_schema(uri, content)

Convert content to a schema instance and process all sub imports.

Source code in xsdata/utils/downloader.py
51
52
53
54
55
def parse_schema(self, uri: str, content: bytes):
    """Convert content to a schema instance and process all sub imports."""
    parser = SchemaParser(location=uri)
    schema = parser.from_bytes(content, Schema)
    self.wget_included(schema)

parse_definitions(uri, content)

Convert content to a definitions instance and process all sub imports.

Source code in xsdata/utils/downloader.py
57
58
59
60
61
62
63
64
def parse_definitions(self, uri: str, content: bytes):
    """Convert content to a definitions instance and process all sub imports."""
    parser = DefinitionsParser(location=uri)
    definitions = parser.from_bytes(content, Definitions)
    self.wget_included(definitions)

    for schema in definitions.schemas:
        self.wget_included(schema)

wget_included(definition)

Download the definitions included resources.

Source code in xsdata/utils/downloader.py
66
67
68
69
70
71
def wget_included(self, definition: Union[Schema, Definitions]):
    """Download the definitions included resources."""
    for included in definition.included():
        if included.location:
            schema_location = getattr(included, "schema_location", None)
            self.wget(included.location, schema_location)

adjust_base_path(uri)

Adjust base path for every new uri loaded.

Example runs
  • file:///schemas/air_v48_0/Air.wsdl -> file:///schemas/air_v48_0
  • file:///schemas/common_v48_0/CommonReqRsp.xsd -> file:///schemas

Parameters:

Name Type Description Default
uri str

A resource location URI

required
Source code in xsdata/utils/downloader.py
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
def adjust_base_path(self, uri: str):
    """Adjust base path for every new uri loaded.

    Example runs:
        - file:///schemas/air_v48_0/Air.wsdl -> file:///schemas/air_v48_0
        - file:///schemas/common_v48_0/CommonReqRsp.xsd -> file:///schemas

    Args:
        uri: A resource location URI
    """
    if not self.base_path:
        self.base_path = Path(uri).parent
        logger.info("Setting base path to %s", self.base_path)
    else:
        common_path = os.path.commonpath((str(self.base_path) or "", uri))

        if common_path:
            common_path_path = Path(common_path)
            if common_path_path < self.base_path:
                self.base_path = Path(common_path)
                logger.info("Adjusting base path to %s", self.base_path)

adjust_imports(path, content)

Update the location of the imports to point to the downloaded files.

Source code in xsdata/utils/downloader.py
 95
 96
 97
 98
 99
100
101
102
103
104
def adjust_imports(self, path: Path, content: str) -> str:
    """Update the location of the imports to point to the downloaded files."""
    matches = re.findall(r"ocation=\"(.*)\"", content)
    for match in matches:
        if isinstance(self.downloaded.get(match), Path):
            location = os.path.relpath(self.downloaded[match], path)
            replace = str(location).replace("\\", "/")
            content = content.replace(f'ocation="{match}"', f'ocation="{replace}"')

    return content

write_file(uri, location, content)

Write the downloaded uri to a local file.

Keep track of all the written file paths, in case we have to modify the location attribute in an upcoming schema/definition import.

Parameters:

Name Type Description Default
uri str

The resource URI

required
location Optional[str]

The import location of the resource

required
content str

The raw content string

required
Source code in xsdata/utils/downloader.py
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
def write_file(self, uri: str, location: Optional[str], content: str):
    """Write the downloaded uri to a local file.

    Keep track of all the written file paths, in case we have to
    modify the location attribute in an upcoming schema/definition
    import.

    Args:
        uri: The resource URI
        location: The import location of the resource
        content: The raw content string
    """
    common_path = os.path.commonpath((self.base_path or "", uri))
    if common_path:
        file_path = self.output.joinpath(Path(uri).relative_to(common_path))
    else:
        file_path = self.output.joinpath(Path(uri).name)

    content = self.adjust_imports(file_path.parent, content)
    file_path.parent.mkdir(parents=True, exist_ok=True)
    file_path.write_text(content, encoding="utf-8")

    logger.info("Writing %s", file_path)
    self.downloaded[uri] = file_path

    if location:
        self.downloaded[location] = file_path