Skip to content

Writers¤

The following Writers are available

HDF5Writer ¤

HDF5Writer(
    file: PathLike | WriteLike | None = None,
    **metadata: Any,
)

Bases: Writer

Writer for the HDF5 file format.

You can use this Writer as a context manager, for example,

with HDF5Writer("my_file.h5") as root:
    root.create_dataset("dset", data=[1, 2, 3])

This will automatically write root to the specified file when the with block exits.

Info

This Writer requires the h5py package to be installed.

Source code in src/msl/io/base.py
75
76
77
78
79
80
81
82
83
84
def __init__(self, file: PathLike | WriteLike | None = None, **metadata: Any) -> None:
    """Abstract base class for a [Writer][msl-io-writers].

    Args:
        file: The file to write the data to. Can also be specified in the [write][msl.io.base.Writer.write] method.
        metadata: All keyword arguments are used as [Metadata][msl.io.metadata.Metadata].
    """
    super().__init__(file, **metadata)
    self._file: PathLike | WriteLike | None = file
    self._context_kwargs: dict[str, Any] = {}

write ¤

write(
    file: PathLike | WriteLike | None = None,
    root: Group | None = None,
    **kwargs: Any,
) -> None

Write to a HDF5 file.

Parameters:

Name Type Description Default
file PathLike | WriteLike | None

The file to write a root to. If None then uses the value of file that was specified when HDF5Writer was instantiated. If a file-like object, it must be open for writing in binary I/O and it must have read, write, seek, tell, truncate and flush methods.

None
root Group | None

Write root in HDF5 format. If None then write the Groups and Datasets in the HDF5Writer instance. This argument is useful when converting between different file formats.

None
kwargs Any

All additional keyword arguments are passed to h5py.File.

{}
Source code in src/msl/io/writers/hdf5.py
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
def write(  # noqa: C901, PLR0912, PLR0915
    self, file: PathLike | WriteLike | None = None, root: Group | None = None, **kwargs: Any
) -> None:
    """Write to a [HDF5](https://www.hdfgroup.org/){:target="_blank"} file.

    Args:
        file: The file to write a *root* to. If `None` then uses the value of
            `file` that was specified when [HDF5Writer][msl.io.writers.hdf5.HDF5Writer] was instantiated.
            If a file-like object, it must be open for writing in binary I/O and it must have `read`, `write`,
            `seek`, `tell`, `truncate` and `flush` methods.
        root: Write `root` in [HDF5](https://www.hdfgroup.org/){:target="_blank"} format.
            If `None` then write the [Group][msl.io.node.Group]s and [Dataset][msl.io.node.Dataset]s
            in the [HDF5Writer][msl.io.writers.hdf5.HDF5Writer] instance. This argument is useful when
            converting between different file formats.
        kwargs: All additional keyword arguments are passed to [h5py.File][]{:target="_blank"}.
    """
    if h5py is None:
        msg = "You must install h5py to write HDF5 files, run\n  pip install h5py"
        raise ImportError(msg)

    if file is None:
        file = self.file
    if not file:
        msg = "You must specify a file to write the root to"
        raise ValueError(msg)

    if root is None:
        root = self
    elif not isinstance(root, Group):  # pyright: ignore[reportUnnecessaryIsInstance]
        msg = "The root parameter must be a Group object"  # type: ignore[unreachable]  # pyright: ignore[reportUnreachable]
        raise TypeError(msg)

    if "mode" not in kwargs:
        kwargs["mode"] = "x"  # Create file, fail if exists

    @no_type_check
    def check_ndarray_dtype(obj: Any) -> Any:  # noqa: C901, PLR0911, PLR0912
        if not isinstance(obj, np.ndarray):
            return obj

        # h5py variable-length string
        v_str = h5py.special_dtype(vlen=str)

        if obj.dtype.names is not None:
            convert, dtype = False, []
            for n in obj.dtype.names:
                typ = obj.dtype.fields[n][0]
                if isinstance(obj[n].item(0), str):
                    dtype.append((n, v_str))
                    convert = True
                else:
                    dtype.append((n, typ))
            if convert:
                return obj.astype(dtype=dtype)
            return obj
        if obj.dtype.char == "U":
            return obj.astype(dtype=v_str)
        if obj.dtype.char == "O":
            has_complex = False
            for item in obj.flat:
                if isinstance(item, str):
                    return obj.astype(dtype="S")
                if isinstance(item, np.complexfloating):
                    has_complex = True
                elif item is None:
                    return obj  # let h5py raise the error that HDF5 does not support NULL
            if has_complex:
                return obj.astype(dtype=complex)
            return obj.astype(dtype=float)
        return obj

    def meta_to_dict(metadata: Metadata) -> dict[str, dict[str, Any] | Any]:
        return {
            k: meta_to_dict(v) if isinstance(v, Metadata) else check_ndarray_dtype(v) for k, v in metadata.items()
        }

    @no_type_check
    def h5_open(f: BufferedIOBase) -> None:
        with h5py.File(f, **kwargs) as h5:
            h5.attrs.update(**meta_to_dict(root.metadata))
            for name, value in root.items():
                if self.is_dataset(value):
                    try:
                        vertex = h5.create_dataset(name, data=value.data)
                    except TypeError:
                        vertex = h5.create_dataset(name, data=check_ndarray_dtype(value.data))
                else:
                    vertex = h5.create_group(name)
                vertex.attrs.update(**meta_to_dict(value.metadata))

    # Calling h5py.File to write to a file on a mapped drive could raise
    # an OSError. This occurred when a local folder was shared and then
    # mapped on the same computer. Opening the file using open() and then
    # passing in the file handle to h5py.File is more universal
    if isinstance(file, (bytes, str, os.PathLike)):
        m = kwargs["mode"]
        if m in ["x", "w-"]:
            if os.path.isfile(file) or is_file_readable(file):  # noqa: PTH113
                msg = f"File exists {file!r}\nSpecify mode='w' if you want to overwrite it."
                raise FileExistsError(msg)
        elif m == "r+":
            if not (os.path.isfile(file) or is_file_readable(file)):  # noqa: PTH113
                msg = f"File does not exist {file!r}"
                raise FileNotFoundError(msg)
        elif m not in ["w", "a"]:
            msg = f"Invalid mode {m!r}"
            raise ValueError(msg)

        with open(file, mode="w+b") as fp:  # noqa: PTH123
            h5_open(fp)
    else:
        h5_open(file)

JSONWriter ¤

JSONWriter(
    file: PathLike | WriteLike | None = None,
    **metadata: Any,
)

Bases: Writer

Writer for a JSON file format.

You can use this Writer as a context manager, for example,

with JSONWriter("my_file.json") as root:
    root.update_context_kwargs(indent=4)
    dset = root.create_dataset("dset", data=[1, 2, 3])

This will automatically write root to the specified file using four spaces as the indentation level (instead of the default value of two spaces) when the with block exits.

Source code in src/msl/io/base.py
75
76
77
78
79
80
81
82
83
84
def __init__(self, file: PathLike | WriteLike | None = None, **metadata: Any) -> None:
    """Abstract base class for a [Writer][msl-io-writers].

    Args:
        file: The file to write the data to. Can also be specified in the [write][msl.io.base.Writer.write] method.
        metadata: All keyword arguments are used as [Metadata][msl.io.metadata.Metadata].
    """
    super().__init__(file, **metadata)
    self._file: PathLike | WriteLike | None = file
    self._context_kwargs: dict[str, Any] = {}

write ¤

write(
    file: PathLike | WriteLike | None = None,
    root: Group | None = None,
    **kwargs: Any,
) -> None

Write to a JSON file.

The first line in the output file contains a description that the file was created by the JSONWriter. It begins with a # and contains a version number.

Version 1.0 specifications:

Parameters:

Name Type Description Default
file PathLike | WriteLike | None

The file to write a root to. If None then uses the value of file that was specified when JSONWriter was instantiated.

None
root Group | None

Write root in JSON format. If None then write the Groups and Datasets in the JSONWriter instance. This argument is useful when converting between different file formats.

None
kwargs Any

Accepts mode, encoding and errors keyword arguments which are passed to open. The default encoding value is utf-8 and the default errors value is strict. All additional keyword arguments are passed to json.dump. The default indentation level is 2.

{}
Source code in src/msl/io/writers/json_.py
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
def write(  # noqa: C901, PLR0912, PLR0915
    self, file: PathLike | WriteLike | None = None, root: Group | None = None, **kwargs: Any
) -> None:
    """Write to a [JSON](https://www.json.org/){:target="_blank"} file.

    The first line in the output file contains a description that the file was created by the
    [JSONWriter][msl.io.writers.json_.JSONWriter]. It begins with a `#` and contains a version number.

    Version 1.0 specifications:

    * Use the *dtype* and *data* keys to uniquely identify a
      [JSON](https://www.json.org/){:target="_blank"} object as a [Dataset][msl.io.node.Dataset].

    * If a [Metadata][msl.io.metadata.Metadata] *key* has a *value* that is a
      [Metadata][msl.io.metadata.Metadata] object then the *key* becomes the name
      of a [Group][msl.io.node.Group] and the *value* becomes
      [Metadata][msl.io.metadata.Metadata] of that [Group][msl.io.node.Group].

    Args:
        file: The file to write a *root* to. If `None` then uses the value of
            `file` that was specified when [JSONWriter][msl.io.writers.json_.JSONWriter] was instantiated.
        root: Write `root` in [JSON](https://www.json.org/){:target="_blank"} format.
            If `None` then write the [Group][msl.io.node.Group]s and [Dataset][msl.io.node.Dataset]s
            in the [JSONWriter][msl.io.writers.json_.JSONWriter] instance. This argument is useful when
            converting between different file formats.
        kwargs: Accepts `mode`, `encoding` and `errors` keyword arguments which are passed
            to [open][]{:target="_blank"}. The default `encoding` value is `utf-8` and the default
            `errors` value is `strict`. All additional keyword arguments are passed to
            [json.dump][]{:target="_blank"}. The default indentation level is `2`.
    """
    if file is None:
        file = self.file

    if not file:
        msg = "You must specify a file to write the root to"
        raise ValueError(msg)

    if root is None:
        root = self
    elif not isinstance(root, Group):  # pyright: ignore[reportUnnecessaryIsInstance]
        msg = "The root parameter must be a Group object"  # type: ignore[unreachable]  # pyright: ignore[reportUnreachable]
        raise TypeError(msg)

    def add_dataset(d: dict[str, Any], dataset: Dataset) -> None:
        if dataset.dtype.fields:
            d["dtype"] = np.array([[name, str(dtype)] for name, (dtype, _) in dataset.dtype.fields.items()])
        else:
            d["dtype"] = dataset.dtype.str
        d["data"] = dataset.data

    def meta_to_dict(metadata: Metadata) -> dict[str, dict[str, Any] | Any]:
        return {k: meta_to_dict(v) if isinstance(v, Metadata) else v for k, v in metadata.items()}

    dict_ = dict(**meta_to_dict(root.metadata))

    for name, value in root.items():
        nodes = name.split("/")
        root_key = nodes[1]

        if root_key not in dict_:
            dict_[root_key] = dict(**meta_to_dict(value.metadata))
            if root.is_dataset(value):
                add_dataset(dict_[root_key], value)  # type: ignore[arg-type]  # pyright: ignore[reportArgumentType]

        if len(nodes) > 2:  # noqa: PLR2004
            vertex = dict_[root_key]
            for key in nodes[2:-1]:
                vertex = vertex[key]

            leaf_key = nodes[-1]
            if leaf_key not in vertex:
                vertex[leaf_key] = dict(**meta_to_dict(value.metadata))
                if root.is_dataset(value):
                    add_dataset(vertex[leaf_key], value)  # type: ignore[arg-type]  # pyright: ignore[reportArgumentType]

    open_kwargs = {
        "mode": kwargs.pop("mode", None),
        "encoding": kwargs.pop("encoding", "utf-8"),
        "errors": kwargs.pop("errors", "strict"),
    }

    if isinstance(file, (bytes, str, os.PathLike)):
        if not open_kwargs["mode"]:
            open_kwargs["mode"] = "w"
            if os.path.isfile(file) or is_file_readable(file):  # noqa: PTH113
                msg = f"File exists {file!r}\nSpecify mode='w' if you want to overwrite it."
                raise FileExistsError(msg)
        elif open_kwargs["mode"] == "r":
            msg = f"Invalid mode {open_kwargs['mode']!r}"
            raise ValueError(msg)
        elif open_kwargs["mode"] == "a":
            open_kwargs["mode"] = "w"

    if "indent" not in kwargs:
        kwargs["indent"] = 2
    if "cls" not in kwargs:
        kwargs["cls"] = _NumpyEncoder

    # header => f"#File created with: MSL {self.__class__.__name__} version 1.0\n"
    #
    # Don't use the above definition of 'header' since JSONWriter could be sub-classed
    # and therefore the value of self.__class__.__name__ would change. The
    # JSONReader.can_read() method expects the text 'MSL JSONWriter' to be in a
    # specific location on the first line in the file.
    header = "#File created with: MSL JSONWriter version 1.0\n"

    if isinstance(file, (bytes, str, os.PathLike)):
        with open(file, **open_kwargs) as fp:  # pyright: ignore[reportUnknownVariableType]  # noqa: PTH123
            _ = fp.write(header)  # pyright: ignore[reportUnknownVariableType, reportUnknownMemberType]
            json.dump(dict_, fp, **kwargs)  # pyright: ignore[reportUnknownArgumentType]
    elif isinstance(file, BufferedIOBase):
        encoding = open_kwargs["encoding"]
        _ = file.write(header.encode(encoding))  # pyright: ignore[reportArgumentType]
        _ = file.write(json.dumps(dict_, **kwargs).encode(encoding))  # pyright: ignore[reportArgumentType]
    else:
        _ = file.write(header)  # type: ignore[arg-type]  # pyright: ignore[reportArgumentType]
        json.dump(dict_, file, **kwargs)  # type: ignore[arg-type]  # pyright: ignore[reportArgumentType]