Skip to content

spreadsheets¤

Classes for reading cells in spreadsheets.

Spreadsheet ¤

Spreadsheet(file)

Bases: ABC

Abstract base class for spreadsheets.

Parameters:

Name Type Description Default
file str

The location of the spreadsheet on a local hard drive or on a network drive.

required
Source code in src/msl/io/readers/spreadsheet.py
19
20
21
22
23
24
25
def __init__(self, file: str) -> None:
    """Abstract base class for spreadsheets.

    Args:
        file: The location of the spreadsheet on a local hard drive or on a network drive.
    """
    self._file: str = file

file property ¤

file

str — The location of the spreadsheet on a local hard drive or on a network drive.

read abstractmethod ¤

read(
    cells=None,
    sheet=None,
    *,
    as_datetime=True,
    merged=False,
)

Read values from the spreadsheet.

You must override this method.

Parameters:

Name Type Description Default
cells str | None

The cell(s) to read. For example, C9 will return a single value and C9:G20 will return all values in the specified range. If not specified then returns all values in the specified sheet.

None
sheet str | None

The name of the sheet to read the value(s) from. If there is only one sheet in the spreadsheet then you do not need to specify the name of the sheet.

None
as_datetime bool

Whether dates should be returned as datetime or date objects. If False, dates are returned as a string in the format of the spreadsheet cell.

True
merged bool

Applies to cells that are merged with other cells. The details depend on the type of spreadsheet document that is being read. Some documents allow the hidden cells that are part of a merger to retain its unmerged value. Other documents associate the merged value only with the top-left cell and all other cells in the merger are empty.

False

Returns:

Type Description
Any | list[tuple[Any, ...]]

The value(s) of the requested cell(s).

Source code in src/msl/io/readers/spreadsheet.py
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
@abstractmethod
def read(
    self, cells: str | None = None, sheet: str | None = None, *, as_datetime: bool = True, merged: bool = False
) -> Any | list[tuple[Any, ...]]:
    """Read values from the spreadsheet.

    !!! warning "You must override this method."

    Args:
        cells: The cell(s) to read. For example, `C9` will return a single value
            and `C9:G20` will return all values in the specified range. If not
            specified then returns all values in the specified `sheet`.
        sheet: The name of the sheet to read the value(s) from. If there is only one
            sheet in the spreadsheet then you do not need to specify the name of the sheet.
        as_datetime: Whether dates should be returned as [datetime][datetime.datetime] or
            [date][datetime.date] objects. If `False`, dates are returned as a string in
            the format of the spreadsheet cell.
        merged: Applies to cells that are merged with other cells. The details depend
            on the type of spreadsheet document that is being read. Some documents allow
            the hidden cells that are part of a merger to retain its unmerged value. Other
            documents associate the merged value only with the top-left cell and all
            other cells in the merger are empty.

    Returns:
        The value(s) of the requested cell(s).
    """

sheet_names abstractmethod ¤

sheet_names()

Get the names of all sheets in the spreadsheet.

You must override this method.

Returns:

Type Description
tuple[str, ...]

The names of all sheets.

Source code in src/msl/io/readers/spreadsheet.py
59
60
61
62
63
64
65
66
67
@abstractmethod
def sheet_names(self) -> tuple[str, ...]:
    """Get the names of all sheets in the spreadsheet.

    !!! warning "You must override this method."

    Returns:
        The names of all sheets.
    """

to_indices staticmethod ¤

to_indices(cell)

Convert a string representation of a cell to row and column indices.

Parameters:

Name Type Description Default
cell str

The cell. Can be letters only (a column) or letters and a number (a column and a row).

required

Returns:

Type Description
tuple[int | None, int]

The (row_index, column_index). If cell does not contain a row number then the row index is None. The row and column index are zero based.

Examples:

>>> to_indices("A")
(None, 0)
>>> to_indices("A1")
(0, 0)
>>> to_indices("AA10")
(9, 26)
>>> to_indices("AAA111")
(110, 702)
>>> to_indices("MSL123456")
(123455, 9293)
>>> to_indices("BIPM")
(None, 41664)
Source code in src/msl/io/readers/spreadsheet.py
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
@staticmethod
def to_indices(cell: str) -> tuple[int | None, int]:
    """Convert a string representation of a cell to row and column indices.

    Args:
        cell: The cell. Can be letters only (a column) or letters and a number
            (a column and a row).

    Returns:
        The *(row_index, column_index)*. If `cell` does not contain a row number
            then the row index is `None`. The row and column index are zero based.

    **Examples:**
    <!-- invisible-code-block: pycon
    >>> from msl.io.readers.spreadsheet import Spreadsheet
    >>> to_indices = Spreadsheet.to_indices

    -->

    ```pycon
    >>> to_indices("A")
    (None, 0)
    >>> to_indices("A1")
    (0, 0)
    >>> to_indices("AA10")
    (9, 26)
    >>> to_indices("AAA111")
    (110, 702)
    >>> to_indices("MSL123456")
    (123455, 9293)
    >>> to_indices("BIPM")
    (None, 41664)

    ```
    """
    match = _cell_regex.match(cell)
    if not match:
        msg = f"Invalid cell {cell!r}"
        raise ValueError(msg)

    letters, numbers = match.groups()
    row = max(0, int(numbers) - 1) if numbers else None
    uppercase = string.ascii_uppercase
    col = sum((26**i) * (1 + uppercase.index(c)) for i, c in enumerate(letters[::-1]))
    return row, col - 1

to_letters staticmethod ¤

to_letters(index)

Convert a column index to column letters.

Parameters:

Name Type Description Default
index int

The column index (zero based).

required

Returns:

Type Description
str

The corresponding spreadsheet column letter(s).

Examples:

>>> to_letters(0)
'A'
>>> to_letters(1)
'B'
>>> to_letters(26)
'AA'
>>> to_letters(702)
'AAA'
>>> to_letters(494264)
'ABCDE'
Source code in src/msl/io/readers/spreadsheet.py
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
@staticmethod
def to_letters(index: int) -> str:
    """Convert a column index to column letters.

    Args:
        index: The column index (zero based).

    Returns:
        The corresponding spreadsheet column letter(s).

    **Examples:**
    <!-- invisible-code-block: pycon
    >>> from msl.io.readers.spreadsheet import Spreadsheet
    >>> to_letters = Spreadsheet.to_letters

    -->

    ```pycon
    >>> to_letters(0)
    'A'
    >>> to_letters(1)
    'B'
    >>> to_letters(26)
    'AA'
    >>> to_letters(702)
    'AAA'
    >>> to_letters(494264)
    'ABCDE'

    ```
    """
    letters: list[str] = []
    uppercase = string.ascii_uppercase
    while index >= 0:
        div, mod = divmod(index, 26)
        letters.append(uppercase[mod])
        index = div - 1
    return "".join(letters[::-1])

to_slices staticmethod ¤

to_slices(cells, row_step=None, column_step=None)

Convert a range of cells to slices of row and column indices.

Parameters:

Name Type Description Default
cells str

The cells. Can be letters only (a column) or letters and a number (a column and a row).

required
row_step int | None

The step-by value for the row slice.

None
column_step int | None

The step-by value for the column slice.

None

Returns:

Type Description
tuple[slice[int, int | None, int | None], slice[int, int, int | None]]

The row slice, the column slice.

Examples:

>>> to_slices("A:A")
(slice(0, None, None), slice(0, 1, None))
>>> to_slices("A:H")
(slice(0, None, None), slice(0, 8, None))
>>> to_slices("B2:M10")
(slice(1, 10, None), slice(1, 13, None))
>>> to_slices("A5:M100", row_step=2, column_step=4)
(slice(4, 100, 2), slice(0, 13, 4))
Source code in src/msl/io/readers/spreadsheet.py
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
@staticmethod
def to_slices(
    cells: str, row_step: int | None = None, column_step: int | None = None
) -> tuple[slice[int, int | None, int | None], slice[int, int, int | None]]:
    """Convert a range of cells to slices of row and column indices.

    Args:
        cells: The cells. Can be letters only (a column) or letters and a number
            (a column and a row).
        row_step: The step-by value for the row slice.
        column_step: The step-by value for the column slice.

    Returns:
        The row [slice][], the column [slice][].

    **Examples:**
    <!-- invisible-code-block: pycon
    >>> from msl.io.readers.spreadsheet import Spreadsheet
    >>> to_slices = Spreadsheet.to_slices

    -->

    ```pycon
    >>> to_slices("A:A")
    (slice(0, None, None), slice(0, 1, None))
    >>> to_slices("A:H")
    (slice(0, None, None), slice(0, 8, None))
    >>> to_slices("B2:M10")
    (slice(1, 10, None), slice(1, 13, None))
    >>> to_slices("A5:M100", row_step=2, column_step=4)
    (slice(4, 100, 2), slice(0, 13, 4))

    ```
    """
    split = cells.split(":")
    if len(split) != 2:  # noqa: PLR2004
        msg = f"Invalid cell range {cells!r}"
        raise ValueError(msg)

    r1, c1 = Spreadsheet.to_indices(split[0])
    r2, c2 = Spreadsheet.to_indices(split[1])
    if r1 is None:
        r1 = 0
    if r2 is not None:
        r2 += 1
    c2 += 1
    return slice(r1, r2, row_step), slice(c1, c2, column_step)

Read an Excel spreadsheet (.xls and .xlsx files).

ExcelReader ¤

ExcelReader(file, **kwargs)

Bases: Spreadsheet

Read an Excel spreadsheet (.xls and .xlsx files).

This class simply provides a convenience for reading cell values (not drawings or charts) from Excel spreadsheets. It is not registered as a Reader because the information in a spreadsheet is unstructured and therefore one cannot generalize how to parse a spreadsheet to create a Root.

Parameters:

Name Type Description Default
file PathLike

The path to an Excel spreadsheet file.

required
kwargs Any

All keyword arguments are passed to xlrd.open_workbook. You can use an encoding keyword argument as an alias for encoding_override. The default on_demand value is True.

{}

Examples:

from msl.io import ExcelReader
excel = ExcelReader("lab_environment.xlsx")

Source code in src/msl/io/readers/excel.py
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
def __init__(self, file: PathLike, **kwargs: Any) -> None:
    """Read an Excel spreadsheet (*.xls* and *.xlsx* files).

    This class simply provides a convenience for reading cell values (not
    drawings or charts) from Excel spreadsheets. It is not registered as
    a [Reader][msl.io.base.Reader] because the information in a spreadsheet
    is unstructured and therefore one cannot generalize how to parse a
    spreadsheet to create a [Root][msl.io.base.Root].

    Args:
        file: The path to an Excel spreadsheet file.
        kwargs: All keyword arguments are passed to [xlrd.open_workbook][]{:target="_blank"}.
            You can use an `encoding` keyword argument as an alias for `encoding_override`.
            The default `on_demand` value is `True`.

    **Examples:**
    ```python
    from msl.io import ExcelReader
    excel = ExcelReader("lab_environment.xlsx")
    ```
    """
    f = os.fsdecode(file)
    super().__init__(f)

    # change the default on_demand value
    if "on_demand" not in kwargs:
        kwargs["on_demand"] = True

    # 'encoding' is an alias for 'encoding_override'
    encoding = kwargs.pop("encoding", None)
    if encoding is not None:
        kwargs["encoding_override"] = encoding

    self._workbook: Book = open_workbook(f, **kwargs)

close ¤

close()

Close the workbook.

Source code in src/msl/io/readers/excel.py
86
87
88
def close(self) -> None:
    """Close the workbook."""
    self._workbook.release_resources()

dimensions ¤

dimensions(sheet)

Get the number of rows and columns in a sheet.

Parameters:

Name Type Description Default
sheet str

The name of a sheet to get the dimensions of.

required

Returns:

Type Description
tuple[int, int]

The (number of rows, number of columns) in sheet.

Source code in src/msl/io/readers/excel.py
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
def dimensions(self, sheet: str) -> tuple[int, int]:
    """Get the number of rows and columns in a sheet.

    Args:
        sheet: The name of a sheet to get the dimensions of.

    Returns:
        The *(number of rows, number of columns)* in `sheet`.
    """
    try:
        s = self._workbook.sheet_by_name(sheet)
    except XLRDError:
        msg = f"A sheet named {sheet!r} is not in {self._file!r}"
        raise ValueError(msg) from None
    else:
        return (s.nrows, s.ncols)

read ¤

read(
    cells=None,
    sheet=None,
    *,
    as_datetime=True,
    merged=False,
)

Read cell values from the Excel spreadsheet.

Parameters:

Name Type Description Default
cells str | None

The cell(s) to read. For example, C9 will return a single value and C9:G20 will return all values in the specified range. If not specified then returns all values in the specified sheet.

None
sheet str | None

The name of the sheet to read the value(s) from. If there is only one sheet in the spreadsheet then you do not need to specify the name of the sheet.

None
as_datetime bool

Whether dates should be returned as datetime or date objects. If False, dates are returned as an ISO 8601 string.

True
merged bool

Applies to cells that are merged with other cells. If cells are merged, then only the top-left cell has the value and all other cells in the merger are empty. Enabling this argument is currently not supported and the value must be False.

False

Returns:

Type Description
Any | list[tuple[Any, ...]]

The value(s) of the requested cell(s).

Examples:

>>> excel.read()
[('temperature', 'humidity'), (20.33, 49.82), (20.23, 46.06), (20.41, 47.06), (20.29, 48.32)]
>>> excel.read("B2")
49.82
>>> excel.read("A:A")
[('temperature',), (20.33,), (20.23,), (20.41,), (20.29,)]
>>> excel.read("A1:B1")
[('temperature', 'humidity')]
>>> excel.read("A2:B4")
[(20.33, 49.82), (20.23, 46.06), (20.41, 47.06)]
Source code in src/msl/io/readers/excel.py
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
def read(  # noqa: C901
    self, cells: str | None = None, sheet: str | None = None, *, as_datetime: bool = True, merged: bool = False
) -> Any | list[tuple[Any, ...]]:
    """Read cell values from the Excel spreadsheet.

    Args:
        cells: The cell(s) to read. For example, `C9` will return a single value
            and `C9:G20` will return all values in the specified range. If not
            specified then returns all values in the specified `sheet`.
        sheet: The name of the sheet to read the value(s) from. If there is only
            one sheet in the spreadsheet then you do not need to specify the name
            of the sheet.
        as_datetime: Whether dates should be returned as [datetime][datetime.datetime] or
            [date][datetime.date] objects. If `False`, dates are returned as an
            ISO 8601 string.
        merged: Applies to cells that are merged with other cells. If cells are merged, then
            only the top-left cell has the value and all other cells in the merger are empty.
            Enabling this argument is currently not supported and the value must be `False`.

    Returns:
        The value(s) of the requested cell(s).

    **Examples:**
    <!-- invisible-code-block: pycon
    >>> from msl.io import ExcelReader
    >>> excel = ExcelReader('./tests/samples/lab_environment.xlsx')

    -->

    ```pycon
    >>> excel.read()
    [('temperature', 'humidity'), (20.33, 49.82), (20.23, 46.06), (20.41, 47.06), (20.29, 48.32)]
    >>> excel.read("B2")
    49.82
    >>> excel.read("A:A")
    [('temperature',), (20.33,), (20.23,), (20.41,), (20.29,)]
    >>> excel.read("A1:B1")
    [('temperature', 'humidity')]
    >>> excel.read("A2:B4")
    [(20.33, 49.82), (20.23, 46.06), (20.41, 47.06)]

    ```
    """
    if merged:
        msg = "The `merged` argument must be False to read an Excel spreadsheet"
        raise ValueError(msg)

    if not sheet:
        names = self.sheet_names()
        if len(names) == 1:
            sheet_name = names[0]
        elif not names:
            msg = "Cannot determine the names of the sheets in the Excel file"
            raise ValueError(msg)
        else:
            sheets = ", ".join(repr(n) for n in names)
            msg = (
                f"{self.file!r} contains the following sheets:\n  {sheets}\n"
                f"You must specify the name of the sheet to read"
            )
            raise ValueError(msg)
    else:
        sheet_name = sheet

    try:
        _sheet = self._workbook.sheet_by_name(sheet_name)
    except XLRDError:
        msg = f"A sheet named {sheet_name!r} is not in {self._file!r}"
        raise ValueError(msg) from None

    if not cells:
        return [
            tuple(self._value(_sheet, r, c, as_datetime) for c in range(_sheet.ncols)) for r in range(_sheet.nrows)
        ]

    split = cells.split(":")
    r1, c1 = self.to_indices(split[0])
    if r1 is None:
        r1 = 0

    if len(split) == 1:
        try:
            return self._value(_sheet, r1, c1, as_datetime=as_datetime)
        except IndexError:
            return None

    if r1 >= _sheet.nrows or c1 >= _sheet.ncols:
        return []

    r2, c2 = self.to_indices(split[1])
    r2 = _sheet.nrows if r2 is None else min(r2 + 1, _sheet.nrows)
    c2 = min(c2 + 1, _sheet.ncols)
    return [tuple(self._value(_sheet, r, c, as_datetime) for c in range(c1, c2)) for r in range(r1, r2)]

sheet_names ¤

sheet_names()

Get the names of all sheets in the Excel spreadsheet.

Returns:

Type Description
tuple[str, ...]

The names of all sheets.

Source code in src/msl/io/readers/excel.py
201
202
203
204
205
206
207
def sheet_names(self) -> tuple[str, ...]:
    """Get the names of all sheets in the Excel spreadsheet.

    Returns:
        The names of all sheets.
    """
    return tuple(self._workbook.sheet_names())

Read a Google Sheets spreadsheet.

GSheetsReader ¤

GSheetsReader(file, *, account=None, credentials=None)

Bases: Spreadsheet

Read a Google Sheets spreadsheet.

This class simply provides a convenience for reading cell values (not drawings or charts) from Google spreadsheets. It is not registered as a Reader because the information in a spreadsheet is unstructured and therefore one cannot generalize how to parse a spreadsheet to create a Root.

Parameters:

Name Type Description Default
file PathLike

The ID or path of a Google Sheets spreadsheet.

required
account str | None

Since a person may have multiple Google accounts, and multiple people may run the same code, this parameter decides which token to load to authenticate with the Google API. The value can be any text (or None) that you want to associate with a particular Google account, provided that it contains valid characters for a filename. The value that you chose when you authenticated with your credentials should be used for all future instances of this class to access that particular Google account. You can associate a different value with a Google account at any time (by passing in a different account value), but you may be asked to authenticate with your credentials again, or, alternatively, you can rename the token files located in MSL_IO_DIR to match the new account value.

None
credentials PathLike | None

The path to the client secrets OAuth credential file. This parameter only needs to be specified the first time that you authenticate with a particular Google account or if you delete the token file that was created when you previously authenticated.

None

Examples:

from msl.io import GSheetsReader

# Specify the path
sheets = GSheetsReader("Google Drive/registers/equipment.gsheet")

# Specify the ID
sheets = GSheetsReader("1TI3pM-534SZ5DQTEZ-7HCI04648f8ZpLGbfHWJu9FSo")

Source code in src/msl/io/readers/gsheets.py
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
def __init__(
    self,
    file: PathLike,
    *,
    account: str | None = None,
    credentials: PathLike | None = None,
) -> None:
    """Read a Google Sheets spreadsheet.

    This class simply provides a convenience for reading cell values (not
    drawings or charts) from Google spreadsheets. It is not registered as
    a [Reader][msl.io.base.Reader] because the information in a spreadsheet
    is unstructured and therefore one cannot generalize how to parse a
    spreadsheet to create a [Root][msl.io.base.Root].

    Args:
        file: The ID or path of a Google Sheets spreadsheet.
        account: Since a person may have multiple Google accounts, and multiple people
            may run the same code, this parameter decides which token to load
            to authenticate with the Google API. The value can be any text (or
            `None`) that you want to associate with a particular Google
            account, provided that it contains valid characters for a filename.
            The value that you chose when you authenticated with your `credentials`
            should be used for all future instances of this class to access that
            particular Google account. You can associate a different value with
            a Google account at any time (by passing in a different `account`
            value), but you may be asked to authenticate with your `credentials`
            again, or, alternatively, you can rename the token files located in
            [MSL_IO_DIR][msl.io.constants.MSL_IO_DIR] to match the new `account` value.
        credentials: The path to the *client secrets* OAuth credential file. This
            parameter only needs to be specified the first time that you
            authenticate with a particular Google account or if you delete
            the token file that was created when you previously authenticated.

    **Examples:**
    ```python
    from msl.io import GSheetsReader

    # Specify the path
    sheets = GSheetsReader("Google Drive/registers/equipment.gsheet")

    # Specify the ID
    sheets = GSheetsReader("1TI3pM-534SZ5DQTEZ-7HCI04648f8ZpLGbfHWJu9FSo")
    ```
    """
    file = os.fsdecode(file)
    super().__init__(file)

    path, ext = os.path.splitext(file)  # noqa: PTH122
    folders, _ = os.path.split(path)

    self._spreadsheet_id: str
    if ext or folders or not _google_file_id_regex.match(path):
        self._spreadsheet_id = GDrive(account=account, credentials=credentials).file_id(
            path, mime_type=GSheets.MIME_TYPE
        )
    else:
        self._spreadsheet_id = path

    self._gsheets: GSheets = GSheets(account=account, credentials=credentials, read_only=True)
    self._cached_sheet_name: str | None = None

close ¤

close()

Close the connection to the GSheet API service.

Source code in src/msl/io/readers/gsheets.py
104
105
106
def close(self) -> None:
    """Close the connection to the GSheet API service."""
    self._gsheets.close()

read ¤

read(
    cells=None,
    sheet=None,
    *,
    as_datetime=True,
    merged=False,
)

Read cell values from the Google Sheets spreadsheet.

Parameters:

Name Type Description Default
cells str | None

The cell(s) to read. For example, C9 will return a single value and C9:G20 will return all values in the specified range. If not specified then returns all values in the specified sheet.

None
sheet str | None

The name of the sheet to read the value(s) from. If there is only one sheet in the spreadsheet then you do not need to specify the name of the sheet.

None
as_datetime bool

Whether dates should be returned as datetime or date objects. If False, dates are returned as a string in the display format of the spreadsheet cell.

True
merged bool

Applies to cells that are merged with other cells. If cells are merged, then only the top-left cell has the value and all other cells in the merger are empty. Enabling this argument is currently not supported and the value must be False.

False

Returns:

Type Description
Any | list[tuple[Any, ...]]

The value(s) of the requested cell(s).

Examples:

>>> sheets.read()
[('temperature', 'humidity'), (20.33, 49.82), (20.23, 46.06), (20.41, 47.06), (20.29, 48.32)]
>>> sheets.read("B2")
49.82
>>> sheets.read("A:A")
[('temperature',), (20.33,), (20.23,), (20.41,), (20.29,)]
>>> sheets.read("A1:B1")
[('temperature', 'humidity')]
>>> sheets.read("A2:B4")
[(20.33, 49.82), (20.23, 46.06), (20.41, 47.06)]
Source code in src/msl/io/readers/gsheets.py
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
def read(  # noqa: C901, PLR0912
    self, cells: str | None = None, sheet: str | None = None, *, as_datetime: bool = True, merged: bool = False
) -> Any | list[tuple[Any, ...]]:
    """Read cell values from the Google Sheets spreadsheet.

    Args:
        cells: The cell(s) to read. For example, `C9` will return a single value
            and `C9:G20` will return all values in the specified range. If not
            specified then returns all values in the specified `sheet`.
        sheet: The name of the sheet to read the value(s) from. If there is only one
            sheet in the spreadsheet then you do not need to specify the name of the sheet.
        as_datetime: Whether dates should be returned as [datetime][datetime.datetime] or
            [date][datetime.date] objects. If `False`, dates are returned as a string in
            the display format of the spreadsheet cell.
        merged: Applies to cells that are merged with other cells. If cells are merged, then
            only the top-left cell has the value and all other cells in the merger are empty.
            Enabling this argument is currently not supported and the value must be `False`.

    Returns:
        The value(s) of the requested cell(s).

    **Examples:**
    <!-- invisible-code-block: pycon
    >>> SKIP_IF_NO_GOOGLE_SHEETS_READ_TOKEN()
    >>> from msl.io import GSheetsReader
    >>> sheets = GSheetsReader('1TI3pM-534SZ5DQTEZ-7vCI04l48f8ZpLGbfEWJuCFSo', account='testing')

    -->

    ```pycon
    >>> sheets.read()
    [('temperature', 'humidity'), (20.33, 49.82), (20.23, 46.06), (20.41, 47.06), (20.29, 48.32)]
    >>> sheets.read("B2")
    49.82
    >>> sheets.read("A:A")
    [('temperature',), (20.33,), (20.23,), (20.41,), (20.29,)]
    >>> sheets.read("A1:B1")
    [('temperature', 'humidity')]
    >>> sheets.read("A2:B4")
    [(20.33, 49.82), (20.23, 46.06), (20.41, 47.06)]

    ```
    """
    if merged:
        msg = "The `merged` argument must be False to read a Google spreadsheet"
        raise ValueError(msg)

    if not sheet:
        if self._cached_sheet_name:
            sheet = self._cached_sheet_name
        else:
            names = self.sheet_names()
            if len(names) != 1:
                sheet_names = ", ".join(repr(n) for n in names)
                msg = (
                    f"{self._file} contains the following sheets:\n  {sheet_names}\n"
                    f"You must specify the name of the sheet to read"
                )
                raise ValueError(msg)
            sheet = names[0]
            self._cached_sheet_name = sheet

    ranges = f"{sheet}!{cells}" if cells else sheet

    cells_dict = self._gsheets.cells(self._spreadsheet_id, ranges=ranges)

    if sheet not in cells_dict:
        msg = f"A sheet named {sheet!r} is not in {self._file!r}"
        raise ValueError(msg)

    values: list[tuple[Any, ...]] = []
    for row in cells_dict[sheet]:
        row_values: list[Any] = []
        for item in row:
            if item.type == GCellType.DATE:
                value = GSheets.to_datetime(item.value).date() if as_datetime else item.formatted
            elif item.type == GCellType.DATE_TIME:
                value = GSheets.to_datetime(item.value) if as_datetime else item.formatted
            else:
                value = item.value
            row_values.append(value)
        values.append(tuple(row_values))

    if not cells:
        return values

    if ":" not in cells:
        if values:
            return values[0][0]
        return None

    return values

sheet_names ¤

sheet_names()

Get the names of all sheets in the Google Sheets spreadsheet.

Returns:

Type Description
tuple[str, ...]

The names of all sheets.

Source code in src/msl/io/readers/gsheets.py
201
202
203
204
205
206
207
def sheet_names(self) -> tuple[str, ...]:
    """Get the names of all sheets in the Google Sheets spreadsheet.

    Returns:
        The names of all sheets.
    """
    return self._gsheets.sheet_names(self._spreadsheet_id)

Read an OpenDocument Spreadsheet (.ods and .fods files, Version 1.2).

ODSReader ¤

ODSReader(file, **kwargs)

Bases: Spreadsheet

Read an OpenDocument Spreadsheet (.ods and .fods files, Version 1.2).

This class simply provides a convenience for reading cell values (not drawings or charts) from OpenDocument Spreadsheets. It is not registered as a Reader because the information in a spreadsheet is unstructured and therefore one cannot generalize how to parse a spreadsheet to create a Root.

Tip

If defusedxml is installed, that package is used to parse the contents of the file instead of the xml module.

Parameters:

Name Type Description Default
file PathLike

The path to an OpenDocument Spreadsheet file.

required
kwargs Any

All keyword arguments are ignored.

{}

Examples:

from msl.io import ODSReader
ods = ODSReader("lab_environment.ods")

Source code in src/msl/io/readers/ods.py
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
def __init__(self, file: PathLike, **kwargs: Any) -> None:  # noqa: ARG002
    """Read an OpenDocument Spreadsheet (*.ods* and *.fods* files, Version 1.2).

    This class simply provides a convenience for reading cell values (not
    drawings or charts) from OpenDocument Spreadsheets. It is not registered
    as a [Reader][msl.io.base.Reader] because the information in a spreadsheet
    is unstructured and therefore one cannot generalize how to parse a
    spreadsheet to create a [Root][msl.io.base.Root].

    !!! tip
        If [defusedxml](https://pypi.org/project/defusedxml/){:target="_blank"} is installed,
        that package is used to parse the contents of the file instead of the
        [xml][]{:target="_blank"} module.

    Args:
        file: The path to an OpenDocument Spreadsheet file.
        kwargs: All keyword arguments are ignored.

    **Examples:**
    ```python
    from msl.io import ODSReader
    ods = ODSReader("lab_environment.ods")
    ```
    """
    f = os.fsdecode(file)
    super().__init__(f)

    self._spans: dict[int, tuple[int, Any]] = {}  # column-index: (spans-remaining, cell-value)

    ext = get_extension(f).lower()
    content: Element[str]
    if ext == ".ods":
        with ZipFile(f) as z:
            try:
                content = ET.XML(z.read("content.xml"))
            except SyntaxError as e:
                e.msg += "\nThe ODS file might be password protected (which is not supported)"
                raise
    elif ext == ".fods":
        with open(f, mode="rb") as fp:  # noqa: PTH123
            content = ET.XML(fp.read())
    else:
        msg = f"Unsupported OpenDocument Spreadsheet file extension {ext!r}"
        raise ValueError(msg)

    self._tables: dict[str, Element[str]] = {
        self._attribute(t, "table", "name"): t for t in content.findall(".//table:table", namespaces=self._ns)
    }

close ¤

close()

Free memory resources that are used to read the OpenDocument Spreadsheet.

Source code in src/msl/io/readers/ods.py
102
103
104
105
def close(self) -> None:
    """Free memory resources that are used to read the OpenDocument Spreadsheet."""
    self._tables.clear()
    self._spans.clear()

dimensions ¤

dimensions(sheet)

Get the number of rows and columns in a sheet.

Parameters:

Name Type Description Default
sheet str

The name of a sheet to get the dimensions of.

required

Returns:

Type Description
tuple[int, int]

The (number of rows, number of columns) in sheet.

Source code in src/msl/io/readers/ods.py
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
def dimensions(self, sheet: str) -> tuple[int, int]:
    """Get the number of rows and columns in a sheet.

    Args:
        sheet: The name of a sheet to get the dimensions of.

    Returns:
        The *(number of rows, number of columns)* in `sheet`.
    """
    table = self._tables.get(sheet)
    if table is None:
        msg = f"A sheet named {sheet!r} is not in {self._file!r}"
        raise ValueError(msg) from None

    num_cols = 0
    for col in table.findall(".//table:table-column", namespaces=self._ns):
        num_cols += int(self._attribute(col, "table", "number-columns-repeated", "1"))

    num_rows = sum(1 for _ in self._rows(table, 0, sys.maxsize - 1))
    return (num_rows, num_cols)

read ¤

read(
    cells=None,
    sheet=None,
    *,
    as_datetime=True,
    merged=False,
)

Read cell values from the OpenDocument Spreadsheet.

Parameters:

Name Type Description Default
cells str | None

The cell(s) to read. For example, C9 will return a single value and C9:G20 will return all values in the specified range. If not specified then returns all values in the specified sheet.

None
sheet str | None

The name of the sheet to read the value(s) from. If there is only one sheet in the spreadsheet then you do not need to specify the name of the sheet.

None
as_datetime bool

Whether dates should be returned as datetime or date objects. If False, dates are returned as a string in the display format of the spreadsheet cell.

True
merged bool

Applies to cells that are merged with other cells. If False, the value of each unmerged cell is returned, otherwise the same value is returned for all merged cells. In an OpenDocument Spreadsheet, the value of a hidden cell that is merged with a visible cell can still be retained (depends on how the merger was performed).

False

Returns:

Type Description
Any | list[tuple[Any, ...]]

The value(s) of the requested cell(s).

Examples:

>>> ods.read()
[('temperature', 'humidity'), (20.33, 49.82), (20.23, 46.06), (20.41, 47.06), (20.29, 48.32)]
>>> ods.read("B2")
49.82
>>> ods.read("A:A")
[('temperature',), (20.33,), (20.23,), (20.41,), (20.29,)]
>>> ods.read("A1:B1")
[('temperature', 'humidity')]
>>> ods.read("A2:B4")
[(20.33, 49.82), (20.23, 46.06), (20.41, 47.06)]
Source code in src/msl/io/readers/ods.py
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
def read(  # noqa: C901, PLR0912
    self, cells: str | None = None, sheet: str | None = None, *, as_datetime: bool = True, merged: bool = False
) -> Any | list[tuple[Any, ...]]:
    """Read cell values from the OpenDocument Spreadsheet.

    Args:
        cells: The cell(s) to read. For example, `C9` will return a single value
            and `C9:G20` will return all values in the specified range. If not
            specified then returns all values in the specified `sheet`.
        sheet: The name of the sheet to read the value(s) from. If there is only
            one sheet in the spreadsheet then you do not need to specify the name
            of the sheet.
        as_datetime: Whether dates should be returned as [datetime][datetime.datetime]
            or [date][datetime.date] objects. If `False`, dates are returned as a
            string in the display format of the spreadsheet cell.
        merged: Applies to cells that are merged with other cells. If `False`, the
            value of each unmerged cell is returned, otherwise the same value is
            returned for all merged cells. In an OpenDocument Spreadsheet, the value
            of a hidden cell that is merged with a visible cell can still be retained
            (depends on how the merger was performed).

    Returns:
        The value(s) of the requested cell(s).

    **Examples:**
    <!-- invisible-code-block: pycon
    >>> from msl.io import ODSReader
    >>> ods = ODSReader('./tests/samples/lab_environment.ods')

    -->

    ```pycon
    >>> ods.read()
    [('temperature', 'humidity'), (20.33, 49.82), (20.23, 46.06), (20.41, 47.06), (20.29, 48.32)]
    >>> ods.read("B2")
    49.82
    >>> ods.read("A:A")
    [('temperature',), (20.33,), (20.23,), (20.41,), (20.29,)]
    >>> ods.read("A1:B1")
    [('temperature', 'humidity')]
    >>> ods.read("A2:B4")
    [(20.33, 49.82), (20.23, 46.06), (20.41, 47.06)]

    ```
    """
    if not sheet:
        names = self.sheet_names()
        if len(names) == 1:
            name = names[0]
        elif not names:
            msg = "Cannot determine the names of the sheets in the OpenDocument file"
            raise ValueError(msg)
        else:
            sheets = ", ".join(repr(n) for n in names)
            msg = (
                f"{self.file!r} contains the following sheets:\n  {sheets}\n"
                f"You must specify the name of the sheet to read"
            )
            raise ValueError(msg)
    else:
        name = sheet

    table = self._tables.get(name)
    if table is None:
        msg = f"A sheet named {sheet!r} is not in {self._file!r}"
        raise ValueError(msg)

    maxsize = sys.maxsize - 1
    r1, c1, r2, c2, contains_colon = 0, 0, maxsize, maxsize, False
    if cells:
        split = cells.split(":")
        r, c1 = self.to_indices(split[0])
        r1 = 0 if r is None else r
        if len(split) > 1:
            contains_colon = True
            r, c2 = self.to_indices(split[1])
            if r is not None:
                r2 = r
        else:
            r2, c2 = r1, c1

    self._spans.clear()
    data: list[tuple[Any, ...]] = []
    for row in self._rows(table, r1, r2):
        values = tuple(self._cell(row, c1, c2, as_datetime, merged))
        if values:
            data.append(values)

    if not contains_colon and r1 == r2 and c1 == c2:
        try:
            return data[0][0]
        except IndexError:
            return None

    return data

sheet_names ¤

sheet_names()

Get the names of all sheets in the OpenDocument Spreadsheet.

Returns:

Type Description
tuple[str, ...]

The names of all sheets.

Source code in src/msl/io/readers/ods.py
224
225
226
227
228
229
230
def sheet_names(self) -> tuple[str, ...]:
    """Get the names of all sheets in the OpenDocument Spreadsheet.

    Returns:
        The names of all sheets.
    """
    return tuple(self._tables.keys())