spreadsheets¤

Classes for reading cells in spreadsheets.

Spreadsheet ¤

Spreadsheet(file)

Bases: ABC

Abstract base class for spreadsheets.

Parameters:

Name	Type	Description	Default
`file`	`str`	The location of the spreadsheet on a local hard drive or on a network drive.	required

Source code in src/msl/io/readers/spreadsheet.py

def __init__(self, file: str) -> None:
    """Abstract base class for spreadsheets.

    Args:
        file: The location of the spreadsheet on a local hard drive or on a network drive.
    """
    self._file: str = file

file `property` ¤

file

str — The location of the spreadsheet on a local hard drive or on a network drive.

read `abstractmethod` ¤

read(
    cells=None,
    sheet=None,
    *,
    as_datetime=True,
    merged=False,
)

Read values from the spreadsheet.

You must override this method.

Parameters:

Name	Type	Description	Default
`cells`	`str \| None`	The cell(s) to read. For example, `C9` will return a single value and `C9:G20` will return all values in the specified range. If not specified then returns all values in the specified `sheet`.	`None`
`sheet`	`str \| None`	The name of the sheet to read the value(s) from. If there is only one sheet in the spreadsheet then you do not need to specify the name of the sheet.	`None`
`as_datetime`	`bool`	Whether dates should be returned as datetime or date objects. If `False`, dates are returned as a string in the format of the spreadsheet cell.	`True`
`merged`	`bool`	Applies to cells that are merged with other cells. The details depend on the type of spreadsheet document that is being read. Some documents allow the hidden cells that are part of a merger to retain its unmerged value. Other documents associate the merged value only with the top-left cell and all other cells in the merger are empty.	`False`

Returns:

Type	Description
`Any \| list[tuple[Any, ...]]`	The value(s) of the requested cell(s).

Source code in src/msl/io/readers/spreadsheet.py

@abstractmethod
def read(
    self, cells: str | None = None, sheet: str | None = None, *, as_datetime: bool = True, merged: bool = False
) -> Any | list[tuple[Any, ...]]:
    """Read values from the spreadsheet.

    !!! warning "You must override this method."

    Args:
        cells: The cell(s) to read. For example, `C9` will return a single value
            and `C9:G20` will return all values in the specified range. If not
            specified then returns all values in the specified `sheet`.
        sheet: The name of the sheet to read the value(s) from. If there is only one
            sheet in the spreadsheet then you do not need to specify the name of the sheet.
        as_datetime: Whether dates should be returned as [datetime][datetime.datetime] or
            [date][datetime.date] objects. If `False`, dates are returned as a string in
            the format of the spreadsheet cell.
        merged: Applies to cells that are merged with other cells. The details depend
            on the type of spreadsheet document that is being read. Some documents allow
            the hidden cells that are part of a merger to retain its unmerged value. Other
            documents associate the merged value only with the top-left cell and all
            other cells in the merger are empty.

    Returns:
        The value(s) of the requested cell(s).
    """

sheet_names `abstractmethod` ¤

sheet_names()

Get the names of all sheets in the spreadsheet.

You must override this method.

Returns:

Type	Description
`tuple[str, ...]`	The names of all sheets.

Source code in src/msl/io/readers/spreadsheet.py

@abstractmethod
def sheet_names(self) -> tuple[str, ...]:
    """Get the names of all sheets in the spreadsheet.

    !!! warning "You must override this method."

    Returns:
        The names of all sheets.
    """

to_indices `staticmethod` ¤

to_indices(cell)

Convert a string representation of a cell to row and column indices.

Parameters:

Name	Type	Description	Default
`cell`	`str`	The cell. Can be letters only (a column) or letters and a number (a column and a row).	required

Returns:

Type	Description
`tuple[int \| None, int]`	The (row_index, column_index). If `cell` does not contain a row number then the row index is `None`. The row and column index are zero based.

Examples:

>>> to_indices("A")
(None, 0)
>>> to_indices("A1")
(0, 0)
>>> to_indices("AA10")
(9, 26)
>>> to_indices("AAA111")
(110, 702)
>>> to_indices("MSL123456")
(123455, 9293)
>>> to_indices("BIPM")
(None, 41664)

Source code in src/msl/io/readers/spreadsheet.py

@staticmethod
def to_indices(cell: str) -> tuple[int | None, int]:
    """Convert a string representation of a cell to row and column indices.

    Args:
        cell: The cell. Can be letters only (a column) or letters and a number
            (a column and a row).

    Returns:
        The *(row_index, column_index)*. If `cell` does not contain a row number
            then the row index is `None`. The row and column index are zero based.

    **Examples:**
    <!-- invisible-code-block: pycon
    >>> from msl.io.readers.spreadsheet import Spreadsheet
    >>> to_indices = Spreadsheet.to_indices

    -->

    ```pycon
    >>> to_indices("A")
    (None, 0)
    >>> to_indices("A1")
    (0, 0)
    >>> to_indices("AA10")
    (9, 26)
    >>> to_indices("AAA111")
    (110, 702)
    >>> to_indices("MSL123456")
    (123455, 9293)
    >>> to_indices("BIPM")
    (None, 41664)

    ```
    """
    match = _cell_regex.match(cell)
    if not match:
        msg = f"Invalid cell {cell!r}"
        raise ValueError(msg)

    letters, numbers = match.groups()
    row = max(0, int(numbers) - 1) if numbers else None
    uppercase = string.ascii_uppercase
    col = sum((26**i) * (1 + uppercase.index(c)) for i, c in enumerate(letters[::-1]))
    return row, col - 1

to_letters `staticmethod` ¤

to_letters(index)

Convert a column index to column letters.

Parameters:

Name	Type	Description	Default
`index`	`int`	The column index (zero based).	required

Returns:

Type	Description
`str`	The corresponding spreadsheet column letter(s).

Examples:

>>> to_letters(0)
'A'
>>> to_letters(1)
'B'
>>> to_letters(26)
'AA'
>>> to_letters(702)
'AAA'
>>> to_letters(494264)
'ABCDE'

Source code in src/msl/io/readers/spreadsheet.py

@staticmethod
def to_letters(index: int) -> str:
    """Convert a column index to column letters.

    Args:
        index: The column index (zero based).

    Returns:
        The corresponding spreadsheet column letter(s).

    **Examples:**
    <!-- invisible-code-block: pycon
    >>> from msl.io.readers.spreadsheet import Spreadsheet
    >>> to_letters = Spreadsheet.to_letters

    -->

    ```pycon
    >>> to_letters(0)
    'A'
    >>> to_letters(1)
    'B'
    >>> to_letters(26)
    'AA'
    >>> to_letters(702)
    'AAA'
    >>> to_letters(494264)
    'ABCDE'

    ```
    """
    letters: list[str] = []
    uppercase = string.ascii_uppercase
    while index >= 0:
        div, mod = divmod(index, 26)
        letters.append(uppercase[mod])
        index = div - 1
    return "".join(letters[::-1])

to_slices `staticmethod` ¤

to_slices(cells, row_step=None, column_step=None)

Convert a range of cells to slices of row and column indices.

Parameters:

Name	Type	Description	Default
`cells`	`str`	The cells. Can be letters only (a column) or letters and a number (a column and a row).	required
`row_step`	`int \| None`	The step-by value for the row slice.	`None`
`column_step`	`int \| None`	The step-by value for the column slice.	`None`

Returns:

Type	Description
`tuple[slice[int, int \| None, int \| None], slice[int, int, int \| None]]`	The row slice, the column slice.

Examples:

>>> to_slices("A:A")
(slice(0, None, None), slice(0, 1, None))
>>> to_slices("A:H")
(slice(0, None, None), slice(0, 8, None))
>>> to_slices("B2:M10")
(slice(1, 10, None), slice(1, 13, None))
>>> to_slices("A5:M100", row_step=2, column_step=4)
(slice(4, 100, 2), slice(0, 13, 4))

Source code in src/msl/io/readers/spreadsheet.py

@staticmethod
def to_slices(
    cells: str, row_step: int | None = None, column_step: int | None = None
) -> tuple[slice[int, int | None, int | None], slice[int, int, int | None]]:
    """Convert a range of cells to slices of row and column indices.

    Args:
        cells: The cells. Can be letters only (a column) or letters and a number
            (a column and a row).
        row_step: The step-by value for the row slice.
        column_step: The step-by value for the column slice.

    Returns:
        The row [slice][], the column [slice][].

    **Examples:**
    <!-- invisible-code-block: pycon
    >>> from msl.io.readers.spreadsheet import Spreadsheet
    >>> to_slices = Spreadsheet.to_slices

    -->

    ```pycon
    >>> to_slices("A:A")
    (slice(0, None, None), slice(0, 1, None))
    >>> to_slices("A:H")
    (slice(0, None, None), slice(0, 8, None))
    >>> to_slices("B2:M10")
    (slice(1, 10, None), slice(1, 13, None))
    >>> to_slices("A5:M100", row_step=2, column_step=4)
    (slice(4, 100, 2), slice(0, 13, 4))

    ```
    """
    split = cells.split(":")
    if len(split) != 2:  # noqa: PLR2004
        msg = f"Invalid cell range {cells!r}"
        raise ValueError(msg)

    r1, c1 = Spreadsheet.to_indices(split[0])
    r2, c2 = Spreadsheet.to_indices(split[1])
    if r1 is None:
        r1 = 0
    if r2 is not None:
        r2 += 1
    c2 += 1
    return slice(r1, r2, row_step), slice(c1, c2, column_step)

Read an Excel spreadsheet (.xls and .xlsx files).

ExcelReader ¤

ExcelReader(file, **kwargs)

Bases: Spreadsheet

Read an Excel spreadsheet (.xls and .xlsx files).

This class simply provides a convenience for reading cell values (not drawings or charts) from Excel spreadsheets. It is not registered as a Reader because the information in a spreadsheet is unstructured and therefore one cannot generalize how to parse a spreadsheet to create a Root.

Parameters:

Name	Type	Description	Default
`file`	`PathLike`	The path to an Excel spreadsheet file.	required
`kwargs`	`Any`	All keyword arguments are passed to xlrd.open_workbook. You can use an `encoding` keyword argument as an alias for `encoding_override`. The default `on_demand` value is `True`.	`{}`

Examples:

from msl.io import ExcelReader
excel = ExcelReader("lab_environment.xlsx")

Source code in src/msl/io/readers/excel.py

def __init__(self, file: PathLike, **kwargs: Any) -> None:
    """Read an Excel spreadsheet (*.xls* and *.xlsx* files).

    This class simply provides a convenience for reading cell values (not
    drawings or charts) from Excel spreadsheets. It is not registered as
    a [Reader][msl.io.base.Reader] because the information in a spreadsheet
    is unstructured and therefore one cannot generalize how to parse a
    spreadsheet to create a [Root][msl.io.base.Root].

    Args:
        file: The path to an Excel spreadsheet file.
        kwargs: All keyword arguments are passed to [xlrd.open_workbook][]{:target="_blank"}.
            You can use an `encoding` keyword argument as an alias for `encoding_override`.
            The default `on_demand` value is `True`.

    **Examples:**
    ```python
    from msl.io import ExcelReader
    excel = ExcelReader("lab_environment.xlsx")
    ```
    """
    f = os.fsdecode(file)
    super().__init__(f)

    # change the default on_demand value
    if "on_demand" not in kwargs:
        kwargs["on_demand"] = True

    # 'encoding' is an alias for 'encoding_override'
    encoding = kwargs.pop("encoding", None)
    if encoding is not None:
        kwargs["encoding_override"] = encoding

    self._workbook: Book = open_workbook(f, **kwargs)

close ¤

close()

Close the workbook.

Source code in src/msl/io/readers/excel.py

def close(self) -> None:
    """Close the workbook."""
    self._workbook.release_resources()

dimensions ¤

dimensions(sheet)

Get the number of rows and columns in a sheet.

Parameters:

Name	Type	Description	Default
`sheet`	`str`	The name of a sheet to get the dimensions of.	required

Returns:

Type	Description
`tuple[int, int]`	The (number of rows, number of columns) in `sheet`.

Source code in src/msl/io/readers/excel.py

def dimensions(self, sheet: str) -> tuple[int, int]:
    """Get the number of rows and columns in a sheet.

    Args:
        sheet: The name of a sheet to get the dimensions of.

    Returns:
        The *(number of rows, number of columns)* in `sheet`.
    """
    try:
        s = self._workbook.sheet_by_name(sheet)
    except XLRDError:
        msg = f"A sheet named {sheet!r} is not in {self._file!r}"
        raise ValueError(msg) from None
    else:
        return (s.nrows, s.ncols)

read ¤

read(
    cells=None,
    sheet=None,
    *,
    as_datetime=True,
    merged=False,
)

Read cell values from the Excel spreadsheet.

Parameters:

Name	Type	Description	Default
`cells`	`str \| None`	The cell(s) to read. For example, `C9` will return a single value and `C9:G20` will return all values in the specified range. If not specified then returns all values in the specified `sheet`.	`None`
`sheet`	`str \| None`	The name of the sheet to read the value(s) from. If there is only one sheet in the spreadsheet then you do not need to specify the name of the sheet.	`None`
`as_datetime`	`bool`	Whether dates should be returned as datetime or date objects. If `False`, dates are returned as an ISO 8601 string.	`True`
`merged`	`bool`	Applies to cells that are merged with other cells. If cells are merged, then only the top-left cell has the value and all other cells in the merger are empty. Enabling this argument is currently not supported and the value must be `False`.	`False`

Returns:

Type	Description
`Any \| list[tuple[Any, ...]]`	The value(s) of the requested cell(s).

Examples:

>>> excel.read()
[('temperature', 'humidity'), (20.33, 49.82), (20.23, 46.06), (20.41, 47.06), (20.29, 48.32)]
>>> excel.read("B2")
49.82
>>> excel.read("A:A")
[('temperature',), (20.33,), (20.23,), (20.41,), (20.29,)]
>>> excel.read("A1:B1")
[('temperature', 'humidity')]
>>> excel.read("A2:B4")
[(20.33, 49.82), (20.23, 46.06), (20.41, 47.06)]

Source code in src/msl/io/readers/excel.py

def read(  # noqa: C901
    self, cells: str | None = None, sheet: str | None = None, *, as_datetime: bool = True, merged: bool = False
) -> Any | list[tuple[Any, ...]]:
    """Read cell values from the Excel spreadsheet.

    Args:
        cells: The cell(s) to read. For example, `C9` will return a single value
            and `C9:G20` will return all values in the specified range. If not
            specified then returns all values in the specified `sheet`.
        sheet: The name of the sheet to read the value(s) from. If there is only
            one sheet in the spreadsheet then you do not need to specify the name
            of the sheet.
        as_datetime: Whether dates should be returned as [datetime][datetime.datetime] or
            [date][datetime.date] objects. If `False`, dates are returned as an
            ISO 8601 string.
        merged: Applies to cells that are merged with other cells. If cells are merged, then
            only the top-left cell has the value and all other cells in the merger are empty.
            Enabling this argument is currently not supported and the value must be `False`.

    Returns:
        The value(s) of the requested cell(s).

    **Examples:**
    <!-- invisible-code-block: pycon
    >>> from msl.io import ExcelReader
    >>> excel = ExcelReader('./tests/samples/lab_environment.xlsx')

    -->

    ```pycon
    >>> excel.read()
    [('temperature', 'humidity'), (20.33, 49.82), (20.23, 46.06), (20.41, 47.06), (20.29, 48.32)]
    >>> excel.read("B2")
    49.82
    >>> excel.read("A:A")
    [('temperature',), (20.33,), (20.23,), (20.41,), (20.29,)]
    >>> excel.read("A1:B1")
    [('temperature', 'humidity')]
    >>> excel.read("A2:B4")
    [(20.33, 49.82), (20.23, 46.06), (20.41, 47.06)]

    ```
    """
    if merged:
        msg = "The `merged` argument must be False to read an Excel spreadsheet"
        raise ValueError(msg)

    if not sheet:
        names = self.sheet_names()
        if len(names) == 1:
            sheet_name = names[0]
        elif not names:
            msg = "Cannot determine the names of the sheets in the Excel file"
            raise ValueError(msg)
        else:
            sheets = ", ".join(repr(n) for n in names)
            msg = (
                f"{self.file!r} contains the following sheets:\n  {sheets}\n"
                f"You must specify the name of the sheet to read"
            )
            raise ValueError(msg)
    else:
        sheet_name = sheet

    try:
        _sheet = self._workbook.sheet_by_name(sheet_name)
    except XLRDError:
        msg = f"A sheet named {sheet_name!r} is not in {self._file!r}"
        raise ValueError(msg) from None

    if not cells:
        return [
            tuple(self._value(_sheet, r, c, as_datetime) for c in range(_sheet.ncols)) for r in range(_sheet.nrows)
        ]

    split = cells.split(":")
    r1, c1 = self.to_indices(split[0])
    if r1 is None:
        r1 = 0

    if len(split) == 1:
        try:
            return self._value(_sheet, r1, c1, as_datetime=as_datetime)
        except IndexError:
            return None

    if r1 >= _sheet.nrows or c1 >= _sheet.ncols:
        return []

    r2, c2 = self.to_indices(split[1])
    r2 = _sheet.nrows if r2 is None else min(r2 + 1, _sheet.nrows)
    c2 = min(c2 + 1, _sheet.ncols)
    return [tuple(self._value(_sheet, r, c, as_datetime) for c in range(c1, c2)) for r in range(r1, r2)]

sheet_names ¤

sheet_names()

Get the names of all sheets in the Excel spreadsheet.

Returns:

Type	Description
`tuple[str, ...]`	The names of all sheets.

Source code in src/msl/io/readers/excel.py

def sheet_names(self) -> tuple[str, ...]:
    """Get the names of all sheets in the Excel spreadsheet.

    Returns:
        The names of all sheets.
    """
    return tuple(self._workbook.sheet_names())

Read a Google Sheets spreadsheet.

GSheetsReader ¤

GSheetsReader(file, *, account=None, credentials=None)

Bases: Spreadsheet

Read a Google Sheets spreadsheet.

This class simply provides a convenience for reading cell values (not drawings or charts) from Google spreadsheets. It is not registered as a Reader because the information in a spreadsheet is unstructured and therefore one cannot generalize how to parse a spreadsheet to create a Root.

Parameters:

Name	Type	Description	Default
`file`	`PathLike`	The ID or path of a Google Sheets spreadsheet.	required
`account`	`str \| None`	Since a person may have multiple Google accounts, and multiple people may run the same code, this parameter decides which token to load to authenticate with the Google API. The value can be any text (or `None`) that you want to associate with a particular Google account, provided that it contains valid characters for a filename. The value that you chose when you authenticated with your `credentials` should be used for all future instances of this class to access that particular Google account. You can associate a different value with a Google account at any time (by passing in a different `account` value), but you may be asked to authenticate with your `credentials` again, or, alternatively, you can rename the token files located in MSL_IO_DIR to match the new `account` value.	`None`
`credentials`	`PathLike \| None`	The path to the client secrets OAuth credential file. This parameter only needs to be specified the first time that you authenticate with a particular Google account or if you delete the token file that was created when you previously authenticated.	`None`

Examples:

from msl.io import GSheetsReader

# Specify the path
sheets = GSheetsReader("Google Drive/registers/equipment.gsheet")

# Specify the ID
sheets = GSheetsReader("1TI3pM-534SZ5DQTEZ-7HCI04648f8ZpLGbfHWJu9FSo")

Source code in src/msl/io/readers/gsheets.py

def __init__(
    self,
    file: PathLike,
    *,
    account: str | None = None,
    credentials: PathLike | None = None,
) -> None:
    """Read a Google Sheets spreadsheet.

    This class simply provides a convenience for reading cell values (not
    drawings or charts) from Google spreadsheets. It is not registered as
    a [Reader][msl.io.base.Reader] because the information in a spreadsheet
    is unstructured and therefore one cannot generalize how to parse a
    spreadsheet to create a [Root][msl.io.base.Root].

    Args:
        file: The ID or path of a Google Sheets spreadsheet.
        account: Since a person may have multiple Google accounts, and multiple people
            may run the same code, this parameter decides which token to load
            to authenticate with the Google API. The value can be any text (or
            `None`) that you want to associate with a particular Google
            account, provided that it contains valid characters for a filename.
            The value that you chose when you authenticated with your `credentials`
            should be used for all future instances of this class to access that
            particular Google account. You can associate a different value with
            a Google account at any time (by passing in a different `account`
            value), but you may be asked to authenticate with your `credentials`
            again, or, alternatively, you can rename the token files located in
            [MSL_IO_DIR][msl.io.constants.MSL_IO_DIR] to match the new `account` value.
        credentials: The path to the *client secrets* OAuth credential file. This
            parameter only needs to be specified the first time that you
            authenticate with a particular Google account or if you delete
            the token file that was created when you previously authenticated.

    **Examples:**
    ```python
    from msl.io import GSheetsReader

    # Specify the path
    sheets = GSheetsReader("Google Drive/registers/equipment.gsheet")

    # Specify the ID
    sheets = GSheetsReader("1TI3pM-534SZ5DQTEZ-7HCI04648f8ZpLGbfHWJu9FSo")
    ```
    """
    file = os.fsdecode(file)
    super().__init__(file)

    path, ext = os.path.splitext(file)  # noqa: PTH122
    folders, _ = os.path.split(path)

    self._spreadsheet_id: str
    if ext or folders or not _google_file_id_regex.match(path):
        self._spreadsheet_id = GDrive(account=account, credentials=credentials).file_id(
            path, mime_type=GSheets.MIME_TYPE
        )
    else:
        self._spreadsheet_id = path

    self._gsheets: GSheets = GSheets(account=account, credentials=credentials, read_only=True)
    self._cached_sheet_name: str | None = None

close ¤

close()

Close the connection to the GSheet API service.

Source code in src/msl/io/readers/gsheets.py

def close(self) -> None:
    """Close the connection to the GSheet API service."""
    self._gsheets.close()

read ¤

read(
    cells=None,
    sheet=None,
    *,
    as_datetime=True,
    merged=False,
)

Read cell values from the Google Sheets spreadsheet.

Parameters:

Name	Type	Description	Default
`cells`	`str \| None`	The cell(s) to read. For example, `C9` will return a single value and `C9:G20` will return all values in the specified range. If not specified then returns all values in the specified `sheet`.	`None`
`sheet`	`str \| None`	The name of the sheet to read the value(s) from. If there is only one sheet in the spreadsheet then you do not need to specify the name of the sheet.	`None`
`as_datetime`	`bool`	Whether dates should be returned as datetime or date objects. If `False`, dates are returned as a string in the display format of the spreadsheet cell.	`True`
`merged`	`bool`	Applies to cells that are merged with other cells. If cells are merged, then only the top-left cell has the value and all other cells in the merger are empty. Enabling this argument is currently not supported and the value must be `False`.	`False`

Returns:

Type	Description
`Any \| list[tuple[Any, ...]]`	The value(s) of the requested cell(s).

Examples:

>>> sheets.read()
[('temperature', 'humidity'), (20.33, 49.82), (20.23, 46.06), (20.41, 47.06), (20.29, 48.32)]
>>> sheets.read("B2")
49.82
>>> sheets.read("A:A")
[('temperature',), (20.33,), (20.23,), (20.41,), (20.29,)]
>>> sheets.read("A1:B1")
[('temperature', 'humidity')]
>>> sheets.read("A2:B4")
[(20.33, 49.82), (20.23, 46.06), (20.41, 47.06)]

Source code in src/msl/io/readers/gsheets.py

def read(  # noqa: C901, PLR0912
    self, cells: str | None = None, sheet: str | None = None, *, as_datetime: bool = True, merged: bool = False
) -> Any | list[tuple[Any, ...]]:
    """Read cell values from the Google Sheets spreadsheet.

    Args:
        cells: The cell(s) to read. For example, `C9` will return a single value
            and `C9:G20` will return all values in the specified range. If not
            specified then returns all values in the specified `sheet`.
        sheet: The name of the sheet to read the value(s) from. If there is only one
            sheet in the spreadsheet then you do not need to specify the name of the sheet.
        as_datetime: Whether dates should be returned as [datetime][datetime.datetime] or
            [date][datetime.date] objects. If `False`, dates are returned as a string in
            the display format of the spreadsheet cell.
        merged: Applies to cells that are merged with other cells. If cells are merged, then
            only the top-left cell has the value and all other cells in the merger are empty.
            Enabling this argument is currently not supported and the value must be `False`.

    Returns:
        The value(s) of the requested cell(s).

    **Examples:**
    <!-- invisible-code-block: pycon
    >>> SKIP_IF_NO_GOOGLE_SHEETS_READ_TOKEN()
    >>> from msl.io import GSheetsReader
    >>> sheets = GSheetsReader('1TI3pM-534SZ5DQTEZ-7vCI04l48f8ZpLGbfEWJuCFSo', account='testing')

    -->

    ```pycon
    >>> sheets.read()
    [('temperature', 'humidity'), (20.33, 49.82), (20.23, 46.06), (20.41, 47.06), (20.29, 48.32)]
    >>> sheets.read("B2")
    49.82
    >>> sheets.read("A:A")
    [('temperature',), (20.33,), (20.23,), (20.41,), (20.29,)]
    >>> sheets.read("A1:B1")
    [('temperature', 'humidity')]
    >>> sheets.read("A2:B4")
    [(20.33, 49.82), (20.23, 46.06), (20.41, 47.06)]

    ```
    """
    if merged:
        msg = "The `merged` argument must be False to read a Google spreadsheet"
        raise ValueError(msg)

    if not sheet:
        if self._cached_sheet_name:
            sheet = self._cached_sheet_name
        else:
            names = self.sheet_names()
            if len(names) != 1:
                sheet_names = ", ".join(repr(n) for n in names)
                msg = (
                    f"{self._file} contains the following sheets:\n  {sheet_names}\n"
                    f"You must specify the name of the sheet to read"
                )
                raise ValueError(msg)
            sheet = names[0]
            self._cached_sheet_name = sheet

    ranges = f"{sheet}!{cells}" if cells else sheet

    cells_dict = self._gsheets.cells(self._spreadsheet_id, ranges=ranges)

    if sheet not in cells_dict:
        msg = f"A sheet named {sheet!r} is not in {self._file!r}"
        raise ValueError(msg)

    values: list[tuple[Any, ...]] = []
    for row in cells_dict[sheet]:
        row_values: list[Any] = []
        for item in row:
            if item.type == GCellType.DATE:
                value = GSheets.to_datetime(item.value).date() if as_datetime else item.formatted
            elif item.type == GCellType.DATE_TIME:
                value = GSheets.to_datetime(item.value) if as_datetime else item.formatted
            else:
                value = item.value
            row_values.append(value)
        values.append(tuple(row_values))

    if not cells:
        return values

    if ":" not in cells:
        if values:
            return values[0][0]
        return None

    return values

sheet_names ¤

sheet_names()

Get the names of all sheets in the Google Sheets spreadsheet.

Returns:

Type	Description
`tuple[str, ...]`	The names of all sheets.

Source code in src/msl/io/readers/gsheets.py

def sheet_names(self) -> tuple[str, ...]:
    """Get the names of all sheets in the Google Sheets spreadsheet.

    Returns:
        The names of all sheets.
    """
    return self._gsheets.sheet_names(self._spreadsheet_id)

Read an OpenDocument Spreadsheet (.ods and .fods files, Version 1.2).

ODSReader ¤

ODSReader(file, **kwargs)

Bases: Spreadsheet

Read an OpenDocument Spreadsheet (.ods and .fods files, Version 1.2).

This class simply provides a convenience for reading cell values (not drawings or charts) from OpenDocument Spreadsheets. It is not registered as a Reader because the information in a spreadsheet is unstructured and therefore one cannot generalize how to parse a spreadsheet to create a Root.

Tip

If defusedxml is installed, that package is used to parse the contents of the file instead of the xml module.

Parameters:

Name	Type	Description	Default
`file`	`PathLike`	The path to an OpenDocument Spreadsheet file.	required
`kwargs`	`Any`	All keyword arguments are ignored.	`{}`

Examples:

from msl.io import ODSReader
ods = ODSReader("lab_environment.ods")

Source code in src/msl/io/readers/ods.py

def __init__(self, file: PathLike, **kwargs: Any) -> None:  # noqa: ARG002
    """Read an OpenDocument Spreadsheet (*.ods* and *.fods* files, Version 1.2).

    This class simply provides a convenience for reading cell values (not
    drawings or charts) from OpenDocument Spreadsheets. It is not registered
    as a [Reader][msl.io.base.Reader] because the information in a spreadsheet
    is unstructured and therefore one cannot generalize how to parse a
    spreadsheet to create a [Root][msl.io.base.Root].

    !!! tip
        If [defusedxml](https://pypi.org/project/defusedxml/){:target="_blank"} is installed,
        that package is used to parse the contents of the file instead of the
        [xml][]{:target="_blank"} module.

    Args:
        file: The path to an OpenDocument Spreadsheet file.
        kwargs: All keyword arguments are ignored.

    **Examples:**
    ```python
    from msl.io import ODSReader
    ods = ODSReader("lab_environment.ods")
    ```
    """
    f = os.fsdecode(file)
    super().__init__(f)

    self._spans: dict[int, tuple[int, Any]] = {}  # column-index: (spans-remaining, cell-value)

    ext = get_extension(f).lower()
    content: Element[str]
    if ext == ".ods":
        with ZipFile(f) as z:
            try:
                content = ET.XML(z.read("content.xml"))
            except SyntaxError as e:
                e.msg += "\nThe ODS file might be password protected (which is not supported)"
                raise
    elif ext == ".fods":
        with open(f, mode="rb") as fp:  # noqa: PTH123
            content = ET.XML(fp.read())
    else:
        msg = f"Unsupported OpenDocument Spreadsheet file extension {ext!r}"
        raise ValueError(msg)

    self._tables: dict[str, Element[str]] = {
        self._attribute(t, "table", "name"): t for t in content.findall(".//table:table", namespaces=self._ns)
    }

close ¤

close()

Free memory resources that are used to read the OpenDocument Spreadsheet.

Source code in src/msl/io/readers/ods.py

def close(self) -> None:
    """Free memory resources that are used to read the OpenDocument Spreadsheet."""
    self._tables.clear()
    self._spans.clear()

dimensions ¤

dimensions(sheet)

Get the number of rows and columns in a sheet.

Parameters:

Name	Type	Description	Default
`sheet`	`str`	The name of a sheet to get the dimensions of.	required

Returns:

Type	Description
`tuple[int, int]`	The (number of rows, number of columns) in `sheet`.

Source code in src/msl/io/readers/ods.py

def dimensions(self, sheet: str) -> tuple[int, int]:
    """Get the number of rows and columns in a sheet.

    Args:
        sheet: The name of a sheet to get the dimensions of.

    Returns:
        The *(number of rows, number of columns)* in `sheet`.
    """
    table = self._tables.get(sheet)
    if table is None:
        msg = f"A sheet named {sheet!r} is not in {self._file!r}"
        raise ValueError(msg) from None

    num_cols = 0
    for col in table.findall(".//table:table-column", namespaces=self._ns):
        num_cols += int(self._attribute(col, "table", "number-columns-repeated", "1"))

    num_rows = sum(1 for _ in self._rows(table, 0, sys.maxsize - 1))
    return (num_rows, num_cols)

read ¤

read(
    cells=None,
    sheet=None,
    *,
    as_datetime=True,
    merged=False,
)

Read cell values from the OpenDocument Spreadsheet.

Parameters:

Name	Type	Description	Default
`cells`	`str \| None`	The cell(s) to read. For example, `C9` will return a single value and `C9:G20` will return all values in the specified range. If not specified then returns all values in the specified `sheet`.	`None`
`sheet`	`str \| None`	The name of the sheet to read the value(s) from. If there is only one sheet in the spreadsheet then you do not need to specify the name of the sheet.	`None`
`as_datetime`	`bool`	Whether dates should be returned as datetime or date objects. If `False`, dates are returned as a string in the display format of the spreadsheet cell.	`True`
`merged`	`bool`	Applies to cells that are merged with other cells. If `False`, the value of each unmerged cell is returned, otherwise the same value is returned for all merged cells. In an OpenDocument Spreadsheet, the value of a hidden cell that is merged with a visible cell can still be retained (depends on how the merger was performed).	`False`

Returns:

Type	Description
`Any \| list[tuple[Any, ...]]`	The value(s) of the requested cell(s).

Examples:

>>> ods.read()
[('temperature', 'humidity'), (20.33, 49.82), (20.23, 46.06), (20.41, 47.06), (20.29, 48.32)]
>>> ods.read("B2")
49.82
>>> ods.read("A:A")
[('temperature',), (20.33,), (20.23,), (20.41,), (20.29,)]
>>> ods.read("A1:B1")
[('temperature', 'humidity')]
>>> ods.read("A2:B4")
[(20.33, 49.82), (20.23, 46.06), (20.41, 47.06)]

Source code in src/msl/io/readers/ods.py

def read(  # noqa: C901, PLR0912
    self, cells: str | None = None, sheet: str | None = None, *, as_datetime: bool = True, merged: bool = False
) -> Any | list[tuple[Any, ...]]:
    """Read cell values from the OpenDocument Spreadsheet.

    Args:
        cells: The cell(s) to read. For example, `C9` will return a single value
            and `C9:G20` will return all values in the specified range. If not
            specified then returns all values in the specified `sheet`.
        sheet: The name of the sheet to read the value(s) from. If there is only
            one sheet in the spreadsheet then you do not need to specify the name
            of the sheet.
        as_datetime: Whether dates should be returned as [datetime][datetime.datetime]
            or [date][datetime.date] objects. If `False`, dates are returned as a
            string in the display format of the spreadsheet cell.
        merged: Applies to cells that are merged with other cells. If `False`, the
            value of each unmerged cell is returned, otherwise the same value is
            returned for all merged cells. In an OpenDocument Spreadsheet, the value
            of a hidden cell that is merged with a visible cell can still be retained
            (depends on how the merger was performed).

    Returns:
        The value(s) of the requested cell(s).

    **Examples:**
    <!-- invisible-code-block: pycon
    >>> from msl.io import ODSReader
    >>> ods = ODSReader('./tests/samples/lab_environment.ods')

    -->

    ```pycon
    >>> ods.read()
    [('temperature', 'humidity'), (20.33, 49.82), (20.23, 46.06), (20.41, 47.06), (20.29, 48.32)]
    >>> ods.read("B2")
    49.82
    >>> ods.read("A:A")
    [('temperature',), (20.33,), (20.23,), (20.41,), (20.29,)]
    >>> ods.read("A1:B1")
    [('temperature', 'humidity')]
    >>> ods.read("A2:B4")
    [(20.33, 49.82), (20.23, 46.06), (20.41, 47.06)]

    ```
    """
    if not sheet:
        names = self.sheet_names()
        if len(names) == 1:
            name = names[0]
        elif not names:
            msg = "Cannot determine the names of the sheets in the OpenDocument file"
            raise ValueError(msg)
        else:
            sheets = ", ".join(repr(n) for n in names)
            msg = (
                f"{self.file!r} contains the following sheets:\n  {sheets}\n"
                f"You must specify the name of the sheet to read"
            )
            raise ValueError(msg)
    else:
        name = sheet

    table = self._tables.get(name)
    if table is None:
        msg = f"A sheet named {sheet!r} is not in {self._file!r}"
        raise ValueError(msg)

    maxsize = sys.maxsize - 1
    r1, c1, r2, c2, contains_colon = 0, 0, maxsize, maxsize, False
    if cells:
        split = cells.split(":")
        r, c1 = self.to_indices(split[0])
        r1 = 0 if r is None else r
        if len(split) > 1:
            contains_colon = True
            r, c2 = self.to_indices(split[1])
            if r is not None:
                r2 = r
        else:
            r2, c2 = r1, c1

    self._spans.clear()
    data: list[tuple[Any, ...]] = []
    for row in self._rows(table, r1, r2):
        values = tuple(self._cell(row, c1, c2, as_datetime, merged))
        if values:
            data.append(values)

    if not contains_colon and r1 == r2 and c1 == c2:
        try:
            return data[0][0]
        except IndexError:
            return None

    return data

sheet_names ¤

sheet_names()

Get the names of all sheets in the OpenDocument Spreadsheet.

Returns:

Type	Description
`tuple[str, ...]`	The names of all sheets.

Source code in src/msl/io/readers/ods.py

def sheet_names(self) -> tuple[str, ...]:
    """Get the names of all sheets in the OpenDocument Spreadsheet.

    Returns:
        The names of all sheets.
    """
    return tuple(self._tables.keys())

spreadsheets¤

Spreadsheet ¤

file property ¤

read abstractmethod ¤

sheet_names abstractmethod ¤

to_indices staticmethod ¤

to_letters staticmethod ¤

to_slices staticmethod ¤

ExcelReader ¤

close ¤

dimensions ¤

read ¤

sheet_names ¤

GSheetsReader ¤

close ¤

read ¤

sheet_names ¤

ODSReader ¤

close ¤

dimensions ¤

read ¤

sheet_names ¤

file `property` ¤

read `abstractmethod` ¤

sheet_names `abstractmethod` ¤

to_indices `staticmethod` ¤

to_letters `staticmethod` ¤

to_slices `staticmethod` ¤