diff --git a/google/cloud/bigtable/__init__.py b/google/cloud/bigtable/__init__.py index 251e41e42..c5581f813 100644 --- a/google/cloud/bigtable/__init__.py +++ b/google/cloud/bigtable/__init__.py @@ -22,9 +22,9 @@ from google.cloud.bigtable.client import Table from google.cloud.bigtable.read_rows_query import ReadRowsQuery +from google.cloud.bigtable.row import Row +from google.cloud.bigtable.row import Cell from google.cloud.bigtable.read_rows_query import RowRange -from google.cloud.bigtable.row_response import RowResponse -from google.cloud.bigtable.row_response import CellResponse from google.cloud.bigtable.mutations_batcher import MutationsBatcher from google.cloud.bigtable.mutations import Mutation @@ -52,6 +52,6 @@ "DeleteRangeFromColumn", "DeleteAllFromFamily", "DeleteAllFromRow", - "RowResponse", - "CellResponse", + "Row", + "Cell", ) diff --git a/google/cloud/bigtable/client.py b/google/cloud/bigtable/client.py index df4bf308f..23f0cb6fe 100644 --- a/google/cloud/bigtable/client.py +++ b/google/cloud/bigtable/client.py @@ -25,7 +25,7 @@ if TYPE_CHECKING: from google.cloud.bigtable.mutations import Mutation, BulkMutationsEntry from google.cloud.bigtable.mutations_batcher import MutationsBatcher - from google.cloud.bigtable.row_response import RowResponse + from google.cloud.bigtable.row import Row from google.cloud.bigtable.read_rows_query import ReadRowsQuery from google.cloud.bigtable import RowKeySamples from google.cloud.bigtable.row_filters import RowFilter @@ -109,7 +109,7 @@ async def read_rows_stream( idle_timeout: int | float | None = 300, per_request_timeout: int | float | None = None, metadata: list[tuple[str, str]] | None = None, - ) -> AsyncIterable[RowResponse]: + ) -> AsyncIterable[Row]: """ Returns a generator to asynchronously stream back row data. @@ -166,7 +166,7 @@ async def read_rows( per_row_timeout: int | float | None = 10, per_request_timeout: int | float | None = None, metadata: list[tuple[str, str]] | None = None, - ) -> list[RowResponse]: + ) -> list[Row]: """ Helper function that returns a full list instead of a generator @@ -184,7 +184,7 @@ async def read_row( operation_timeout: int | float | None = 60, per_request_timeout: int | float | None = None, metadata: list[tuple[str, str]] | None = None, - ) -> RowResponse: + ) -> Row: """ Helper function to return a single row @@ -206,7 +206,7 @@ async def read_rows_sharded( idle_timeout: int | float | None = 300, per_request_timeout: int | float | None = None, metadata: list[tuple[str, str]] | None = None, - ) -> AsyncIterable[RowResponse]: + ) -> AsyncIterable[Row]: """ Runs a sharded query in parallel @@ -410,7 +410,7 @@ async def read_modify_write_row( *, operation_timeout: int | float | None = 60, metadata: list[tuple[str, str]] | None = None, - ) -> RowResponse: + ) -> Row: """ Reads and modifies a row atomically according to input ReadModifyWriteRules, and returns the contents of all modified cells @@ -429,7 +429,7 @@ async def read_modify_write_row( Failed requests will not be retried. - metadata: Strings which should be sent along with the request as metadata headers. Returns: - - RowResponse: containing cell data that was modified as part of the + - Row: containing cell data that was modified as part of the operation Raises: - GoogleAPIError exceptions from grpc call diff --git a/google/cloud/bigtable/mutations.py b/google/cloud/bigtable/mutations.py index ed3c2f065..4ff59bff9 100644 --- a/google/cloud/bigtable/mutations.py +++ b/google/cloud/bigtable/mutations.py @@ -15,7 +15,7 @@ from __future__ import annotations from dataclasses import dataclass -from google.cloud.bigtable.row_response import family_id, qualifier, row_key +from google.cloud.bigtable.row import family_id, qualifier, row_key class Mutation: diff --git a/google/cloud/bigtable/mutations_batcher.py b/google/cloud/bigtable/mutations_batcher.py index 2e393cc7e..582786ee4 100644 --- a/google/cloud/bigtable/mutations_batcher.py +++ b/google/cloud/bigtable/mutations_batcher.py @@ -18,7 +18,7 @@ from typing import TYPE_CHECKING from google.cloud.bigtable.mutations import Mutation -from google.cloud.bigtable.row_response import row_key +from google.cloud.bigtable.row import row_key from google.cloud.bigtable.row_filters import RowFilter if TYPE_CHECKING: diff --git a/google/cloud/bigtable/read_modify_write_rules.py b/google/cloud/bigtable/read_modify_write_rules.py index a9b0885f2..839262ea2 100644 --- a/google/cloud/bigtable/read_modify_write_rules.py +++ b/google/cloud/bigtable/read_modify_write_rules.py @@ -16,7 +16,7 @@ from dataclasses import dataclass -from google.cloud.bigtable.row_response import family_id, qualifier +from google.cloud.bigtable.row import family_id, qualifier class ReadModifyWriteRule: diff --git a/google/cloud/bigtable/read_rows_query.py b/google/cloud/bigtable/read_rows_query.py index 9fd349d5f..559b47f04 100644 --- a/google/cloud/bigtable/read_rows_query.py +++ b/google/cloud/bigtable/read_rows_query.py @@ -14,7 +14,7 @@ # from __future__ import annotations from typing import TYPE_CHECKING, Any -from .row_response import row_key +from .row import row_key from dataclasses import dataclass from google.cloud.bigtable.row_filters import RowFilter diff --git a/google/cloud/bigtable/row.py b/google/cloud/bigtable/row.py new file mode 100644 index 000000000..8231e324e --- /dev/null +++ b/google/cloud/bigtable/row.py @@ -0,0 +1,413 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +from collections import OrderedDict +from typing import Sequence, Generator, overload, Any +from functools import total_ordering + +# Type aliases used internally for readability. +row_key = bytes +family_id = str +qualifier = bytes +row_value = bytes + + +class Row(Sequence["Cell"]): + """ + Model class for row data returned from server + + Does not represent all data contained in the row, only data returned by a + query. + Expected to be read-only to users, and written by backend + + Can be indexed: + cells = row["family", "qualifier"] + """ + + def __init__( + self, + key: row_key, + cells: list[Cell], + ): + """ + Initializes a Row object + + Row objects are not intended to be created by users. + They are returned by the Bigtable backend. + """ + self.row_key = key + self._cells_map: dict[family_id, dict[qualifier, list[Cell]]] = OrderedDict() + self._cells_list: list[Cell] = [] + # add cells to internal stores using Bigtable native ordering + for cell in cells: + if cell.family not in self._cells_map: + self._cells_map[cell.family] = OrderedDict() + if cell.column_qualifier not in self._cells_map[cell.family]: + self._cells_map[cell.family][cell.column_qualifier] = [] + self._cells_map[cell.family][cell.column_qualifier].append(cell) + self._cells_list.append(cell) + + def get_cells( + self, family: str | None = None, qualifier: str | bytes | None = None + ) -> list[Cell]: + """ + Returns cells sorted in Bigtable native order: + - Family lexicographically ascending + - Qualifier ascending + - Timestamp in reverse chronological order + + If family or qualifier not passed, will include all + + Can also be accessed through indexing: + cells = row["family", "qualifier"] + cells = row["family"] + """ + if family is None: + if qualifier is not None: + # get_cells(None, "qualifier") is not allowed + raise ValueError("Qualifier passed without family") + else: + # return all cells on get_cells() + return self._cells_list + if qualifier is None: + # return all cells in family on get_cells(family) + return list(self._get_all_from_family(family)) + if isinstance(qualifier, str): + qualifier = qualifier.encode("utf-8") + # return cells in family and qualifier on get_cells(family, qualifier) + if family not in self._cells_map: + raise ValueError(f"Family '{family}' not found in row '{self.row_key!r}'") + if qualifier not in self._cells_map[family]: + raise ValueError( + f"Qualifier '{qualifier!r}' not found in family '{family}' in row '{self.row_key!r}'" + ) + return self._cells_map[family][qualifier] + + def _get_all_from_family(self, family: family_id) -> Generator[Cell, None, None]: + """ + Returns all cells in the row for the family_id + """ + if family not in self._cells_map: + raise ValueError(f"Family '{family}' not found in row '{self.row_key!r}'") + qualifier_dict = self._cells_map.get(family, {}) + for cell_batch in qualifier_dict.values(): + for cell in cell_batch: + yield cell + + def __str__(self) -> str: + """ + Human-readable string representation + + { + (family='fam', qualifier=b'col'): [b'value', (+1 more),], + (family='fam', qualifier=b'col2'): [b'other'], + } + """ + output = ["{"] + for family, qualifier in self.get_column_components(): + cell_list = self[family, qualifier] + line = [f" (family={family!r}, qualifier={qualifier!r}): "] + if len(cell_list) == 0: + line.append("[],") + elif len(cell_list) == 1: + line.append(f"[{cell_list[0]}],") + else: + line.append(f"[{cell_list[0]}, (+{len(cell_list)-1} more)],") + output.append("".join(line)) + output.append("}") + return "\n".join(output) + + def __repr__(self): + cell_str_buffer = ["{"] + for family, qualifier in self.get_column_components(): + cell_list = self[family, qualifier] + repr_list = [cell.to_dict() for cell in cell_list] + cell_str_buffer.append(f" ('{family}', {qualifier}): {repr_list},") + cell_str_buffer.append("}") + cell_str = "\n".join(cell_str_buffer) + output = f"Row(key={self.row_key!r}, cells={cell_str})" + return output + + def to_dict(self) -> dict[str, Any]: + """ + Returns a dictionary representation of the cell in the Bigtable Row + proto format + + https://cloud.google.com/bigtable/docs/reference/data/rpc/google.bigtable.v2#row + """ + families_list: list[dict[str, Any]] = [] + for family in self._cells_map: + column_list: list[dict[str, Any]] = [] + for qualifier in self._cells_map[family]: + cells_list: list[dict[str, Any]] = [] + for cell in self._cells_map[family][qualifier]: + cells_list.append(cell.to_dict()) + column_list.append({"qualifier": qualifier, "cells": cells_list}) + families_list.append({"name": family, "columns": column_list}) + return {"key": self.row_key, "families": families_list} + + # Sequence and Mapping methods + def __iter__(self): + """ + Allow iterating over all cells in the row + """ + # iterate as a sequence; yield all cells + for cell in self._cells_list: + yield cell + + def __contains__(self, item): + """ + Implements `in` operator + + Works for both cells in the internal list, and `family` or + `(family, qualifier)` pairs associated with the cells + """ + if isinstance(item, family_id): + # check if family key is in Row + return item in self._cells_map + elif ( + isinstance(item, tuple) + and isinstance(item[0], family_id) + and isinstance(item[1], (qualifier, str)) + ): + # check if (family, qualifier) pair is in Row + qualifer = item[1] if isinstance(item[1], bytes) else item[1].encode() + return item[0] in self._cells_map and qualifer in self._cells_map[item[0]] + # check if Cell is in Row + return item in self._cells_list + + @overload + def __getitem__( + self, + index: family_id | tuple[family_id, qualifier | str], + ) -> list[Cell]: + # overload signature for type checking + pass + + @overload + def __getitem__(self, index: int) -> Cell: + # overload signature for type checking + pass + + @overload + def __getitem__(self, index: slice) -> list[Cell]: + # overload signature for type checking + pass + + def __getitem__(self, index): + """ + Implements [] indexing + + Supports indexing by family, (family, qualifier) pair, + numerical index, and index slicing + """ + if isinstance(index, family_id): + return self.get_cells(family=index) + elif ( + isinstance(index, tuple) + and isinstance(index[0], family_id) + and isinstance(index[1], (qualifier, str)) + ): + return self.get_cells(family=index[0], qualifier=index[1]) + elif isinstance(index, int) or isinstance(index, slice): + # index is int or slice + return self._cells_list[index] + else: + raise TypeError( + "Index must be family_id, (family_id, qualifier), int, or slice" + ) + + def __len__(self): + """ + Implements `len()` operator + """ + return len(self._cells_list) + + def get_column_components(self): + """ + Returns a list of (family, qualifier) pairs associated with the cells + + Pairs can be used for indexing + """ + key_list = [] + for family in self._cells_map: + for qualifier in self._cells_map[family]: + key_list.append((family, qualifier)) + return key_list + + def __eq__(self, other): + """ + Implements `==` operator + """ + # for performance reasons, check row metadata + # before checking individual cells + if not isinstance(other, Row): + return False + if self.row_key != other.row_key: + return False + if len(self._cells_list) != len(other._cells_list): + return False + components = self.get_column_components() + other_components = other.get_column_components() + if len(components) != len(other_components): + return False + if components != other_components: + return False + for family, qualifier in components: + if len(self[family, qualifier]) != len(other[family, qualifier]): + return False + # compare individual cell lists + if self._cells_list != other._cells_list: + return False + return True + + def __ne__(self, other) -> bool: + """ + Implements `!=` operator + """ + return not self == other + + +@total_ordering +class Cell: + """ + Model class for cell data + + Does not represent all data contained in the cell, only data returned by a + query. + Expected to be read-only to users, and written by backend + """ + + def __init__( + self, + value: row_value, + row: row_key, + family: family_id, + column_qualifier: qualifier | str, + timestamp_micros: int, + labels: list[str] | None = None, + ): + """ + Cell constructor + + Cell objects are not intended to be constructed by users. + They are returned by the Bigtable backend. + """ + self.value = value + self.row_key = row + self.family = family + if isinstance(column_qualifier, str): + column_qualifier = column_qualifier.encode() + self.column_qualifier = column_qualifier + self.timestamp_micros = timestamp_micros + self.labels = labels if labels is not None else [] + + def __int__(self) -> int: + """ + Allows casting cell to int + Interprets value as a 64-bit big-endian signed integer, as expected by + ReadModifyWrite increment rule + """ + return int.from_bytes(self.value, byteorder="big", signed=True) + + def to_dict(self) -> dict[str, Any]: + """ + Returns a dictionary representation of the cell in the Bigtable Cell + proto format + + https://cloud.google.com/bigtable/docs/reference/data/rpc/google.bigtable.v2#cell + """ + cell_dict: dict[str, Any] = { + "value": self.value, + } + cell_dict["timestamp_micros"] = self.timestamp_micros + if self.labels: + cell_dict["labels"] = self.labels + return cell_dict + + def __str__(self) -> str: + """ + Allows casting cell to str + Prints encoded byte string, same as printing value directly. + """ + return str(self.value) + + def __repr__(self): + """ + Returns a string representation of the cell + """ + return f"Cell(value={self.value!r}, row={self.row_key!r}, family='{self.family}', column_qualifier={self.column_qualifier!r}, timestamp_micros={self.timestamp_micros}, labels={self.labels})" + + """For Bigtable native ordering""" + + def __lt__(self, other) -> bool: + """ + Implements `<` operator + """ + if not isinstance(other, Cell): + return NotImplemented + this_ordering = ( + self.family, + self.column_qualifier, + -self.timestamp_micros, + self.value, + self.labels, + ) + other_ordering = ( + other.family, + other.column_qualifier, + -other.timestamp_micros, + other.value, + other.labels, + ) + return this_ordering < other_ordering + + def __eq__(self, other) -> bool: + """ + Implements `==` operator + """ + if not isinstance(other, Cell): + return NotImplemented + return ( + self.row_key == other.row_key + and self.family == other.family + and self.column_qualifier == other.column_qualifier + and self.value == other.value + and self.timestamp_micros == other.timestamp_micros + and len(self.labels) == len(other.labels) + and all([label in other.labels for label in self.labels]) + ) + + def __ne__(self, other) -> bool: + """ + Implements `!=` operator + """ + return not self == other + + def __hash__(self): + """ + Implements `hash()` function to fingerprint cell + """ + return hash( + ( + self.row_key, + self.family, + self.column_qualifier, + self.value, + self.timestamp_micros, + tuple(self.labels), + ) + ) diff --git a/google/cloud/bigtable/row_response.py b/google/cloud/bigtable/row_response.py deleted file mode 100644 index be6d8c505..000000000 --- a/google/cloud/bigtable/row_response.py +++ /dev/null @@ -1,130 +0,0 @@ -# Copyright 2023 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -from __future__ import annotations - -from collections import OrderedDict -from typing import Sequence - -# Type aliases used internally for readability. -row_key = bytes -family_id = str -qualifier = bytes -row_value = bytes - - -class RowResponse(Sequence["CellResponse"]): - """ - Model class for row data returned from server - - Does not represent all data contained in the row, only data returned by a - query. - Expected to be read-only to users, and written by backend - - Can be indexed: - cells = row["family", "qualifier"] - """ - - def __init__(self, key: row_key, cells: list[CellResponse]): - self.row_key = key - self.cells: OrderedDict[ - family_id, OrderedDict[qualifier, list[CellResponse]] - ] = OrderedDict() - """Expected to be used internally only""" - pass - - def get_cells( - self, family: str | None, qualifer: str | bytes | None - ) -> list[CellResponse]: - """ - Returns cells sorted in Bigtable native order: - - Family lexicographically ascending - - Qualifier lexicographically ascending - - Timestamp in reverse chronological order - - If family or qualifier not passed, will include all - - Syntactic sugar: cells = row["family", "qualifier"] - """ - raise NotImplementedError - - def get_index(self) -> dict[family_id, list[qualifier]]: - """ - Returns a list of family and qualifiers for the object - """ - raise NotImplementedError - - def __str__(self): - """ - Human-readable string representation - - (family, qualifier) cells - (ABC, XYZ) [b"123", b"456" ...(+5)] - (DEF, XYZ) [b"123"] - (GHI, XYZ) [b"123", b"456" ...(+2)] - """ - raise NotImplementedError - - -class CellResponse: - """ - Model class for cell data - - Does not represent all data contained in the cell, only data returned by a - query. - Expected to be read-only to users, and written by backend - """ - - def __init__( - self, - value: row_value, - row: row_key, - family: family_id, - column_qualifier: qualifier, - labels: list[str] | None = None, - timestamp: int | None = None, - ): - self.value = value - self.row_key = row - self.family = family - self.column_qualifier = column_qualifier - self.labels = labels - self.timestamp = timestamp - - def decode_value(self, encoding="UTF-8", errors=None) -> str: - """decode bytes to string""" - return self.value.decode(encoding, errors) - - def __int__(self) -> int: - """ - Allows casting cell to int - Interprets value as a 64-bit big-endian signed integer, as expected by - ReadModifyWrite increment rule - """ - return int.from_bytes(self.value, byteorder="big", signed=True) - - def __str__(self) -> str: - """ - Allows casting cell to str - Prints encoded byte string, same as printing value directly. - """ - return str(self.value) - - """For Bigtable native ordering""" - - def __lt__(self, other) -> bool: - raise NotImplementedError - - def __eq__(self, other) -> bool: - raise NotImplementedError diff --git a/tests/unit/test_row.py b/tests/unit/test_row.py new file mode 100644 index 000000000..32afe92f5 --- /dev/null +++ b/tests/unit/test_row.py @@ -0,0 +1,692 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import time + +TEST_VALUE = b"1234" +TEST_ROW_KEY = b"row" +TEST_FAMILY_ID = "cf1" +TEST_QUALIFIER = b"col" +TEST_TIMESTAMP = time.time_ns() // 1000 +TEST_LABELS = ["label1", "label2"] + + +class TestRow(unittest.TestCase): + @staticmethod + def _get_target_class(): + from google.cloud.bigtable.row import Row + + return Row + + def _make_one(self, *args, **kwargs): + if len(args) == 0: + args = (TEST_ROW_KEY, [self._make_cell()]) + return self._get_target_class()(*args, **kwargs) + + def _make_cell( + self, + value=TEST_VALUE, + row_key=TEST_ROW_KEY, + family_id=TEST_FAMILY_ID, + qualifier=TEST_QUALIFIER, + timestamp=TEST_TIMESTAMP, + labels=TEST_LABELS, + ): + from google.cloud.bigtable.row import Cell + + return Cell(value, row_key, family_id, qualifier, timestamp, labels) + + def test_ctor(self): + cells = [self._make_cell(), self._make_cell()] + row_response = self._make_one(TEST_ROW_KEY, cells) + self.assertEqual(list(row_response), cells) + self.assertEqual(row_response.row_key, TEST_ROW_KEY) + + def test_get_cells(self): + cell_list = [] + for family_id in ["1", "2"]: + for qualifier in [b"a", b"b"]: + cell = self._make_cell(family_id=family_id, qualifier=qualifier) + cell_list.append(cell) + # test getting all cells + row_response = self._make_one(TEST_ROW_KEY, cell_list) + self.assertEqual(row_response.get_cells(), cell_list) + # test getting cells in a family + output = row_response.get_cells(family="1") + self.assertEqual(len(output), 2) + self.assertEqual(output[0].family, "1") + self.assertEqual(output[1].family, "1") + self.assertEqual(output[0], cell_list[0]) + # test getting cells in a family/qualifier + # should accept bytes or str for qualifier + for q in [b"a", "a"]: + output = row_response.get_cells(family="1", qualifier=q) + self.assertEqual(len(output), 1) + self.assertEqual(output[0].family, "1") + self.assertEqual(output[0].column_qualifier, b"a") + self.assertEqual(output[0], cell_list[0]) + # calling with just qualifier should raise an error + with self.assertRaises(ValueError): + row_response.get_cells(qualifier=b"a") + # test calling with bad family or qualifier + with self.assertRaises(ValueError): + row_response.get_cells(family="3", qualifier=b"a") + with self.assertRaises(ValueError): + row_response.get_cells(family="3") + with self.assertRaises(ValueError): + row_response.get_cells(family="1", qualifier=b"c") + + def test___repr__(self): + + cell_str = ( + "{'value': b'1234', 'timestamp_micros': %d, 'labels': ['label1', 'label2']}" + % (TEST_TIMESTAMP) + ) + expected_prefix = "Row(key=b'row', cells=" + row = self._make_one(TEST_ROW_KEY, [self._make_cell()]) + self.assertIn(expected_prefix, repr(row)) + self.assertIn(cell_str, repr(row)) + expected_full = ( + "Row(key=b'row', cells={\n ('cf1', b'col'): [{'value': b'1234', 'timestamp_micros': %d, 'labels': ['label1', 'label2']}],\n})" + % (TEST_TIMESTAMP) + ) + self.assertEqual(expected_full, repr(row)) + # try with multiple cells + row = self._make_one(TEST_ROW_KEY, [self._make_cell(), self._make_cell()]) + self.assertIn(expected_prefix, repr(row)) + self.assertIn(cell_str, repr(row)) + + def test___str__(self): + cells = [ + self._make_cell(value=b"1234", family_id="1", qualifier=b"col"), + self._make_cell(value=b"5678", family_id="3", qualifier=b"col"), + self._make_cell(value=b"1", family_id="3", qualifier=b"col"), + self._make_cell(value=b"2", family_id="3", qualifier=b"col"), + ] + + row_response = self._make_one(TEST_ROW_KEY, cells) + expected = ( + "{\n" + + " (family='1', qualifier=b'col'): [b'1234'],\n" + + " (family='3', qualifier=b'col'): [b'5678', (+2 more)],\n" + + "}" + ) + self.assertEqual(expected, str(row_response)) + + def test_to_dict(self): + from google.cloud.bigtable_v2.types import Row + + cell1 = self._make_cell() + cell2 = self._make_cell() + cell2.value = b"other" + row = self._make_one(TEST_ROW_KEY, [cell1, cell2]) + row_dict = row.to_dict() + expected_dict = { + "key": TEST_ROW_KEY, + "families": [ + { + "name": TEST_FAMILY_ID, + "columns": [ + { + "qualifier": TEST_QUALIFIER, + "cells": [ + { + "value": TEST_VALUE, + "timestamp_micros": TEST_TIMESTAMP, + "labels": TEST_LABELS, + }, + { + "value": b"other", + "timestamp_micros": TEST_TIMESTAMP, + "labels": TEST_LABELS, + }, + ], + } + ], + }, + ], + } + self.assertEqual(len(row_dict), len(expected_dict)) + for key, value in expected_dict.items(): + self.assertEqual(row_dict[key], value) + # should be able to construct a Cell proto from the dict + row_proto = Row(**row_dict) + self.assertEqual(row_proto.key, TEST_ROW_KEY) + self.assertEqual(len(row_proto.families), 1) + family = row_proto.families[0] + self.assertEqual(family.name, TEST_FAMILY_ID) + self.assertEqual(len(family.columns), 1) + column = family.columns[0] + self.assertEqual(column.qualifier, TEST_QUALIFIER) + self.assertEqual(len(column.cells), 2) + self.assertEqual(column.cells[0].value, TEST_VALUE) + self.assertEqual(column.cells[0].timestamp_micros, TEST_TIMESTAMP) + self.assertEqual(column.cells[0].labels, TEST_LABELS) + self.assertEqual(column.cells[1].value, cell2.value) + self.assertEqual(column.cells[1].timestamp_micros, TEST_TIMESTAMP) + self.assertEqual(column.cells[1].labels, TEST_LABELS) + + def test_iteration(self): + from types import GeneratorType + from google.cloud.bigtable.row import Cell + + # should be able to iterate over the Row as a list + cell1 = self._make_cell(value=b"1") + cell2 = self._make_cell(value=b"2") + cell3 = self._make_cell(value=b"3") + row_response = self._make_one(TEST_ROW_KEY, [cell1, cell2, cell3]) + self.assertEqual(len(row_response), 3) + # should create generator object + self.assertIsInstance(iter(row_response), GeneratorType) + result_list = list(row_response) + self.assertEqual(len(result_list), 3) + # should be able to iterate over all cells + idx = 0 + for cell in row_response: + self.assertIsInstance(cell, Cell) + self.assertEqual(cell.value, result_list[idx].value) + self.assertEqual(cell.value, str(idx + 1).encode()) + idx += 1 + + def test_contains_cell(self): + cell3 = self._make_cell(value=b"3") + cell1 = self._make_cell(value=b"1") + cell2 = self._make_cell(value=b"2") + cell4 = self._make_cell(value=b"4") + row_response = self._make_one(TEST_ROW_KEY, [cell3, cell1, cell2]) + self.assertIn(cell1, row_response) + self.assertIn(cell2, row_response) + self.assertNotIn(cell4, row_response) + cell3_copy = self._make_cell(value=b"3") + self.assertIn(cell3_copy, row_response) + + def test_contains_family_id(self): + new_family_id = "new_family_id" + cell = self._make_cell( + TEST_VALUE, + TEST_ROW_KEY, + TEST_FAMILY_ID, + TEST_QUALIFIER, + TEST_TIMESTAMP, + TEST_LABELS, + ) + cell2 = self._make_cell( + TEST_VALUE, + TEST_ROW_KEY, + new_family_id, + TEST_QUALIFIER, + TEST_TIMESTAMP, + TEST_LABELS, + ) + row_response = self._make_one(TEST_ROW_KEY, [cell, cell2]) + self.assertIn(TEST_FAMILY_ID, row_response) + self.assertIn("new_family_id", row_response) + self.assertIn(new_family_id, row_response) + self.assertNotIn("not_a_family_id", row_response) + self.assertNotIn(None, row_response) + + def test_contains_family_qualifier_tuple(self): + new_family_id = "new_family_id" + new_qualifier = b"new_qualifier" + cell = self._make_cell( + TEST_VALUE, + TEST_ROW_KEY, + TEST_FAMILY_ID, + TEST_QUALIFIER, + TEST_TIMESTAMP, + TEST_LABELS, + ) + cell2 = self._make_cell( + TEST_VALUE, + TEST_ROW_KEY, + new_family_id, + new_qualifier, + TEST_TIMESTAMP, + TEST_LABELS, + ) + row_response = self._make_one(TEST_ROW_KEY, [cell, cell2]) + self.assertIn((TEST_FAMILY_ID, TEST_QUALIFIER), row_response) + self.assertIn(("new_family_id", "new_qualifier"), row_response) + self.assertIn(("new_family_id", b"new_qualifier"), row_response) + self.assertIn((new_family_id, new_qualifier), row_response) + + self.assertNotIn(("not_a_family_id", TEST_QUALIFIER), row_response) + self.assertNotIn((TEST_FAMILY_ID, "not_a_qualifier"), row_response) + self.assertNotIn((TEST_FAMILY_ID, new_qualifier), row_response) + self.assertNotIn(("not_a_family_id", "not_a_qualifier"), row_response) + self.assertNotIn((None, None), row_response) + self.assertNotIn(None, row_response) + + def test_int_indexing(self): + # should be able to index into underlying list with an index number directly + cell_list = [self._make_cell(value=str(i).encode()) for i in range(10)] + sorted(cell_list) + row_response = self._make_one(TEST_ROW_KEY, cell_list) + self.assertEqual(len(row_response), 10) + for i in range(10): + self.assertEqual(row_response[i].value, str(i).encode()) + # backwards indexing should work + self.assertEqual(row_response[-i - 1].value, str(9 - i).encode()) + with self.assertRaises(IndexError): + row_response[10] + with self.assertRaises(IndexError): + row_response[-11] + + def test_slice_indexing(self): + # should be able to index with a range of indices + cell_list = [self._make_cell(value=str(i).encode()) for i in range(10)] + sorted(cell_list) + row_response = self._make_one(TEST_ROW_KEY, cell_list) + self.assertEqual(len(row_response), 10) + self.assertEqual(len(row_response[0:10]), 10) + self.assertEqual(row_response[0:10], cell_list) + self.assertEqual(len(row_response[0:]), 10) + self.assertEqual(row_response[0:], cell_list) + self.assertEqual(len(row_response[:10]), 10) + self.assertEqual(row_response[:10], cell_list) + self.assertEqual(len(row_response[0:10:1]), 10) + self.assertEqual(row_response[0:10:1], cell_list) + self.assertEqual(len(row_response[0:10:2]), 5) + self.assertEqual(row_response[0:10:2], [cell_list[i] for i in range(0, 10, 2)]) + self.assertEqual(len(row_response[0:10:3]), 4) + self.assertEqual(row_response[0:10:3], [cell_list[i] for i in range(0, 10, 3)]) + self.assertEqual(len(row_response[10:0:-1]), 9) + self.assertEqual(len(row_response[10:0:-2]), 5) + self.assertEqual(row_response[10:0:-3], cell_list[10:0:-3]) + self.assertEqual(len(row_response[0:100]), 10) + + def test_family_indexing(self): + # should be able to retrieve cells in a family + new_family_id = "new_family_id" + cell = self._make_cell( + TEST_VALUE, + TEST_ROW_KEY, + TEST_FAMILY_ID, + TEST_QUALIFIER, + TEST_TIMESTAMP, + TEST_LABELS, + ) + cell2 = self._make_cell( + TEST_VALUE, + TEST_ROW_KEY, + TEST_FAMILY_ID, + TEST_QUALIFIER, + TEST_TIMESTAMP, + TEST_LABELS, + ) + cell3 = self._make_cell( + TEST_VALUE, + TEST_ROW_KEY, + new_family_id, + TEST_QUALIFIER, + TEST_TIMESTAMP, + TEST_LABELS, + ) + row_response = self._make_one(TEST_ROW_KEY, [cell, cell2, cell3]) + + self.assertEqual(len(row_response[TEST_FAMILY_ID]), 2) + self.assertEqual(row_response[TEST_FAMILY_ID][0], cell) + self.assertEqual(row_response[TEST_FAMILY_ID][1], cell2) + self.assertEqual(len(row_response[new_family_id]), 1) + self.assertEqual(row_response[new_family_id][0], cell3) + with self.assertRaises(ValueError): + row_response["not_a_family_id"] + with self.assertRaises(TypeError): + row_response[None] + with self.assertRaises(TypeError): + row_response[b"new_family_id"] + + def test_family_qualifier_indexing(self): + # should be able to retrieve cells in a family/qualifier tuplw + new_family_id = "new_family_id" + new_qualifier = b"new_qualifier" + cell = self._make_cell( + TEST_VALUE, + TEST_ROW_KEY, + TEST_FAMILY_ID, + TEST_QUALIFIER, + TEST_TIMESTAMP, + TEST_LABELS, + ) + cell2 = self._make_cell( + TEST_VALUE, + TEST_ROW_KEY, + TEST_FAMILY_ID, + TEST_QUALIFIER, + TEST_TIMESTAMP, + TEST_LABELS, + ) + cell3 = self._make_cell( + TEST_VALUE, + TEST_ROW_KEY, + new_family_id, + new_qualifier, + TEST_TIMESTAMP, + TEST_LABELS, + ) + row_response = self._make_one(TEST_ROW_KEY, [cell, cell2, cell3]) + + self.assertEqual(len(row_response[TEST_FAMILY_ID, TEST_QUALIFIER]), 2) + self.assertEqual(row_response[TEST_FAMILY_ID, TEST_QUALIFIER][0], cell) + self.assertEqual(row_response[TEST_FAMILY_ID, TEST_QUALIFIER][1], cell2) + self.assertEqual(len(row_response[new_family_id, new_qualifier]), 1) + self.assertEqual(row_response[new_family_id, new_qualifier][0], cell3) + self.assertEqual(len(row_response["new_family_id", "new_qualifier"]), 1) + self.assertEqual(len(row_response["new_family_id", b"new_qualifier"]), 1) + with self.assertRaises(ValueError): + row_response[new_family_id, "not_a_qualifier"] + with self.assertRaises(ValueError): + row_response["not_a_family_id", new_qualifier] + with self.assertRaises(TypeError): + row_response[None, None] + with self.assertRaises(TypeError): + row_response[b"new_family_id", b"new_qualifier"] + + def test_get_column_components(self): + # should be able to retrieve (family,qualifier) tuples as keys + new_family_id = "new_family_id" + new_qualifier = b"new_qualifier" + cell = self._make_cell( + TEST_VALUE, + TEST_ROW_KEY, + TEST_FAMILY_ID, + TEST_QUALIFIER, + TEST_TIMESTAMP, + TEST_LABELS, + ) + cell2 = self._make_cell( + TEST_VALUE, + TEST_ROW_KEY, + TEST_FAMILY_ID, + TEST_QUALIFIER, + TEST_TIMESTAMP, + TEST_LABELS, + ) + cell3 = self._make_cell( + TEST_VALUE, + TEST_ROW_KEY, + new_family_id, + new_qualifier, + TEST_TIMESTAMP, + TEST_LABELS, + ) + row_response = self._make_one(TEST_ROW_KEY, [cell, cell2, cell3]) + + self.assertEqual(len(row_response.get_column_components()), 2) + self.assertEqual( + row_response.get_column_components(), + [(TEST_FAMILY_ID, TEST_QUALIFIER), (new_family_id, new_qualifier)], + ) + + row_response = self._make_one(TEST_ROW_KEY, []) + self.assertEqual(len(row_response.get_column_components()), 0) + self.assertEqual(row_response.get_column_components(), []) + + row_response = self._make_one(TEST_ROW_KEY, [cell]) + self.assertEqual(len(row_response.get_column_components()), 1) + self.assertEqual( + row_response.get_column_components(), [(TEST_FAMILY_ID, TEST_QUALIFIER)] + ) + + def test_index_of(self): + # given a cell, should find index in underlying list + cell_list = [self._make_cell(value=str(i).encode()) for i in range(10)] + sorted(cell_list) + row_response = self._make_one(TEST_ROW_KEY, cell_list) + + self.assertEqual(row_response.index(cell_list[0]), 0) + self.assertEqual(row_response.index(cell_list[5]), 5) + self.assertEqual(row_response.index(cell_list[9]), 9) + with self.assertRaises(ValueError): + row_response.index(self._make_cell()) + with self.assertRaises(ValueError): + row_response.index(None) + + +class TestCell(unittest.TestCase): + @staticmethod + def _get_target_class(): + from google.cloud.bigtable.row import Cell + + return Cell + + def _make_one(self, *args, **kwargs): + if len(args) == 0: + args = ( + TEST_VALUE, + TEST_ROW_KEY, + TEST_FAMILY_ID, + TEST_QUALIFIER, + TEST_TIMESTAMP, + TEST_LABELS, + ) + return self._get_target_class()(*args, **kwargs) + + def test_ctor(self): + cell = self._make_one( + TEST_VALUE, + TEST_ROW_KEY, + TEST_FAMILY_ID, + TEST_QUALIFIER, + TEST_TIMESTAMP, + TEST_LABELS, + ) + self.assertEqual(cell.value, TEST_VALUE) + self.assertEqual(cell.row_key, TEST_ROW_KEY) + self.assertEqual(cell.family, TEST_FAMILY_ID) + self.assertEqual(cell.column_qualifier, TEST_QUALIFIER) + self.assertEqual(cell.timestamp_micros, TEST_TIMESTAMP) + self.assertEqual(cell.labels, TEST_LABELS) + + def test_to_dict(self): + from google.cloud.bigtable_v2.types import Cell + + cell = self._make_one() + cell_dict = cell.to_dict() + expected_dict = { + "value": TEST_VALUE, + "timestamp_micros": TEST_TIMESTAMP, + "labels": TEST_LABELS, + } + self.assertEqual(len(cell_dict), len(expected_dict)) + for key, value in expected_dict.items(): + self.assertEqual(cell_dict[key], value) + # should be able to construct a Cell proto from the dict + cell_proto = Cell(**cell_dict) + self.assertEqual(cell_proto.value, TEST_VALUE) + self.assertEqual(cell_proto.timestamp_micros, TEST_TIMESTAMP) + self.assertEqual(cell_proto.labels, TEST_LABELS) + + def test_to_dict_no_labels(self): + from google.cloud.bigtable_v2.types import Cell + + cell_no_labels = self._make_one( + TEST_VALUE, + TEST_ROW_KEY, + TEST_FAMILY_ID, + TEST_QUALIFIER, + TEST_TIMESTAMP, + None, + ) + cell_dict = cell_no_labels.to_dict() + expected_dict = { + "value": TEST_VALUE, + "timestamp_micros": TEST_TIMESTAMP, + } + self.assertEqual(len(cell_dict), len(expected_dict)) + for key, value in expected_dict.items(): + self.assertEqual(cell_dict[key], value) + # should be able to construct a Cell proto from the dict + cell_proto = Cell(**cell_dict) + self.assertEqual(cell_proto.value, TEST_VALUE) + self.assertEqual(cell_proto.timestamp_micros, TEST_TIMESTAMP) + self.assertEqual(cell_proto.labels, []) + + def test_int_value(self): + test_int = 1234 + bytes_value = test_int.to_bytes(4, "big", signed=True) + cell = self._make_one( + bytes_value, + TEST_ROW_KEY, + TEST_FAMILY_ID, + TEST_QUALIFIER, + TEST_TIMESTAMP, + TEST_LABELS, + ) + self.assertEqual(int(cell), test_int) + # ensure string formatting works + formatted = "%d" % cell + self.assertEqual(formatted, str(test_int)) + self.assertEqual(int(formatted), test_int) + + def test_int_value_negative(self): + test_int = -99999 + bytes_value = test_int.to_bytes(4, "big", signed=True) + cell = self._make_one( + bytes_value, + TEST_ROW_KEY, + TEST_FAMILY_ID, + TEST_QUALIFIER, + TEST_TIMESTAMP, + TEST_LABELS, + ) + self.assertEqual(int(cell), test_int) + # ensure string formatting works + formatted = "%d" % cell + self.assertEqual(formatted, str(test_int)) + self.assertEqual(int(formatted), test_int) + + def test___str__(self): + test_value = b"helloworld" + cell = self._make_one( + test_value, + TEST_ROW_KEY, + TEST_FAMILY_ID, + TEST_QUALIFIER, + TEST_TIMESTAMP, + TEST_LABELS, + ) + self.assertEqual(str(cell), "b'helloworld'") + self.assertEqual(str(cell), str(test_value)) + + def test___repr__(self): + from google.cloud.bigtable.row import Cell # type: ignore # noqa: F401 + + cell = self._make_one() + expected = ( + "Cell(value=b'1234', row=b'row', " + + "family='cf1', column_qualifier=b'col', " + + f"timestamp_micros={TEST_TIMESTAMP}, labels=['label1', 'label2'])" + ) + self.assertEqual(repr(cell), expected) + # should be able to construct instance from __repr__ + result = eval(repr(cell)) + self.assertEqual(result, cell) + + def test___repr___no_labels(self): + from google.cloud.bigtable.row import Cell # type: ignore # noqa: F401 + + cell_no_labels = self._make_one( + TEST_VALUE, + TEST_ROW_KEY, + TEST_FAMILY_ID, + TEST_QUALIFIER, + TEST_TIMESTAMP, + None, + ) + expected = ( + "Cell(value=b'1234', row=b'row', " + + "family='cf1', column_qualifier=b'col', " + + f"timestamp_micros={TEST_TIMESTAMP}, labels=[])" + ) + self.assertEqual(repr(cell_no_labels), expected) + # should be able to construct instance from __repr__ + result = eval(repr(cell_no_labels)) + self.assertEqual(result, cell_no_labels) + + def test_equality(self): + cell1 = self._make_one() + cell2 = self._make_one() + self.assertEqual(cell1, cell2) + self.assertTrue(cell1 == cell2) + args = ( + TEST_VALUE, + TEST_ROW_KEY, + TEST_FAMILY_ID, + TEST_QUALIFIER, + TEST_TIMESTAMP, + TEST_LABELS, + ) + for i in range(0, len(args)): + # try changing each argument + modified_cell = self._make_one(*args[:i], args[i] + args[i], *args[i + 1 :]) + self.assertNotEqual(cell1, modified_cell) + self.assertFalse(cell1 == modified_cell) + self.assertTrue(cell1 != modified_cell) + + def test_hash(self): + # class should be hashable + cell1 = self._make_one() + d = {cell1: 1} + cell2 = self._make_one() + self.assertEqual(d[cell2], 1) + + args = ( + TEST_VALUE, + TEST_ROW_KEY, + TEST_FAMILY_ID, + TEST_QUALIFIER, + TEST_TIMESTAMP, + TEST_LABELS, + ) + for i in range(0, len(args)): + # try changing each argument + modified_cell = self._make_one(*args[:i], args[i] + args[i], *args[i + 1 :]) + with self.assertRaises(KeyError): + d[modified_cell] + + def test_ordering(self): + # create cell list in order from lowest to highest + higher_cells = [] + i = 0 + # families; alphebetical order + for family in ["z", "y", "x"]: + # qualifiers; lowest byte value first + for qualifier in [b"z", b"y", b"x"]: + # timestamps; newest first + for timestamp in [ + TEST_TIMESTAMP, + TEST_TIMESTAMP + 1, + TEST_TIMESTAMP + 2, + ]: + cell = self._make_one( + TEST_VALUE, + TEST_ROW_KEY, + family, + qualifier, + timestamp, + TEST_LABELS, + ) + # cell should be the highest priority encountered so far + self.assertEqual(i, len(higher_cells)) + i += 1 + for other in higher_cells: + self.assertLess(cell, other) + higher_cells.append(cell) + # final order should be reverse of sorted order + expected_order = higher_cells + expected_order.reverse() + self.assertEqual(expected_order, sorted(higher_cells))