Skip to content

Commit cc2dbae

Browse files
authored
fix: do not warn with DefaultIndexWarning in partial ordering mode (#2230)
Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes internal issue b/356872356 🦕
1 parent c62e553 commit cc2dbae

File tree

3 files changed

+50
-1
lines changed

3 files changed

+50
-1
lines changed

bigframes/session/_io/bigquery/read_gbq_table.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -402,6 +402,7 @@ def get_index_cols(
402402
| bigframes.enums.DefaultIndexKind,
403403
*,
404404
rename_to_schema: Optional[Dict[str, str]] = None,
405+
default_index_type: bigframes.enums.DefaultIndexKind = bigframes.enums.DefaultIndexKind.SEQUENTIAL_INT64,
405406
) -> List[str]:
406407
"""
407408
If we can get a total ordering from the table, such as via primary key
@@ -471,7 +472,11 @@ def get_index_cols(
471472
# find index_cols to use. This is to avoid unexpected performance and
472473
# resource utilization because of the default sequential index. See
473474
# internal issue 335727141.
474-
if _is_table_clustered_or_partitioned(table) and not primary_keys:
475+
if (
476+
_is_table_clustered_or_partitioned(table)
477+
and not primary_keys
478+
and default_index_type == bigframes.enums.DefaultIndexKind.SEQUENTIAL_INT64
479+
):
475480
msg = bfe.format_message(
476481
f"Table '{str(table.reference)}' is clustered and/or "
477482
"partitioned, but BigQuery DataFrames was not able to find a "

bigframes/session/loader.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -696,6 +696,7 @@ def read_gbq_table(
696696
table=table,
697697
index_col=index_col,
698698
rename_to_schema=rename_to_schema,
699+
default_index_type=self._default_index_type,
699700
)
700701
_check_index_col_param(
701702
index_cols,

tests/unit/session/test_read_gbq_table.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,13 @@
1515
"""Unit tests for read_gbq_table helper functions."""
1616

1717
import unittest.mock as mock
18+
import warnings
1819

1920
import google.cloud.bigquery
2021
import pytest
2122

23+
import bigframes.enums
24+
import bigframes.exceptions
2225
import bigframes.session._io.bigquery.read_gbq_table as bf_read_gbq_table
2326
from bigframes.testing import mocks
2427

@@ -143,3 +146,43 @@ def test_check_if_index_columns_are_unique(index_cols, values_distinct, expected
143146
)
144147

145148
assert result == expected
149+
150+
151+
def test_get_index_cols_warns_if_clustered_but_sequential_index():
152+
table = google.cloud.bigquery.Table.from_api_repr(
153+
{
154+
"tableReference": {
155+
"projectId": "my-project",
156+
"datasetId": "my_dataset",
157+
"tableId": "my_table",
158+
},
159+
"clustering": {
160+
"fields": ["col1", "col2"],
161+
},
162+
},
163+
)
164+
table.schema = (
165+
google.cloud.bigquery.SchemaField("col1", "INT64"),
166+
google.cloud.bigquery.SchemaField("col2", "INT64"),
167+
google.cloud.bigquery.SchemaField("col3", "INT64"),
168+
google.cloud.bigquery.SchemaField("col4", "INT64"),
169+
)
170+
171+
with pytest.warns(bigframes.exceptions.DefaultIndexWarning, match="is clustered"):
172+
bf_read_gbq_table.get_index_cols(
173+
table,
174+
index_col=(),
175+
default_index_type=bigframes.enums.DefaultIndexKind.SEQUENTIAL_INT64,
176+
)
177+
178+
# Ensure that we don't raise if using a NULL index by default, such as in
179+
# partial ordering mode. See: internal issue b/356872356.
180+
with warnings.catch_warnings():
181+
warnings.simplefilter(
182+
"error", category=bigframes.exceptions.DefaultIndexWarning
183+
)
184+
bf_read_gbq_table.get_index_cols(
185+
table,
186+
index_col=(),
187+
default_index_type=bigframes.enums.DefaultIndexKind.NULL,
188+
)

0 commit comments

Comments
 (0)