Skip to content

Commit 9f497a6

Browse files
authored
refactor: read table with system time for sqlglot compiler (#2252)
Fixes internal issue 459579873🦕
1 parent 98129ba commit 9f497a6

File tree

6 files changed

+83
-8
lines changed

6 files changed

+83
-8
lines changed

bigframes/core/compile/sqlglot/compiler.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,7 @@ def compile_readtable(node: nodes.ReadTableNode, child: ir.SQLGlotIR):
172172
col_names=[col.source_id for col in node.scan_list.items],
173173
alias_names=[col.id.sql for col in node.scan_list.items],
174174
uid_gen=child.uid_gen,
175+
system_time=node.source.at_time,
175176
)
176177

177178

bigframes/core/compile/sqlglot/sqlglot_ir.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from __future__ import annotations
1616

1717
import dataclasses
18+
import datetime
1819
import functools
1920
import typing
2021

@@ -118,6 +119,7 @@ def from_table(
118119
col_names: typing.Sequence[str],
119120
alias_names: typing.Sequence[str],
120121
uid_gen: guid.SequentialUIDGenerator,
122+
system_time: typing.Optional[datetime.datetime] = None,
121123
) -> SQLGlotIR:
122124
"""Builds a SQLGlotIR expression from a BigQuery table.
123125
@@ -128,6 +130,7 @@ def from_table(
128130
col_names (typing.Sequence[str]): The names of the columns to select.
129131
alias_names (typing.Sequence[str]): The aliases for the selected columns.
130132
uid_gen (guid.SequentialUIDGenerator): A generator for unique identifiers.
133+
system_time (typing.Optional[str]): An optional system time for time-travel queries.
131134
"""
132135
selections = [
133136
sge.Alias(
@@ -138,10 +141,20 @@ def from_table(
138141
else sge.to_identifier(col_name, quoted=cls.quoted)
139142
for col_name, alias_name in zip(col_names, alias_names)
140143
]
144+
version = (
145+
sge.Version(
146+
this="TIMESTAMP",
147+
expression=sge.Literal(this=system_time.isoformat(), is_string=True),
148+
kind="AS OF",
149+
)
150+
if system_time
151+
else None
152+
)
141153
table_expr = sge.Table(
142154
this=sg.to_identifier(table_id, quoted=cls.quoted),
143155
db=sg.to_identifier(dataset_id, quoted=cls.quoted),
144156
catalog=sg.to_identifier(project_id, quoted=cls.quoted),
157+
version=version,
145158
)
146159
select_expr = sge.Select().select(*selections).from_(table_expr)
147160
return cls(expr=select_expr, uid_gen=uid_gen)

tests/unit/core/compile/sqlglot/conftest.py

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,10 @@ def scalar_types_table_schema() -> typing.Sequence[bigquery.SchemaField]:
9797
def scalar_types_df(compiler_session) -> bpd.DataFrame:
9898
"""Returns a BigFrames DataFrame containing all scalar types and using the `rowindex`
9999
column as the index."""
100-
bf_df = compiler_session.read_gbq_table("bigframes-dev.sqlglot_test.scalar_types")
100+
bf_df = compiler_session._loader.read_gbq_table(
101+
"bigframes-dev.sqlglot_test.scalar_types",
102+
enable_snapshot=False,
103+
)
101104
bf_df = bf_df.set_index("rowindex", drop=False)
102105
return bf_df
103106

@@ -154,8 +157,9 @@ def nested_structs_types_table_schema() -> typing.Sequence[bigquery.SchemaField]
154157
def nested_structs_types_df(compiler_session_w_nested_structs_types) -> bpd.DataFrame:
155158
"""Returns a BigFrames DataFrame containing all scalar types and using the `rowindex`
156159
column as the index."""
157-
bf_df = compiler_session_w_nested_structs_types.read_gbq_table(
158-
"bigframes-dev.sqlglot_test.nested_structs_types"
160+
bf_df = compiler_session_w_nested_structs_types._loader.read_gbq_table(
161+
"bigframes-dev.sqlglot_test.nested_structs_types",
162+
enable_snapshot=False,
159163
)
160164
bf_df = bf_df.set_index("id", drop=False)
161165
return bf_df
@@ -204,8 +208,9 @@ def repeated_types_table_schema() -> typing.Sequence[bigquery.SchemaField]:
204208
def repeated_types_df(compiler_session_w_repeated_types) -> bpd.DataFrame:
205209
"""Returns a BigFrames DataFrame containing all scalar types and using the `rowindex`
206210
column as the index."""
207-
bf_df = compiler_session_w_repeated_types.read_gbq_table(
208-
"bigframes-dev.sqlglot_test.repeated_types"
211+
bf_df = compiler_session_w_repeated_types._loader.read_gbq_table(
212+
"bigframes-dev.sqlglot_test.repeated_types",
213+
enable_snapshot=False,
209214
)
210215
bf_df = bf_df.set_index("rowindex", drop=False)
211216
return bf_df
@@ -237,8 +242,9 @@ def json_types_table_schema() -> typing.Sequence[bigquery.SchemaField]:
237242
def json_types_df(compiler_session_w_json_types) -> bpd.DataFrame:
238243
"""Returns a BigFrames DataFrame containing JSON types and using the `rowindex`
239244
column as the index."""
240-
bf_df = compiler_session_w_json_types.read_gbq_table(
241-
"bigframes-dev.sqlglot_test.json_types"
245+
bf_df = compiler_session_w_json_types._loader.read_gbq_table(
246+
"bigframes-dev.sqlglot_test.json_types",
247+
enable_snapshot=False,
242248
)
243249
# TODO(b/427305807): Why `drop=False` will produce two "rowindex" columns?
244250
bf_df = bf_df.set_index("rowindex", drop=True)
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
WITH `bfcte_0` AS (
2+
SELECT
3+
`bool_col`,
4+
`bytes_col`,
5+
`date_col`,
6+
`datetime_col`,
7+
`duration_col`,
8+
`float64_col`,
9+
`geography_col`,
10+
`int64_col`,
11+
`int64_too`,
12+
`numeric_col`,
13+
`rowindex`,
14+
`rowindex_2`,
15+
`string_col`,
16+
`time_col`,
17+
`timestamp_col`
18+
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` FOR SYSTEM_TIME AS OF '2025-11-09T03:04:05.678901+00:00'
19+
)
20+
SELECT
21+
`bool_col`,
22+
`bytes_col`,
23+
`date_col`,
24+
`datetime_col`,
25+
`geography_col`,
26+
`int64_col`,
27+
`int64_too`,
28+
`numeric_col`,
29+
`float64_col`,
30+
`rowindex`,
31+
`rowindex_2`,
32+
`string_col`,
33+
`time_col`,
34+
`timestamp_col`,
35+
`duration_col`
36+
FROM `bfcte_0`

tests/unit/core/compile/sqlglot/test_compile_readtable.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15+
import datetime
16+
17+
import google.cloud.bigquery as bigquery
1518
import pytest
1619

1720
import bigframes.pandas as bpd
@@ -47,3 +50,20 @@ def test_compile_readtable_w_limit(scalar_types_df: bpd.DataFrame, snapshot):
4750
bf_df = scalar_types_df[["int64_col"]]
4851
bf_df = bf_df.sort_index().head(10)
4952
snapshot.assert_match(bf_df.sql, "out.sql")
53+
54+
55+
def test_compile_readtable_w_system_time(
56+
compiler_session, scalar_types_table_schema, snapshot
57+
):
58+
table_ref = bigquery.TableReference(
59+
bigquery.DatasetReference("bigframes-dev", "sqlglot_test"),
60+
"scalar_types",
61+
)
62+
table = bigquery.Table(table_ref, tuple(scalar_types_table_schema))
63+
table._properties["location"] = compiler_session._location
64+
compiler_session._loader._df_snapshot[str(table_ref)] = (
65+
datetime.datetime(2025, 11, 9, 3, 4, 5, 678901, tzinfo=datetime.timezone.utc),
66+
table,
67+
)
68+
bf_df = compiler_session.read_gbq_table(str(table_ref))
69+
snapshot.assert_match(bf_df.sql, "out.sql")

tests/unit/session/test_session.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,6 @@ def test_read_gbq_cached_table():
240240
)
241241
table._properties["location"] = session._location
242242
table._properties["numRows"] = "1000000000"
243-
table._properties["location"] = session._location
244243
table._properties["type"] = "TABLE"
245244
session._loader._df_snapshot[str(table_ref)] = (
246245
datetime.datetime(1999, 1, 2, 3, 4, 5, 678901, tzinfo=datetime.timezone.utc),

0 commit comments

Comments
 (0)