Skip to content

Commit 642a480

Browse files
feat: Preserve source names better for more readable sql
1 parent 9b86dcf commit 642a480

File tree

240 files changed

+1161
-1100
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

240 files changed

+1161
-1100
lines changed

bigframes/core/rewrite/select_pullup.py

Lines changed: 38 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,10 @@
1313
# limitations under the License.
1414

1515
import dataclasses
16+
import functools
1617
from typing import cast
1718

18-
from bigframes.core import expression, nodes
19+
from bigframes.core import expression, identifiers, nodes
1920

2021

2122
def defer_selection(
@@ -26,12 +27,19 @@ def defer_selection(
2627
2728
In many cases, these nodes will be merged or eliminated entirely, simplifying the overall tree.
2829
"""
29-
return nodes.bottom_up(root, pull_up_select)
30+
return nodes.bottom_up(
31+
root, functools.partial(pull_up_select, prefer_source_names=True)
32+
)
3033

3134

32-
def pull_up_select(node: nodes.BigFrameNode) -> nodes.BigFrameNode:
35+
def pull_up_select(
36+
node: nodes.BigFrameNode, prefer_source_names: bool
37+
) -> nodes.BigFrameNode:
3338
if isinstance(node, nodes.LeafNode):
34-
return node
39+
if prefer_source_names and isinstance(node, nodes.ReadTableNode):
40+
return pull_up_source_ids(node)
41+
else:
42+
return node
3543
if isinstance(node, nodes.JoinNode):
3644
return pull_up_selects_under_join(node)
3745
if isinstance(node, nodes.ConcatNode):
@@ -42,6 +50,32 @@ def pull_up_select(node: nodes.BigFrameNode) -> nodes.BigFrameNode:
4250
return node
4351

4452

53+
def pull_up_source_ids(node: nodes.ReadTableNode) -> nodes.BigFrameNode:
54+
if all(id.sql == source_id for id, source_id in node.scan_list.items):
55+
return node
56+
else:
57+
source_ids = sorted(
58+
set(scan_item.source_id for scan_item in node.scan_list.items)
59+
)
60+
new_scan_list = nodes.ScanList.from_items(
61+
[
62+
nodes.ScanItem(identifiers.ColumnId(source_id), source_id)
63+
for source_id in source_ids
64+
]
65+
)
66+
new_source = dataclasses.replace(node, scan_list=new_scan_list)
67+
new_selection = nodes.SelectionNode(
68+
new_source,
69+
tuple(
70+
nodes.AliasedRef(
71+
expression.DerefOp(identifiers.ColumnId(source_id)), id
72+
)
73+
for id, source_id in node.scan_list.items
74+
),
75+
)
76+
return new_selection
77+
78+
4579
def pull_up_select_unary(node: nodes.UnaryNode) -> nodes.BigFrameNode:
4680
child = node.child
4781
if not isinstance(child, nodes.SelectionNode):

tests/unit/core/compile/sqlglot/aggregations/snapshots/test_binary_compiler/test_corr/out.sql

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
WITH `bfcte_0` AS (
22
SELECT
3-
`int64_col` AS `bfcol_0`,
4-
`float64_col` AS `bfcol_1`
3+
`float64_col` AS `float64_col`,
4+
`int64_col` AS `int64_col`
55
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
66
), `bfcte_1` AS (
77
SELECT
8-
CORR(`bfcol_0`, `bfcol_1`) AS `bfcol_2`
8+
CORR(`int64_col`, `float64_col`) AS `bfcol_2`
99
FROM `bfcte_0`
1010
)
1111
SELECT

tests/unit/core/compile/sqlglot/aggregations/snapshots/test_binary_compiler/test_cov/out.sql

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
WITH `bfcte_0` AS (
22
SELECT
3-
`int64_col` AS `bfcol_0`,
4-
`float64_col` AS `bfcol_1`
3+
`float64_col` AS `float64_col`,
4+
`int64_col` AS `int64_col`
55
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
66
), `bfcte_1` AS (
77
SELECT
8-
COVAR_SAMP(`bfcol_0`, `bfcol_1`) AS `bfcol_2`
8+
COVAR_SAMP(`int64_col`, `float64_col`) AS `bfcol_2`
99
FROM `bfcte_0`
1010
)
1111
SELECT

tests/unit/core/compile/sqlglot/aggregations/snapshots/test_nullary_compiler/test_row_number/out.sql

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,20 @@
11
WITH `bfcte_0` AS (
22
SELECT
3-
`bool_col` AS `bfcol_0`,
4-
`bytes_col` AS `bfcol_1`,
5-
`date_col` AS `bfcol_2`,
6-
`datetime_col` AS `bfcol_3`,
7-
`geography_col` AS `bfcol_4`,
8-
`int64_col` AS `bfcol_5`,
9-
`int64_too` AS `bfcol_6`,
10-
`numeric_col` AS `bfcol_7`,
11-
`float64_col` AS `bfcol_8`,
12-
`rowindex` AS `bfcol_9`,
13-
`rowindex_2` AS `bfcol_10`,
14-
`string_col` AS `bfcol_11`,
15-
`time_col` AS `bfcol_12`,
16-
`timestamp_col` AS `bfcol_13`,
17-
`duration_col` AS `bfcol_14`
3+
`bool_col` AS `bool_col`,
4+
`bytes_col` AS `bytes_col`,
5+
`date_col` AS `date_col`,
6+
`datetime_col` AS `datetime_col`,
7+
`duration_col` AS `duration_col`,
8+
`float64_col` AS `float64_col`,
9+
`geography_col` AS `geography_col`,
10+
`int64_col` AS `int64_col`,
11+
`int64_too` AS `int64_too`,
12+
`numeric_col` AS `numeric_col`,
13+
`rowindex` AS `rowindex`,
14+
`rowindex_2` AS `rowindex_2`,
15+
`string_col` AS `string_col`,
16+
`time_col` AS `time_col`,
17+
`timestamp_col` AS `timestamp_col`
1818
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
1919
), `bfcte_1` AS (
2020
SELECT

tests/unit/core/compile/sqlglot/aggregations/snapshots/test_nullary_compiler/test_row_number_with_window/out.sql

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
WITH `bfcte_0` AS (
22
SELECT
3-
`int64_col` AS `bfcol_0`
3+
`int64_col` AS `int64_col`
44
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
55
), `bfcte_1` AS (
66
SELECT
77
*,
8-
ROW_NUMBER() OVER (ORDER BY `bfcol_0` ASC NULLS LAST) AS `bfcol_1`
8+
ROW_NUMBER() OVER (ORDER BY `int64_col` ASC NULLS LAST) AS `bfcol_1`
99
FROM `bfcte_0`
1010
)
1111
SELECT

tests/unit/core/compile/sqlglot/aggregations/snapshots/test_nullary_compiler/test_size/out.sql

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,20 @@
11
WITH `bfcte_0` AS (
22
SELECT
3-
`bool_col` AS `bfcol_0`,
4-
`bytes_col` AS `bfcol_1`,
5-
`date_col` AS `bfcol_2`,
6-
`datetime_col` AS `bfcol_3`,
7-
`geography_col` AS `bfcol_4`,
8-
`int64_col` AS `bfcol_5`,
9-
`int64_too` AS `bfcol_6`,
10-
`numeric_col` AS `bfcol_7`,
11-
`float64_col` AS `bfcol_8`,
12-
`rowindex` AS `bfcol_9`,
13-
`rowindex_2` AS `bfcol_10`,
14-
`string_col` AS `bfcol_11`,
15-
`time_col` AS `bfcol_12`,
16-
`timestamp_col` AS `bfcol_13`,
17-
`duration_col` AS `bfcol_14`
3+
`bool_col` AS `bool_col`,
4+
`bytes_col` AS `bytes_col`,
5+
`date_col` AS `date_col`,
6+
`datetime_col` AS `datetime_col`,
7+
`duration_col` AS `duration_col`,
8+
`float64_col` AS `float64_col`,
9+
`geography_col` AS `geography_col`,
10+
`int64_col` AS `int64_col`,
11+
`int64_too` AS `int64_too`,
12+
`numeric_col` AS `numeric_col`,
13+
`rowindex` AS `rowindex`,
14+
`rowindex_2` AS `rowindex_2`,
15+
`string_col` AS `string_col`,
16+
`time_col` AS `time_col`,
17+
`timestamp_col` AS `timestamp_col`
1818
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
1919
), `bfcte_1` AS (
2020
SELECT

tests/unit/core/compile/sqlglot/aggregations/snapshots/test_ordered_unary_compiler/test_array_agg/out.sql

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
WITH `bfcte_0` AS (
22
SELECT
3-
`int64_col` AS `bfcol_0`
3+
`int64_col` AS `int64_col`
44
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
55
), `bfcte_1` AS (
66
SELECT
7-
ARRAY_AGG(`bfcol_0` IGNORE NULLS ORDER BY `bfcol_0` IS NULL ASC, `bfcol_0` ASC) AS `bfcol_1`
7+
ARRAY_AGG(
8+
`int64_col` IGNORE NULLS ORDER BY `int64_col` IS NULL ASC NULLS LAST, `int64_col` ASC NULLS LAST
9+
) AS `bfcol_1`
810
FROM `bfcte_0`
911
)
1012
SELECT

tests/unit/core/compile/sqlglot/aggregations/snapshots/test_ordered_unary_compiler/test_string_agg/out.sql

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,16 @@
11
WITH `bfcte_0` AS (
22
SELECT
3-
`string_col` AS `bfcol_0`
3+
`string_col` AS `string_col`
44
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
55
), `bfcte_1` AS (
66
SELECT
7-
COALESCE(STRING_AGG(`bfcol_0`, ','
8-
ORDER BY
9-
`bfcol_0` IS NULL ASC,
10-
`bfcol_0` ASC), '') AS `bfcol_1`
7+
COALESCE(
8+
STRING_AGG(
9+
`string_col` ORDER BY `string_col` IS NULL ASC NULLS LAST, `string_col` ASC NULLS LAST,
10+
','
11+
),
12+
''
13+
) AS `bfcol_1`
1114
FROM `bfcte_0`
1215
)
1316
SELECT

tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_all/out.sql

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
WITH `bfcte_0` AS (
22
SELECT
3-
`bool_col` AS `bfcol_0`
3+
`bool_col` AS `bool_col`
44
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
55
), `bfcte_1` AS (
66
SELECT
7-
COALESCE(LOGICAL_AND(`bfcol_0`), TRUE) AS `bfcol_1`
7+
COALESCE(LOGICAL_AND(`bool_col`), TRUE) AS `bfcol_1`
88
FROM `bfcte_0`
99
)
1010
SELECT

tests/unit/core/compile/sqlglot/aggregations/snapshots/test_unary_compiler/test_all/window_out.sql

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
11
WITH `bfcte_0` AS (
22
SELECT
3-
`bool_col` AS `bfcol_0`
3+
`bool_col` AS `bool_col`
44
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
55
), `bfcte_1` AS (
66
SELECT
77
*,
88
CASE
9-
WHEN `bfcol_0` IS NULL
9+
WHEN `bool_col` IS NULL
1010
THEN NULL
11-
ELSE COALESCE(LOGICAL_AND(`bfcol_0`) OVER (), TRUE)
11+
ELSE COALESCE(LOGICAL_AND(`bool_col`) OVER (), TRUE)
1212
END AS `bfcol_1`
1313
FROM `bfcte_0`
1414
)

0 commit comments

Comments
 (0)