Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions bigframes/core/compile/sqlglot/sqlglot_ir.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,8 @@ def from_table(
this=sge.to_identifier(col_name, quoted=cls.quoted),
alias=sge.to_identifier(alias_name, quoted=cls.quoted),
)
if col_name != alias_name
else sge.to_identifier(col_name, quoted=cls.quoted)
for col_name, alias_name in zip(col_names, alias_names)
]
table_expr = sge.Table(
Expand Down Expand Up @@ -227,6 +229,8 @@ def select(
this=expr,
alias=sge.to_identifier(id, quoted=self.quoted),
)
if expr.alias_or_name != id
else expr
for id, expr in selected_cols
]

Expand Down
42 changes: 38 additions & 4 deletions bigframes/core/rewrite/select_pullup.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,10 @@
# limitations under the License.

import dataclasses
import functools
from typing import cast

from bigframes.core import expression, nodes
from bigframes.core import expression, identifiers, nodes


def defer_selection(
Expand All @@ -26,12 +27,19 @@ def defer_selection(

In many cases, these nodes will be merged or eliminated entirely, simplifying the overall tree.
"""
return nodes.bottom_up(root, pull_up_select)
return nodes.bottom_up(
root, functools.partial(pull_up_select, prefer_source_names=True)
)


def pull_up_select(node: nodes.BigFrameNode) -> nodes.BigFrameNode:
def pull_up_select(
node: nodes.BigFrameNode, prefer_source_names: bool
) -> nodes.BigFrameNode:
if isinstance(node, nodes.LeafNode):
return node
if prefer_source_names and isinstance(node, nodes.ReadTableNode):
return pull_up_source_ids(node)
else:
return node
if isinstance(node, nodes.JoinNode):
return pull_up_selects_under_join(node)
if isinstance(node, nodes.ConcatNode):
Expand All @@ -42,6 +50,32 @@ def pull_up_select(node: nodes.BigFrameNode) -> nodes.BigFrameNode:
return node


def pull_up_source_ids(node: nodes.ReadTableNode) -> nodes.BigFrameNode:
if all(id.sql == source_id for id, source_id in node.scan_list.items):
return node
else:
source_ids = sorted(
set(scan_item.source_id for scan_item in node.scan_list.items)
)
new_scan_list = nodes.ScanList.from_items(
[
nodes.ScanItem(identifiers.ColumnId(source_id), source_id)
for source_id in source_ids
]
)
new_source = dataclasses.replace(node, scan_list=new_scan_list)
new_selection = nodes.SelectionNode(
new_source,
tuple(
nodes.AliasedRef(
expression.DerefOp(identifiers.ColumnId(source_id)), id
)
for id, source_id in node.scan_list.items
),
)
return new_selection


def pull_up_select_unary(node: nodes.UnaryNode) -> nodes.BigFrameNode:
child = node.child
if not isinstance(child, nodes.SelectionNode):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
WITH `bfcte_0` AS (
SELECT
`int64_col` AS `bfcol_0`,
`float64_col` AS `bfcol_1`
`float64_col`,
`int64_col`
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
), `bfcte_1` AS (
SELECT
CORR(`bfcol_0`, `bfcol_1`) AS `bfcol_2`
CORR(`int64_col`, `float64_col`) AS `bfcol_2`
FROM `bfcte_0`
)
SELECT
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
WITH `bfcte_0` AS (
SELECT
`int64_col` AS `bfcol_0`,
`float64_col` AS `bfcol_1`
`float64_col`,
`int64_col`
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
), `bfcte_1` AS (
SELECT
COVAR_SAMP(`bfcol_0`, `bfcol_1`) AS `bfcol_2`
COVAR_SAMP(`int64_col`, `float64_col`) AS `bfcol_2`
FROM `bfcte_0`
)
SELECT
Expand Down
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
WITH `bfcte_0` AS (
SELECT
`bool_col` AS `bfcol_0`,
`bytes_col` AS `bfcol_1`,
`date_col` AS `bfcol_2`,
`datetime_col` AS `bfcol_3`,
`geography_col` AS `bfcol_4`,
`int64_col` AS `bfcol_5`,
`int64_too` AS `bfcol_6`,
`numeric_col` AS `bfcol_7`,
`float64_col` AS `bfcol_8`,
`rowindex` AS `bfcol_9`,
`rowindex_2` AS `bfcol_10`,
`string_col` AS `bfcol_11`,
`time_col` AS `bfcol_12`,
`timestamp_col` AS `bfcol_13`,
`duration_col` AS `bfcol_14`
`bool_col`,
`bytes_col`,
`date_col`,
`datetime_col`,
`duration_col`,
`float64_col`,
`geography_col`,
`int64_col`,
`int64_too`,
`numeric_col`,
`rowindex`,
`rowindex_2`,
`string_col`,
`time_col`,
`timestamp_col`
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
), `bfcte_1` AS (
SELECT
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
WITH `bfcte_0` AS (
SELECT
`int64_col` AS `bfcol_0`
`int64_col`
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
), `bfcte_1` AS (
SELECT
*,
ROW_NUMBER() OVER (ORDER BY `bfcol_0` ASC NULLS LAST) AS `bfcol_1`
ROW_NUMBER() OVER (ORDER BY `int64_col` ASC NULLS LAST) AS `bfcol_1`
FROM `bfcte_0`
)
SELECT
Expand Down
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
WITH `bfcte_0` AS (
SELECT
`bool_col` AS `bfcol_0`,
`bytes_col` AS `bfcol_1`,
`date_col` AS `bfcol_2`,
`datetime_col` AS `bfcol_3`,
`geography_col` AS `bfcol_4`,
`int64_col` AS `bfcol_5`,
`int64_too` AS `bfcol_6`,
`numeric_col` AS `bfcol_7`,
`float64_col` AS `bfcol_8`,
`rowindex` AS `bfcol_9`,
`rowindex_2` AS `bfcol_10`,
`string_col` AS `bfcol_11`,
`time_col` AS `bfcol_12`,
`timestamp_col` AS `bfcol_13`,
`duration_col` AS `bfcol_14`
`bool_col`,
`bytes_col`,
`date_col`,
`datetime_col`,
`duration_col`,
`float64_col`,
`geography_col`,
`int64_col`,
`int64_too`,
`numeric_col`,
`rowindex`,
`rowindex_2`,
`string_col`,
`time_col`,
`timestamp_col`
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
), `bfcte_1` AS (
SELECT
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
WITH `bfcte_0` AS (
SELECT
`int64_col` AS `bfcol_0`
`int64_col`
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
), `bfcte_1` AS (
SELECT
ARRAY_AGG(`bfcol_0` IGNORE NULLS ORDER BY `bfcol_0` IS NULL ASC, `bfcol_0` ASC) AS `bfcol_1`
ARRAY_AGG(`int64_col` IGNORE NULLS ORDER BY `int64_col` IS NULL ASC, `int64_col` ASC) AS `bfcol_1`
FROM `bfcte_0`
)
SELECT
Expand Down
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
WITH `bfcte_0` AS (
SELECT
`string_col` AS `bfcol_0`
`string_col`
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
), `bfcte_1` AS (
SELECT
COALESCE(STRING_AGG(`bfcol_0`, ','
ORDER BY
`bfcol_0` IS NULL ASC,
`bfcol_0` ASC), '') AS `bfcol_1`
COALESCE(
STRING_AGG(`string_col`, ','
ORDER BY
`string_col` IS NULL ASC,
`string_col` ASC),
''
) AS `bfcol_1`
FROM `bfcte_0`
)
SELECT
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
WITH `bfcte_0` AS (
SELECT
`bool_col` AS `bfcol_0`
`bool_col`
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
), `bfcte_1` AS (
SELECT
COALESCE(LOGICAL_AND(`bfcol_0`), TRUE) AS `bfcol_1`
COALESCE(LOGICAL_AND(`bool_col`), TRUE) AS `bfcol_1`
FROM `bfcte_0`
)
SELECT
Expand Down
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
WITH `bfcte_0` AS (
SELECT
`bool_col` AS `bfcol_0`
`bool_col`
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
), `bfcte_1` AS (
SELECT
*,
CASE
WHEN `bfcol_0` IS NULL
WHEN `bool_col` IS NULL
THEN NULL
ELSE COALESCE(LOGICAL_AND(`bfcol_0`) OVER (), TRUE)
ELSE COALESCE(LOGICAL_AND(`bool_col`) OVER (), TRUE)
END AS `bfcol_1`
FROM `bfcte_0`
)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
WITH `bfcte_0` AS (
SELECT
`bool_col` AS `bfcol_0`,
`string_col` AS `bfcol_1`
`bool_col`,
`string_col`
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
), `bfcte_1` AS (
SELECT
*,
CASE
WHEN `bfcol_0` IS NULL
WHEN `bool_col` IS NULL
THEN NULL
ELSE COALESCE(LOGICAL_AND(`bfcol_0`) OVER (PARTITION BY `bfcol_1`), TRUE)
ELSE COALESCE(LOGICAL_AND(`bool_col`) OVER (PARTITION BY `string_col`), TRUE)
END AS `bfcol_2`
FROM `bfcte_0`
)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
WITH `bfcte_0` AS (
SELECT
`int64_col` AS `bfcol_0`
`int64_col`
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
), `bfcte_1` AS (
SELECT
ANY_VALUE(`bfcol_0`) AS `bfcol_1`
ANY_VALUE(`int64_col`) AS `bfcol_1`
FROM `bfcte_0`
)
SELECT
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
WITH `bfcte_0` AS (
SELECT
`int64_col` AS `bfcol_0`
`int64_col`
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
), `bfcte_1` AS (
SELECT
*,
CASE WHEN `bfcol_0` IS NULL THEN NULL ELSE ANY_VALUE(`bfcol_0`) OVER () END AS `bfcol_1`
CASE WHEN `int64_col` IS NULL THEN NULL ELSE ANY_VALUE(`int64_col`) OVER () END AS `bfcol_1`
FROM `bfcte_0`
)
SELECT
Expand Down
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
WITH `bfcte_0` AS (
SELECT
`int64_col` AS `bfcol_0`,
`string_col` AS `bfcol_1`
`int64_col`,
`string_col`
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
), `bfcte_1` AS (
SELECT
*,
CASE
WHEN `bfcol_0` IS NULL
WHEN `int64_col` IS NULL
THEN NULL
ELSE ANY_VALUE(`bfcol_0`) OVER (PARTITION BY `bfcol_1`)
ELSE ANY_VALUE(`int64_col`) OVER (PARTITION BY `string_col`)
END AS `bfcol_2`
FROM `bfcte_0`
)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
WITH `bfcte_0` AS (
SELECT
`int64_col` AS `bfcol_0`
`int64_col`
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
), `bfcte_1` AS (
SELECT
APPROX_QUANTILES(`bfcol_0`, 4)[OFFSET(1)] AS `bfcol_1`,
APPROX_QUANTILES(`bfcol_0`, 4)[OFFSET(2)] AS `bfcol_2`,
APPROX_QUANTILES(`bfcol_0`, 4)[OFFSET(3)] AS `bfcol_3`
APPROX_QUANTILES(`int64_col`, 4)[OFFSET(1)] AS `bfcol_1`,
APPROX_QUANTILES(`int64_col`, 4)[OFFSET(2)] AS `bfcol_2`,
APPROX_QUANTILES(`int64_col`, 4)[OFFSET(3)] AS `bfcol_3`
FROM `bfcte_0`
)
SELECT
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
WITH `bfcte_0` AS (
SELECT
`int64_col` AS `bfcol_0`
`int64_col`
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
), `bfcte_1` AS (
SELECT
APPROX_TOP_COUNT(`bfcol_0`, 10) AS `bfcol_1`
APPROX_TOP_COUNT(`int64_col`, 10) AS `bfcol_1`
FROM `bfcte_0`
)
SELECT
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
WITH `bfcte_0` AS (
SELECT
`int64_col` AS `bfcol_0`
`int64_col`
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
), `bfcte_1` AS (
SELECT
COUNT(`bfcol_0`) AS `bfcol_1`
COUNT(`int64_col`) AS `bfcol_1`
FROM `bfcte_0`
)
SELECT
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
WITH `bfcte_0` AS (
SELECT
`int64_col` AS `bfcol_0`
`int64_col`
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types`
), `bfcte_1` AS (
SELECT
*,
COUNT(`bfcol_0`) OVER () AS `bfcol_1`
COUNT(`int64_col`) OVER () AS `bfcol_1`
FROM `bfcte_0`
)
SELECT
Expand Down
Loading