Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions vortex-duckdb/cpp/include/duckdb_vx/table_filter.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,10 @@ typedef struct {
void duckdb_vx_table_filter_get_in_filter(duckdb_vx_table_filter ffi_filter,
duckdb_vx_table_filter_in_filter *out);

duckdb_vx_table_filter duckdb_vx_table_filter_get_optional(duckdb_vx_table_filter ffi_filter);

duckdb_vx_expr duckdb_vx_table_filter_get_expression(duckdb_vx_table_filter ffi_filter);

#ifdef __cplusplus /* End C ABI */
}
#endif
17 changes: 15 additions & 2 deletions vortex-duckdb/cpp/include/duckdb_vx/table_function.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
*/
#pragma once

#include "duckdb.h"
#include "error.h"
#include "table_filter.h"
#include "duckdb_vx/data.h"
Expand All @@ -35,6 +36,17 @@ duckdb_value duckdb_vx_tfunc_bind_input_get_named_parameter(duckdb_vx_tfunc_bind
void duckdb_vx_tfunc_bind_result_add_column(duckdb_vx_tfunc_bind_result ffi_result, const char *name_str,
size_t name_len, duckdb_logical_type ffi_type);

// Opaque type for the result of get_virtual_columns
typedef struct duckdb_vx_tfunc_virtual_cols_result_ *duckdb_vx_tfunc_virtual_cols_result;
// Push a column into the get_virtual_columns result.
void duckdb_vx_tfunc_virtual_cols_push(duckdb_vx_tfunc_virtual_cols_result ffi_result, idx_t column_idx,
const char *name_str, size_t name_len, duckdb_logical_type ffi_type);

// Opaque type for the result of get_row_id_columns
typedef struct duckdb_vx_tfunc_row_id_cols_result_ *duckdb_vx_tfunc_row_id_cols_result;
// Push a column into the get_row_id_columns result.
void duckdb_vx_tfunc_row_id_cols_push(duckdb_vx_tfunc_row_id_cols_result ffi_result, idx_t column_idx);

// Input data passed into the init_global and init_local callbacks.
typedef struct {
const void *bind_data;
Expand Down Expand Up @@ -96,6 +108,9 @@ typedef struct {

bool (*pushdown_complex_filter)(void *bind_data, duckdb_vx_expr expr, duckdb_vx_error *error_out);

void (*get_virtual_columns)(void *bind_data, duckdb_vx_tfunc_virtual_cols_result result_out);
void (*get_row_id_columns)(void *bind_data, duckdb_vx_tfunc_row_id_cols_result result_out);

void *pushdown_expression;
// void *to_string;
// void *dynamic_to_string;
Expand All @@ -108,8 +123,6 @@ typedef struct {
// void *supports_pushdown_type;
// void *get_partition_info;
// void *get_partition_stats;
// void *get_virtual_columns;
// void *get_row_id_columns;

bool projection_pushdown;
bool filter_pushdown;
Expand Down
4 changes: 2 additions & 2 deletions vortex-duckdb/cpp/table_filter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@
#include "duckdb_vx.h"
#include "duckdb/planner/filter/conjunction_filter.hpp"
#include "duckdb/planner/filter/dynamic_filter.hpp"
#include "duckdb/planner/filter/optional_filter.hpp"
#include "duckdb/planner/filter/expression_filter.hpp"
#include "duckdb/planner/filter/struct_filter.hpp"
#include "duckdb/planner/filter/in_filter.hpp"
#include "duckdb/planner/filter/optional_filter.hpp"
#include "duckdb/planner/filter/struct_filter.hpp"

using namespace duckdb;

Expand Down
45 changes: 45 additions & 0 deletions vortex-duckdb/cpp/table_function.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,49 @@ unique_ptr<NodeStatistics> c_cardinality(ClientContext &context, const FunctionD
return stats;
}

virtual_column_map_t c_get_virtual_columns(ClientContext &context, optional_ptr<FunctionData> bind_data) {
auto &bind = bind_data->Cast<CTableBindData>();

auto result = virtual_column_map_t();
bind.info->vtab.get_virtual_columns(bind_data->Cast<CTableBindData>().ffi_data->DataPtr(),
reinterpret_cast<duckdb_vx_tfunc_virtual_cols_result>(&result));
return result;
}

extern "C" void duckdb_vx_tfunc_virtual_cols_push(duckdb_vx_tfunc_virtual_cols_result ffi_result,
idx_t column_idx, const char *name_str, size_t name_len,
duckdb_logical_type ffi_type) {
if (!ffi_result || !name_str || !ffi_type) {
return;
}

auto result = reinterpret_cast<virtual_column_map_t *>(ffi_result);
const auto logical_type = reinterpret_cast<LogicalType *>(ffi_type);
const auto name = string(name_str, name_len);

auto table_col = TableColumn(std::move(name), *logical_type);
result->emplace(column_idx, std::move(table_col));
}

vector<column_t> c_get_row_id_columns(ClientContext &context, optional_ptr<FunctionData> bind_data) {
auto &bind = bind_data->Cast<CTableBindData>();

auto result = vector<column_t>();
bind.info->vtab.get_row_id_columns(bind_data->Cast<CTableBindData>().ffi_data->DataPtr(),
reinterpret_cast<duckdb_vx_tfunc_row_id_cols_result>(&result));
return result;
}

extern "C" void duckdb_vx_tfunc_row_id_cols_push(duckdb_vx_tfunc_row_id_cols_result ffi_result,
idx_t column_idx) {
if (!ffi_result) {
return;
}

auto result = reinterpret_cast<vector<column_t> *>(ffi_result);
result->push_back(static_cast<column_t>(column_idx));
}

extern "C" size_t duckdb_vx_tfunc_bind_input_get_parameter_count(duckdb_vx_tfunc_bind_input ffi_input) {
if (!ffi_input) {
return 0;
Expand Down Expand Up @@ -275,6 +318,8 @@ extern "C" duckdb_state duckdb_vx_tfunc_register(duckdb_connection ffi_conn,
tf.late_materialization = vtab->late_materialization;
tf.cardinality = c_cardinality;
tf.get_partition_data = c_get_partition_data;
tf.get_virtual_columns = c_get_virtual_columns;
tf.get_row_id_columns = c_get_row_id_columns;

// Set up the parameters
for (size_t i = 0; i < vtab->parameter_count; i++) {
Expand Down
14 changes: 7 additions & 7 deletions vortex-duckdb/src/convert/table_filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,17 @@ use crate::duckdb::{TableFilter, TableFilterClass};
pub fn try_from_table_filter(
value: &TableFilter,
col: &ExprRef,
scope_dtype: &DType,
dtype: &DType,
) -> VortexResult<Option<ExprRef>> {
Ok(Some(match value.as_class() {
TableFilterClass::ConstantComparison(const_) => {
let scalar: Scalar = (&const_.value).try_into()?;
let scalar: Scalar = const_.value.try_into()?;
BinaryExpr::new_expr(col.clone(), const_.operator.try_into()?, lit(scalar))
}
TableFilterClass::ConjunctionAnd(conj_and) => {
let Some(children) = conj_and
.children()
.map(|child| try_from_table_filter(&child, col, scope_dtype))
.map(|child| try_from_table_filter(&child, col, dtype))
.try_collect::<_, Option<Vec<_>>, _>()?
else {
return Ok(None);
Expand All @@ -41,7 +41,7 @@ pub fn try_from_table_filter(
TableFilterClass::ConjunctionOr(disjuction_or) => {
let Some(children) = disjuction_or
.children()
.map(|child| try_from_table_filter(&child, col, scope_dtype))
.map(|child| try_from_table_filter(&child, col, dtype))
.try_collect::<_, Option<Vec<_>>, _>()?
else {
return Ok(None);
Expand All @@ -52,11 +52,11 @@ pub fn try_from_table_filter(
TableFilterClass::IsNull => is_null(col.clone()),
TableFilterClass::IsNotNull => not(is_null(col.clone())),
TableFilterClass::StructExtract(name, child_filter) => {
return try_from_table_filter(&child_filter, &get_item(name, col.clone()), scope_dtype);
return try_from_table_filter(&child_filter, &get_item(name, col.clone()), dtype);
}
TableFilterClass::Optional(child) => {
// Optional expressions are optional not yet supported.
return try_from_table_filter(&child, col, scope_dtype).or_else(|_err| {
return try_from_table_filter(&child, col, dtype).or_else(|_err| {
// Failed to convert the optional expression, but it's optional, so who cares?
Ok(None)
});
Expand Down Expand Up @@ -99,7 +99,7 @@ pub fn try_from_table_filter(
.vortex_expect("failed to convert dynamic filter value to scalar");
Some(scalar.into_value())
},
col.return_dtype(scope_dtype)?,
col.return_dtype(dtype)?,
true, // If there is no value, we say that all rows pass the dynamic filter.
)
.into_expr()
Expand Down
8 changes: 8 additions & 0 deletions vortex-duckdb/src/duckdb/logical_type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,14 @@ impl LogicalType {
Self::new(DUCKDB_TYPE::DUCKDB_TYPE_VARCHAR)
}

pub fn uint64() -> Self {
Self::new(DUCKDB_TYPE::DUCKDB_TYPE_UBIGINT)
}

pub fn int64() -> Self {
Self::new(DUCKDB_TYPE::DUCKDB_TYPE_BIGINT)
}

pub fn as_decimal(&self) -> (u8, u8) {
unsafe {
(
Expand Down
18 changes: 17 additions & 1 deletion vortex-duckdb/src/duckdb/table_function/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,13 @@ mod cardinality;
mod init;
mod partition;
mod pushdown_complex_filter;
mod row_id_cols;
mod virtual_cols;

pub use bind::*;
pub use init::*;
pub use row_id_cols::*;
pub use virtual_cols::*;

use crate::duckdb::LogicalType;
use crate::duckdb::connection::Connection;
Expand Down Expand Up @@ -46,6 +50,9 @@ pub trait TableFunction: Sized + Debug {
/// - j does not need to leave the table function at all.
const FILTER_PRUNE: bool = false;

/// Whether the table supports late materialization.
const LATE_MATERIALIZATION: bool = false;

/// Returns the parameters of the table function.
fn parameters() -> Vec<LogicalType> {
// By default, we don't have any parameters.
Expand Down Expand Up @@ -110,6 +117,13 @@ pub trait TableFunction: Sized + Debug {
_local_init_data: &mut Self::LocalState,
) -> VortexResult<u64>;

/// Return the columns that uniquely identify a row ID in the table function.
/// Used for late-materialization and other optimizations.
fn row_id_columns(_bind_data: &Self::BindData, _result: &mut RowIdColsResult) {}

/// Returns the virtual columns of the table function.
fn virtual_columns(_bind_data: &Self::BindData, _result: &mut VirtualColsResult) {}

// TODO(ngates): there are many more callbacks that can be configured.
}

Expand Down Expand Up @@ -152,14 +166,16 @@ impl Connection {
statistics: ptr::null_mut::<c_void>(),
cardinality: Some(cardinality_callback::<T>),
pushdown_complex_filter: Some(pushdown_complex_filter_callback::<T>),
get_virtual_columns: Some(get_virtual_columns_callback::<T>),
get_row_id_columns: Some(get_row_id_columns_callback::<T>),
pushdown_expression: ptr::null_mut::<c_void>(),
table_scan_progress: ptr::null_mut::<c_void>(),
get_partition_data: Some(get_partition_data_callback::<T>),
projection_pushdown: T::PROJECTION_PUSHDOWN,
filter_pushdown: T::FILTER_PUSHDOWN,
filter_prune: T::FILTER_PRUNE,
sampling_pushdown: false,
late_materialization: false,
late_materialization: T::LATE_MATERIALIZATION,
};

duckdb_try!(
Expand Down
32 changes: 32 additions & 0 deletions vortex-duckdb/src/duckdb/table_function/row_id_cols.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

use std::ffi::c_void;

use vortex::error::VortexExpect;

use crate::duckdb::TableFunction;
use crate::{cpp, wrapper};

/// Native callback for the get_row_id_columns function.
pub(crate) unsafe extern "C-unwind" fn get_row_id_columns_callback<T: TableFunction>(
bind_data: *mut c_void,
result: cpp::duckdb_vx_tfunc_row_id_cols_result,
) {
let bind_data =
unsafe { bind_data.cast::<T::BindData>().as_ref() }.vortex_expect("bind_data null pointer");
let mut result = unsafe { RowIdColsResult::borrow(result) };
T::row_id_columns(bind_data, &mut result);
}

wrapper!(
RowIdColsResult,
cpp::duckdb_vx_tfunc_row_id_cols_result,
|_| {}
);

impl RowIdColsResult {
pub fn push(&self, column_idx: u64) {
unsafe { cpp::duckdb_vx_tfunc_row_id_cols_push(self.as_ptr(), column_idx as _) }
}
}
41 changes: 41 additions & 0 deletions vortex-duckdb/src/duckdb/table_function/virtual_cols.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

use std::ffi::c_void;

use vortex::error::VortexExpect;

use crate::duckdb::{LogicalType, TableFunction};
use crate::{cpp, wrapper};

/// Native callback for the get_virtual_columns function.
pub(crate) unsafe extern "C-unwind" fn get_virtual_columns_callback<T: TableFunction>(
bind_data: *mut c_void,
result: cpp::duckdb_vx_tfunc_virtual_cols_result,
) {
let bind_data =
unsafe { bind_data.cast::<T::BindData>().as_ref() }.vortex_expect("bind_data null pointer");
let mut result = unsafe { VirtualColsResult::borrow(result) };

T::virtual_columns(bind_data, &mut result);
}

wrapper!(
VirtualColsResult,
cpp::duckdb_vx_tfunc_virtual_cols_result,
|_| {}
);

impl VirtualColsResult {
pub fn register(&self, column_idx: u64, name: &str, logical_type: &LogicalType) {
unsafe {
cpp::duckdb_vx_tfunc_virtual_cols_push(
self.as_ptr(),
column_idx as _,
name.as_ptr().cast(),
name.len() as _,
logical_type.as_ptr(),
)
}
}
}
Loading
Loading