From 928c415e844ea82a07b4a63f25c7b1bfe76bb776 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Fri, 27 Oct 2023 09:24:32 -0600 Subject: [PATCH 01/14] Add some initial content about creating logical plans --- Cargo.toml | 1 + docs/Cargo.toml | 32 ++++++++ .../building-logical-plans.md | 82 ++++++++++++++++++- docs/src/lib.rs | 19 +++++ docs/src/library_logical_plan.rs | 50 +++++++++++ 5 files changed, 183 insertions(+), 1 deletion(-) create mode 100644 docs/Cargo.toml create mode 100644 docs/src/lib.rs create mode 100644 docs/src/library_logical_plan.rs diff --git a/Cargo.toml b/Cargo.toml index 71088e7fc7ad..77e3c6038ea7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -32,6 +32,7 @@ members = [ "datafusion/substrait", "datafusion/wasmtest", "datafusion-examples", + "docs", "test-utils", "benchmarks", ] diff --git a/docs/Cargo.toml b/docs/Cargo.toml new file mode 100644 index 000000000000..dc92994f773c --- /dev/null +++ b/docs/Cargo.toml @@ -0,0 +1,32 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +[package] +name = "datafusion-docs" +description = "DataFusion Documentation" +publish = false +version = { workspace = true } +edition = { workspace = true } +readme = { workspace = true } +homepage = { workspace = true } +repository = { workspace = true } +license = { workspace = true } +authors = { workspace = true } +rust-version = "1.70" + +[dependencies] +datafusion = { path = "../datafusion/core" } \ No newline at end of file diff --git a/docs/source/library-user-guide/building-logical-plans.md b/docs/source/library-user-guide/building-logical-plans.md index 406f4881129c..f7b45c98454d 100644 --- a/docs/source/library-user-guide/building-logical-plans.md +++ b/docs/source/library-user-guide/building-logical-plans.md @@ -19,4 +19,84 @@ # Building Logical Plans -Coming Soon +A logical plan is a structured representation of a database query that describes the high-level operations and +transformations needed to retrieve data from a database or data source. It abstracts away specific implementation +details and focuses on the logical flow of the query, including operations like filtering, sorting, and joining tables. + +This logical plan serves as an intermediate step before generating an optimized physical execution plan. + +DataFusion logical plans are typically created using the [LogicalPlanBuilder] struct. The following associated functions can be +used to create a new builder: + +- `empty` - create an empty plan with no fields +- `values` - create a plan from a set of literal values +- `scan` - create a plan representing a table scan +- `scan_with_filters` - create a plan representing a table scan with filters + +Once the builder is created, transformation methods can be called to declare that further operations should be +performed on the plan. Note that all we are doing at this stage is building up the logical plan structure. No query +execution will be performed. + +Here are some examples of transformation methods, but for a full list, refer to the [LogicalPlanBuilder] API documentation. + +- `filter` +- `limit` +- `sort` +- `distinct` +- `join` + +The following example demonstrates building a simple query consisting of a table scan followed by a filter. + + +```rust +// create a logical table source +let schema = Schema::new(vec![ + Field::new("id", DataType::Int32, true), + Field::new("name", DataType::Utf8, true), +]); +let table_source = LogicalTableSource::new(SchemaRef::new(schema)); + +// optional projection +let projection = None; + +// create a LogicalPlanBuilder for a table scan +let builder = LogicalPlanBuilder::scan("person", Arc::new(table_source), projection)?; + +// perform a filter operation and build the plan +let plan = builder + .filter(col("id").gt(lit(500)))? // WHERE id > 500 + .build()?; + +// print the plan +println!("{}", plan.display_indent_schema()); +``` + +This example produces the following plan: + +``` +Filter: person.id > Int32(500) [id:Int32;N, name:Utf8;N] + TableScan: person [id:Int32;N, name:Utf8;N] +``` + +## Table Sources + +The previous example used a [LogicalTableSource], which is used for tests and documentation in DataFusion, and is also +suitable if you are using DataFusion to build logical plans but do not use DataFusion's physical plan. However, if you +want to use a TableSource that can be executed in DataFusion then you will need to [DefaultTableSource], which is a +wrapper for a [TableProvider]. + +Both [LogicalTableSource] and [DefaultTableSource] implement the [TableSource] trait. [DefaultTableSource] acts as a +bridge between DataFusion's logical and physical plans and is necessary because the logical plan is contained in +the `datafusion_expr` crate, which does not know about DataFusion's physical plans. + +```rust +pub struct DefaultTableSource { + pub table_provider: Arc, +} +``` + +[LogicalPlanBuilder]: https://docs.rs/datafusion-expr/latest/datafusion_expr/logical_plan/builder/struct.LogicalPlanBuilder.html +[LogicalTableSource]: https://docs.rs/datafusion-expr/latest/datafusion_expr/logical_plan/builder/struct.LogicalTableSource.html +[DefaultTableSource]: https://docs.rs/datafusion/latest/datafusion/datasource/default_table_source/struct.DefaultTableSource.html +[TableProvider]: https://docs.rs/datafusion/latest/datafusion/datasource/provider/trait.TableProvider.html +[TableSource]: https://docs.rs/datafusion-expr/latest/datafusion_expr/trait.TableSource.html \ No newline at end of file diff --git a/docs/src/lib.rs b/docs/src/lib.rs new file mode 100644 index 000000000000..4b32a27f3cde --- /dev/null +++ b/docs/src/lib.rs @@ -0,0 +1,19 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#[cfg(test)] +mod library_logical_plan; \ No newline at end of file diff --git a/docs/src/library_logical_plan.rs b/docs/src/library_logical_plan.rs new file mode 100644 index 000000000000..9783329a58f0 --- /dev/null +++ b/docs/src/library_logical_plan.rs @@ -0,0 +1,50 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use datafusion::prelude::*; +use datafusion::arrow::datatypes::{DataType, Field, Schema, SchemaRef}; +use datafusion::error::Result; +use datafusion::logical_expr::builder::LogicalTableSource; +use datafusion::logical_expr::LogicalPlanBuilder; +use std::sync::Arc; + +#[test] +fn plan_builder_1() -> Result<()> { + + // create a logical table source + let schema = Schema::new(vec![ + Field::new("id", DataType::Int32, true), + Field::new("name", DataType::Utf8, true), + ]); + let table_source = LogicalTableSource::new(SchemaRef::new(schema)); + + // optional projection + let projection = None; + + // create a LogicalPlanBuilder for a table scan + let builder = LogicalPlanBuilder::scan("person", Arc::new(table_source), projection)?; + + // perform a filter operation and build the plan + let plan = builder + .filter(col("id").gt(lit(500)))? + .build()?; + + // print the plan + println!("{}", plan.display_indent_schema()); + + Ok(()) +} From 1b220a1f1220655a43f5f569898162af10ea794d Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Fri, 27 Oct 2023 09:31:51 -0600 Subject: [PATCH 02/14] prettier --- datafusion/core/src/prelude.rs | 2 +- .../building-logical-plans.md | 29 ++++++++++--------- 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/datafusion/core/src/prelude.rs b/datafusion/core/src/prelude.rs index 7689468e5d13..5cd8b3870f81 100644 --- a/datafusion/core/src/prelude.rs +++ b/datafusion/core/src/prelude.rs @@ -13,7 +13,7 @@ // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations -// under the License.pub}, +// under the License. //! DataFusion "prelude" to simplify importing common types. //! diff --git a/docs/source/library-user-guide/building-logical-plans.md b/docs/source/library-user-guide/building-logical-plans.md index f7b45c98454d..56aa7dc7563f 100644 --- a/docs/source/library-user-guide/building-logical-plans.md +++ b/docs/source/library-user-guide/building-logical-plans.md @@ -19,22 +19,22 @@ # Building Logical Plans -A logical plan is a structured representation of a database query that describes the high-level operations and -transformations needed to retrieve data from a database or data source. It abstracts away specific implementation -details and focuses on the logical flow of the query, including operations like filtering, sorting, and joining tables. +A logical plan is a structured representation of a database query that describes the high-level operations and +transformations needed to retrieve data from a database or data source. It abstracts away specific implementation +details and focuses on the logical flow of the query, including operations like filtering, sorting, and joining tables. This logical plan serves as an intermediate step before generating an optimized physical execution plan. -DataFusion logical plans are typically created using the [LogicalPlanBuilder] struct. The following associated functions can be -used to create a new builder: +DataFusion logical plans are typically created using the [LogicalPlanBuilder] struct. The following associated functions can be +used to create a new builder: - `empty` - create an empty plan with no fields - `values` - create a plan from a set of literal values - `scan` - create a plan representing a table scan - `scan_with_filters` - create a plan representing a table scan with filters -Once the builder is created, transformation methods can be called to declare that further operations should be -performed on the plan. Note that all we are doing at this stage is building up the logical plan structure. No query +Once the builder is created, transformation methods can be called to declare that further operations should be +performed on the plan. Note that all we are doing at this stage is building up the logical plan structure. No query execution will be performed. Here are some examples of transformation methods, but for a full list, refer to the [LogicalPlanBuilder] API documentation. @@ -45,9 +45,10 @@ Here are some examples of transformation methods, but for a full list, refer to - `distinct` - `join` -The following example demonstrates building a simple query consisting of a table scan followed by a filter. +The following example demonstrates building a simple query consisting of a table scan followed by a filter. + ```rust // create a logical table source let schema = Schema::new(vec![ @@ -80,13 +81,13 @@ Filter: person.id > Int32(500) [id:Int32;N, name:Utf8;N] ## Table Sources -The previous example used a [LogicalTableSource], which is used for tests and documentation in DataFusion, and is also -suitable if you are using DataFusion to build logical plans but do not use DataFusion's physical plan. However, if you -want to use a TableSource that can be executed in DataFusion then you will need to [DefaultTableSource], which is a +The previous example used a [LogicalTableSource], which is used for tests and documentation in DataFusion, and is also +suitable if you are using DataFusion to build logical plans but do not use DataFusion's physical plan. However, if you +want to use a TableSource that can be executed in DataFusion then you will need to [DefaultTableSource], which is a wrapper for a [TableProvider]. -Both [LogicalTableSource] and [DefaultTableSource] implement the [TableSource] trait. [DefaultTableSource] acts as a -bridge between DataFusion's logical and physical plans and is necessary because the logical plan is contained in +Both [LogicalTableSource] and [DefaultTableSource] implement the [TableSource] trait. [DefaultTableSource] acts as a +bridge between DataFusion's logical and physical plans and is necessary because the logical plan is contained in the `datafusion_expr` crate, which does not know about DataFusion's physical plans. ```rust @@ -99,4 +100,4 @@ pub struct DefaultTableSource { [LogicalTableSource]: https://docs.rs/datafusion-expr/latest/datafusion_expr/logical_plan/builder/struct.LogicalTableSource.html [DefaultTableSource]: https://docs.rs/datafusion/latest/datafusion/datasource/default_table_source/struct.DefaultTableSource.html [TableProvider]: https://docs.rs/datafusion/latest/datafusion/datasource/provider/trait.TableProvider.html -[TableSource]: https://docs.rs/datafusion-expr/latest/datafusion_expr/trait.TableSource.html \ No newline at end of file +[TableSource]: https://docs.rs/datafusion-expr/latest/datafusion_expr/trait.TableSource.html From 653bace135a45a346afb7a727968bb0a6a0a4da9 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Fri, 27 Oct 2023 09:32:58 -0600 Subject: [PATCH 03/14] formatting --- docs/src/lib.rs | 2 +- docs/src/library_logical_plan.rs | 7 ++----- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/docs/src/lib.rs b/docs/src/lib.rs index 4b32a27f3cde..f73132468ec9 100644 --- a/docs/src/lib.rs +++ b/docs/src/lib.rs @@ -16,4 +16,4 @@ // under the License. #[cfg(test)] -mod library_logical_plan; \ No newline at end of file +mod library_logical_plan; diff --git a/docs/src/library_logical_plan.rs b/docs/src/library_logical_plan.rs index 9783329a58f0..15c2d3b63e46 100644 --- a/docs/src/library_logical_plan.rs +++ b/docs/src/library_logical_plan.rs @@ -15,16 +15,15 @@ // specific language governing permissions and limitations // under the License. -use datafusion::prelude::*; use datafusion::arrow::datatypes::{DataType, Field, Schema, SchemaRef}; use datafusion::error::Result; use datafusion::logical_expr::builder::LogicalTableSource; use datafusion::logical_expr::LogicalPlanBuilder; +use datafusion::prelude::*; use std::sync::Arc; #[test] fn plan_builder_1() -> Result<()> { - // create a logical table source let schema = Schema::new(vec![ Field::new("id", DataType::Int32, true), @@ -39,9 +38,7 @@ fn plan_builder_1() -> Result<()> { let builder = LogicalPlanBuilder::scan("person", Arc::new(table_source), projection)?; // perform a filter operation and build the plan - let plan = builder - .filter(col("id").gt(lit(500)))? - .build()?; + let plan = builder.filter(col("id").gt(lit(500)))?.build()?; // print the plan println!("{}", plan.display_indent_schema()); From 4db03cc9e3b7f528462eef392ccdfc69e6e7c591 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Fri, 27 Oct 2023 09:38:11 -0600 Subject: [PATCH 04/14] formatting --- .../library-user-guide/building-logical-plans.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/source/library-user-guide/building-logical-plans.md b/docs/source/library-user-guide/building-logical-plans.md index 56aa7dc7563f..0bab0c062efd 100644 --- a/docs/source/library-user-guide/building-logical-plans.md +++ b/docs/source/library-user-guide/building-logical-plans.md @@ -96,8 +96,8 @@ pub struct DefaultTableSource { } ``` -[LogicalPlanBuilder]: https://docs.rs/datafusion-expr/latest/datafusion_expr/logical_plan/builder/struct.LogicalPlanBuilder.html -[LogicalTableSource]: https://docs.rs/datafusion-expr/latest/datafusion_expr/logical_plan/builder/struct.LogicalTableSource.html -[DefaultTableSource]: https://docs.rs/datafusion/latest/datafusion/datasource/default_table_source/struct.DefaultTableSource.html -[TableProvider]: https://docs.rs/datafusion/latest/datafusion/datasource/provider/trait.TableProvider.html -[TableSource]: https://docs.rs/datafusion-expr/latest/datafusion_expr/trait.TableSource.html +[logicalplanbuilder]: https://docs.rs/datafusion-expr/latest/datafusion_expr/logical_plan/builder/struct.LogicalPlanBuilder.html +[logicaltablesource]: https://docs.rs/datafusion-expr/latest/datafusion_expr/logical_plan/builder/struct.LogicalTableSource.html +[defaulttablesource]: https://docs.rs/datafusion/latest/datafusion/datasource/default_table_source/struct.DefaultTableSource.html +[tableprovider]: https://docs.rs/datafusion/latest/datafusion/datasource/provider/trait.TableProvider.html +[tablesource]: https://docs.rs/datafusion-expr/latest/datafusion_expr/trait.TableSource.html From feedaf735dbffbd3e70ce43740acf501eabd058d Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Fri, 27 Oct 2023 09:38:42 -0600 Subject: [PATCH 05/14] formatting --- docs/source/library-user-guide/building-logical-plans.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/library-user-guide/building-logical-plans.md b/docs/source/library-user-guide/building-logical-plans.md index 0bab0c062efd..6501446746f5 100644 --- a/docs/source/library-user-guide/building-logical-plans.md +++ b/docs/source/library-user-guide/building-logical-plans.md @@ -83,7 +83,7 @@ Filter: person.id > Int32(500) [id:Int32;N, name:Utf8;N] The previous example used a [LogicalTableSource], which is used for tests and documentation in DataFusion, and is also suitable if you are using DataFusion to build logical plans but do not use DataFusion's physical plan. However, if you -want to use a TableSource that can be executed in DataFusion then you will need to [DefaultTableSource], which is a +want to use a [TableSource] that can be executed in DataFusion then you will need to use [DefaultTableSource], which is a wrapper for a [TableProvider]. Both [LogicalTableSource] and [DefaultTableSource] implement the [TableSource] trait. [DefaultTableSource] acts as a From 3de5a041c30ba58eaff6bef31b46a6dc6f40ecc7 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Fri, 27 Oct 2023 09:59:00 -0600 Subject: [PATCH 06/14] more content --- .../building-logical-plans.md | 48 +++++++++++++++++++ docs/src/library_logical_plan.rs | 33 ++++++++++++- 2 files changed, 80 insertions(+), 1 deletion(-) diff --git a/docs/source/library-user-guide/building-logical-plans.md b/docs/source/library-user-guide/building-logical-plans.md index 6501446746f5..bf7d33cecb76 100644 --- a/docs/source/library-user-guide/building-logical-plans.md +++ b/docs/source/library-user-guide/building-logical-plans.md @@ -25,6 +25,53 @@ details and focuses on the logical flow of the query, including operations like This logical plan serves as an intermediate step before generating an optimized physical execution plan. +## Building Logical Plans Manually + +DataFusion's [LogicalPlan] is an enum containing variants representing all the supported operators, and also +contains an `Extension` variant that allows projects building on DataFusion to add custom logical operators. + +It is possible to create logical plans by directly creating instances of the [LogicalPlan] enum as follows, but is is +much easier to use the [LogicalPlanBuilder], which is described in the next section. + +Here is an example of building a logical plan directly: + +```rust +// create a logical table source +let schema = Schema::new(vec![ + Field::new("id", DataType::Int32, true), + Field::new("name", DataType::Utf8, true), +]); +let table_source = LogicalTableSource::new(SchemaRef::new(schema)); + +// create a TableScan plan +let projection = None; // optional projection +let filters = vec![]; // optional filters to push down +let fetch = None; // optional LIMIT +let table_scan = LogicalPlan::TableScan(TableScan::try_new( + "my_table", + Arc::new(table_source), + projection, + filters, + fetch, +)?); + +// create a Filter plan that wraps the TableScan +let filter_expr = col("id").gt(lit(500)); +let plan = LogicalPlan::Filter(Filter::try_new(filter_expr, Arc::new(table_scan))?); + +// print the plan +println!("{}", plan.display_indent_schema()); +``` + +This example produces the following plan: + +``` +Filter: person.id > Int32(500) [id:Int32;N, name:Utf8;N] + TableScan: person [id:Int32;N, name:Utf8;N] +``` + +## Building Logical Plans with LogicalPlanBuilder + DataFusion logical plans are typically created using the [LogicalPlanBuilder] struct. The following associated functions can be used to create a new builder: @@ -96,6 +143,7 @@ pub struct DefaultTableSource { } ``` +[logicalplan]: https://docs.rs/datafusion-expr/latest/datafusion_expr/logical_plan/enum.LogicalPlan.html [logicalplanbuilder]: https://docs.rs/datafusion-expr/latest/datafusion_expr/logical_plan/builder/struct.LogicalPlanBuilder.html [logicaltablesource]: https://docs.rs/datafusion-expr/latest/datafusion_expr/logical_plan/builder/struct.LogicalTableSource.html [defaulttablesource]: https://docs.rs/datafusion/latest/datafusion/datasource/default_table_source/struct.DefaultTableSource.html diff --git a/docs/src/library_logical_plan.rs b/docs/src/library_logical_plan.rs index 15c2d3b63e46..39b00734b721 100644 --- a/docs/src/library_logical_plan.rs +++ b/docs/src/library_logical_plan.rs @@ -18,10 +18,41 @@ use datafusion::arrow::datatypes::{DataType, Field, Schema, SchemaRef}; use datafusion::error::Result; use datafusion::logical_expr::builder::LogicalTableSource; -use datafusion::logical_expr::LogicalPlanBuilder; +use datafusion::logical_expr::{Filter, LogicalPlan, LogicalPlanBuilder, TableScan}; use datafusion::prelude::*; use std::sync::Arc; +#[test] +fn plan_1() -> Result<()> { + // create a logical table source + let schema = Schema::new(vec![ + Field::new("id", DataType::Int32, true), + Field::new("name", DataType::Utf8, true), + ]); + let table_source = LogicalTableSource::new(SchemaRef::new(schema)); + + // create a TableScan plan + let projection = None; // optional projection + let filters = vec![]; // optional filters to push down + let fetch = None; // optional LIMIT + let table_scan = LogicalPlan::TableScan(TableScan::try_new( + "my_table", + Arc::new(table_source), + projection, + filters, + fetch, + )?); + + // create a Filter plan that wraps the TableScan + let filter_expr = col("id").gt(lit(500)); + let plan = LogicalPlan::Filter(Filter::try_new(filter_expr, Arc::new(table_scan))?); + + // print the plan + println!("{}", plan.display_indent_schema()); + + Ok(()) +} + #[test] fn plan_builder_1() -> Result<()> { // create a logical table source From fb35b4ae8ffb8a7b6e084a2ae0f553807c0a2b3e Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Fri, 27 Oct 2023 10:05:29 -0600 Subject: [PATCH 07/14] use correct table name --- docs/source/library-user-guide/building-logical-plans.md | 4 ++-- docs/src/library_logical_plan.rs | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/source/library-user-guide/building-logical-plans.md b/docs/source/library-user-guide/building-logical-plans.md index bf7d33cecb76..e38e0aeeaa1e 100644 --- a/docs/source/library-user-guide/building-logical-plans.md +++ b/docs/source/library-user-guide/building-logical-plans.md @@ -35,6 +35,7 @@ much easier to use the [LogicalPlanBuilder], which is described in the next sect Here is an example of building a logical plan directly: + ```rust // create a logical table source let schema = Schema::new(vec![ @@ -48,7 +49,7 @@ let projection = None; // optional projection let filters = vec![]; // optional filters to push down let fetch = None; // optional LIMIT let table_scan = LogicalPlan::TableScan(TableScan::try_new( - "my_table", + "person", Arc::new(table_source), projection, filters, @@ -95,7 +96,6 @@ Here are some examples of transformation methods, but for a full list, refer to The following example demonstrates building a simple query consisting of a table scan followed by a filter. - ```rust // create a logical table source let schema = Schema::new(vec![ diff --git a/docs/src/library_logical_plan.rs b/docs/src/library_logical_plan.rs index 39b00734b721..91c18b385af5 100644 --- a/docs/src/library_logical_plan.rs +++ b/docs/src/library_logical_plan.rs @@ -36,7 +36,7 @@ fn plan_1() -> Result<()> { let filters = vec![]; // optional filters to push down let fetch = None; // optional LIMIT let table_scan = LogicalPlan::TableScan(TableScan::try_new( - "my_table", + "person", Arc::new(table_source), projection, filters, From 8d7719764cd0e5c199748e68958a79b87b8a7145 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Fri, 27 Oct 2023 10:28:33 -0600 Subject: [PATCH 08/14] formatting --- dev/update_datafusion_versions.py | 1 + docs/Cargo.toml | 2 +- docs/source/library-user-guide/building-logical-plans.md | 2 ++ 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/dev/update_datafusion_versions.py b/dev/update_datafusion_versions.py index 7cbe39fdfb66..19701b813671 100755 --- a/dev/update_datafusion_versions.py +++ b/dev/update_datafusion_versions.py @@ -43,6 +43,7 @@ 'datafusion-wasmtest': 'datafusion/wasmtest/Cargo.toml', 'datafusion-benchmarks': 'benchmarks/Cargo.toml', 'datafusion-examples': 'datafusion-examples/Cargo.toml', + 'datafusion-docs': 'docs/Cargo.toml', } def update_workspace_version(new_version: str): diff --git a/docs/Cargo.toml b/docs/Cargo.toml index dc92994f773c..21a69fb2bc06 100644 --- a/docs/Cargo.toml +++ b/docs/Cargo.toml @@ -29,4 +29,4 @@ authors = { workspace = true } rust-version = "1.70" [dependencies] -datafusion = { path = "../datafusion/core" } \ No newline at end of file +datafusion = { path = "../datafusion/core", version = "32.0.0", default-features = false } \ No newline at end of file diff --git a/docs/source/library-user-guide/building-logical-plans.md b/docs/source/library-user-guide/building-logical-plans.md index e38e0aeeaa1e..ff118c8fb040 100644 --- a/docs/source/library-user-guide/building-logical-plans.md +++ b/docs/source/library-user-guide/building-logical-plans.md @@ -36,6 +36,7 @@ much easier to use the [LogicalPlanBuilder], which is described in the next sect Here is an example of building a logical plan directly: + ```rust // create a logical table source let schema = Schema::new(vec![ @@ -96,6 +97,7 @@ Here are some examples of transformation methods, but for a full list, refer to The following example demonstrates building a simple query consisting of a table scan followed by a filter. + ```rust // create a logical table source let schema = Schema::new(vec![ From d1302816619b2a7170429083559295e9de196eca Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Fri, 27 Oct 2023 11:38:13 -0600 Subject: [PATCH 09/14] tomlfmt --- docs/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/Cargo.toml b/docs/Cargo.toml index 21a69fb2bc06..b82c07f9f8f4 100644 --- a/docs/Cargo.toml +++ b/docs/Cargo.toml @@ -29,4 +29,4 @@ authors = { workspace = true } rust-version = "1.70" [dependencies] -datafusion = { path = "../datafusion/core", version = "32.0.0", default-features = false } \ No newline at end of file +datafusion = { path = "../datafusion/core", version = "32.0.0", default-features = false } From ac0859a20d40df3a133add0bfde307f13cc1b827 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Sat, 28 Oct 2023 09:06:34 -0600 Subject: [PATCH 10/14] Update docs/source/library-user-guide/building-logical-plans.md Co-authored-by: Andrew Lamb --- docs/source/library-user-guide/building-logical-plans.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/library-user-guide/building-logical-plans.md b/docs/source/library-user-guide/building-logical-plans.md index ff118c8fb040..60984a5bf9f3 100644 --- a/docs/source/library-user-guide/building-logical-plans.md +++ b/docs/source/library-user-guide/building-logical-plans.md @@ -57,7 +57,7 @@ let table_scan = LogicalPlan::TableScan(TableScan::try_new( fetch, )?); -// create a Filter plan that wraps the TableScan +// create a Filter plan that evaluates `id > 500` that wraps the TableScan let filter_expr = col("id").gt(lit(500)); let plan = LogicalPlan::Filter(Filter::try_new(filter_expr, Arc::new(table_scan))?); From ee5d4cb9295775dbeff9d0723323dbc2578ccabd Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Sat, 28 Oct 2023 09:06:45 -0600 Subject: [PATCH 11/14] address feedback --- docs/Cargo.toml | 4 ++-- docs/source/library-user-guide/building-logical-plans.md | 5 ++++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/docs/Cargo.toml b/docs/Cargo.toml index b82c07f9f8f4..9caa0bde3608 100644 --- a/docs/Cargo.toml +++ b/docs/Cargo.toml @@ -16,8 +16,8 @@ # under the License. [package] -name = "datafusion-docs" -description = "DataFusion Documentation" +name = "datafusion-docs-tests" +description = "DataFusion Documentation Tests" publish = false version = { workspace = true } edition = { workspace = true } diff --git a/docs/source/library-user-guide/building-logical-plans.md b/docs/source/library-user-guide/building-logical-plans.md index ff118c8fb040..12f3dcda9f1d 100644 --- a/docs/source/library-user-guide/building-logical-plans.md +++ b/docs/source/library-user-guide/building-logical-plans.md @@ -23,7 +23,8 @@ A logical plan is a structured representation of a database query that describes transformations needed to retrieve data from a database or data source. It abstracts away specific implementation details and focuses on the logical flow of the query, including operations like filtering, sorting, and joining tables. -This logical plan serves as an intermediate step before generating an optimized physical execution plan. +This logical plan serves as an intermediate step before generating an optimized physical execution plan. This is +explained in more detail in the [Query Planning and Execution Overview] section of the [Architecture Guide]. ## Building Logical Plans Manually @@ -145,6 +146,8 @@ pub struct DefaultTableSource { } ``` +[query planning and execution overview]: https://docs.rs/datafusion/latest/datafusion/index.html#query-planning-and-execution-overview +[architecture guide]: https://docs.rs/datafusion/latest/datafusion/index.html#architecture [logicalplan]: https://docs.rs/datafusion-expr/latest/datafusion_expr/logical_plan/enum.LogicalPlan.html [logicalplanbuilder]: https://docs.rs/datafusion-expr/latest/datafusion_expr/logical_plan/builder/struct.LogicalPlanBuilder.html [logicaltablesource]: https://docs.rs/datafusion-expr/latest/datafusion_expr/logical_plan/builder/struct.LogicalTableSource.html From 313469b1f225e64f8226f3843ee5f935bdec6a94 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Sat, 28 Oct 2023 09:11:05 -0600 Subject: [PATCH 12/14] Update docs/source/library-user-guide/building-logical-plans.md Co-authored-by: Andrew Lamb --- docs/source/library-user-guide/building-logical-plans.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/library-user-guide/building-logical-plans.md b/docs/source/library-user-guide/building-logical-plans.md index 60984a5bf9f3..7896f78be03d 100644 --- a/docs/source/library-user-guide/building-logical-plans.md +++ b/docs/source/library-user-guide/building-logical-plans.md @@ -94,7 +94,7 @@ Here are some examples of transformation methods, but for a full list, refer to - `distinct` - `join` -The following example demonstrates building a simple query consisting of a table scan followed by a filter. +The following example demonstrates building the same simple query plan as the previous example, with a table scan followed by a filter. From 3190a3f2b451100eb637b55979f0f56d9b12565e Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Sat, 28 Oct 2023 09:17:18 -0600 Subject: [PATCH 13/14] address feedback --- .../src/datasource/default_table_source.rs | 6 ++++-- .../building-logical-plans.md | 21 +++++++------------ docs/src/library_logical_plan.rs | 4 ++-- 3 files changed, 13 insertions(+), 18 deletions(-) diff --git a/datafusion/core/src/datasource/default_table_source.rs b/datafusion/core/src/datasource/default_table_source.rs index f93faa50a9b9..00a9c123ceee 100644 --- a/datafusion/core/src/datasource/default_table_source.rs +++ b/datafusion/core/src/datasource/default_table_source.rs @@ -26,10 +26,12 @@ use arrow::datatypes::SchemaRef; use datafusion_common::{internal_err, Constraints, DataFusionError}; use datafusion_expr::{Expr, TableProviderFilterPushDown, TableSource}; -/// DataFusion default table source, wrapping TableProvider +/// DataFusion default table source, wrapping TableProvider. /// /// This structure adapts a `TableProvider` (physical plan trait) to the `TableSource` -/// (logical plan trait) +/// (logical plan trait) and is necessary because the logical plan is contained in +/// the `datafusion_expr` crate, and is not aware of table providers, which exist in +/// the core `datafusion` crate. pub struct DefaultTableSource { /// table provider pub table_provider: Arc, diff --git a/docs/source/library-user-guide/building-logical-plans.md b/docs/source/library-user-guide/building-logical-plans.md index f52ffde36e99..dc0faa573f45 100644 --- a/docs/source/library-user-guide/building-logical-plans.md +++ b/docs/source/library-user-guide/building-logical-plans.md @@ -23,7 +23,7 @@ A logical plan is a structured representation of a database query that describes transformations needed to retrieve data from a database or data source. It abstracts away specific implementation details and focuses on the logical flow of the query, including operations like filtering, sorting, and joining tables. -This logical plan serves as an intermediate step before generating an optimized physical execution plan. This is +This logical plan serves as an intermediate step before generating an optimized physical execution plan. This is explained in more detail in the [Query Planning and Execution Overview] section of the [Architecture Guide]. ## Building Logical Plans Manually @@ -75,8 +75,10 @@ Filter: person.id > Int32(500) [id:Int32;N, name:Utf8;N] ## Building Logical Plans with LogicalPlanBuilder -DataFusion logical plans are typically created using the [LogicalPlanBuilder] struct. The following associated functions can be -used to create a new builder: +DataFusion logical plans can be created using the [LogicalPlanBuilder] struct. There is also a [DataFrame] API which is +a higher-level API that delegates to [LogicalPlanBuilder]. + +The following associated functions can be used to create a new builder: - `empty` - create an empty plan with no fields - `values` - create a plan from a set of literal values @@ -132,24 +134,15 @@ Filter: person.id > Int32(500) [id:Int32;N, name:Utf8;N] ## Table Sources The previous example used a [LogicalTableSource], which is used for tests and documentation in DataFusion, and is also -suitable if you are using DataFusion to build logical plans but do not use DataFusion's physical plan. However, if you +suitable if you are using DataFusion to build logical plans but do not use DataFusion's physical planner. However, if you want to use a [TableSource] that can be executed in DataFusion then you will need to use [DefaultTableSource], which is a wrapper for a [TableProvider]. -Both [LogicalTableSource] and [DefaultTableSource] implement the [TableSource] trait. [DefaultTableSource] acts as a -bridge between DataFusion's logical and physical plans and is necessary because the logical plan is contained in -the `datafusion_expr` crate, which does not know about DataFusion's physical plans. - -```rust -pub struct DefaultTableSource { - pub table_provider: Arc, -} -``` - [query planning and execution overview]: https://docs.rs/datafusion/latest/datafusion/index.html#query-planning-and-execution-overview [architecture guide]: https://docs.rs/datafusion/latest/datafusion/index.html#architecture [logicalplan]: https://docs.rs/datafusion-expr/latest/datafusion_expr/logical_plan/enum.LogicalPlan.html [logicalplanbuilder]: https://docs.rs/datafusion-expr/latest/datafusion_expr/logical_plan/builder/struct.LogicalPlanBuilder.html +[dataframe]: using-the-dataframe-api.md [logicaltablesource]: https://docs.rs/datafusion-expr/latest/datafusion_expr/logical_plan/builder/struct.LogicalTableSource.html [defaulttablesource]: https://docs.rs/datafusion/latest/datafusion/datasource/default_table_source/struct.DefaultTableSource.html [tableprovider]: https://docs.rs/datafusion/latest/datafusion/datasource/provider/trait.TableProvider.html diff --git a/docs/src/library_logical_plan.rs b/docs/src/library_logical_plan.rs index 91c18b385af5..355003941570 100644 --- a/docs/src/library_logical_plan.rs +++ b/docs/src/library_logical_plan.rs @@ -43,7 +43,7 @@ fn plan_1() -> Result<()> { fetch, )?); - // create a Filter plan that wraps the TableScan + // create a Filter plan that evaluates `id > 500` and wraps the TableScan let filter_expr = col("id").gt(lit(500)); let plan = LogicalPlan::Filter(Filter::try_new(filter_expr, Arc::new(table_scan))?); @@ -68,7 +68,7 @@ fn plan_builder_1() -> Result<()> { // create a LogicalPlanBuilder for a table scan let builder = LogicalPlanBuilder::scan("person", Arc::new(table_source), projection)?; - // perform a filter operation and build the plan + // perform a filter that evaluates `id > 500`, and build the plan let plan = builder.filter(col("id").gt(lit(500)))?.build()?; // print the plan From 3db45decb00e13b5daa1a5a047472668e85292aa Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Sat, 28 Oct 2023 09:17:42 -0600 Subject: [PATCH 14/14] prettier --- docs/source/library-user-guide/building-logical-plans.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/library-user-guide/building-logical-plans.md b/docs/source/library-user-guide/building-logical-plans.md index e00bb60a1bcb..fe922d8eaeb1 100644 --- a/docs/source/library-user-guide/building-logical-plans.md +++ b/docs/source/library-user-guide/building-logical-plans.md @@ -97,7 +97,7 @@ Here are some examples of transformation methods, but for a full list, refer to - `distinct` - `join` -The following example demonstrates building the same simple query plan as the previous example, with a table scan followed by a filter. +The following example demonstrates building the same simple query plan as the previous example, with a table scan followed by a filter.