Skip to content
16 changes: 14 additions & 2 deletions datafusion/physical-plan/src/filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -194,11 +194,23 @@ impl ExecutionPlan for FilterExec {
fn statistics(&self) -> Result<Statistics> {
let predicate = self.predicate();

let input_stats = self.input.statistics()?;
let schema = self.schema();
if !check_support(predicate, &schema) {
return Ok(Statistics::new_unknown(&schema));
// assume filter selects 20% of rows if we cannot do anything smarter
// tracking issue for making this configurable:
// https://github.com/apache/arrow-datafusion/issues/8133
let selectivity = 0.2_f32;
let mut stats = input_stats.clone().into_inexact();
if let Precision::Inexact(n) = stats.num_rows {
stats.num_rows = Precision::Inexact((selectivity * n as f32) as usize);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we can/should do the same for the total_byte_size value

}
if let Precision::Inexact(n) = stats.total_byte_size {
stats.total_byte_size =
Precision::Inexact((selectivity * n as f32) as usize);
}
return Ok(stats);
}
let input_stats = self.input.statistics()?;

let num_rows = input_stats.num_rows;
let total_byte_size = input_stats.total_byte_size;
Expand Down