Skip to content

Commit db8f15b

Browse files
committed
MDEV-27229: Estimation for filtered rows less precise ... #5
Followup: remove this line from get_column_range_cardinality() set_if_bigger(res, col_stats->get_avg_frequency()); and make sure it is only used with the binary histograms. For JSON histograms, it makes the estimates unnecessarily imprecise.
1 parent d3e511d commit db8f15b

File tree

7 files changed

+33
-16
lines changed

7 files changed

+33
-16
lines changed

mysql-test/main/statistics_json.result

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4644,7 +4644,7 @@ Warnings:
46444644
Note 1003 select `test`.`t1_json`.`a` AS `a` from `test`.`t1_json` where `test`.`t1_json`.`a` < 'b-1a'
46454645
analyze select * from t1_json where a > 'zzzzzzzzz';
46464646
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
4647-
1 SIMPLE t1_json ALL NULL NULL NULL NULL 10 10.00 10.00 0.00 Using where
4647+
1 SIMPLE t1_json ALL NULL NULL NULL NULL 10 10.00 0.00 0.00 Using where
46484648
drop table ten;
46494649
UPDATE mysql.column_stats
46504650
SET histogram='["not-what-you-expect"]' WHERE table_name='t1_json';
@@ -4752,7 +4752,7 @@ id select_type table type possible_keys key key_len ref rows r_rows filtered r_f
47524752
1 SIMPLE t2 ALL NULL NULL NULL NULL 101 101.00 1.98 1.98 Using where
47534753
explain extended select * from t2 where city < 'Lagos';
47544754
id select_type table type possible_keys key key_len ref rows filtered Extra
4755-
1 SIMPLE t2 ALL NULL NULL NULL NULL 101 50.00 Using where
4755+
1 SIMPLE t2 ALL NULL NULL NULL NULL 101 1.98 Using where
47564756
Warnings:
47574757
Note 1003 select `test`.`t2`.`city` AS `city` from `test`.`t2` where `test`.`t2`.`city` < 'Lagos'
47584758
drop table t1_bin;
@@ -8164,7 +8164,7 @@ id select_type table type possible_keys key key_len ref rows r_rows filtered r_f
81648164
1 SIMPLE t1 ALL NULL NULL NULL NULL 100 100.00 33.00 33.00 Using where
81658165
analyze select * from t1 where a < 0;
81668166
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
8167-
1 SIMPLE t1 ALL NULL NULL NULL NULL 100 100.00 1.47 0.00 Using where
8167+
1 SIMPLE t1 ALL NULL NULL NULL NULL 100 100.00 1.00 0.00 Using where
81688168
analyze select * from t1 where a > 0;
81698169
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
81708170
1 SIMPLE t1 ALL NULL NULL NULL NULL 100 100.00 67.00 67.00 Using where
@@ -8189,7 +8189,7 @@ test.t2 analyze status Engine-independent statistics collected
81898189
test.t2 analyze status OK
81908190
analyze select * from t2 where a < 1;
81918191
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
8192-
1 SIMPLE t2 ALL NULL NULL NULL NULL 1011 1011.00 8.33 0.10 Using where
8192+
1 SIMPLE t2 ALL NULL NULL NULL NULL 1011 1011.00 0.10 0.10 Using where
81938193
analyze select * from t2 where a =100;
81948194
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
81958195
1 SIMPLE t2 ALL NULL NULL NULL NULL 1011 1011.00 0.10 0.10 Using where
@@ -8237,6 +8237,15 @@ id select_type table type possible_keys key key_len ref rows r_rows filtered r_f
82378237
analyze select COUNT(*) FROM t1 WHERE a >='bar';
82388238
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
82398239
1 SIMPLE t1 ALL NULL NULL NULL NULL 200 200.00 100.00 100.00 Using where
8240+
analyze select COUNT(*) FROM t1 WHERE a < 'aaa';
8241+
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
8242+
1 SIMPLE t1 ALL NULL NULL NULL NULL 200 200.00 0.50 0.00 Using where
8243+
analyze select COUNT(*) FROM t1 WHERE a <='aaa';
8244+
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
8245+
1 SIMPLE t1 ALL NULL NULL NULL NULL 200 200.00 0.50 0.00 Using where
8246+
analyze select COUNT(*) FROM t1 WHERE a < 'bar';
8247+
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
8248+
1 SIMPLE t1 ALL NULL NULL NULL NULL 200 200.00 0.50 0.00 Using where
82408249
analyze select COUNT(*) FROM t1 WHERE a <='bar';
82418250
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
82428251
1 SIMPLE t1 ALL NULL NULL NULL NULL 200 200.00 50.00 50.00 Using where

mysql-test/main/statistics_json.test

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -409,9 +409,9 @@ analyze select COUNT(*) FROM t1 WHERE a > 'bar';
409409
analyze select COUNT(*) FROM t1 WHERE a >='bar';
410410

411411
# Can enable these after get_avg_frequency issue is resolved:
412-
# analyze select COUNT(*) FROM t1 WHERE a < 'aaa';
413-
# analyze select COUNT(*) FROM t1 WHERE a <='aaa';
414-
# analyze select COUNT(*) FROM t1 WHERE a < 'bar';
412+
analyze select COUNT(*) FROM t1 WHERE a < 'aaa';
413+
analyze select COUNT(*) FROM t1 WHERE a <='aaa';
414+
analyze select COUNT(*) FROM t1 WHERE a < 'bar';
415415

416416
analyze select COUNT(*) FROM t1 WHERE a <='bar';
417417

sql/opt_histogram_json.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -962,7 +962,7 @@ std::string& Histogram_json_hb::get_end_value(int idx)
962962
*/
963963

964964
double Histogram_json_hb::range_selectivity(Field *field, key_range *min_endp,
965-
key_range *max_endp)
965+
key_range *max_endp, double avg_sel)
966966
{
967967
double min, max;
968968

sql/opt_histogram_json.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ class Histogram_json_hb : public Histogram_base
129129
double avg_selection,
130130
double total_rows) override;
131131
double range_selectivity(Field *field, key_range *min_endp,
132-
key_range *max_endp) override;
132+
key_range *max_endp, double avg_sel) override;
133133

134134
void set_json_text(ulonglong sz, const char *json_text_arg,
135135
size_t json_text_len)

sql/opt_range.cc

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3277,7 +3277,10 @@ double records_in_column_ranges(PARAM *param, uint idx,
32773277
break;
32783278
}
32793279
total_rows += rows;
3280-
}
3280+
}
3281+
if (total_rows == 0)
3282+
total_rows= MY_MIN(1, param->table->stat_records());
3283+
32813284
return total_rows;
32823285
}
32833286

sql/sql_statistics.cc

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3910,10 +3910,13 @@ double get_column_range_cardinality(Field *field,
39103910
if (col_stats->min_max_values_are_provided())
39113911
{
39123912
Histogram_base *hist= col_stats->histogram;
3913+
double avg_frequency= col_stats->get_avg_frequency();
39133914
double sel;
39143915
if (hist && hist->is_usable(thd))
39153916
{
3916-
sel= hist->range_selectivity(field, min_endp, max_endp);
3917+
sel= hist->range_selectivity(field, min_endp, max_endp,
3918+
avg_frequency / col_non_nulls);
3919+
res= col_non_nulls * sel;
39173920
}
39183921
else
39193922
{
@@ -3938,9 +3941,9 @@ double get_column_range_cardinality(Field *field,
39383941
max_mp_pos= 1.0;
39393942

39403943
sel = (max_mp_pos - min_mp_pos);
3944+
res= col_non_nulls * sel;
3945+
set_if_bigger(res, avg_frequency);
39413946
}
3942-
res= col_non_nulls * sel;
3943-
set_if_bigger(res, col_stats->get_avg_frequency());
39443947
}
39453948
else
39463949
res= col_non_nulls;
@@ -4076,7 +4079,8 @@ double Histogram_binary::point_selectivity(Field *field, key_range *endpoint,
40764079

40774080
double Histogram_binary::range_selectivity(Field *field,
40784081
key_range *min_endp,
4079-
key_range *max_endp)
4082+
key_range *max_endp,
4083+
double avg_sel)
40804084
{
40814085
double sel, min_mp_pos, max_mp_pos;
40824086
Column_statistics *col_stats= field->read_stats;
@@ -4105,6 +4109,7 @@ double Histogram_binary::range_selectivity(Field *field,
41054109
uint max= find_bucket(max_mp_pos, FALSE);
41064110
sel= bucket_sel * (max - min + 1);
41074111

4112+
set_if_bigger(sel, avg_sel);
41084113
return sel;
41094114
}
41104115

sql/sql_statistics.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,7 @@ class Histogram_base
191191
double avg_selectivity,
192192
double total_rows)=0;
193193
virtual double range_selectivity(Field *field, key_range *min_endp,
194-
key_range *max_endp)=0;
194+
key_range *max_endp, double avg_sel)=0;
195195

196196
/*
197197
Legacy: return the size of the histogram on disk.
@@ -353,7 +353,7 @@ class Histogram_binary : public Histogram_base
353353
}
354354

355355
double range_selectivity(Field *field, key_range *min_endp,
356-
key_range *max_endp) override;
356+
key_range *max_endp, double avg_sel) override;
357357

358358
/*
359359
Estimate selectivity of "col=const" using a histogram

0 commit comments

Comments
 (0)