[prev in list] [next in list] [prev in thread] [next in thread]
List: monetdb-checkins
Subject: MonetDB: properties - Reduce number of iterations in the AST, al...
From: Pedro Ferreira <commits+pedro.ferreira=monetdbsolutions.com () monetdb ! org>
Date: 2021-01-29 13:44:35
Message-ID: hg.4ea9c4d4ff60.1611927875.6315528441665844383 () monetdb-vm0 ! spin-off ! cwi ! nl
[Download RAW message or body]
Changeset: 4ea9c4d4ff60 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=4ea9c4d4ff60
Modified Files:
sql/server/rel_statistics.c
Branch: properties
Log Message:
Reduce number of iterations in the AST, also prune intersect relations if that's the \
case
diffs (truncated from 402 to 300 lines):
diff --git a/sql/server/rel_statistics.c b/sql/server/rel_statistics.c
--- a/sql/server/rel_statistics.c
+++ b/sql/server/rel_statistics.c
@@ -44,6 +44,9 @@ rel_propagate_column_ref_statistics(mvc
case op_semi: {
bool found_without_semantics = false, found_left = false, found_right = false;
+ if ((is_innerjoin(rel->op) || is_select(rel->op)) && list_length(rel->exps) == 1 \
&& exp_is_false(rel->exps->h->data)) + return NULL; /* nothing will pass, skip */
+
/* propagate from the bottom first */
if (rel_propagate_column_ref_statistics(sql, rel->l, e))
found_left = true;
@@ -232,28 +235,33 @@ rel_basetable_get_statistics(visitor *v,
return e;
}
-static void
+static bool
rel_setop_get_statistics(mvc *sql, sql_rel *rel, list *lexps, list *rexps, sql_exp \
*e, int i) {
sql_exp *le = list_fetch(lexps, i), *re = list_fetch(rexps, i);
- atom *lval, *rval;
+ atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = \
find_prop_and_get(le->p, PROP_MAX), + *rval_min = find_prop_and_get(re->p, \
PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);
- assert(le && re && e);
- if ((lval = find_prop_and_get(le->p, PROP_MAX)) && (rval = find_prop_and_get(re->p, \
PROP_MAX))) { + if (is_inter(rel->op) && exp_is_not_null(le) && exp_is_not_null(re) \
&& + lval_min && rval_min && lval_max && rval_max && !lval_min->isnull && \
!rval_min->isnull && !lval_max->isnull && !rval_max->isnull && + (atom_cmp(rval_max, \
lval_min) < 0 || atom_cmp(rval_min, lval_max) > 0)) + return true;
+
+ if (lval_min && rval_min) {
if (is_union(rel->op))
- set_property(sql, e, PROP_MAX, statistics_atom_max(sql, lval, rval)); /* for \
union the new max will be the max of the two */ + set_property(sql, e, PROP_MIN, \
statistics_atom_min(sql, lval_min, rval_min)); /* for union the new min will be the \
min of the two */ else if (is_inter(rel->op))
- set_property(sql, e, PROP_MAX, statistics_atom_min(sql, lval, rval)); /* for \
intersect the new max will be the min of the two */ + set_property(sql, e, \
PROP_MIN, statistics_atom_max(sql, lval_min, rval_min)); /* for intersect the new min \
will be the max of the two */ else /* except */
- set_property(sql, e, PROP_MAX, lval);
+ set_property(sql, e, PROP_MIN, lval_min);
}
- if ((lval = find_prop_and_get(le->p, PROP_MIN)) && (rval = find_prop_and_get(re->p, \
PROP_MIN))) { + if (lval_max && rval_max) {
if (is_union(rel->op))
- set_property(sql, e, PROP_MIN, statistics_atom_min(sql, lval, rval)); /* for \
union the new min will be the min of the two */ + set_property(sql, e, PROP_MAX, \
statistics_atom_max(sql, lval_max, rval_max)); /* for union the new max will be the \
max of the two */ else if (is_inter(rel->op))
- set_property(sql, e, PROP_MIN, statistics_atom_max(sql, lval, rval)); /* for \
intersect the new min will be the max of the two */ + set_property(sql, e, \
PROP_MAX, statistics_atom_min(sql, lval_max, rval_max)); /* for intersect the new max \
will be the min of the two */ else /* except */
- set_property(sql, e, PROP_MIN, lval);
+ set_property(sql, e, PROP_MAX, lval_max);
}
if (is_union(rel->op)) {
@@ -267,6 +275,7 @@ rel_setop_get_statistics(mvc *sql, sql_r
if (!has_nil(le))
set_has_no_nil(e);
}
+ return false;
}
static sql_exp *
@@ -407,6 +416,130 @@ rel_propagate_statistics(visitor *v, sql
return e;
}
+static list * /* Remove predicates always false from min/max values */
+rel_prune_predicates(visitor *v, sql_rel *rel)
+{
+ for (node *n = rel->exps->h ; n ; n = n->next) {
+ sql_exp *e = n->data;
+
+ if (e->type == e_cmp && (is_theta_exp(e->flag) || e->f)) {
+ sql_exp *le = e->l, *re = e->r, *fe = e->f;
+ atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = \
find_prop_and_get(le->p, PROP_MAX), + *rval_min = find_prop_and_get(re->p, \
PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX); + bool always_false = \
false, always_true = false; +
+ if (fe && !(e->flag & CMP_SYMMETRIC)) {
+ atom *fval_min = find_prop_and_get(fe->p, PROP_MIN), *fval_max = \
find_prop_and_get(fe->p, PROP_MAX); + comp_type lower = range2lcompare(e->flag), \
higher = range2rcompare(e->flag); + int not_int1 = rval_min && lval_max && \
!rval_min->isnull && !lval_max->isnull && /* the middle and left intervals don't \
overlap */ + (!e->anti && (lower == cmp_gte ? atom_cmp(rval_min, lval_max) > 0 : \
atom_cmp(rval_min, lval_max) >= 0)), + not_int2 = lval_min && fval_max && \
!lval_min->isnull && !fval_max->isnull && /* the middle and right intervals don't \
overlap */ + (!e->anti && (higher == cmp_lte ? atom_cmp(lval_min, fval_max) > 0 : \
atom_cmp(lval_min, fval_max) >= 0)), + not_int3 = rval_min && fval_max && \
!rval_min->isnull && !fval_max->isnull && /* the left interval is after the right one \
*/ + (!e->anti && (atom_cmp(rval_min, fval_max) > 0));
+
+ always_false |= not_int1 || not_int2 || not_int3;
+ /* for anti the middle must be before the left or after the right or the right \
after the left, for the other the middle must be always between the left and right \
intervals */ + always_true |= exp_is_not_null(le) && exp_is_not_null(re) && \
exp_is_not_null(fe) && + lval_min && lval_max && rval_min && rval_max && fval_min \
&& fval_max && !lval_min->isnull && !lval_max->isnull && !rval_min->isnull && \
!rval_max->isnull && !fval_min->isnull && !fval_max->isnull && + (e->anti ? \
((lower == cmp_gte ? atom_cmp(rval_min, lval_max) > 0 : atom_cmp(rval_min, lval_max) \
>= 0) || (higher == cmp_lte ? atom_cmp(lval_min, fval_max) > 0 : atom_cmp(lval_min, \
> fval_max) >= 0) || atom_cmp(rval_min, fval_max) > 0) :
+ ((lower == cmp_gte ? atom_cmp(lval_min, rval_max) >= 0 : atom_cmp(lval_min, \
rval_max) > 0) && (higher == cmp_lte ? atom_cmp(fval_min, lval_max) >= 0 : \
atom_cmp(fval_min, lval_max) > 0))); + } else {
+ switch (e->flag) {
+ case cmp_equal:
+ if (lval_min && lval_max && rval_min && rval_max && !lval_min->isnull && \
!lval_max->isnull && !rval_min->isnull && !rval_max->isnull) + always_false |= \
(!e->anti && (atom_cmp(rval_max, lval_min) < 0 || atom_cmp(rval_min, lval_max) > 0)) \
|| (e->anti && atom_cmp(lval_min, rval_min) == 0 && atom_cmp(lval_max, rval_max) <= \
0); + if (is_semantics(e))
+ always_false |= is_semantics(e) ?
+ e->anti ? (exp_is_null(le) && exp_is_null(re)) || (exp_is_not_null(le) && \
exp_is_not_null(re)) : (exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) \
&& exp_is_not_null(re)) : + e->anti ? exp_is_not_null(le) && \
exp_is_not_null(re) : (exp_is_null(le) && exp_is_null(re)) || (exp_is_not_null(le) && \
exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re)); + break;
+ case cmp_gt:
+ if (lval_max && rval_min && !lval_max->isnull && !rval_min->isnull)
+ always_false |= e->anti ? atom_cmp(lval_max, rval_min) > 0 : \
atom_cmp(lval_max, rval_min) <= 0; + if (lval_min && rval_max && \
!lval_min->isnull && !rval_max->isnull) + always_true |= exp_is_not_null(le) && \
exp_is_not_null(re) && (e->anti ? atom_cmp(lval_min, rval_max) <= 0 : \
atom_cmp(lval_min, rval_max) > 0); + break;
+ case cmp_gte:
+ if (lval_max && rval_min && !lval_max->isnull && !rval_min->isnull)
+ always_false |= e->anti ? atom_cmp(lval_max, rval_min) >= 0 : \
atom_cmp(lval_max, rval_min) < 0; + if (lval_min && rval_max && !lval_min->isnull \
&& !rval_max->isnull) + always_true |= exp_is_not_null(le) && \
exp_is_not_null(re) && (e->anti ? atom_cmp(lval_min, rval_max) < 0 : \
atom_cmp(lval_min, rval_max) >= 0); + break;
+ case cmp_lt:
+ if (lval_min && rval_max && !lval_min->isnull && !rval_max->isnull)
+ always_false |= e->anti ? atom_cmp(lval_min, rval_max) < 0 : \
atom_cmp(lval_min, rval_max) >= 0; + if (lval_max && rval_min && \
!lval_max->isnull && !rval_min->isnull) + always_true |= exp_is_not_null(le) && \
exp_is_not_null(re) && (e->anti ? atom_cmp(lval_max, rval_min) >= 0 : \
atom_cmp(lval_max, rval_min) < 0); + break;
+ case cmp_lte:
+ if (lval_min && rval_max && !lval_min->isnull && !rval_max->isnull)
+ always_false |= e->anti ? atom_cmp(lval_min, rval_max) <= 0 : \
atom_cmp(lval_min, rval_max) > 0; + if (lval_max && rval_min && !lval_max->isnull \
&& !rval_min->isnull) + always_true |= exp_is_not_null(le) && \
exp_is_not_null(re) && (e->anti ? atom_cmp(lval_max, rval_min) > 0 : \
atom_cmp(lval_max, rval_min) <= 0); + break;
+ default: /* Maybe later I can do cmp_in and cmp_notin */
+ break;
+ }
+ }
+ assert(!always_false || !always_true);
+ if (always_false || always_true) {
+ sql_exp *ne = exp_atom_bool(v->sql->sa, always_true);
+ if (exp_name(e))
+ exp_prop_alias(v->sql->sa, ne, e);
+ n->data = ne;
+ v->changes++;
+ }
+ }
+ }
+ return rel->exps;
+}
+
+static list*
+rel_simplify_count(visitor *v, sql_rel *rel)
+{
+ mvc *sql = v->sql;
+ int ncountstar = 0;
+
+ /* Convert count(no null) into count(*) */
+ for (node *n = rel->exps->h; n ; n = n->next) {
+ sql_exp *e = n->data;
+
+ if (exp_aggr_is_count(e) && !need_distinct(e)) {
+ if (list_length(e->l) == 0) {
+ ncountstar++;
+ } else if (list_length(e->l) == 1 && !has_nil((sql_exp*)((list*)e->l)->h->data)) \
{ + sql_subfunc *cf = sql_bind_func(sql, "sys", "count", \
sql_bind_localtype("void"), NULL, F_AGGR); + sql_exp *ne = exp_aggr(sql->sa, NULL, \
cf, 0, 0, e->card, 0); + if (exp_name(e))
+ exp_prop_alias(sql->sa, ne, e);
+ n->data = ne;
+ ncountstar++;
+ v->changes++;
+ }
+ }
+ }
+ /* With multiple count(*), use exp_ref to reduce the number of calls to this \
aggregate */ + if (ncountstar > 1) {
+ sql_exp *count_star = NULL;
+ for (node *n = rel->exps->h; n ; n = n->next) {
+ sql_exp *e = n->data;
+
+ if (exp_aggr_is_count(e) && !need_distinct(e) && list_length(e->l) == 0) {
+ if (!count_star) {
+ count_star = e;
+ } else {
+ sql_exp *ne = exp_ref(sql, count_star);
+ if (exp_name(e))
+ exp_prop_alias(sql->sa, ne, e);
+ n->data = ne;
+ }
+ }
+ }
+ }
+ return rel->exps;
+}
+
static sql_rel *
rel_get_statistics(visitor *v, sql_rel *rel)
{
@@ -417,6 +550,7 @@ rel_get_statistics(visitor *v, sql_rel *
case op_union:
case op_inter:
case op_except: {
+ bool can_be_pruned = false;
int i = 0;
sql_rel *l = rel->l, *r = rel->r;
@@ -434,10 +568,24 @@ rel_get_statistics(visitor *v, sql_rel *
r->exps = exps_exp_visitor_bottomup(v, r, r->exps, 0, &rel_propagate_statistics, \
false); }
- for (node *n = rel->exps->h ; n ; n = n->next) {
- rel_setop_get_statistics(v->sql, rel, l->exps, r->exps, n->data, i);
+ for (node *n = rel->exps->h ; n && !can_be_pruned ; n = n->next) {
+ can_be_pruned |= rel_setop_get_statistics(v->sql, rel, l->exps, r->exps, n->data, \
i); i++;
}
+ if (can_be_pruned) {
+ rel_destroy(rel->l);
+ rel->l = NULL;
+ rel_destroy(rel->r);
+ rel->r = NULL;
+ for (node *n = rel->exps->h ; n ; n = n->next) {
+ sql_exp *e = n->data, *a = exp_atom(v->sql->sa, atom_general(v->sql->sa, \
exp_subtype(e), NULL)); + exp_prop_alias(v->sql->sa, a, e);
+ n->data = a;
+ }
+ rel = rel_project(v->sql->sa, NULL, rel->exps);
+ rel = rel_select(v->sql->sa, rel, exp_atom_bool(v->sql->sa, 0));
+ rel = rel_project(v->sql->sa, rel, rel_projections(v->sql, rel, NULL, 1, 1));
+ }
} break;
case op_join:
case op_left:
@@ -450,8 +598,13 @@ rel_get_statistics(visitor *v, sql_rel *
case op_groupby:
case op_ddl:
rel->exps = exps_exp_visitor_bottomup(v, rel, rel->exps, 0, \
&rel_propagate_statistics, false);
- if ((is_simple_project(rel->op) || is_groupby(rel->op)) && !list_empty(rel->r))
+ if (is_simple_project(rel->op) && !list_empty(rel->r))
rel->r = exps_exp_visitor_bottomup(v, rel, rel->r, 0, &rel_propagate_statistics, \
false); + /* The following optimizations can only be applied after propagating the \
statistics to rel->exps */ + if (is_groupby(rel->op) && rel->exps)
+ rel->exps = rel_simplify_count(v, rel);
+ if ((is_join(rel->op) || is_select(rel->op)) && rel->exps)
+ rel->exps = rel_prune_predicates(v, rel);
break;
/*These relations are less important for now
case op_table:
@@ -468,131 +621,6 @@ rel_get_statistics(visitor *v, sql_rel *
return rel;
}
-static sql_rel *
-rel_simplify_count(visitor *v, sql_rel *rel)
-{
- mvc *sql = v->sql;
-
- if (is_groupby(rel->op)) {
- int ncountstar = 0;
-
- /* Convert count(no null) into count(*) */
- for (node *n = rel->exps->h; n ; n = n->next) {
- sql_exp *e = n->data;
-
- if (exp_aggr_is_count(e) && !need_distinct(e)) {
- if (list_length(e->l) == 0) {
- ncountstar++;
- } else if (list_length(e->l) == 1 && !has_nil((sql_exp*)((list*)e->l)->h->data)) \
{
- sql_subfunc *cf = sql_bind_func(sql, "sys", "count", \
sql_bind_localtype("void"), NULL, F_AGGR);
- sql_exp *ne = exp_aggr(sql->sa, NULL, cf, 0, 0, e->card, 0);
- if (exp_name(e))
- exp_prop_alias(sql->sa, ne, e);
- n->data = ne;
- ncountstar++;
- v->changes++;
- }
- }
- }
- /* With multiple count(*), use exp_ref to reduce the number of calls to this \
aggregate */
- if (ncountstar > 1) {
- sql_exp *count_star = NULL;
- for (node *n = rel->exps->h; n ; n = n->next) {
- sql_exp *e = n->data;
-
- if (exp_aggr_is_count(e) && !need_distinct(e) && list_length(e->l) == 0) {
- if (!count_star) {
- count_star = e;
- } else {
- sql_exp *ne = exp_ref(sql, count_star);
- if (exp_name(e))
- exp_prop_alias(sql->sa, ne, e);
- n->data = ne;
- }
- }
- }
- }
- }
- return rel;
-}
-
-static sql_exp * /* Remove predicates always false from min/max values */
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list
[prev in list] [next in list] [prev in thread] [next in thread]
Configure |
About |
News |
Add a list |
Sponsored by KoreLogic