[prev in list] [next in list] [prev in thread] [next in thread] 

List:       monetdb-checkins
Subject:    MonetDB: properties - Reduce number of iterations in the AST, al...
From:       Pedro Ferreira <commits+pedro.ferreira=monetdbsolutions.com () monetdb ! org>
Date:       2021-01-29 13:44:35
Message-ID: hg.4ea9c4d4ff60.1611927875.6315528441665844383 () monetdb-vm0 ! spin-off ! cwi ! nl
[Download RAW message or body]

Changeset: 4ea9c4d4ff60 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=4ea9c4d4ff60
Modified Files:
	sql/server/rel_statistics.c
Branch: properties
Log Message:

Reduce number of iterations in the AST, also prune intersect relations if that's the \
case


diffs (truncated from 402 to 300 lines):

diff --git a/sql/server/rel_statistics.c b/sql/server/rel_statistics.c
--- a/sql/server/rel_statistics.c
+++ b/sql/server/rel_statistics.c
@@ -44,6 +44,9 @@ rel_propagate_column_ref_statistics(mvc 
 		case op_semi: {
 			bool found_without_semantics = false, found_left = false, found_right = false;
 
+			if ((is_innerjoin(rel->op) || is_select(rel->op)) && list_length(rel->exps) == 1 \
&& exp_is_false(rel->exps->h->data)) +				return NULL; /* nothing will pass, skip */
+
 			/* propagate from the bottom first */
 			if (rel_propagate_column_ref_statistics(sql, rel->l, e))
 				found_left = true;
@@ -232,28 +235,33 @@ rel_basetable_get_statistics(visitor *v,
 	return e;
 }
 
-static void
+static bool
 rel_setop_get_statistics(mvc *sql, sql_rel *rel, list *lexps, list *rexps, sql_exp \
*e, int i)  {
 	sql_exp *le = list_fetch(lexps, i), *re = list_fetch(rexps, i);
-	atom *lval, *rval;
+	atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = \
find_prop_and_get(le->p, PROP_MAX), +		 *rval_min = find_prop_and_get(re->p, \
PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX);  
-	assert(le && re && e);
-	if ((lval = find_prop_and_get(le->p, PROP_MAX)) && (rval = find_prop_and_get(re->p, \
PROP_MAX))) { +	if (is_inter(rel->op) && exp_is_not_null(le) && exp_is_not_null(re) \
&& +		lval_min && rval_min && lval_max && rval_max && !lval_min->isnull && \
!rval_min->isnull && !lval_max->isnull && !rval_max->isnull && +		(atom_cmp(rval_max, \
lval_min) < 0 || atom_cmp(rval_min, lval_max) > 0)) +		return true;
+
+	if (lval_min && rval_min) {
 		if (is_union(rel->op))
-			set_property(sql, e, PROP_MAX, statistics_atom_max(sql, lval, rval)); /* for \
union the new max will be the max of the two */ +			set_property(sql, e, PROP_MIN, \
statistics_atom_min(sql, lval_min, rval_min)); /* for union the new min will be the \
min of the two */  else if (is_inter(rel->op))
-			set_property(sql, e, PROP_MAX, statistics_atom_min(sql, lval, rval)); /* for \
intersect the new max will be the min of the two */ +			set_property(sql, e, \
PROP_MIN, statistics_atom_max(sql, lval_min, rval_min)); /* for intersect the new min \
will be the max of the two */  else /* except */
-			set_property(sql, e, PROP_MAX, lval);
+			set_property(sql, e, PROP_MIN, lval_min);
 	}
-	if ((lval = find_prop_and_get(le->p, PROP_MIN)) && (rval = find_prop_and_get(re->p, \
PROP_MIN))) { +	if (lval_max && rval_max) {
 		if (is_union(rel->op))
-			set_property(sql, e, PROP_MIN, statistics_atom_min(sql, lval, rval)); /* for \
union the new min will be the min of the two */ +			set_property(sql, e, PROP_MAX, \
statistics_atom_max(sql, lval_max, rval_max)); /* for union the new max will be the \
max of the two */  else if (is_inter(rel->op))
-			set_property(sql, e, PROP_MIN, statistics_atom_max(sql, lval, rval)); /* for \
intersect the new min will be the max of the two */ +			set_property(sql, e, \
PROP_MAX, statistics_atom_min(sql, lval_max, rval_max)); /* for intersect the new max \
will be the min of the two */  else /* except */
-			set_property(sql, e, PROP_MIN, lval);
+			set_property(sql, e, PROP_MAX, lval_max);
 	}
 
 	if (is_union(rel->op)) {
@@ -267,6 +275,7 @@ rel_setop_get_statistics(mvc *sql, sql_r
 		if (!has_nil(le))
 			set_has_no_nil(e);
 	}
+	return false;
 }
 
 static sql_exp *
@@ -407,6 +416,130 @@ rel_propagate_statistics(visitor *v, sql
 	return e;
 }
 
+static list * /* Remove predicates always false from min/max values */
+rel_prune_predicates(visitor *v, sql_rel *rel)
+{
+	for (node *n = rel->exps->h ; n ; n = n->next) {
+		sql_exp *e = n->data;
+
+		if (e->type == e_cmp && (is_theta_exp(e->flag) || e->f)) {
+			sql_exp *le = e->l, *re = e->r, *fe = e->f;
+			atom *lval_min = find_prop_and_get(le->p, PROP_MIN), *lval_max = \
find_prop_and_get(le->p, PROP_MAX), +				*rval_min = find_prop_and_get(re->p, \
PROP_MIN), *rval_max = find_prop_and_get(re->p, PROP_MAX); +			bool always_false = \
false, always_true = false; +
+			if (fe && !(e->flag & CMP_SYMMETRIC)) {
+				atom *fval_min = find_prop_and_get(fe->p, PROP_MIN), *fval_max = \
find_prop_and_get(fe->p, PROP_MAX); +				comp_type lower = range2lcompare(e->flag), \
higher = range2rcompare(e->flag); +				int not_int1 = rval_min && lval_max && \
!rval_min->isnull && !lval_max->isnull && /* the middle and left intervals don't \
overlap */ +					(!e->anti && (lower == cmp_gte ? atom_cmp(rval_min, lval_max) > 0 : \
atom_cmp(rval_min, lval_max) >= 0)), +					not_int2 = lval_min && fval_max && \
!lval_min->isnull && !fval_max->isnull && /* the middle and right intervals don't \
overlap */ +					(!e->anti && (higher == cmp_lte ? atom_cmp(lval_min, fval_max) > 0 : \
atom_cmp(lval_min, fval_max) >= 0)), +					not_int3 = rval_min && fval_max && \
!rval_min->isnull && !fval_max->isnull && /* the left interval is after the right one \
*/ +					(!e->anti && (atom_cmp(rval_min, fval_max) > 0));
+
+				always_false |= not_int1 || not_int2 || not_int3;
+				/* for anti the middle must be before the left or after the right or the right \
after the left, for the other the middle must be always between the left and right \
intervals */ +				always_true |= exp_is_not_null(le) && exp_is_not_null(re) && \
exp_is_not_null(fe) && +					lval_min && lval_max && rval_min && rval_max && fval_min \
&& fval_max && !lval_min->isnull && !lval_max->isnull && !rval_min->isnull && \
!rval_max->isnull && !fval_min->isnull && !fval_max->isnull && +					(e->anti ? \
((lower == cmp_gte ? atom_cmp(rval_min, lval_max) > 0 : atom_cmp(rval_min, lval_max) \
>= 0) || (higher == cmp_lte ? atom_cmp(lval_min, fval_max) > 0 : atom_cmp(lval_min, \
> fval_max) >= 0) || atom_cmp(rval_min, fval_max) > 0) :
+					((lower == cmp_gte ? atom_cmp(lval_min, rval_max) >= 0 : atom_cmp(lval_min, \
rval_max) > 0) && (higher == cmp_lte ? atom_cmp(fval_min, lval_max) >= 0 : \
atom_cmp(fval_min, lval_max) > 0))); +			} else {
+				switch (e->flag) {
+				case cmp_equal:
+					if (lval_min && lval_max && rval_min && rval_max && !lval_min->isnull && \
!lval_max->isnull && !rval_min->isnull && !rval_max->isnull) +						always_false |= \
(!e->anti && (atom_cmp(rval_max, lval_min) < 0 || atom_cmp(rval_min, lval_max) > 0)) \
|| (e->anti && atom_cmp(lval_min, rval_min) == 0 && atom_cmp(lval_max, rval_max) <= \
0); +					if (is_semantics(e))
+						always_false |= is_semantics(e) ?
+							e->anti ? (exp_is_null(le) && exp_is_null(re)) || (exp_is_not_null(le) && \
exp_is_not_null(re)) : (exp_is_not_null(le) && exp_is_null(re)) || (exp_is_null(le) \
&& exp_is_not_null(re)) : +							e->anti ? exp_is_not_null(le) && \
exp_is_not_null(re) : (exp_is_null(le) && exp_is_null(re)) || (exp_is_not_null(le) && \
exp_is_null(re)) || (exp_is_null(le) && exp_is_not_null(re)); +					break;
+				case cmp_gt:
+					if (lval_max && rval_min && !lval_max->isnull && !rval_min->isnull)
+						always_false |= e->anti ? atom_cmp(lval_max, rval_min) > 0 : \
atom_cmp(lval_max, rval_min) <= 0; +					if (lval_min && rval_max && \
!lval_min->isnull && !rval_max->isnull) +						always_true |= exp_is_not_null(le) && \
exp_is_not_null(re) && (e->anti ? atom_cmp(lval_min, rval_max) <= 0 : \
atom_cmp(lval_min, rval_max) > 0); +					break;
+				case cmp_gte:
+					if (lval_max && rval_min && !lval_max->isnull && !rval_min->isnull)
+						always_false |= e->anti ? atom_cmp(lval_max, rval_min) >= 0 : \
atom_cmp(lval_max, rval_min) < 0; +					if (lval_min && rval_max && !lval_min->isnull \
&& !rval_max->isnull) +						always_true |= exp_is_not_null(le) && \
exp_is_not_null(re) && (e->anti ? atom_cmp(lval_min, rval_max) < 0 : \
atom_cmp(lval_min, rval_max) >= 0); +					break;
+				case cmp_lt:
+					if (lval_min && rval_max && !lval_min->isnull && !rval_max->isnull)
+						always_false |= e->anti ? atom_cmp(lval_min, rval_max) < 0 : \
atom_cmp(lval_min, rval_max) >= 0; +					if (lval_max && rval_min && \
!lval_max->isnull && !rval_min->isnull) +						always_true |= exp_is_not_null(le) && \
exp_is_not_null(re) && (e->anti ? atom_cmp(lval_max, rval_min) >= 0 : \
atom_cmp(lval_max, rval_min) < 0); +					break;
+				case cmp_lte:
+					if (lval_min && rval_max && !lval_min->isnull && !rval_max->isnull)
+						always_false |= e->anti ? atom_cmp(lval_min, rval_max) <= 0 : \
atom_cmp(lval_min, rval_max) > 0; +					if (lval_max && rval_min && !lval_max->isnull \
&& !rval_min->isnull) +						always_true |= exp_is_not_null(le) && \
exp_is_not_null(re) && (e->anti ? atom_cmp(lval_max, rval_min) > 0 : \
atom_cmp(lval_max, rval_min) <= 0); +					break;
+				default: /* Maybe later I can do cmp_in and cmp_notin */
+					break;
+				}
+			}
+			assert(!always_false || !always_true);
+			if (always_false || always_true) {
+				sql_exp *ne = exp_atom_bool(v->sql->sa, always_true);
+				if (exp_name(e))
+					exp_prop_alias(v->sql->sa, ne, e);
+				n->data = ne;
+				v->changes++;
+			}
+		}
+	}
+	return rel->exps;
+}
+
+static list*
+rel_simplify_count(visitor *v, sql_rel *rel)
+{
+	mvc *sql = v->sql;
+	int ncountstar = 0;
+
+	/* Convert count(no null) into count(*) */
+	for (node *n = rel->exps->h; n ; n = n->next) {
+		sql_exp *e = n->data;
+
+		if (exp_aggr_is_count(e) && !need_distinct(e)) {
+			if (list_length(e->l) == 0) {
+				ncountstar++;
+			} else if (list_length(e->l) == 1 && !has_nil((sql_exp*)((list*)e->l)->h->data)) \
{ +				sql_subfunc *cf = sql_bind_func(sql, "sys", "count", \
sql_bind_localtype("void"), NULL, F_AGGR); +				sql_exp *ne = exp_aggr(sql->sa, NULL, \
cf, 0, 0, e->card, 0); +				if (exp_name(e))
+					exp_prop_alias(sql->sa, ne, e);
+				n->data = ne;
+				ncountstar++;
+				v->changes++;
+			}
+		}
+	}
+	/* With multiple count(*), use exp_ref to reduce the number of calls to this \
aggregate */ +	if (ncountstar > 1) {
+		sql_exp *count_star = NULL;
+		for (node *n = rel->exps->h; n ; n = n->next) {
+			sql_exp *e = n->data;
+
+			if (exp_aggr_is_count(e) && !need_distinct(e) && list_length(e->l) == 0) {
+				if (!count_star) {
+					count_star = e;
+				} else {
+					sql_exp *ne = exp_ref(sql, count_star);
+					if (exp_name(e))
+						exp_prop_alias(sql->sa, ne, e);
+					n->data = ne;
+				}
+			}
+		}
+	}
+	return rel->exps;
+}
+
 static sql_rel *
 rel_get_statistics(visitor *v, sql_rel *rel)
 {
@@ -417,6 +550,7 @@ rel_get_statistics(visitor *v, sql_rel *
 	case op_union:
 	case op_inter:
 	case op_except: {
+		bool can_be_pruned = false;
 		int i = 0;
 		sql_rel *l = rel->l, *r = rel->r;
 
@@ -434,10 +568,24 @@ rel_get_statistics(visitor *v, sql_rel *
 			r->exps = exps_exp_visitor_bottomup(v, r, r->exps, 0, &rel_propagate_statistics, \
false);  }
 
-		for (node *n = rel->exps->h ; n ; n = n->next) {
-			rel_setop_get_statistics(v->sql, rel, l->exps, r->exps, n->data, i);
+		for (node *n = rel->exps->h ; n && !can_be_pruned ; n = n->next) {
+			can_be_pruned |= rel_setop_get_statistics(v->sql, rel, l->exps, r->exps, n->data, \
i);  i++;
 		}
+		if (can_be_pruned) {
+			rel_destroy(rel->l);
+			rel->l = NULL;
+			rel_destroy(rel->r);
+			rel->r = NULL;
+			for (node *n = rel->exps->h ; n ; n = n->next) {
+				sql_exp *e = n->data, *a = exp_atom(v->sql->sa, atom_general(v->sql->sa, \
exp_subtype(e), NULL)); +				exp_prop_alias(v->sql->sa, a, e);
+				n->data = a;
+			}
+			rel = rel_project(v->sql->sa, NULL, rel->exps);
+			rel = rel_select(v->sql->sa, rel, exp_atom_bool(v->sql->sa, 0));
+			rel = rel_project(v->sql->sa, rel, rel_projections(v->sql, rel, NULL, 1, 1));
+		}
 	} break;
 	case op_join:
 	case op_left:
@@ -450,8 +598,13 @@ rel_get_statistics(visitor *v, sql_rel *
 	case op_groupby:
 	case op_ddl:
 		rel->exps = exps_exp_visitor_bottomup(v, rel, rel->exps, 0, \
                &rel_propagate_statistics, false);
-		if ((is_simple_project(rel->op) || is_groupby(rel->op)) && !list_empty(rel->r))
+		if (is_simple_project(rel->op) && !list_empty(rel->r))
 			rel->r = exps_exp_visitor_bottomup(v, rel, rel->r, 0, &rel_propagate_statistics, \
false); +		/* The following optimizations can only be applied after propagating the \
statistics to rel->exps */ +		if (is_groupby(rel->op) && rel->exps)
+			rel->exps = rel_simplify_count(v, rel);
+		if ((is_join(rel->op) || is_select(rel->op)) && rel->exps)
+			rel->exps = rel_prune_predicates(v, rel);
 		break;
 	/*These relations are less important for now
 	case op_table:
@@ -468,131 +621,6 @@ rel_get_statistics(visitor *v, sql_rel *
 	return rel;
 }
 
-static sql_rel *
-rel_simplify_count(visitor *v, sql_rel *rel)
-{
-	mvc *sql = v->sql;
-
-	if (is_groupby(rel->op)) {
-		int ncountstar = 0;
-
-		/* Convert count(no null) into count(*) */
-		for (node *n = rel->exps->h; n ; n = n->next) {
-			sql_exp *e = n->data;
-
-			if (exp_aggr_is_count(e) && !need_distinct(e)) {
-				if (list_length(e->l) == 0) {
-					ncountstar++;
-				} else if (list_length(e->l) == 1 && !has_nil((sql_exp*)((list*)e->l)->h->data)) \
                {
-					sql_subfunc *cf = sql_bind_func(sql, "sys", "count", \
                sql_bind_localtype("void"), NULL, F_AGGR);
-					sql_exp *ne = exp_aggr(sql->sa, NULL, cf, 0, 0, e->card, 0);
-					if (exp_name(e))
-						exp_prop_alias(sql->sa, ne, e);
-					n->data = ne;
-					ncountstar++;
-					v->changes++;
-				}
-			}
-		}
-		/* With multiple count(*), use exp_ref to reduce the number of calls to this \
                aggregate */
-		if (ncountstar > 1) {
-			sql_exp *count_star = NULL;
-			for (node *n = rel->exps->h; n ; n = n->next) {
-				sql_exp *e = n->data;
-
-				if (exp_aggr_is_count(e) && !need_distinct(e) && list_length(e->l) == 0) {
-					if (!count_star) {
-						count_star = e;
-					} else {
-						sql_exp *ne = exp_ref(sql, count_star);
-						if (exp_name(e))
-							exp_prop_alias(sql->sa, ne, e);
-						n->data = ne;
-					}
-				}
-			}
-		}
-	}
-	return rel;
-}
-
-static sql_exp * /* Remove predicates always false from min/max values */
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list


[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic