[prev in list] [next in list] [prev in thread] [next in thread] 

List:       postgresql-hackers
Subject:    Implement missing join selectivity estimation for range types
From:       Mahmoud Sakr <mahmoud.sakr () ulb ! be>
Date:       2022-06-30 14:31:30
Message-ID: CAB4o4asMq3k6HN9WfDsssQ5DDVfAziB4TpiFJ8RBJgZTVuwC7g () mail ! gmail ! com
[Download RAW message or body]

Hi,
Given a query:
SELECT * FROM t1, t2 WHERE t1.r << t2.r
where t1.r, t2.r are of range type,
currently PostgreSQL will estimate a constant selectivity for the << predicate,
which is equal to 0.005, not utilizing the statistics that the optimizer
collects for range attributes.

We have worked out a theory for inequality join selectivity estimation
(http://arxiv.org/abs/2206.07396), and implemented it for range
types it in this patch.

The algorithm in this patch re-uses the currently collected statistics for
range types, which is the bounds histogram. It works fairly accurate for the
operations <<, >>, &&, &<, &>, <=, >= with estimation error of about 0.5%.
The patch also implements selectivity estimation for the
operations @>, <@ (contains and is contained in), but their accuracy is not
stable, since the bounds histograms assume independence between the range
bounds. A point to discuss is whether or not to keep these last two operations.
The patch also includes the selectivity estimation for multirange types,
treating a multirange as a single range which is its bounding box.

The same algorithm in this patch is applicable to inequality joins of scalar
types. We, however, don't implement it for scalars, since more work is needed
to make use of the other statistics available for scalars, such as the MCV.
This is left as a future work.

--
Mahmoud SAKR - Univeristé Libre de Bruxelles
This work is done by Diogo Repas, Zhicheng Luo, Maxime Schoemans, and myself

["v1-0001-Join-Selectivity-Estimation-for-Range-types.patch" (text/x-patch)]

diff --git a/src/backend/utils/adt/multirangetypes_selfuncs.c \
b/src/backend/utils/adt/multirangetypes_selfuncs.c index 919c8889d4..7ba4aa8b04 \
                100644
--- a/src/backend/utils/adt/multirangetypes_selfuncs.c
+++ b/src/backend/utils/adt/multirangetypes_selfuncs.c
@@ -1335,3 +1335,511 @@ calc_hist_selectivity_contains(TypeCacheEntry *typcache,
 
 	return sum_frac;
 }
+
+/*
+ * This function is a copy of the function with the same name in
+ * rangetypes_selfuncs.c, with the only difference that the types are
+ * multiranges
+ *
+ */
+static double
+calc_hist_join_selectivity(TypeCacheEntry *typcache,
+						   const RangeBound *hist1, int nhist1,
+						   const RangeBound *hist2, int nhist2)
+{
+	int			i,
+				j;
+	double		selectivity,
+				cur_sel1,
+				cur_sel2,
+				prev_sel1,
+				prev_sel2;
+	RangeBound	cur_sync;
+
+	/*
+	 * Histograms will never be empty. In fact, a histogram will never have
+	 * less than 2 values (1 bin)
+	 */
+	Assert(nhist1 > 1);
+	Assert(nhist2 > 1);
+
+	/* Fast-forwards i and j to start of iteration */
+	for (i = 0; range_cmp_bound_values(typcache, &hist1[i], &hist2[0]) < 0; i++);
+	for (j = 0; range_cmp_bound_values(typcache, &hist2[j], &hist1[0]) < 0; j++);
+
+	if (range_cmp_bound_values(typcache, &hist1[i], &hist2[j]) < 0)
+		cur_sync = hist1[i++];
+	else if (range_cmp_bound_values(typcache, &hist1[i], &hist2[j]) > 0)
+		cur_sync = hist2[j++];
+	else
+	{
+		/* If equal, skip one */
+		cur_sync = hist1[i];
+		i++;
+		j++;
+	}
+	prev_sel1 = calc_hist_selectivity_scalar(typcache, &cur_sync,
+											 hist1, nhist1, false);
+	prev_sel2 = calc_hist_selectivity_scalar(typcache, &cur_sync,
+											 hist2, nhist2, false);
+
+	/*
+	 * Do the estimation on overlapping region
+	 */
+	selectivity = 0.0;
+	while (i < nhist1 && j < nhist2)
+	{
+		if (range_cmp_bound_values(typcache, &hist1[i], &hist2[j]) < 0)
+			cur_sync = hist1[i++];
+		else if (range_cmp_bound_values(typcache, &hist1[i], &hist2[j]) > 0)
+			cur_sync = hist2[j++];
+		else
+		{
+			/* If equal, skip one */
+			cur_sync = hist1[i];
+			i++;
+			j++;
+		}
+		cur_sel1 = calc_hist_selectivity_scalar(typcache, &cur_sync,
+												hist1, nhist1, false);
+		cur_sel2 = calc_hist_selectivity_scalar(typcache, &cur_sync,
+												hist2, nhist2, false);
+
+		selectivity += (prev_sel1 + cur_sel1) * (cur_sel2 - prev_sel2);
+
+		/* Prepare for the next iteration */
+		prev_sel1 = cur_sel1;
+		prev_sel2 = cur_sel2;
+	}
+
+	/* Include remainder of hist2 if any */
+	if (j < nhist2)
+		selectivity += 1 - prev_sel2;
+
+	return selectivity / 2;
+}
+
+/*
+ * multirangejoinsel -- join cardinality for multirange operators
+ */
+Datum
+multirangejoinsel(PG_FUNCTION_ARGS)
+{
+	PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
+	Oid			operator = PG_GETARG_OID(1);
+	List	   *args = (List *) PG_GETARG_POINTER(2);
+	SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) PG_GETARG_POINTER(4);
+	VariableStatData vardata1,
+				vardata2;
+	AttStatsSlot hist1,
+				hist2,
+				sslot;
+	bool		reversed;
+	Selectivity selec;
+	TypeCacheEntry *typcache = NULL,
+			   *rng_typcache = NULL;
+	Form_pg_statistic stats1,
+				stats2;
+	double		empty_frac1,
+				empty_frac2,
+				null_frac1,
+				null_frac2;
+	int			nhist1,
+				nhist2;
+	RangeBound *hist1_lower,
+			   *hist1_upper,
+			   *hist2_lower,
+			   *hist2_upper;
+	bool		empty;
+	int			i;
+
+	get_join_variables(root, args, sjinfo, &vardata1, &vardata2, &reversed);
+
+	selec = default_multirange_selectivity(operator);
+
+	/* get multirange type cache */
+	if (type_is_multirange(vardata1.vartype))
+		typcache = multirange_get_typcache(fcinfo, vardata1.vartype);
+	else if (type_is_multirange(vardata2.vartype))
+		typcache = multirange_get_typcache(fcinfo, vardata2.vartype);
+
+	if (HeapTupleIsValid(vardata1.statsTuple) &&
+		get_attstatsslot(&hist1, vardata1.statsTuple,
+						 STATISTIC_KIND_BOUNDS_HISTOGRAM, InvalidOid,
+						 ATTSTATSSLOT_VALUES) &&
+		HeapTupleIsValid(vardata2.statsTuple) &&
+		get_attstatsslot(&hist2, vardata2.statsTuple,
+						 STATISTIC_KIND_BOUNDS_HISTOGRAM, InvalidOid,
+						 ATTSTATSSLOT_VALUES) &&
+		typcache)
+	{
+
+		/* Initialize underlying range type cache */
+		rng_typcache = typcache->rngtype;
+
+		/*
+		 * First look up the fraction of NULLs and empty ranges from
+		 * pg_statistic.
+		 */
+		stats1 = (Form_pg_statistic) GETSTRUCT(vardata1.statsTuple);
+		stats2 = (Form_pg_statistic) GETSTRUCT(vardata2.statsTuple);
+
+		null_frac1 = stats1->stanullfrac;
+		null_frac2 = stats2->stanullfrac;
+
+		/* Try to get fraction of empty ranges for the first variable */
+		if (get_attstatsslot(&sslot, vardata1.statsTuple,
+							 STATISTIC_KIND_RANGE_LENGTH_HISTOGRAM,
+							 InvalidOid,
+							 ATTSTATSSLOT_NUMBERS))
+		{
+			if (sslot.nnumbers != 1)	/* shouldn't happen */
+				elog(ERROR, "invalid empty fraction statistic");
+			empty_frac1 = sslot.numbers[0];
+			free_attstatsslot(&sslot);
+		}
+		else
+		{
+			/* No empty fraction statistic. Assume no empty ranges. */
+			empty_frac1 = 0.0;
+		}
+
+		/* Try to get fraction of empty ranges for the second variable */
+		if (get_attstatsslot(&sslot, vardata2.statsTuple,
+							 STATISTIC_KIND_RANGE_LENGTH_HISTOGRAM,
+							 InvalidOid,
+							 ATTSTATSSLOT_NUMBERS))
+		{
+			if (sslot.nnumbers != 1)	/* shouldn't happen */
+				elog(ERROR, "invalid empty fraction statistic");
+			empty_frac2 = sslot.numbers[0];
+			free_attstatsslot(&sslot);
+		}
+		else
+		{
+			/* No empty fraction statistic. Assume no empty ranges. */
+			empty_frac2 = 0.0;
+		}
+
+		/*
+		 * Convert histograms of ranges into histograms of their lower and
+		 * upper bounds for the first variable.
+		 */
+		nhist1 = hist1.nvalues;
+		hist1_lower = (RangeBound *) palloc(sizeof(RangeBound) * nhist1);
+		hist1_upper = (RangeBound *) palloc(sizeof(RangeBound) * nhist1);
+		for (i = 0; i < nhist1; i++)
+		{
+			range_deserialize(rng_typcache, DatumGetRangeTypeP(hist1.values[i]),
+							  &hist1_lower[i], &hist1_upper[i], &empty);
+			/* The histogram should not contain any empty ranges */
+			if (empty)
+				elog(ERROR, "bounds histogram contains an empty range");
+		}
+
+		/*
+		 * Convert histograms of ranges into histograms of their lower and
+		 * upper bounds for the second variable.
+		 */
+		nhist2 = hist2.nvalues;
+		hist2_lower = (RangeBound *) palloc(sizeof(RangeBound) * nhist2);
+		hist2_upper = (RangeBound *) palloc(sizeof(RangeBound) * nhist2);
+		for (i = 0; i < nhist2; i++)
+		{
+			range_deserialize(rng_typcache, DatumGetRangeTypeP(hist2.values[i]),
+							  &hist2_lower[i], &hist2_upper[i], &empty);
+			/* The histogram should not contain any empty ranges */
+			if (empty)
+				elog(ERROR, "bounds histogram contains an empty range");
+		}
+
+		switch (operator)
+		{
+			case OID_MULTIRANGE_OVERLAPS_MULTIRANGE_OP:
+			case OID_MULTIRANGE_OVERLAPS_RANGE_OP:
+			case OID_RANGE_OVERLAPS_MULTIRANGE_OP:
+
+				/*
+				 * Selectivity of A && B = Selectivity of NOT( A << B || A >>
+				 * B ) = 1 - Selectivity of (A.upper < B.lower) - Selectivity
+				 * of (B.upper < A.lower)
+				 */
+				selec = 1;
+				selec -= calc_hist_join_selectivity(rng_typcache,
+													hist1_upper, nhist1,
+													hist2_lower, nhist2);
+				selec -= calc_hist_join_selectivity(rng_typcache,
+													hist2_upper, nhist2,
+													hist1_lower, nhist1);
+				break;
+
+			case OID_MULTIRANGE_LESS_EQUAL_OP:
+
+				/*
+				 * A <= B
+				 *
+				 * Start by comparing lower bounds and if they are equal
+				 * compare upper bounds
+				 *
+				 * Negation of OID_RANGE_GREATER_OP.
+				 *
+				 * Overestimate by comparing only the lower bounds. Higher
+				 * accuracy would require us to subtract P(lower1 = lower2) *
+				 * P(upper1 > upper2)
+				 */
+				selec = 1 - calc_hist_join_selectivity(rng_typcache,
+													   hist2_lower, nhist2,
+													   hist1_lower, nhist1);
+				break;
+
+			case OID_MULTIRANGE_LESS_OP:
+
+				/*
+				 * A < B
+				 *
+				 * Start by comparing lower bounds and if they are equal
+				 * compare upper bounds
+				 *
+				 * Underestimate by comparing only the lower bounds. Higher
+				 * accuracy would require us to add P(lower1 = lower2) *
+				 * P(upper1 < upper2)
+				 */
+				selec = calc_hist_join_selectivity(rng_typcache,
+												   hist1_lower, nhist1,
+												   hist2_lower, nhist2);
+				break;
+
+			case OID_MULTIRANGE_GREATER_EQUAL_OP:
+
+				/*
+				 * A >= B
+				 *
+				 * Start by comparing lower bounds and if they are equal
+				 * compare upper bounds
+				 *
+				 * Negation of OID_RANGE_LESS_OP.
+				 *
+				 * Overestimate by comparing only the lower bounds. Higher
+				 * accuracy would require us to add P(lower1 = lower2) *
+				 * P(upper1 < upper2)
+				 */
+				selec = 1 - calc_hist_join_selectivity(rng_typcache,
+													   hist1_lower, nhist1,
+													   hist2_lower, nhist2);
+				break;
+
+			case OID_MULTIRANGE_GREATER_OP:
+
+				/*
+				 * A > B == B < A
+				 *
+				 * Start by comparing lower bounds and if they are equal
+				 * compare upper bounds
+				 *
+				 * Underestimate by comparing only the lower bounds. Higher
+				 * accuracy would require us to add P(lower1 = lower2) *
+				 * P(upper1 > upper2)
+				 */
+				selec = calc_hist_join_selectivity(rng_typcache,
+												   hist2_lower, nhist2,
+												   hist1_lower, nhist1);
+				break;
+
+			case OID_MULTIRANGE_LEFT_MULTIRANGE_OP:
+			case OID_MULTIRANGE_LEFT_RANGE_OP:
+			case OID_RANGE_LEFT_MULTIRANGE_OP:
+				/* var1 << var2 when upper(var1) < lower(var2) */
+				selec = calc_hist_join_selectivity(rng_typcache,
+												   hist1_upper, nhist1,
+												   hist2_lower, nhist2);
+				break;
+
+			case OID_MULTIRANGE_RIGHT_MULTIRANGE_OP:
+			case OID_MULTIRANGE_RIGHT_RANGE_OP:
+			case OID_RANGE_RIGHT_MULTIRANGE_OP:
+				/* var1 >> var2 when upper(var2) < lower(var1) */
+				selec = calc_hist_join_selectivity(rng_typcache,
+												   hist2_upper, nhist2,
+												   hist1_lower, nhist1);
+				break;
+
+			case OID_MULTIRANGE_OVERLAPS_LEFT_MULTIRANGE_OP:
+			case OID_MULTIRANGE_OVERLAPS_LEFT_RANGE_OP:
+			case OID_RANGE_OVERLAPS_LEFT_MULTIRANGE_OP:
+				/* var1 &< var2 when upper(var1) < upper(var2) */
+				selec = calc_hist_join_selectivity(rng_typcache,
+												   hist1_upper, nhist1,
+												   hist2_upper, nhist2);
+				break;
+
+			case OID_MULTIRANGE_OVERLAPS_RIGHT_MULTIRANGE_OP:
+			case OID_MULTIRANGE_OVERLAPS_RIGHT_RANGE_OP:
+			case OID_RANGE_OVERLAPS_RIGHT_MULTIRANGE_OP:
+				/* var1 &> var2 when lower(var2) < lower(var1) */
+				selec = calc_hist_join_selectivity(rng_typcache,
+												   hist2_lower, nhist2,
+												   hist1_lower, nhist1);
+				break;
+
+			case OID_MULTIRANGE_MULTIRANGE_CONTAINED_OP:
+			case OID_MULTIRANGE_RANGE_CONTAINED_OP:
+			case OID_RANGE_MULTIRANGE_CONTAINED_OP:
+
+				/*
+				 * var1 <@ var2 is equivalent to lower(var2) <= lower(var1)
+				 * and upper(var1) <= upper(var2)
+				 *
+				 * After negating both sides we get not( lower(var1) <
+				 * lower(var2) ) and not( upper(var2) < upper(var1) ),
+				 * respectively. Assuming independence, multiply both
+				 * selectivities.
+				 */
+				selec = 1 - calc_hist_join_selectivity(rng_typcache,
+													   hist1_lower, nhist1,
+													   hist2_lower, nhist2);
+				selec *= 1 - calc_hist_join_selectivity(rng_typcache,
+														hist2_upper, nhist2,
+														hist1_upper, nhist1);
+				break;
+
+			case OID_MULTIRANGE_CONTAINS_MULTIRANGE_OP:
+			case OID_MULTIRANGE_CONTAINS_RANGE_OP:
+			case OID_RANGE_CONTAINS_MULTIRANGE_OP:
+
+				/*
+				 * var1 @> var2 is equivalent to lower(var1) <= lower(var2)
+				 * and upper(var2) <= upper(var1)
+				 *
+				 * After negating both sides we get not( lower(var2) <
+				 * lower(var1) ) and not( upper(var1) < upper(var2) ),
+				 * respectively. Assuming independence, multiply both
+				 * selectivities.
+				 */
+				selec = 1 - calc_hist_join_selectivity(rng_typcache,
+													   hist2_lower, nhist2,
+													   hist1_lower, nhist1);
+				selec *= 1 - calc_hist_join_selectivity(rng_typcache,
+														hist1_upper, nhist1,
+														hist2_upper, nhist2);
+				break;
+
+			case OID_MULTIRANGE_ADJACENT_MULTIRANGE_OP:
+			case OID_MULTIRANGE_ADJACENT_RANGE_OP:
+			case OID_RANGE_ADJACENT_MULTIRANGE_OP:
+
+				/*
+				 * just punt for now, estimation would require equality
+				 * selectivity for bounds
+				 */
+			case OID_MULTIRANGE_CONTAINS_ELEM_OP:
+			case OID_MULTIRANGE_ELEM_CONTAINED_OP:
+
+				/*
+				 * just punt for now, estimation would require extraction of
+				 * histograms for the anyelement
+				 */
+			default:
+				break;
+		}
+
+
+		/* the calculated selectivity only applies to non-empty (multi)ranges */
+		selec *= (1 - empty_frac1) * (1 - empty_frac2);
+
+		/*
+		 * Depending on the operator, empty (multi)ranges might match
+		 * different fractions of the result.
+		 */
+		switch (operator)
+		{
+			case OID_MULTIRANGE_LESS_OP:
+
+				/*
+				 * empty (multi)range < non-empty (multi)range
+				 */
+				selec += empty_frac1 * (1 - empty_frac2);
+				break;
+
+			case OID_MULTIRANGE_GREATER_OP:
+
+				/*
+				 * non-empty (multi)range > empty (multi)range
+				 */
+				selec += (1 - empty_frac1) * empty_frac2;
+				break;
+
+			case OID_MULTIRANGE_MULTIRANGE_CONTAINED_OP:
+			case OID_MULTIRANGE_RANGE_CONTAINED_OP:
+			case OID_RANGE_MULTIRANGE_CONTAINED_OP:
+
+				/*
+				 * empty (multi)range <@ any (multi)range
+				 */
+			case OID_MULTIRANGE_LESS_EQUAL_OP:
+
+				/*
+				 * empty (multi)range <= any (multi)range
+				 */
+				selec += empty_frac1;
+				break;
+
+			case OID_MULTIRANGE_CONTAINS_MULTIRANGE_OP:
+			case OID_MULTIRANGE_CONTAINS_RANGE_OP:
+			case OID_RANGE_CONTAINS_MULTIRANGE_OP:
+
+				/*
+				 * any (multi)range @> empty (multi)range
+				 */
+			case OID_MULTIRANGE_GREATER_EQUAL_OP:
+
+				/*
+				 * any (multi)range >= empty (multi)range
+				 */
+				selec += empty_frac2;
+				break;
+
+			case OID_MULTIRANGE_CONTAINS_ELEM_OP:
+			case OID_MULTIRANGE_ELEM_CONTAINED_OP:
+			case OID_MULTIRANGE_OVERLAPS_MULTIRANGE_OP:
+			case OID_MULTIRANGE_OVERLAPS_RANGE_OP:
+			case OID_RANGE_OVERLAPS_MULTIRANGE_OP:
+			case OID_MULTIRANGE_OVERLAPS_LEFT_MULTIRANGE_OP:
+			case OID_MULTIRANGE_OVERLAPS_LEFT_RANGE_OP:
+			case OID_RANGE_OVERLAPS_LEFT_MULTIRANGE_OP:
+			case OID_MULTIRANGE_OVERLAPS_RIGHT_MULTIRANGE_OP:
+			case OID_MULTIRANGE_OVERLAPS_RIGHT_RANGE_OP:
+			case OID_RANGE_OVERLAPS_RIGHT_MULTIRANGE_OP:
+			case OID_MULTIRANGE_LEFT_MULTIRANGE_OP:
+			case OID_MULTIRANGE_LEFT_RANGE_OP:
+			case OID_RANGE_LEFT_MULTIRANGE_OP:
+			case OID_MULTIRANGE_RIGHT_MULTIRANGE_OP:
+			case OID_MULTIRANGE_RIGHT_RANGE_OP:
+			case OID_RANGE_RIGHT_MULTIRANGE_OP:
+			case OID_MULTIRANGE_ADJACENT_MULTIRANGE_OP:
+			case OID_MULTIRANGE_ADJACENT_RANGE_OP:
+			case OID_RANGE_ADJACENT_MULTIRANGE_OP:
+			default:
+
+				/*
+				 * these operators always return false when an empty
+				 * (multi)range is involved
+				 */
+				break;
+
+		}
+
+		/* all range operators are strict */
+		selec *= (1 - null_frac1) * (1 - null_frac2);
+
+		free_attstatsslot(&hist1);
+		free_attstatsslot(&hist2);
+	}
+
+	ReleaseVariableStats(vardata1);
+	ReleaseVariableStats(vardata2);
+
+	CLAMP_PROBABILITY(selec);
+
+	PG_RETURN_FLOAT8((float8) selec);
+
+}
diff --git a/src/backend/utils/adt/rangetypes_selfuncs.c \
b/src/backend/utils/adt/rangetypes_selfuncs.c index c2795f4593..007e14bcf6 100644
--- a/src/backend/utils/adt/rangetypes_selfuncs.c
+++ b/src/backend/utils/adt/rangetypes_selfuncs.c
@@ -1221,3 +1221,509 @@ calc_hist_selectivity_contains(TypeCacheEntry *typcache,
 
 	return sum_frac;
 }
+
+/*
+ * This is a utility function used to estimate the join selectivity of
+ * range attributes using rangebound histogram statistics as described
+ * in this paper:
+ *
+ * Diogo Repas, Zhicheng Luo, Maxime Schoemans and Mahmoud Sakr, 2022
+ * Selectivity Estimation of Inequality Joins In Databases
+ * https://doi.org/10.48550/arXiv.2206.07396
+ *
+ * The attributes being joined will be treated as random variables
+ * that follow a distribution modeled by a Probability Density Function (PDF).
+ * Let the two attributes be denoted X, Y.
+ * This function finds the probability P(X < Y).
+ * Note that the PDFs of the two variables can easily be obtained
+ * from their bounds histogram, respectively hist1 and hist2 .
+ *
+ * Let the PDF of X, Y be denoted as f_X, f_Y.
+ * The probability P(X < Y) can be formalized as follows:
+ * P(X < Y)= integral_-inf^inf( integral_-inf^y ( f_X(x) * f_Y(y) dx dy ) )
+ *                = integral_-inf^inf( F_X(y) * f_Y(y) dy )
+ * where F_X(y) denote the Cumulative Distribution Function of X at y.
+ * Note that F_X is the selectivity estimation (non-join),
+ * which is implemented using the function calc_hist_selectivity_scalar.
+ *
+ * Now given the histograms of the two attributes X, Y, we note the following:
+ * - The PDF of Y is a step function
+ *	(constant piece-wise, where each piece is defined in a bin of Y's histogram)
+ * - The CDF of X is linear piece-wise
+ * 	(each piece is defined in a bin of X's histogram)
+ * This leads to the conclusion that their product
+ * (used to calculate the equation above) is also linear piece-wise.
+ * A new piece starts whenever either the bin of X or the bin of Y changes.
+ * By parallel scanning the two rangebound histograms of X and Y,
+ * we evaluate one piece of the result between every two consecutive rangebounds
+ * in the union of the two histograms.
+ *
+ * Given that the product F_X * f_y is linear in the interval
+ * between every two consecutive rangebounds, let them be denoted prev, cur,
+ * it can be shown that the above formula can be discretized into the following:
+ * P(X < Y) = 
+ *   0.5 * sum_0^{n+m-1} ( ( F_X(prev) + F_X(cur) ) * ( F_Y(cur) - F_Y(prev) ) )
+ * where n, m are the lengths of the two histograms.
+ *
+ * As such, it is possible to fully compute the join selectivity
+ * as a summation of CDFs, iterating over the bounds of the two histograms.
+ * This maximizes the code reuse, since the CDF is computed using
+ * the calc_hist_selectivity_scalar function, which is the function used
+ * for selectivity estimation (non-joins).
+ *
+ */
+static double
+calc_hist_join_selectivity(TypeCacheEntry *typcache,
+						   const RangeBound *hist1, int nhist1,
+						   const RangeBound *hist2, int nhist2)
+{
+	int			i,
+				j;
+	double		selectivity,
+				cur_sel1,
+				cur_sel2,
+				prev_sel1,
+				prev_sel2;
+	RangeBound	cur_sync;
+
+	/*
+	 * Histograms will never be empty. In fact, a histogram will never have
+	 * less than 2 values (1 bin)
+	 */
+	Assert(nhist1 > 1);
+	Assert(nhist2 > 1);
+
+	/* Fast-forwards i and j to start of iteration */
+	for (i = 0; range_cmp_bound_values(typcache, &hist1[i], &hist2[0]) < 0; i++);
+	for (j = 0; range_cmp_bound_values(typcache, &hist2[j], &hist1[0]) < 0; j++);
+
+	if (range_cmp_bound_values(typcache, &hist1[i], &hist2[j]) < 0)
+		cur_sync = hist1[i++];
+	else if (range_cmp_bound_values(typcache, &hist1[i], &hist2[j]) > 0)
+		cur_sync = hist2[j++];
+	else
+	{
+		/* If equal, skip one */
+		cur_sync = hist1[i];
+		i++;
+		j++;
+	}
+	prev_sel1 = calc_hist_selectivity_scalar(typcache, &cur_sync,
+											 hist1, nhist1, false);
+	prev_sel2 = calc_hist_selectivity_scalar(typcache, &cur_sync,
+											 hist2, nhist2, false);
+
+	/*
+	 * Do the estimation on overlapping region
+	 */
+	selectivity = 0.0;
+	while (i < nhist1 && j < nhist2)
+	{
+		if (range_cmp_bound_values(typcache, &hist1[i], &hist2[j]) < 0)
+			cur_sync = hist1[i++];
+		else if (range_cmp_bound_values(typcache, &hist1[i], &hist2[j]) > 0)
+			cur_sync = hist2[j++];
+		else
+		{
+			/* If equal, skip one */
+			cur_sync = hist1[i];
+			i++;
+			j++;
+		}
+		cur_sel1 = calc_hist_selectivity_scalar(typcache, &cur_sync,
+												hist1, nhist1, false);
+		cur_sel2 = calc_hist_selectivity_scalar(typcache, &cur_sync,
+												hist2, nhist2, false);
+
+		selectivity += (prev_sel1 + cur_sel1) * (cur_sel2 - prev_sel2);
+
+		/* Prepare for the next iteration */
+		prev_sel1 = cur_sel1;
+		prev_sel2 = cur_sel2;
+	}
+
+	/* Include remainder of hist2 if any */
+	if (j < nhist2)
+		selectivity += 1 - prev_sel2;
+
+	return selectivity / 2;
+}
+
+/*
+ * rangejoinsel -- join cardinality for range operators
+ */
+Datum
+rangejoinsel(PG_FUNCTION_ARGS)
+{
+	PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
+	Oid			operator = PG_GETARG_OID(1);
+	List	   *args = (List *) PG_GETARG_POINTER(2);
+	SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) PG_GETARG_POINTER(4);
+	VariableStatData vardata1,
+				vardata2;
+	AttStatsSlot hist1,
+				hist2,
+				sslot;
+	bool		reversed;
+	Selectivity selec;
+	TypeCacheEntry *typcache = NULL;
+	Form_pg_statistic stats1,
+				stats2;
+	double		empty_frac1,
+				empty_frac2,
+				null_frac1,
+				null_frac2;
+	int			nhist1,
+				nhist2;
+	RangeBound *hist1_lower,
+			   *hist1_upper,
+			   *hist2_lower,
+			   *hist2_upper;
+	bool		empty;
+	int			i;
+
+	get_join_variables(root, args, sjinfo, &vardata1, &vardata2, &reversed);
+
+	selec = default_range_selectivity(operator);
+
+	if (HeapTupleIsValid(vardata1.statsTuple) &&
+		get_attstatsslot(&hist1, vardata1.statsTuple,
+						 STATISTIC_KIND_BOUNDS_HISTOGRAM, InvalidOid,
+						 ATTSTATSSLOT_VALUES) &&
+		HeapTupleIsValid(vardata2.statsTuple) &&
+		get_attstatsslot(&hist2, vardata2.statsTuple,
+						 STATISTIC_KIND_BOUNDS_HISTOGRAM, InvalidOid,
+						 ATTSTATSSLOT_VALUES) &&
+		vardata1.vartype == vardata2.vartype)
+	{
+
+		/* Initialize type cache */
+		typcache = range_get_typcache(fcinfo, vardata1.vartype);
+
+		/*
+		 * First look up the fraction of NULLs and empty ranges from
+		 * pg_statistic.
+		 */
+		stats1 = (Form_pg_statistic) GETSTRUCT(vardata1.statsTuple);
+		stats2 = (Form_pg_statistic) GETSTRUCT(vardata2.statsTuple);
+
+		null_frac1 = stats1->stanullfrac;
+		null_frac2 = stats2->stanullfrac;
+
+		/* Try to get fraction of empty ranges for the first variable */
+		if (get_attstatsslot(&sslot, vardata1.statsTuple,
+							 STATISTIC_KIND_RANGE_LENGTH_HISTOGRAM,
+							 InvalidOid,
+							 ATTSTATSSLOT_NUMBERS))
+		{
+			if (sslot.nnumbers != 1)	/* shouldn't happen */
+				elog(ERROR, "invalid empty fraction statistic");
+			empty_frac1 = sslot.numbers[0];
+			free_attstatsslot(&sslot);
+		}
+		else
+		{
+			/* No empty fraction statistic. Assume no empty ranges. */
+			empty_frac1 = 0.0;
+		}
+
+		/* Try to get fraction of empty ranges for the second variable */
+		if (get_attstatsslot(&sslot, vardata2.statsTuple,
+							 STATISTIC_KIND_RANGE_LENGTH_HISTOGRAM,
+							 InvalidOid,
+							 ATTSTATSSLOT_NUMBERS))
+		{
+			if (sslot.nnumbers != 1)	/* shouldn't happen */
+				elog(ERROR, "invalid empty fraction statistic");
+			empty_frac2 = sslot.numbers[0];
+			free_attstatsslot(&sslot);
+		}
+		else
+		{
+			/* No empty fraction statistic. Assume no empty ranges. */
+			empty_frac2 = 0.0;
+		}
+
+		/*
+		 * Convert histograms of ranges into histograms of their lower and
+		 * upper bounds for the first variable.
+		 */
+		nhist1 = hist1.nvalues;
+		hist1_lower = (RangeBound *) palloc(sizeof(RangeBound) * nhist1);
+		hist1_upper = (RangeBound *) palloc(sizeof(RangeBound) * nhist1);
+		for (i = 0; i < nhist1; i++)
+		{
+			range_deserialize(typcache, DatumGetRangeTypeP(hist1.values[i]),
+							  &hist1_lower[i], &hist1_upper[i], &empty);
+			/* The histogram should not contain any empty ranges */
+			if (empty)
+				elog(ERROR, "bounds histogram contains an empty range");
+		}
+
+		/*
+		 * Convert histograms of ranges into histograms of their lower and
+		 * upper bounds for the second variable.
+		 */
+		nhist2 = hist2.nvalues;
+		hist2_lower = (RangeBound *) palloc(sizeof(RangeBound) * nhist2);
+		hist2_upper = (RangeBound *) palloc(sizeof(RangeBound) * nhist2);
+		for (i = 0; i < nhist2; i++)
+		{
+			range_deserialize(typcache, DatumGetRangeTypeP(hist2.values[i]),
+							  &hist2_lower[i], &hist2_upper[i], &empty);
+			/* The histogram should not contain any empty ranges */
+			if (empty)
+				elog(ERROR, "bounds histogram contains an empty range");
+		}
+
+		switch (operator)
+		{
+			case OID_RANGE_OVERLAP_OP:
+
+				/*
+				 * Selectivity of A && B = Selectivity of NOT( A << B || A >>
+				 * B ) = 1 - Selectivity of (A.upper < B.lower) - Selectivity
+				 * of (B.upper < A.lower)
+				 */
+				selec = 1;
+				selec -= calc_hist_join_selectivity(typcache,
+													hist1_upper, nhist1,
+													hist2_lower, nhist2);
+				selec -= calc_hist_join_selectivity(typcache,
+													hist2_upper, nhist2,
+													hist1_lower, nhist1);
+				break;
+
+			case OID_RANGE_LESS_EQUAL_OP:
+
+				/*
+				 * A <= B
+				 *
+				 * Start by comparing lower bounds and if they are equal
+				 * compare upper bounds
+				 *
+				 * Negation of OID_RANGE_GREATER_OP.
+				 *
+				 * Overestimate by comparing only the lower bounds. Higher
+				 * accuracy would require us to subtract P(lower1 = lower2) *
+				 * P(upper1 > upper2)
+				 */
+				selec = 1 - calc_hist_join_selectivity(typcache,
+													   hist2_lower, nhist2,
+													   hist1_lower, nhist1);
+				break;
+
+			case OID_RANGE_LESS_OP:
+
+				/*
+				 * A < B
+				 *
+				 * Start by comparing lower bounds and if they are equal
+				 * compare upper bounds
+				 *
+				 * Underestimate by comparing only the lower bounds. Higher
+				 * accuracy would require us to add P(lower1 = lower2) *
+				 * P(upper1 < upper2)
+				 */
+				selec = calc_hist_join_selectivity(typcache,
+												   hist1_lower, nhist1,
+												   hist2_lower, nhist2);
+				break;
+
+			case OID_RANGE_GREATER_EQUAL_OP:
+
+				/*
+				 * A >= B
+				 *
+				 * Start by comparing lower bounds and if they are equal
+				 * compare upper bounds
+				 *
+				 * Negation of OID_RANGE_LESS_OP.
+				 *
+				 * Overestimate by comparing only the lower bounds. Higher
+				 * accuracy would require us to add P(lower1 = lower2) *
+				 * P(upper1 < upper2)
+				 */
+				selec = 1 - calc_hist_join_selectivity(typcache,
+													   hist1_lower, nhist1,
+													   hist2_lower, nhist2);
+				break;
+
+			case OID_RANGE_GREATER_OP:
+
+				/*
+				 * A > B == B < A
+				 *
+				 * Start by comparing lower bounds and if they are equal
+				 * compare upper bounds
+				 *
+				 * Underestimate by comparing only the lower bounds. Higher
+				 * accuracy would require us to add P(lower1 = lower2) *
+				 * P(upper1 > upper2)
+				 */
+				selec = calc_hist_join_selectivity(typcache,
+												   hist2_lower, nhist2,
+												   hist1_lower, nhist1);
+				break;
+
+			case OID_RANGE_LEFT_OP:
+				/* var1 << var2 when upper(var1) < lower(var2) */
+				selec = calc_hist_join_selectivity(typcache,
+												   hist1_upper, nhist1,
+												   hist2_lower, nhist2);
+				break;
+
+			case OID_RANGE_RIGHT_OP:
+				/* var1 >> var2 when upper(var2) < lower(var1) */
+				selec = calc_hist_join_selectivity(typcache,
+												   hist2_upper, nhist2,
+												   hist1_lower, nhist1);
+				break;
+
+			case OID_RANGE_OVERLAPS_LEFT_OP:
+				/* var1 &< var2 when upper(var1) < upper(var2) */
+				selec = calc_hist_join_selectivity(typcache,
+												   hist1_upper, nhist1,
+												   hist2_upper, nhist2);
+				break;
+
+			case OID_RANGE_OVERLAPS_RIGHT_OP:
+				/* var1 &> var2 when lower(var2) < lower(var1) */
+				selec = calc_hist_join_selectivity(typcache,
+												   hist2_lower, nhist2,
+												   hist1_lower, nhist1);
+				break;
+
+			case OID_RANGE_CONTAINED_OP:
+
+				/*
+				 * var1 <@ var2 is equivalent to lower(var2) <= lower(var1)
+				 * and upper(var1) <= upper(var2)
+				 *
+				 * After negating both sides we get not( lower(var1) <
+				 * lower(var2) ) and not( upper(var2) < upper(var1) ),
+				 * respectively. Assuming independence, multiply both
+				 * selectivities.
+				 */
+				selec = 1 - calc_hist_join_selectivity(typcache,
+													   hist1_lower, nhist1,
+													   hist2_lower, nhist2);
+				selec *= 1 - calc_hist_join_selectivity(typcache,
+														hist2_upper, nhist2,
+														hist1_upper, nhist1);
+				break;
+
+			case OID_RANGE_CONTAINS_OP:
+
+				/*
+				 * var1 @> var2 is equivalent to lower(var1) <= lower(var2)
+				 * and upper(var2) <= upper(var1)
+				 *
+				 * After negating both sides we get not( lower(var2) <
+				 * lower(var1) ) and not( upper(var1) < upper(var2) ),
+				 * respectively. Assuming independence, multiply both
+				 * selectivities.
+				 */
+				selec = 1 - calc_hist_join_selectivity(typcache,
+													   hist2_lower, nhist2,
+													   hist1_lower, nhist1);
+				selec *= 1 - calc_hist_join_selectivity(typcache,
+														hist1_upper, nhist1,
+														hist2_upper, nhist2);
+				break;
+
+			case OID_RANGE_CONTAINS_ELEM_OP:
+			case OID_RANGE_ELEM_CONTAINED_OP:
+
+				/*
+				 * just punt for now, estimation would require extraction of
+				 * histograms for the anyelement
+				 */
+			default:
+				break;
+		}
+
+
+		/* the calculated selectivity only applies to non-empty ranges */
+		selec *= (1 - empty_frac1) * (1 - empty_frac2);
+
+		/*
+		 * Depending on the operator, empty ranges might match different
+		 * fractions of the result.
+		 */
+		switch (operator)
+		{
+			case OID_RANGE_LESS_OP:
+
+				/*
+				 * empty range < non-empty range
+				 */
+				selec += empty_frac1 * (1 - empty_frac2);
+				break;
+
+			case OID_RANGE_GREATER_OP:
+
+				/*
+				 * non-empty range > empty range
+				 */
+				selec += (1 - empty_frac1) * empty_frac2;
+				break;
+
+			case OID_RANGE_CONTAINED_OP:
+
+				/*
+				 * empty range <@ any range
+				 */
+			case OID_RANGE_LESS_EQUAL_OP:
+
+				/*
+				 * empty range <= any range
+				 */
+				selec += empty_frac1;
+				break;
+
+			case OID_RANGE_CONTAINS_OP:
+
+				/*
+				 * any range @> empty range
+				 */
+			case OID_RANGE_GREATER_EQUAL_OP:
+
+				/*
+				 * any range >= empty range
+				 */
+				selec += empty_frac2;
+				break;
+
+			case OID_RANGE_CONTAINS_ELEM_OP:
+			case OID_RANGE_ELEM_CONTAINED_OP:
+			case OID_RANGE_OVERLAP_OP:
+			case OID_RANGE_OVERLAPS_LEFT_OP:
+			case OID_RANGE_OVERLAPS_RIGHT_OP:
+			case OID_RANGE_LEFT_OP:
+			case OID_RANGE_RIGHT_OP:
+			default:
+
+				/*
+				 * these operators always return false when an empty range is
+				 * involved
+				 */
+				break;
+
+		}
+
+		/* all range operators are strict */
+		selec *= (1 - null_frac1) * (1 - null_frac2);
+
+		free_attstatsslot(&hist1);
+		free_attstatsslot(&hist2);
+	}
+
+	ReleaseVariableStats(vardata1);
+	ReleaseVariableStats(vardata2);
+
+	CLAMP_PROBABILITY(selec);
+
+	PG_RETURN_FLOAT8((float8) selec);
+
+}
diff --git a/src/include/catalog/pg_operator.dat \
b/src/include/catalog/pg_operator.dat index bc5f8213f3..b63a7e15af 100644
--- a/src/include/catalog/pg_operator.dat
+++ b/src/include/catalog/pg_operator.dat
@@ -3071,78 +3071,78 @@
   oprname => '<', oprleft => 'anyrange', oprright => 'anyrange',
   oprresult => 'bool', oprcom => '>(anyrange,anyrange)',
   oprnegate => '>=(anyrange,anyrange)', oprcode => 'range_lt',
-  oprrest => 'rangesel', oprjoin => 'scalarltjoinsel' },
+  oprrest => 'rangesel', oprjoin => 'rangejoinsel' },
 { oid => '3885', oid_symbol => 'OID_RANGE_LESS_EQUAL_OP',
   descr => 'less than or equal',
   oprname => '<=', oprleft => 'anyrange', oprright => 'anyrange',
   oprresult => 'bool', oprcom => '>=(anyrange,anyrange)',
   oprnegate => '>(anyrange,anyrange)', oprcode => 'range_le',
-  oprrest => 'rangesel', oprjoin => 'scalarlejoinsel' },
+  oprrest => 'rangesel', oprjoin => 'rangejoinsel' },
 { oid => '3886', oid_symbol => 'OID_RANGE_GREATER_EQUAL_OP',
   descr => 'greater than or equal',
   oprname => '>=', oprleft => 'anyrange', oprright => 'anyrange',
   oprresult => 'bool', oprcom => '<=(anyrange,anyrange)',
   oprnegate => '<(anyrange,anyrange)', oprcode => 'range_ge',
-  oprrest => 'rangesel', oprjoin => 'scalargejoinsel' },
+  oprrest => 'rangesel', oprjoin => 'rangejoinsel' },
 { oid => '3887', oid_symbol => 'OID_RANGE_GREATER_OP',
   descr => 'greater than',
   oprname => '>', oprleft => 'anyrange', oprright => 'anyrange',
   oprresult => 'bool', oprcom => '<(anyrange,anyrange)',
   oprnegate => '<=(anyrange,anyrange)', oprcode => 'range_gt',
-  oprrest => 'rangesel', oprjoin => 'scalargtjoinsel' },
+  oprrest => 'rangesel', oprjoin => 'rangejoinsel' },
 { oid => '3888', oid_symbol => 'OID_RANGE_OVERLAP_OP', descr => 'overlaps',
   oprname => '&&', oprleft => 'anyrange', oprright => 'anyrange',
   oprresult => 'bool', oprcom => '&&(anyrange,anyrange)',
   oprcode => 'range_overlaps', oprrest => 'rangesel',
-  oprjoin => 'areajoinsel' },
+  oprjoin => 'rangejoinsel' },
 { oid => '3889', oid_symbol => 'OID_RANGE_CONTAINS_ELEM_OP',
   descr => 'contains',
   oprname => '@>', oprleft => 'anyrange', oprright => 'anyelement',
   oprresult => 'bool', oprcom => '<@(anyelement,anyrange)',
   oprcode => 'range_contains_elem', oprrest => 'rangesel',
-  oprjoin => 'contjoinsel' },
+  oprjoin => 'rangejoinsel' },
 { oid => '3890', oid_symbol => 'OID_RANGE_CONTAINS_OP', descr => 'contains',
   oprname => '@>', oprleft => 'anyrange', oprright => 'anyrange',
   oprresult => 'bool', oprcom => '<@(anyrange,anyrange)',
   oprcode => 'range_contains', oprrest => 'rangesel',
-  oprjoin => 'contjoinsel' },
+  oprjoin => 'rangejoinsel' },
 { oid => '3891', oid_symbol => 'OID_RANGE_ELEM_CONTAINED_OP',
   descr => 'is contained by',
   oprname => '<@', oprleft => 'anyelement', oprright => 'anyrange',
   oprresult => 'bool', oprcom => '@>(anyrange,anyelement)',
   oprcode => 'elem_contained_by_range', oprrest => 'rangesel',
-  oprjoin => 'contjoinsel' },
+  oprjoin => 'rangejoinsel' },
 { oid => '3892', oid_symbol => 'OID_RANGE_CONTAINED_OP',
   descr => 'is contained by',
   oprname => '<@', oprleft => 'anyrange', oprright => 'anyrange',
   oprresult => 'bool', oprcom => '@>(anyrange,anyrange)',
   oprcode => 'range_contained_by', oprrest => 'rangesel',
-  oprjoin => 'contjoinsel' },
+  oprjoin => 'rangejoinsel' },
 { oid => '3893', oid_symbol => 'OID_RANGE_LEFT_OP', descr => 'is left of',
   oprname => '<<', oprleft => 'anyrange', oprright => 'anyrange',
   oprresult => 'bool', oprcom => '>>(anyrange,anyrange)',
   oprcode => 'range_before', oprrest => 'rangesel',
-  oprjoin => 'scalarltjoinsel' },
+  oprjoin => 'rangejoinsel' },
 { oid => '3894', oid_symbol => 'OID_RANGE_RIGHT_OP', descr => 'is right of',
   oprname => '>>', oprleft => 'anyrange', oprright => 'anyrange',
   oprresult => 'bool', oprcom => '<<(anyrange,anyrange)',
   oprcode => 'range_after', oprrest => 'rangesel',
-  oprjoin => 'scalargtjoinsel' },
+  oprjoin => 'rangejoinsel' },
 { oid => '3895', oid_symbol => 'OID_RANGE_OVERLAPS_LEFT_OP',
   descr => 'overlaps or is left of',
   oprname => '&<', oprleft => 'anyrange', oprright => 'anyrange',
   oprresult => 'bool', oprcode => 'range_overleft', oprrest => 'rangesel',
-  oprjoin => 'scalarltjoinsel' },
+  oprjoin => 'rangejoinsel' },
 { oid => '3896', oid_symbol => 'OID_RANGE_OVERLAPS_RIGHT_OP',
   descr => 'overlaps or is right of',
   oprname => '&>', oprleft => 'anyrange', oprright => 'anyrange',
   oprresult => 'bool', oprcode => 'range_overright', oprrest => 'rangesel',
-  oprjoin => 'scalargtjoinsel' },
+  oprjoin => 'rangejoinsel' },
 { oid => '3897', descr => 'is adjacent to',
   oprname => '-|-', oprleft => 'anyrange', oprright => 'anyrange',
   oprresult => 'bool', oprcom => '-|-(anyrange,anyrange)',
   oprcode => 'range_adjacent', oprrest => 'matchingsel',
-  oprjoin => 'matchingjoinsel' },
+  oprjoin => 'rangejoinsel' },
 { oid => '3898', descr => 'range union',
   oprname => '+', oprleft => 'anyrange', oprright => 'anyrange',
   oprresult => 'anyrange', oprcom => '+(anyrange,anyrange)',
@@ -3277,139 +3277,139 @@
   oprname => '<', oprleft => 'anymultirange', oprright => 'anymultirange',
   oprresult => 'bool', oprcom => '>(anymultirange,anymultirange)',
   oprnegate => '>=(anymultirange,anymultirange)', oprcode => 'multirange_lt',
-  oprrest => 'multirangesel', oprjoin => 'scalarltjoinsel' },
+  oprrest => 'multirangesel', oprjoin => 'multirangejoinsel' },
 { oid => '2863', oid_symbol => 'OID_MULTIRANGE_LESS_EQUAL_OP',
   descr => 'less than or equal',
   oprname => '<=', oprleft => 'anymultirange', oprright => 'anymultirange',
   oprresult => 'bool', oprcom => '>=(anymultirange,anymultirange)',
   oprnegate => '>(anymultirange,anymultirange)', oprcode => 'multirange_le',
-  oprrest => 'multirangesel', oprjoin => 'scalarlejoinsel' },
+  oprrest => 'multirangesel', oprjoin => 'multirangejoinsel' },
 { oid => '2864', oid_symbol => 'OID_MULTIRANGE_GREATER_EQUAL_OP',
   descr => 'greater than or equal',
   oprname => '>=', oprleft => 'anymultirange', oprright => 'anymultirange',
   oprresult => 'bool', oprcom => '<=(anymultirange,anymultirange)',
   oprnegate => '<(anymultirange,anymultirange)', oprcode => 'multirange_ge',
-  oprrest => 'multirangesel', oprjoin => 'scalargejoinsel' },
+  oprrest => 'multirangesel', oprjoin => 'multirangejoinsel' },
 { oid => '2865', oid_symbol => 'OID_MULTIRANGE_GREATER_OP',
   descr => 'greater than',
   oprname => '>', oprleft => 'anymultirange', oprright => 'anymultirange',
   oprresult => 'bool', oprcom => '<(anymultirange,anymultirange)',
   oprnegate => '<=(anymultirange,anymultirange)', oprcode => 'multirange_gt',
-  oprrest => 'multirangesel', oprjoin => 'scalargtjoinsel' },
+  oprrest => 'multirangesel', oprjoin => 'multirangejoinsel' },
 { oid => '2866', oid_symbol => 'OID_RANGE_OVERLAPS_MULTIRANGE_OP',
   descr => 'overlaps',
   oprname => '&&', oprleft => 'anyrange', oprright => 'anymultirange',
   oprresult => 'bool', oprcom => '&&(anymultirange,anyrange)',
   oprcode => 'range_overlaps_multirange', oprrest => 'multirangesel',
-  oprjoin => 'areajoinsel' },
+  oprjoin => 'multirangejoinsel' },
 { oid => '2867', oid_symbol => 'OID_MULTIRANGE_OVERLAPS_RANGE_OP',
   descr => 'overlaps',
   oprname => '&&', oprleft => 'anymultirange', oprright => 'anyrange',
   oprresult => 'bool', oprcom => '&&(anyrange,anymultirange)',
   oprcode => 'multirange_overlaps_range', oprrest => 'multirangesel',
-  oprjoin => 'areajoinsel' },
+  oprjoin => 'multirangejoinsel' },
 { oid => '2868', oid_symbol => 'OID_MULTIRANGE_OVERLAPS_MULTIRANGE_OP',
   descr => 'overlaps',
   oprname => '&&', oprleft => 'anymultirange', oprright => 'anymultirange',
   oprresult => 'bool', oprcom => '&&(anymultirange,anymultirange)',
   oprcode => 'multirange_overlaps_multirange', oprrest => 'multirangesel',
-  oprjoin => 'areajoinsel' },
+  oprjoin => 'multirangejoinsel' },
 { oid => '2869', oid_symbol => 'OID_MULTIRANGE_CONTAINS_ELEM_OP',
   descr => 'contains',
   oprname => '@>', oprleft => 'anymultirange', oprright => 'anyelement',
   oprresult => 'bool', oprcom => '<@(anyelement,anymultirange)',
   oprcode => 'multirange_contains_elem', oprrest => 'multirangesel',
-  oprjoin => 'contjoinsel' },
+  oprjoin => 'multirangejoinsel' },
 { oid => '2870', oid_symbol => 'OID_MULTIRANGE_CONTAINS_RANGE_OP',
   descr => 'contains',
   oprname => '@>', oprleft => 'anymultirange', oprright => 'anyrange',
   oprresult => 'bool', oprcom => '<@(anyrange,anymultirange)',
   oprcode => 'multirange_contains_range', oprrest => 'multirangesel',
-  oprjoin => 'contjoinsel' },
+  oprjoin => 'multirangejoinsel' },
 { oid => '2871', oid_symbol => 'OID_MULTIRANGE_CONTAINS_MULTIRANGE_OP',
   descr => 'contains',
   oprname => '@>', oprleft => 'anymultirange', oprright => 'anymultirange',
   oprresult => 'bool', oprcom => '<@(anymultirange,anymultirange)',
   oprcode => 'multirange_contains_multirange', oprrest => 'multirangesel',
-  oprjoin => 'contjoinsel' },
+  oprjoin => 'multirangejoinsel' },
 { oid => '2872', oid_symbol => 'OID_MULTIRANGE_ELEM_CONTAINED_OP',
   descr => 'is contained by',
   oprname => '<@', oprleft => 'anyelement', oprright => 'anymultirange',
   oprresult => 'bool', oprcom => '@>(anymultirange,anyelement)',
   oprcode => 'elem_contained_by_multirange', oprrest => 'multirangesel',
-  oprjoin => 'contjoinsel' },
+  oprjoin => 'multirangejoinsel' },
 { oid => '2873', oid_symbol => 'OID_MULTIRANGE_RANGE_CONTAINED_OP',
   descr => 'is contained by',
   oprname => '<@', oprleft => 'anyrange', oprright => 'anymultirange',
   oprresult => 'bool', oprcom => '@>(anymultirange,anyrange)',
   oprcode => 'range_contained_by_multirange', oprrest => 'multirangesel',
-  oprjoin => 'contjoinsel' },
+  oprjoin => 'multirangejoinsel' },
 { oid => '2874', oid_symbol => 'OID_MULTIRANGE_MULTIRANGE_CONTAINED_OP',
   descr => 'is contained by',
   oprname => '<@', oprleft => 'anymultirange', oprright => 'anymultirange',
   oprresult => 'bool', oprcom => '@>(anymultirange,anymultirange)',
   oprcode => 'multirange_contained_by_multirange', oprrest => 'multirangesel',
-  oprjoin => 'contjoinsel' },
+  oprjoin => 'multirangejoinsel' },
 { oid => '4539', oid_symbol => 'OID_RANGE_CONTAINS_MULTIRANGE_OP',
   descr => 'contains',
   oprname => '@>', oprleft => 'anyrange', oprright => 'anymultirange',
   oprresult => 'bool', oprcom => '<@(anymultirange,anyrange)',
   oprcode => 'range_contains_multirange', oprrest => 'multirangesel',
-  oprjoin => 'contjoinsel' },
+  oprjoin => 'multirangejoinsel' },
 { oid => '4540', oid_symbol => 'OID_RANGE_MULTIRANGE_CONTAINED_OP',
   descr => 'is contained by',
   oprname => '<@', oprleft => 'anymultirange', oprright => 'anyrange',
   oprresult => 'bool', oprcom => '@>(anyrange,anymultirange)',
   oprcode => 'multirange_contained_by_range', oprrest => 'multirangesel',
-  oprjoin => 'contjoinsel' },
+  oprjoin => 'multirangejoinsel' },
 { oid => '2875', oid_symbol => 'OID_RANGE_OVERLAPS_LEFT_MULTIRANGE_OP',
   descr => 'overlaps or is left of',
   oprname => '&<', oprleft => 'anyrange', oprright => 'anymultirange',
   oprresult => 'bool', oprcode => 'range_overleft_multirange',
-  oprrest => 'multirangesel', oprjoin => 'scalarltjoinsel' },
+  oprrest => 'multirangesel', oprjoin => 'multirangejoinsel' },
 { oid => '2876', oid_symbol => 'OID_MULTIRANGE_OVERLAPS_LEFT_RANGE_OP',
   descr => 'overlaps or is left of',
   oprname => '&<', oprleft => 'anymultirange', oprright => 'anyrange',
   oprresult => 'bool', oprcode => 'multirange_overleft_range',
-  oprrest => 'multirangesel', oprjoin => 'scalarltjoinsel' },
+  oprrest => 'multirangesel', oprjoin => 'multirangejoinsel' },
 { oid => '2877', oid_symbol => 'OID_MULTIRANGE_OVERLAPS_LEFT_MULTIRANGE_OP',
   descr => 'overlaps or is left of',
   oprname => '&<', oprleft => 'anymultirange', oprright => 'anymultirange',
   oprresult => 'bool', oprcode => 'multirange_overleft_multirange',
-  oprrest => 'multirangesel', oprjoin => 'scalarltjoinsel' },
+  oprrest => 'multirangesel', oprjoin => 'multirangejoinsel' },
 { oid => '3585', oid_symbol => 'OID_RANGE_OVERLAPS_RIGHT_MULTIRANGE_OP',
   descr => 'overlaps or is right of',
   oprname => '&>', oprleft => 'anyrange', oprright => 'anymultirange',
   oprresult => 'bool', oprcode => 'range_overright_multirange',
-  oprrest => 'multirangesel', oprjoin => 'scalargtjoinsel' },
+  oprrest => 'multirangesel', oprjoin => 'multirangejoinsel' },
 { oid => '4035', oid_symbol => 'OID_MULTIRANGE_OVERLAPS_RIGHT_RANGE_OP',
   descr => 'overlaps or is right of',
   oprname => '&>', oprleft => 'anymultirange', oprright => 'anyrange',
   oprresult => 'bool', oprcode => 'multirange_overright_range',
-  oprrest => 'multirangesel', oprjoin => 'scalargtjoinsel' },
+  oprrest => 'multirangesel', oprjoin => 'multirangejoinsel' },
 { oid => '4142', oid_symbol => 'OID_MULTIRANGE_OVERLAPS_RIGHT_MULTIRANGE_OP',
   descr => 'overlaps or is right of',
   oprname => '&>', oprleft => 'anymultirange', oprright => 'anymultirange',
   oprresult => 'bool', oprcode => 'multirange_overright_multirange',
-  oprrest => 'multirangesel', oprjoin => 'scalargtjoinsel' },
+  oprrest => 'multirangesel', oprjoin => 'multirangejoinsel' },
 { oid => '4179', oid_symbol => 'OID_RANGE_ADJACENT_MULTIRANGE_OP',
   descr => 'is adjacent to',
   oprname => '-|-', oprleft => 'anyrange', oprright => 'anymultirange',
   oprresult => 'bool', oprcom => '-|-(anymultirange,anyrange)',
   oprcode => 'range_adjacent_multirange', oprrest => 'matchingsel',
-  oprjoin => 'matchingjoinsel' },
+  oprjoin => 'multirangejoinsel' },
 { oid => '4180', oid_symbol => 'OID_MULTIRANGE_ADJACENT_RANGE_OP',
   descr => 'is adjacent to',
   oprname => '-|-', oprleft => 'anymultirange', oprright => 'anyrange',
   oprresult => 'bool', oprcom => '-|-(anyrange,anymultirange)',
   oprcode => 'multirange_adjacent_range', oprrest => 'matchingsel',
-  oprjoin => 'matchingjoinsel' },
+  oprjoin => 'multirangejoinsel' },
 { oid => '4198', oid_symbol => 'OID_MULTIRANGE_ADJACENT_MULTIRANGE_OP',
   descr => 'is adjacent to',
   oprname => '-|-', oprleft => 'anymultirange', oprright => 'anymultirange',
   oprresult => 'bool', oprcom => '-|-(anymultirange,anymultirange)',
   oprcode => 'multirange_adjacent_multirange', oprrest => 'matchingsel',
-  oprjoin => 'matchingjoinsel' },
+  oprjoin => 'multirangejoinsel' },
 { oid => '4392', descr => 'multirange union',
   oprname => '+', oprleft => 'anymultirange', oprright => 'anymultirange',
   oprresult => 'anymultirange', oprcom => '+(anymultirange,anymultirange)',
@@ -3426,36 +3426,36 @@
   oprname => '<<', oprleft => 'anyrange', oprright => 'anymultirange',
   oprresult => 'bool', oprcom => '>>(anymultirange,anyrange)',
   oprcode => 'range_before_multirange', oprrest => 'multirangesel',
-  oprjoin => 'scalarltjoinsel' },
+  oprjoin => 'multirangejoinsel' },
 { oid => '4396', oid_symbol => 'OID_MULTIRANGE_LEFT_RANGE_OP',
   descr => 'is left of',
   oprname => '<<', oprleft => 'anymultirange', oprright => 'anyrange',
   oprresult => 'bool', oprcom => '>>(anyrange,anymultirange)',
   oprcode => 'multirange_before_range', oprrest => 'multirangesel',
-  oprjoin => 'scalarltjoinsel' },
+  oprjoin => 'multirangejoinsel' },
 { oid => '4397', oid_symbol => 'OID_MULTIRANGE_LEFT_MULTIRANGE_OP',
   descr => 'is left of',
   oprname => '<<', oprleft => 'anymultirange', oprright => 'anymultirange',
   oprresult => 'bool', oprcom => '>>(anymultirange,anymultirange)',
   oprcode => 'multirange_before_multirange', oprrest => 'multirangesel',
-  oprjoin => 'scalarltjoinsel' },
+  oprjoin => 'multirangejoinsel' },
 { oid => '4398', oid_symbol => 'OID_RANGE_RIGHT_MULTIRANGE_OP',
   descr => 'is right of',
   oprname => '>>', oprleft => 'anyrange', oprright => 'anymultirange',
   oprresult => 'bool', oprcom => '<<(anymultirange,anyrange)',
   oprcode => 'range_after_multirange', oprrest => 'multirangesel',
-  oprjoin => 'scalargtjoinsel' },
+  oprjoin => 'multirangejoinsel' },
 { oid => '4399', oid_symbol => 'OID_MULTIRANGE_RIGHT_RANGE_OP',
   descr => 'is right of',
   oprname => '>>', oprleft => 'anymultirange', oprright => 'anyrange',
   oprresult => 'bool', oprcom => '<<(anyrange,anymultirange)',
   oprcode => 'multirange_after_range', oprrest => 'multirangesel',
-  oprjoin => 'scalargtjoinsel' },
+  oprjoin => 'multirangejoinsel' },
 { oid => '4400', oid_symbol => 'OID_MULTIRANGE_RIGHT_MULTIRANGE_OP',
   descr => 'is right of',
   oprname => '>>', oprleft => 'anymultirange', oprright => 'anymultirange',
   oprresult => 'bool', oprcom => '<<(anymultirange,anymultirange)',
   oprcode => 'multirange_after_multirange', oprrest => 'multirangesel',
-  oprjoin => 'scalargtjoinsel' },
+  oprjoin => 'multirangejoinsel' },
 
 ]
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 87aa571a33..c1d4119684 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -11885,4 +11885,12 @@
   prorettype => 'bytea', proargtypes => 'pg_brin_minmax_multi_summary',
   prosrc => 'brin_minmax_multi_summary_send' },
 
+{ oid => '8355', descr => 'join selectivity for range operators',
+  proname => 'rangejoinsel', provolatile => 's', prorettype => 'float8',
+  proargtypes => 'internal oid internal int2 internal',
+  prosrc => 'rangejoinsel' },
+{ oid => '8356', descr => 'join selectivity for multirange operators',
+  proname => 'multirangejoinsel', provolatile => 's', prorettype => 'float8',
+  proargtypes => 'internal oid internal int2 internal',
+  prosrc => 'multirangejoinsel' },
 ]
diff --git a/src/test/regress/expected/multirangetypes.out \
b/src/test/regress/expected/multirangetypes.out index ac2eb84c3a..b0eeb672f0 100644
--- a/src/test/regress/expected/multirangetypes.out
+++ b/src/test/regress/expected/multirangetypes.out
@@ -3330,3 +3330,275 @@ create function mr_table_fail(i anyelement) returns table(i \
anyelement, r anymul  as $$ select $1, '[1,10]' $$ language sql;
 ERROR:  cannot determine result data type
 DETAIL:  A result of type anymultirange requires at least one input of type anyrange \
or anymultirange. +-- test multirange join operators
+create table test_multirange_join_1(mr1 int4multirange);
+create table test_multirange_join_2(mr2 int4multirange);
+create table test_range_join(ir int4range);
+create table test_elem_join(elem int4);
+insert into test_multirange_join_1 select int4multirange(int4range(g, \
g+10),int4range(g+20, g+30),int4range(g+40, g+50)) from generate_series(1,200) g; \
+insert into test_multirange_join_1 select '{}'::int4multirange from \
generate_series(1,50) g; +insert into test_multirange_join_1 select \
int4multirange(int4range(g, g+10000)) from generate_series(1,100) g; +insert into \
test_multirange_join_1 select int4multirange(int4range(NULL, g*10, '(]'), \
int4range(g*10, g*20, '(]')) from generate_series(1,10) g; +insert into \
test_multirange_join_1 select int4multirange(int4range(g*10, g*20, '(]'), \
int4range(g*20, NULL, '[)')) from generate_series(1,10) g; +insert into \
test_multirange_join_2 select int4multirange(int4range(g, g+10),int4range(g+20, \
g+30),int4range(g+40, g+50)) from generate_series(1,20) g; +insert into \
test_multirange_join_2 select '{}'::int4multirange from generate_series(1,5) g; \
+insert into test_multirange_join_2 select int4multirange(int4range(g, g+10000)) from \
generate_series(1,10) g; +insert into test_multirange_join_2 select \
int4multirange(int4range(NULL, g*10, '(]'), int4range(g*10, g*20, '(]')) from \
generate_series(1,10) g; +insert into test_multirange_join_2 select \
int4multirange(int4range(g*10, g*20, '(]'), int4range(g*20, NULL, '[)')) from \
generate_series(1,10) g; +insert into test_range_join select int4range(g, g+10) from \
generate_series(1,20) g; +insert into test_range_join select int4range(g, g+10000) \
from generate_series(1,10) g; +insert into test_range_join select \
int4range(NULL,g*10,'(]') from generate_series(1,10) g; +insert into test_range_join \
select int4range(g*10,NULL,'[)') from generate_series(1,10) g; +insert into \
test_range_join select int4range(g, g+10) from generate_series(1,20) g; +insert into \
test_range_join select 'empty'::int4range from generate_series(1,20) g; +insert into \
test_range_join select NULL from generate_series(1,5) g; +insert into test_elem_join \
select g from generate_series(1,20) g; +insert into test_elem_join select g+10000 \
from generate_series(1,10) g; +insert into test_elem_join select g*10 from \
generate_series(1,10) g; +insert into test_elem_join select g from \
generate_series(1,20) g; +insert into test_elem_join select NULL from \
generate_series(1,5) g; +analyze test_multirange_join_1;
+analyze test_multirange_join_2;
+analyze test_range_join;
+analyze test_elem_join;
+create function check_estimated_rows(text) returns table (estimated int, actual int)
+language plpgsql as
+$$
+declare
+    ln text;
+    tmp text[];
+    first_row bool := true;
+begin
+    for ln in
+        execute format('explain analyze %s', $1)
+    loop
+        if first_row then
+            first_row := false;
+            tmp := regexp_match(ln, 'rows=(\d*) .* rows=(\d*)');
+            return query select tmp[1]::int, tmp[2]::int;
+        end if;
+    end loop;
+end;
+$$;
+SELECT * FROM check_estimated_rows('
+select * from test_multirange_join_1, test_multirange_join_2 where mr1 = mr2');
+ estimated | actual 
+-----------+--------
+        55 |    300
+(1 row)
+
+SELECT * FROM check_estimated_rows('
+select * from test_multirange_join_1, test_multirange_join_2 where mr1 < mr2');
+ estimated | actual 
+-----------+--------
+      4579 |   4598
+(1 row)
+
+SELECT * FROM check_estimated_rows('
+select * from test_multirange_join_1, test_multirange_join_2 where mr1 <= mr2');
+ estimated | actual 
+-----------+--------
+      7309 |   4898
+(1 row)
+
+SELECT * FROM check_estimated_rows('
+select * from test_multirange_join_1, test_multirange_join_2 where mr1 > mr2');
+ estimated | actual 
+-----------+--------
+     13041 |  15452
+(1 row)
+
+SELECT * FROM check_estimated_rows('
+select * from test_multirange_join_1, test_multirange_join_2 where mr1 >= mr2');
+ estimated | actual 
+-----------+--------
+     15771 |  15752
+(1 row)
+
+SELECT * FROM check_estimated_rows('
+select * from test_multirange_join_1, test_multirange_join_2 where mr1 && mr2');
+ estimated | actual 
+-----------+--------
+     11098 |  10932
+(1 row)
+
+SELECT * FROM check_estimated_rows('
+select * from test_multirange_join_1, test_range_join where mr1 && ir');
+ estimated | actual 
+-----------+--------
+      9611 |   9471
+(1 row)
+
+SELECT * FROM check_estimated_rows('
+select * from test_range_join, test_multirange_join_2 where ir && mr2');
+ estimated | actual 
+-----------+--------
+      2924 |   2851
+(1 row)
+
+SELECT * FROM check_estimated_rows('
+select * from test_multirange_join_1, test_multirange_join_2 where mr1 <@ mr2');
+ estimated | actual 
+-----------+--------
+      8491 |   7393
+(1 row)
+
+SELECT * FROM check_estimated_rows('
+select * from test_multirange_join_1, test_range_join where mr1 <@ ir');
+ estimated | actual 
+-----------+--------
+      9754 |   8621
+(1 row)
+
+SELECT * FROM check_estimated_rows('
+select * from test_range_join, test_multirange_join_2 where ir <@ mr2');
+ estimated | actual 
+-----------+--------
+      2663 |   1987
+(1 row)
+
+SELECT * FROM check_estimated_rows('
+select * from test_multirange_join_1, test_multirange_join_2 where mr1 @> mr2');
+ estimated | actual 
+-----------+--------
+      5022 |   2361
+(1 row)
+
+SELECT * FROM check_estimated_rows('
+select * from test_multirange_join_1, test_range_join where mr1 @> ir');
+ estimated | actual 
+-----------+--------
+     12473 |   8397
+(1 row)
+
+SELECT * FROM check_estimated_rows('
+select * from test_range_join, test_multirange_join_2 where ir @> mr2');
+ estimated | actual 
+-----------+--------
+      1177 |    800
+(1 row)
+
+SELECT * FROM check_estimated_rows('
+select * from test_multirange_join_1, test_multirange_join_2 where mr1 << mr2');
+ estimated | actual 
+-----------+--------
+       152 |    181
+(1 row)
+
+SELECT * FROM check_estimated_rows('
+select * from test_multirange_join_1, test_range_join where mr1 << ir');
+ estimated | actual 
+-----------+--------
+       145 |    170
+(1 row)
+
+SELECT * FROM check_estimated_rows('
+select * from test_range_join, test_multirange_join_2 where ir << mr2');
+ estimated | actual 
+-----------+--------
+       478 |    519
+(1 row)
+
+SELECT * FROM check_estimated_rows('
+select * from test_multirange_join_1, test_multirange_join_2 where mr1 >> mr2');
+ estimated | actual 
+-----------+--------
+      4750 |   4837
+(1 row)
+
+SELECT * FROM check_estimated_rows('
+select * from test_multirange_join_1, test_range_join where mr1 >> ir');
+ estimated | actual 
+-----------+--------
+     12644 |  12739
+(1 row)
+
+SELECT * FROM check_estimated_rows('
+select * from test_range_join, test_multirange_join_2 where ir >> mr2');
+ estimated | actual 
+-----------+--------
+        98 |    110
+(1 row)
+
+SELECT * FROM check_estimated_rows('
+select * from test_multirange_join_1, test_multirange_join_2 where mr1 &< mr2');
+ estimated | actual 
+-----------+--------
+      4868 |   6318
+(1 row)
+
+SELECT * FROM check_estimated_rows('
+select * from test_multirange_join_1, test_range_join where mr1 &< ir');
+ estimated | actual 
+-----------+--------
+      4120 |   5556
+(1 row)
+
+SELECT * FROM check_estimated_rows('
+select * from test_range_join, test_multirange_join_2 where ir &< mr2');
+ estimated | actual 
+-----------+--------
+      1986 |   2627
+(1 row)
+
+SELECT * FROM check_estimated_rows('
+select * from test_multirange_join_1, test_multirange_join_2 where mr1 &> mr2');
+ estimated | actual 
+-----------+--------
+     11441 |  13976
+(1 row)
+
+SELECT * FROM check_estimated_rows('
+select * from test_multirange_join_1, test_range_join where mr1 &> ir');
+ estimated | actual 
+-----------+--------
+     16184 |  19807
+(1 row)
+
+SELECT * FROM check_estimated_rows('
+select * from test_range_join, test_multirange_join_2 where ir &> mr2');
+ estimated | actual 
+-----------+--------
+      1819 |   1895
+(1 row)
+
+SELECT * FROM check_estimated_rows('
+select * from test_multirange_join_1, test_multirange_join_2 where mr1 -|- mr2');
+ estimated | actual 
+-----------+--------
+       160 |     71
+(1 row)
+
+SELECT * FROM check_estimated_rows('
+select * from test_multirange_join_1, test_range_join where mr1 -|- ir');
+ estimated | actual 
+-----------+--------
+       224 |    118
+(1 row)
+
+SELECT * FROM check_estimated_rows('
+select * from test_range_join, test_multirange_join_2 where ir -|- mr2');
+ estimated | actual 
+-----------+--------
+        35 |     37
+(1 row)
+
+SELECT * FROM check_estimated_rows('
+select * from test_elem_join, test_multirange_join_1 where elem <@ mr1');
+ estimated | actual 
+-----------+--------
+       120 |   3110
+(1 row)
+
+SELECT * FROM check_estimated_rows('
+select * from test_multirange_join_1, test_elem_join where mr1 @> elem');
+ estimated | actual 
+-----------+--------
+       120 |   3110
+(1 row)
+
+drop function check_estimated_rows;
+drop table test_multirange_join_1;
+drop table test_multirange_join_2;
+drop table test_range_join;
+drop table test_elem_join;
diff --git a/src/test/regress/expected/rangetypes.out \
b/src/test/regress/expected/rangetypes.out index 04ccd5d451..10a76dec7a 100644
--- a/src/test/regress/expected/rangetypes.out
+++ b/src/test/regress/expected/rangetypes.out
@@ -1767,3 +1767,157 @@ create function table_fail(i anyelement) returns table(i \
anyelement, r anyrange)  as $$ select $1, '[1,10]' $$ language sql;
 ERROR:  cannot determine result data type
 DETAIL:  A result of type anyrange requires at least one input of type anyrange or \
anymultirange. +-- test range join operators
+create table test_range_join_1(ir1 int4range);
+create table test_range_join_2(ir2 int4range);
+create table test_elem_join(elem int4);
+insert into test_range_join_1 select int4range(g, g+10) from generate_series(1,200) \
g; +insert into test_range_join_1 select int4range(g, g+10000) from \
generate_series(1,100) g; +insert into test_range_join_1 select \
int4range(NULL,g*10,'(]') from generate_series(1,10) g; +insert into \
test_range_join_1 select int4range(g*10,NULL,'[)') from generate_series(1,10) g; \
+insert into test_range_join_1 select int4range(g, g+10) from generate_series(1,200) \
g; +insert into test_range_join_1 select 'empty'::int4range from \
generate_series(1,20) g; +insert into test_range_join_1 select NULL from \
generate_series(1,50) g; +insert into test_range_join_2 select int4range(g+10, g+20) \
from generate_series(1,20) g; +insert into test_range_join_2 select int4range(g+5000, \
g+15000) from generate_series(1,10) g; +insert into test_range_join_2 select \
int4range(NULL,g*5,'(]') from generate_series(1,10) g; +insert into test_range_join_2 \
select int4range(g*5,NULL,'[)') from generate_series(1,10) g; +insert into \
test_range_join_2 select int4range(g, g+10) from generate_series(1,20) g; +insert \
into test_range_join_2 select 'empty'::int4range from generate_series(1,5) g; +insert \
into test_range_join_2 select NULL from generate_series(1,5) g; +insert into \
test_elem_join select g from generate_series(1,20) g; +insert into test_elem_join \
select g+10000 from generate_series(1,10) g; +insert into test_elem_join select g*10 \
from generate_series(1,10) g; +insert into test_elem_join select g from \
generate_series(1,20) g; +insert into test_elem_join select NULL from \
generate_series(1,5) g; +analyze test_range_join_1;
+analyze test_range_join_2;
+analyze test_elem_join;
+create function check_estimated_rows(text) returns table (estimated int, actual int)
+language plpgsql as
+$$
+declare
+    ln text;
+    tmp text[];
+    first_row bool := true;
+begin
+    for ln in
+        execute format('explain analyze %s', $1)
+    loop
+        if first_row then
+            first_row := false;
+            tmp := regexp_match(ln, 'rows=(\d*) .* rows=(\d*)');
+            return query select tmp[1]::int, tmp[2]::int;
+        end if;
+    end loop;
+end;
+$$;
+SELECT * FROM check_estimated_rows('
+select * from test_range_join_1, test_range_join_2 where ir1 = ir2');
+ estimated | actual 
+-----------+--------
+        75 |    190
+(1 row)
+
+SELECT * FROM check_estimated_rows('
+select * from test_range_join_1, test_range_join_2 where ir1 < ir2');
+ estimated | actual 
+-----------+--------
+      7256 |   9745
+(1 row)
+
+SELECT * FROM check_estimated_rows('
+select * from test_range_join_1, test_range_join_2 where ir1 <= ir2');
+ estimated | actual 
+-----------+--------
+      9986 |   9935
+(1 row)
+
+SELECT * FROM check_estimated_rows('
+select * from test_range_join_1, test_range_join_2 where ir1 > ir2');
+ estimated | actual 
+-----------+--------
+     30514 |  30565
+(1 row)
+
+SELECT * FROM check_estimated_rows('
+select * from test_range_join_1, test_range_join_2 where ir1 >= ir2');
+ estimated | actual 
+-----------+--------
+     33244 |  30755
+(1 row)
+
+SELECT * FROM check_estimated_rows('
+select * from test_range_join_1, test_range_join_2 where ir1 && ir2');
+ estimated | actual 
+-----------+--------
+      9966 |   9720
+(1 row)
+
+SELECT * FROM check_estimated_rows('
+select * from test_range_join_1, test_range_join_2 where ir1 <@ ir2');
+ estimated | actual 
+-----------+--------
+     11868 |   6268
+(1 row)
+
+SELECT * FROM check_estimated_rows('
+select * from test_range_join_1, test_range_join_2 where ir1 @> ir2');
+ estimated | actual 
+-----------+--------
+      8933 |   3973
+(1 row)
+
+SELECT * FROM check_estimated_rows('
+select * from test_range_join_1, test_range_join_2 where ir1 << ir2');
+ estimated | actual 
+-----------+--------
+      5034 |   5050
+(1 row)
+
+SELECT * FROM check_estimated_rows('
+select * from test_range_join_1, test_range_join_2 where ir1 >> ir2');
+ estimated | actual 
+-----------+--------
+     21400 |  21630
+(1 row)
+
+SELECT * FROM check_estimated_rows('
+select * from test_range_join_1, test_range_join_2 where ir1 &< ir2');
+ estimated | actual 
+-----------+--------
+      9665 |  12023
+(1 row)
+
+SELECT * FROM check_estimated_rows('
+select * from test_range_join_1, test_range_join_2 where ir1 &> ir2');
+ estimated | actual 
+-----------+--------
+     27914 |  28105
+(1 row)
+
+SELECT * FROM check_estimated_rows('
+select * from test_range_join_1, test_range_join_2 where ir1 -|- ir2');
+ estimated | actual 
+-----------+--------
+       364 |    233
+(1 row)
+
+SELECT * FROM check_estimated_rows('
+select * from test_elem_join, test_range_join_1 where elem <@ ir1');
+ estimated | actual 
+-----------+--------
+       192 |   3349
+(1 row)
+
+SELECT * FROM check_estimated_rows('
+select * from test_range_join_1, test_elem_join where ir1 @> elem');
+ estimated | actual 
+-----------+--------
+       192 |   3349
+(1 row)
+
+drop function check_estimated_rows;
+drop table test_range_join_1;
+drop table test_range_join_2;
+drop table test_elem_join;
diff --git a/src/test/regress/sql/multirangetypes.sql \
b/src/test/regress/sql/multirangetypes.sql index 1abcaeddb5..cb53f90aba 100644
--- a/src/test/regress/sql/multirangetypes.sql
+++ b/src/test/regress/sql/multirangetypes.sql
@@ -854,3 +854,160 @@ create function mr_inoutparam_fail(inout i anyelement, out r \
                anymultirange)
 --should fail
 create function mr_table_fail(i anyelement) returns table(i anyelement, r \
anymultirange)  as $$ select $1, '[1,10]' $$ language sql;
+
+-- test multirange join operators
+create table test_multirange_join_1(mr1 int4multirange);
+create table test_multirange_join_2(mr2 int4multirange);
+create table test_range_join(ir int4range);
+create table test_elem_join(elem int4);
+
+insert into test_multirange_join_1 select int4multirange(int4range(g, \
g+10),int4range(g+20, g+30),int4range(g+40, g+50)) from generate_series(1,200) g; \
+insert into test_multirange_join_1 select '{}'::int4multirange from \
generate_series(1,50) g; +insert into test_multirange_join_1 select \
int4multirange(int4range(g, g+10000)) from generate_series(1,100) g; +insert into \
test_multirange_join_1 select int4multirange(int4range(NULL, g*10, '(]'), \
int4range(g*10, g*20, '(]')) from generate_series(1,10) g; +insert into \
test_multirange_join_1 select int4multirange(int4range(g*10, g*20, '(]'), \
int4range(g*20, NULL, '[)')) from generate_series(1,10) g; +
+insert into test_multirange_join_2 select int4multirange(int4range(g, \
g+10),int4range(g+20, g+30),int4range(g+40, g+50)) from generate_series(1,20) g; \
+insert into test_multirange_join_2 select '{}'::int4multirange from \
generate_series(1,5) g; +insert into test_multirange_join_2 select \
int4multirange(int4range(g, g+10000)) from generate_series(1,10) g; +insert into \
test_multirange_join_2 select int4multirange(int4range(NULL, g*10, '(]'), \
int4range(g*10, g*20, '(]')) from generate_series(1,10) g; +insert into \
test_multirange_join_2 select int4multirange(int4range(g*10, g*20, '(]'), \
int4range(g*20, NULL, '[)')) from generate_series(1,10) g; +
+insert into test_range_join select int4range(g, g+10) from generate_series(1,20) g;
+insert into test_range_join select int4range(g, g+10000) from generate_series(1,10) \
g; +insert into test_range_join select int4range(NULL,g*10,'(]') from \
generate_series(1,10) g; +insert into test_range_join select \
int4range(g*10,NULL,'[)') from generate_series(1,10) g; +insert into test_range_join \
select int4range(g, g+10) from generate_series(1,20) g; +insert into test_range_join \
select 'empty'::int4range from generate_series(1,20) g; +insert into test_range_join \
select NULL from generate_series(1,5) g; +
+insert into test_elem_join select g from generate_series(1,20) g;
+insert into test_elem_join select g+10000 from generate_series(1,10) g;
+insert into test_elem_join select g*10 from generate_series(1,10) g;
+insert into test_elem_join select g from generate_series(1,20) g;
+insert into test_elem_join select NULL from generate_series(1,5) g;
+
+analyze test_multirange_join_1;
+analyze test_multirange_join_2;
+analyze test_range_join;
+analyze test_elem_join;
+
+create function check_estimated_rows(text) returns table (estimated int, actual int)
+language plpgsql as
+$$
+declare
+    ln text;
+    tmp text[];
+    first_row bool := true;
+begin
+    for ln in
+        execute format('explain analyze %s', $1)
+    loop
+        if first_row then
+            first_row := false;
+            tmp := regexp_match(ln, 'rows=(\d*) .* rows=(\d*)');
+            return query select tmp[1]::int, tmp[2]::int;
+        end if;
+    end loop;
+end;
+$$;
+
+SELECT * FROM check_estimated_rows('
+select * from test_multirange_join_1, test_multirange_join_2 where mr1 = mr2');
+
+SELECT * FROM check_estimated_rows('
+select * from test_multirange_join_1, test_multirange_join_2 where mr1 < mr2');
+
+SELECT * FROM check_estimated_rows('
+select * from test_multirange_join_1, test_multirange_join_2 where mr1 <= mr2');
+
+SELECT * FROM check_estimated_rows('
+select * from test_multirange_join_1, test_multirange_join_2 where mr1 > mr2');
+
+SELECT * FROM check_estimated_rows('
+select * from test_multirange_join_1, test_multirange_join_2 where mr1 >= mr2');
+
+SELECT * FROM check_estimated_rows('
+select * from test_multirange_join_1, test_multirange_join_2 where mr1 && mr2');
+
+SELECT * FROM check_estimated_rows('
+select * from test_multirange_join_1, test_range_join where mr1 && ir');
+
+SELECT * FROM check_estimated_rows('
+select * from test_range_join, test_multirange_join_2 where ir && mr2');
+
+SELECT * FROM check_estimated_rows('
+select * from test_multirange_join_1, test_multirange_join_2 where mr1 <@ mr2');
+
+SELECT * FROM check_estimated_rows('
+select * from test_multirange_join_1, test_range_join where mr1 <@ ir');
+
+SELECT * FROM check_estimated_rows('
+select * from test_range_join, test_multirange_join_2 where ir <@ mr2');
+
+SELECT * FROM check_estimated_rows('
+select * from test_multirange_join_1, test_multirange_join_2 where mr1 @> mr2');
+
+SELECT * FROM check_estimated_rows('
+select * from test_multirange_join_1, test_range_join where mr1 @> ir');
+
+SELECT * FROM check_estimated_rows('
+select * from test_range_join, test_multirange_join_2 where ir @> mr2');
+
+SELECT * FROM check_estimated_rows('
+select * from test_multirange_join_1, test_multirange_join_2 where mr1 << mr2');
+
+SELECT * FROM check_estimated_rows('
+select * from test_multirange_join_1, test_range_join where mr1 << ir');
+
+SELECT * FROM check_estimated_rows('
+select * from test_range_join, test_multirange_join_2 where ir << mr2');
+
+SELECT * FROM check_estimated_rows('
+select * from test_multirange_join_1, test_multirange_join_2 where mr1 >> mr2');
+
+SELECT * FROM check_estimated_rows('
+select * from test_multirange_join_1, test_range_join where mr1 >> ir');
+
+SELECT * FROM check_estimated_rows('
+select * from test_range_join, test_multirange_join_2 where ir >> mr2');
+
+SELECT * FROM check_estimated_rows('
+select * from test_multirange_join_1, test_multirange_join_2 where mr1 &< mr2');
+
+SELECT * FROM check_estimated_rows('
+select * from test_multirange_join_1, test_range_join where mr1 &< ir');
+
+SELECT * FROM check_estimated_rows('
+select * from test_range_join, test_multirange_join_2 where ir &< mr2');
+
+SELECT * FROM check_estimated_rows('
+select * from test_multirange_join_1, test_multirange_join_2 where mr1 &> mr2');
+
+SELECT * FROM check_estimated_rows('
+select * from test_multirange_join_1, test_range_join where mr1 &> ir');
+
+SELECT * FROM check_estimated_rows('
+select * from test_range_join, test_multirange_join_2 where ir &> mr2');
+
+SELECT * FROM check_estimated_rows('
+select * from test_multirange_join_1, test_multirange_join_2 where mr1 -|- mr2');
+
+SELECT * FROM check_estimated_rows('
+select * from test_multirange_join_1, test_range_join where mr1 -|- ir');
+
+SELECT * FROM check_estimated_rows('
+select * from test_range_join, test_multirange_join_2 where ir -|- mr2');
+
+SELECT * FROM check_estimated_rows('
+select * from test_elem_join, test_multirange_join_1 where elem <@ mr1');
+
+SELECT * FROM check_estimated_rows('
+select * from test_multirange_join_1, test_elem_join where mr1 @> elem');
+
+drop function check_estimated_rows;
+
+drop table test_multirange_join_1;
+drop table test_multirange_join_2;
+drop table test_range_join;
+drop table test_elem_join;
diff --git a/src/test/regress/sql/rangetypes.sql \
b/src/test/regress/sql/rangetypes.sql index 1a10f67f19..6031cd695a 100644
--- a/src/test/regress/sql/rangetypes.sql
+++ b/src/test/regress/sql/rangetypes.sql
@@ -616,3 +616,105 @@ create function inoutparam_fail(inout i anyelement, out r \
                anyrange)
 --should fail
 create function table_fail(i anyelement) returns table(i anyelement, r anyrange)
   as $$ select $1, '[1,10]' $$ language sql;
+
+-- test range join operators
+create table test_range_join_1(ir1 int4range);
+create table test_range_join_2(ir2 int4range);
+create table test_elem_join(elem int4);
+
+insert into test_range_join_1 select int4range(g, g+10) from generate_series(1,200) \
g; +insert into test_range_join_1 select int4range(g, g+10000) from \
generate_series(1,100) g; +insert into test_range_join_1 select \
int4range(NULL,g*10,'(]') from generate_series(1,10) g; +insert into \
test_range_join_1 select int4range(g*10,NULL,'[)') from generate_series(1,10) g; \
+insert into test_range_join_1 select int4range(g, g+10) from generate_series(1,200) \
g; +insert into test_range_join_1 select 'empty'::int4range from \
generate_series(1,20) g; +insert into test_range_join_1 select NULL from \
generate_series(1,50) g; +
+insert into test_range_join_2 select int4range(g+10, g+20) from \
generate_series(1,20) g; +insert into test_range_join_2 select int4range(g+5000, \
g+15000) from generate_series(1,10) g; +insert into test_range_join_2 select \
int4range(NULL,g*5,'(]') from generate_series(1,10) g; +insert into test_range_join_2 \
select int4range(g*5,NULL,'[)') from generate_series(1,10) g; +insert into \
test_range_join_2 select int4range(g, g+10) from generate_series(1,20) g; +insert \
into test_range_join_2 select 'empty'::int4range from generate_series(1,5) g; +insert \
into test_range_join_2 select NULL from generate_series(1,5) g; +
+insert into test_elem_join select g from generate_series(1,20) g;
+insert into test_elem_join select g+10000 from generate_series(1,10) g;
+insert into test_elem_join select g*10 from generate_series(1,10) g;
+insert into test_elem_join select g from generate_series(1,20) g;
+insert into test_elem_join select NULL from generate_series(1,5) g;
+
+analyze test_range_join_1;
+analyze test_range_join_2;
+analyze test_elem_join;
+
+create function check_estimated_rows(text) returns table (estimated int, actual int)
+language plpgsql as
+$$
+declare
+    ln text;
+    tmp text[];
+    first_row bool := true;
+begin
+    for ln in
+        execute format('explain analyze %s', $1)
+    loop
+        if first_row then
+            first_row := false;
+            tmp := regexp_match(ln, 'rows=(\d*) .* rows=(\d*)');
+            return query select tmp[1]::int, tmp[2]::int;
+        end if;
+    end loop;
+end;
+$$;
+
+SELECT * FROM check_estimated_rows('
+select * from test_range_join_1, test_range_join_2 where ir1 = ir2');
+
+SELECT * FROM check_estimated_rows('
+select * from test_range_join_1, test_range_join_2 where ir1 < ir2');
+
+SELECT * FROM check_estimated_rows('
+select * from test_range_join_1, test_range_join_2 where ir1 <= ir2');
+
+SELECT * FROM check_estimated_rows('
+select * from test_range_join_1, test_range_join_2 where ir1 > ir2');
+
+SELECT * FROM check_estimated_rows('
+select * from test_range_join_1, test_range_join_2 where ir1 >= ir2');
+
+SELECT * FROM check_estimated_rows('
+select * from test_range_join_1, test_range_join_2 where ir1 && ir2');
+
+SELECT * FROM check_estimated_rows('
+select * from test_range_join_1, test_range_join_2 where ir1 <@ ir2');
+
+SELECT * FROM check_estimated_rows('
+select * from test_range_join_1, test_range_join_2 where ir1 @> ir2');
+
+SELECT * FROM check_estimated_rows('
+select * from test_range_join_1, test_range_join_2 where ir1 << ir2');
+
+SELECT * FROM check_estimated_rows('
+select * from test_range_join_1, test_range_join_2 where ir1 >> ir2');
+
+SELECT * FROM check_estimated_rows('
+select * from test_range_join_1, test_range_join_2 where ir1 &< ir2');
+
+SELECT * FROM check_estimated_rows('
+select * from test_range_join_1, test_range_join_2 where ir1 &> ir2');
+
+SELECT * FROM check_estimated_rows('
+select * from test_range_join_1, test_range_join_2 where ir1 -|- ir2');
+
+SELECT * FROM check_estimated_rows('
+select * from test_elem_join, test_range_join_1 where elem <@ ir1');
+
+SELECT * FROM check_estimated_rows('
+select * from test_range_join_1, test_elem_join where ir1 @> elem');
+
+drop function check_estimated_rows;
+
+drop table test_range_join_1;
+drop table test_range_join_2;
+drop table test_elem_join;



[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic