[prev in list] [next in list] [prev in thread] [next in thread]
List: pgsql-bugs
Subject: Re: BUG #15518: intarray index crashes hard
From: Andrew Gierth <andrew () tao11 ! riddles ! org ! uk>
Date: 2018-11-23 21:49:22
Message-ID: 878t1jsczr.fsf () news-spur ! riddles ! org ! uk
[Download RAW message or body]
>>>>> "PG" == PG Bug reporting form <noreply@postgresql.org> writes:
PG> Obviously it's not intended that gist__int_ops should actually work
PG> with data of this kind - that's what gist__intbig_ops is for. But
PG> it's not reasonable for it to crash rather than returning an error.
PG> I'm working on a patch.
And here it is.
This version doesn't restrict the sparseness of keys any more than it
needs to for safety. This necessitates improving the O(N^2) compression
algorithm to O(N), because otherwise it's possible to create datasets
that cause a single compression call to run for months (with no check
for interrupts, either).
There are essentially 4 specific issues addressed:
1. Integer overflow in internal_size could result in memory corruption
in decompression since a zero-length array would be allocated and
then written to.
2. Integer overflow in g_int_compress could cause pessimal merge
choices, resulting in unnecessarily large ranges (which would in turn
trigger issue 1 above)
3. Even without overflow, array sizes could become large enough to cause
unexplained memory allocation errors, so cap the sizes and report
actual errors pointing at gist__intbig_ops as needed.
4. Large inputs to the compression function always consist of large runs
of consecutive integers, and the compression loop was processing
these one at a time in an O(N^2) manner with a lot of overhead. The
expected runtime of this function could easily exceed 6 months as a
result. Performing a linear-time first pass reduces the worst case
to something on the order of seconds.
--
Andrew (irc:RhodiumToad)
["iafix.patch" (text/x-patch)]
diff --git a/contrib/intarray/_int_gist.c b/contrib/intarray/_int_gist.c
index 911d18023b..fe10cb56b6 100644
--- a/contrib/intarray/_int_gist.c
+++ b/contrib/intarray/_int_gist.c
@@ -13,6 +13,17 @@
#define GETENTRY(vec,pos) ((ArrayType *) DatumGetPointer((vec)->vector[(pos)].key))
/*
+ * Control the maximum sparseness of compressed keys.
+ *
+ * The upper safe bound for this limit is half the maximum allocatable array
+ * size. A lower bound would give more guarantees that pathological data
+ * wouldn't eat excessive CPU and memory, but at the expense of breaking
+ * possibly working (after a fashion) indexes.
+ */
+#define MAXNUMELTS (Min((MaxAllocSize / sizeof(Datum)),((MaxAllocSize - \
ARR_OVERHEAD_NONULLS(1)) / sizeof(int)))/2) +/* or: #define MAXNUMELTS 1000000 */
+
+/*
** GiST support methods
*/
PG_FUNCTION_INFO_V1(g_int_consistent);
@@ -141,11 +152,13 @@ g_int_compress(PG_FUNCTION_ARGS)
GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
GISTENTRY *retval;
ArrayType *r;
- int len;
+ int len,
+ lenr;
int *dr;
int i,
- min,
+ j,
cand;
+ int64 min;
if (entry->leafkey)
{
@@ -186,23 +199,62 @@ g_int_compress(PG_FUNCTION_ARGS)
dr = ARRPTR(r);
- for (i = len - 1; i >= 0; i--)
- dr[2 * i] = dr[2 * i + 1] = dr[i];
+ /*
+ * "len" at this point is the number of ranges we will construct.
+ * "lenr" is the number of ranges we must eventually remove by
+ * merging, we must be careful to remove no more than this number.
+ */
+ lenr = len - MAXNUMRANGE;
+
+ /*
+ * Initially assume we can merge consecutive ints into a range. but we
+ * must count every value removed and stop when lenr runs out
+ */
+ for (j = i = len - 1; i > 0 && lenr > 0; i--, j--)
+ {
+ int r_end = dr[i];
+ int r_start = r_end;
+ while (i > 0 && lenr > 0 && dr[i-1] == r_start - 1)
+ --r_start, --i, --lenr;
+ dr[2*j] = r_start;
+ dr[2*j+1] = r_end;
+ }
+ /* just copy the rest, if any, as trivial ranges */
+ for (; i >= 0; i--, j--)
+ dr[2*j] = dr[2*j + 1] = dr[i];
- len *= 2;
+ if (++j)
+ {
+ /*
+ * shunt everything down to start at the right place
+ */
+ memmove((void *) &dr[0], (void *) &dr[2*j], 2*(len - j) * sizeof(int32));
+ }
+ /*
+ * make "len" be number of array elements, not ranges
+ */
+ len = 2*(len - j);
cand = 1;
while (len > MAXNUMRANGE * 2)
{
- min = INT_MAX;
+ min = PG_INT64_MAX;
for (i = 2; i < len; i += 2)
- if (min > (dr[i] - dr[i - 1]))
+ if (min > ((int64)dr[i] - (int64)dr[i - 1]))
{
- min = (dr[i] - dr[i - 1]);
+ min = ((int64)dr[i] - (int64)dr[i - 1]);
cand = i;
}
memmove((void *) &dr[cand - 1], (void *) &dr[cand + 1], (len - cand - 1) * \
sizeof(int32)); len -= 2;
}
+ /*
+ * check sparseness of result
+ */
+ lenr = internal_size(dr, len);
+ if (lenr < 0 || lenr > MAXNUMELTS)
+ ereport(ERROR,
+ (errmsg("data is too sparse, recreate index using gist__intbig_ops opclass \
instead"))); +
r = resize_intArrayType(r, len);
retval = palloc(sizeof(GISTENTRY));
gistentryinit(*retval, PointerGetDatum(r),
@@ -260,6 +312,9 @@ g_int_decompress(PG_FUNCTION_ARGS)
din = ARRPTR(in);
lenr = internal_size(din, lenin);
+ if (lenr < 0 || lenr > MAXNUMELTS)
+ ereport(ERROR,
+ (errmsg("compressed array is too big, recreate index using gist__intbig_ops \
opclass instead")));
r = new_intArrayType(lenr);
dr = ARRPTR(r);
diff --git a/contrib/intarray/_int_tool.c b/contrib/intarray/_int_tool.c
index d86485dfa5..7288b35b02 100644
--- a/contrib/intarray/_int_tool.c
+++ b/contrib/intarray/_int_tool.c
@@ -225,6 +225,7 @@ new_intArrayType(int num)
/* if no elements, return a zero-dimensional array */
if (num <= 0)
{
+ Assert(num == 0);
r = construct_empty_array(INT4OID);
return r;
}
@@ -252,6 +253,7 @@ resize_intArrayType(ArrayType *a, int num)
/* if no elements, return a zero-dimensional array */
if (num <= 0)
{
+ Assert(num == 0);
ARR_NDIM(a) = 0;
return a;
}
@@ -289,14 +291,18 @@ int
internal_size(int *a, int len)
{
int i,
- size = 0;
+ size;
+ int64 tsize = 0;
for (i = 0; i < len; i += 2)
{
if (!i || a[i] != a[i - 1]) /* do not count repeated range */
- size += a[i + 1] - a[i] + 1;
+ tsize += (int64)(a[i + 1]) - (int64)(a[i]) + 1;
}
+ size = (int) tsize;
+ if ((int64)size != tsize)
+ return -1; /* overflow */
return size;
}
[prev in list] [next in list] [prev in thread] [next in thread]
Configure |
About |
News |
Add a list |
Sponsored by KoreLogic