[prev in list] [next in list] [prev in thread] [next in thread] 

List:       monetdb-checkins
Subject:    MonetDB: mosaic - Squeeze 4 more bytes from mosaic header
From:       Martin Kersten <commits () monetdb ! org>
Date:       2014-08-30 10:07:45
Message-ID: hg.2e32f68867cd.1409393265.6315528441665844383 () monetdb2 ! cwi-incubator ! nl
[Download RAW message or body]

Changeset: 2e32f68867cd for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=2e32f68867cd
Modified Files:
	monetdb5/modules/mal/mosaic.c
	monetdb5/modules/mal/mosaic.h
	monetdb5/modules/mal/mosaic_delta.c
	monetdb5/modules/mal/mosaic_dict.c
	monetdb5/modules/mal/mosaic_linear.c
	monetdb5/modules/mal/mosaic_rle.c
	monetdb5/optimizer/opt_mosaic.c
Branch: mosaic
Log Message:

Squeeze 4 more bytes from mosaic header


diffs (296 lines):

diff --git a/monetdb5/modules/mal/mosaic.c b/monetdb5/modules/mal/mosaic.c
--- a/monetdb5/modules/mal/mosaic.c
+++ b/monetdb5/modules/mal/mosaic.c
@@ -328,6 +328,19 @@ MOScompressInternal(Client cntxt, int *r
 				task->dst = ((char*) task->blk)+ MosaicBlkSize;
 				*task->blk = MOSeol;
 			}
+			break;
+		case MOSAIC_NONE:
+		case MOSAIC_ZONE:
+			if ( MOScnt(task->blk) == MOSlimit()){
+				MOSupdateHeader(cntxt,task);
+				if( MOStag(task->blk) == MOSAIC_NONE)
+					MOSskip_none(cntxt,task);
+				else
+					MOSskip_zone(cntxt,task);
+				// always start with an EOL block
+				task->dst = ((char*) task->blk)+ MosaicBlkSize;
+				*task->blk = MOSeol;
+			}
 		}
 		// apply the compression to a chunk
 		switch(cand){
diff --git a/monetdb5/modules/mal/mosaic.h b/monetdb5/modules/mal/mosaic.h
--- a/monetdb5/modules/mal/mosaic.h
+++ b/monetdb5/modules/mal/mosaic.h
@@ -63,20 +63,24 @@ typedef struct MOSAICHEADER{
 	BUN offset[MOSAICINDEX];
 } * MosaicHdr;
 
-// bit stuffed header block
-typedef lng *MosaicBlk;
-#define MOStag(Blk) (*(Blk)>>56)
-#define MOSsetTag(Tag)  ((lng) (Tag) <<56)
-#define MOScnt(Blk) (BUN)(*(Blk) & 03777777777777777)
+// bit stuffed header block, currently 4 bytes wide
+#define MOSshift 24
+typedef int *MosaicBlk;
+
+#define MOStag(Blk) (*(Blk)>>MOSshift)
+#define MOSsetTag(Tag)  ((int) (Tag) <<MOSshift)
+#define MOScnt(Blk) (BUN)(*(Blk) & ~(0377<<MOSshift))
 #define MOSinc(Blk,I) *(Blk)= *(Blk)+I
 
-#define MOSnone (((lng)MOSAIC_NONE) <<56)
-#define MOSrle (((lng)MOSAIC_RLE) <<56)
-#define MOSdict (((lng)MOSAIC_DICT) <<56)
-#define MOSlinear (((lng)MOSAIC_LINEAR) <<56)
-#define MOSdelta (((lng)MOSAIC_DELTA) <<56)
-#define MOSzone (((lng)MOSAIC_ZONE) <<56)
-#define MOSeol (((lng)MOSAIC_EOL) <<56)
+#define MOSnone (((int)MOSAIC_NONE) <<MOSshift)
+#define MOSrle (((int)MOSAIC_RLE) <<MOSshift)
+#define MOSdict (((int)MOSAIC_DICT) <<MOSshift)
+#define MOSlinear (((int)MOSAIC_LINEAR) <<MOSshift)
+#define MOSdelta (((int)MOSAIC_DELTA) <<MOSshift)
+#define MOSzone (((int)MOSAIC_ZONE) <<MOSshift)
+#define MOSeol (((int)MOSAIC_EOL) <<MOSshift)
+
+#define MOSlimit() (int) ~(0377<<MOSshift)
 
 /* Memory word alignement is type and platform dependent.
  * We use an encoding that fits the column type requirements
diff --git a/monetdb5/modules/mal/mosaic_delta.c b/monetdb5/modules/mal/mosaic_delta.c
--- a/monetdb5/modules/mal/mosaic_delta.c
+++ b/monetdb5/modules/mal/mosaic_delta.c
@@ -77,6 +77,7 @@ MOSskip_delta(Client cntxt, MOStask task
 			break;\
 		val = *w;\
 	}\
+	if ( i > MOSlimit() ) i = MOSlimit();\
 	factor = (float)((int)i * sizeof(TYPE))/  (MosaicBlkSize + sizeof(TYPE)+(bte)i-1);\
 }
 
@@ -97,6 +98,7 @@ MOSestimate_delta(Client cntxt, MOStask 
 					break;
 				val = *w;
 			}
+			if ( i > MOSlimit() ) i = MOSlimit();
 			factor = ((float)i * sizeof(int))/  (MosaicBlkSize + sizeof(oid)+(bte)i-1);
 		}
 	case TYPE_wrd: Estimate_delta(wrd); break;
@@ -123,10 +125,11 @@ MOSestimate_delta(Client cntxt, MOStask 
 
 #define DELTAcompress(TYPE)\
 {	TYPE *w = (TYPE*)task->src, val= *w, delta;\
+	BUN limit = task->elm > MOSlimit()? MOSlimit():task->elm;\
 	task->dst = ((char*) task->blk) + MosaicBlkSize;\
 	*(TYPE*)task->dst = val;\
 	task->dst += sizeof(TYPE);\
-	for(w++,i =1; i<task->elm; i++,w++){\
+	for(w++,i =1; i<limit; i++,w++){\
 		delta = *w -val;\
 		if ( delta < -127 || delta >127)\
 			break;\
@@ -156,10 +159,11 @@ MOScompress_delta(Client cntxt, MOStask 
 #endif
 	case TYPE_oid:
 		{	oid *w = (oid*)task->src, val= *w, delta;
+			BUN limit = task->elm > MOSlimit()? MOSlimit():task->elm;
 			task->dst = ((char*) task->blk) + MosaicBlkSize;
 			*(oid*)task->dst = val;
 			task->dst += sizeof(oid);
-			for(w++,i =1; i<task->elm; i++,w++){
+			for(w++,i =1; i<limit; i++,w++){
 				delta = *w -val;
 				if ( delta < 256)
 					break;
@@ -172,10 +176,11 @@ MOScompress_delta(Client cntxt, MOStask 
 		break;
 	case TYPE_int:
 		{	int *w = (int*)task->src, val= *w, delta;
+			BUN limit = task->elm > MOSlimit()? MOSlimit():task->elm;
 			task->dst = ((char*) task->blk) + MosaicBlkSize;
 			*(int*)task->dst = val;
 			task->dst += sizeof(int);
-			for(w++,i =1; i<task->elm; i++,w++){
+			for(w++,i =1; i<limit; i++,w++){
 				delta = *w -val;
 				if ( delta < -127 || delta >127)
 					break;
diff --git a/monetdb5/modules/mal/mosaic_dict.c b/monetdb5/modules/mal/mosaic_dict.c
--- a/monetdb5/modules/mal/mosaic_dict.c
+++ b/monetdb5/modules/mal/mosaic_dict.c
@@ -123,6 +123,7 @@ MOSskip_dict(Client cntxt, MOStask task)
 			cnt++;\
 		}\
 	}\
+	if ( i > MOSlimit() ) i = MOSlimit();\
 	if(i) factor = (flt) ((int)i * sizeof(int)) / (2 * MosaicBlkSize + sizeof(int) * dictsize +i);\
 }
 
@@ -160,6 +161,7 @@ MOSestimate_dict(Client cntxt, MOStask t
 					cnt++;
 				}
 			}
+			if ( i > MOSlimit() ) i = MOSlimit();
 			if(i) factor = (flt) ((int)i * sizeof(int)) / (2 * MosaicBlkSize + sizeof(int) * dictsize +i);
 		}
 	}
@@ -173,8 +175,9 @@ MOSestimate_dict(Client cntxt, MOStask t
 #define DICTcompress(TPE)\
 {	TPE *val = (TPE*)task->src;\
 	TPE *dict = (TPE*)((char*)task->blk+ 2 * MosaicBlkSize);\
+	BUN limit = task->elm > MOSlimit()? MOSlimit(): task->elm;\
 	task->dst = ((char*) dict)+ sizeof(TPE)*dictsize;\
-	for(i =0; i<task->elm; i++, val++){\
+	for(i =0; i<limit; i++, val++){\
 		for(j= 0; j< *size; j++)\
 			if( dict[j] == *val) {\
 				MOSinc(blk,1);\
@@ -186,10 +189,10 @@ MOSestimate_dict(Client cntxt, MOStask t
 				task->dst += wordaligned(MOScnt(blk) %2,TPE);\
 				break;\
 			}\
+			MOSinc(blk,1);\
 			dict[j] = *val;\
 			*size = *size+1;\
 			*task->dst++ = (char) j;\
-			MOSinc(blk,1);\
 		}\
 	}\
 	task->src = (char*) val;\
@@ -219,8 +222,9 @@ MOScompress_dict(Client cntxt, MOStask t
 	case TYPE_lng:
 		{	lng *val = (lng*)task->src;
 			lng *dict = (lng*)((char*)task->blk+ 2 * MosaicBlkSize);
+			BUN limit = task->elm > MOSlimit()? MOSlimit(): task->elm;
 			task->dst = ((char*) dict)+ sizeof(lng)*dictsize;
-			for(i =0; i<task->elm; i++, val++){
+			for(i =0; i<limit; i++, val++){
 				for(j= 0; j< *size; j++)
 					if( dict[j] == *val) {
 						MOSinc(blk,1);
@@ -233,10 +237,10 @@ MOScompress_dict(Client cntxt, MOStask t
 						task->dst += wordaligned(MOScnt(blk) %2,lng);
 						break;
 					}
+					MOSinc(blk,1);
 					dict[j] = *val;
 					*size = *size+1;
 					*task->dst++ = (char) j;
-					MOSinc(blk,1);
 				}
 			}
 			task->src = (char*) val;
diff --git a/monetdb5/modules/mal/mosaic_linear.c b/monetdb5/modules/mal/mosaic_linear.c
--- a/monetdb5/modules/mal/mosaic_linear.c
+++ b/monetdb5/modules/mal/mosaic_linear.c
@@ -126,6 +126,7 @@ MOSskip_linear(Client cntxt, MOStask tas
 	for(i =1; i < task->elm; i++)\
 	if ( ((TYPE*)task->src)[i] != (TYPE)(val + (int)i * step))\
 		break;\
+	if( i >= MOSlimit()) i = MOSlimit();\
 	factor =  ( (flt)i * sizeof(TYPE))/(MosaicBlkSize + 2 * sizeof(TYPE));\
 }
 
@@ -154,6 +155,7 @@ MOSestimate_linear(Client cntxt, MOStask
 			for(i =1; i<task->elm; i++)
 			if ( ((int*)task->src)[i] != (int)(val + (int)i * step))
 				break;
+			if( i >= MOSlimit()) i = MOSlimit();
 			factor =  ( (flt)i * sizeof(int))/(MosaicBlkSize + 2 * sizeof(int));
 		}
 	}
@@ -167,7 +169,8 @@ MOSestimate_linear(Client cntxt, MOStask
 #define LINEARcompress(TYPE)\
 {	TYPE val = *(TYPE*) task->src;\
 	TYPE step = *(TYPE*) (task->src + sizeof(TYPE)) - val;\
-	for(i =1; i<task->elm; i++)\
+	BUN limit = task->elm > MOSlimit()? MOSlimit():task->elm;\
+	for(i =1; i<limit; i++)\
 	if ( ((TYPE*)task->src)[i] != (TYPE)(val + (int)i * step))\
 		break;\
 	MOSinc(blk,i);\
@@ -201,7 +204,8 @@ MOScompress_linear(Client cntxt, MOStask
 	case TYPE_int:
 		{	int val = *(int*) task->src;\
 			int step = *(int*) (task->src + sizeof(int)) - val;\
-			for(i =1; i<task->elm; i++)\
+			BUN limit = task->elm > MOSlimit()? MOSlimit():task->elm;
+			for(i =1; i<limit; i++)\
 			if ( ((int*)task->src)[i] != (int)(val + (int)i * step))\
 				break;\
 			MOSinc(blk,i);\
diff --git a/monetdb5/modules/mal/mosaic_rle.c b/monetdb5/modules/mal/mosaic_rle.c
--- a/monetdb5/modules/mal/mosaic_rle.c
+++ b/monetdb5/modules/mal/mosaic_rle.c
@@ -108,6 +108,7 @@ MOSskip_rle(Client cntxt, MOStask task)
 	for(i =1; i < task->elm; i++)\
 	if ( ((TYPE*)task->src)[i] != val)\
 		break;\
+	if ( i > MOSlimit() ) i = MOSlimit();\
 	factor = ( (flt)i * sizeof(TYPE))/ (MosaicBlkSize + sizeof(TYPE));\
 }
 
@@ -135,6 +136,7 @@ MOSestimate_rle(Client cntxt, MOStask ta
 			for(i =1; i<task->elm; i++)
 			if ( ((int*)task->src)[i] != val)
 				break;
+			if ( i > MOSlimit() ) i = MOSlimit();
 			factor = ( (flt)i * sizeof(int))/ (MosaicBlkSize + sizeof(int));
 		}
 	}
@@ -146,16 +148,17 @@ MOSestimate_rle(Client cntxt, MOStask ta
 
 // insert a series of values into the compressor block using rle.
 #define RLEcompress(TYPE)\
-	{	TYPE val = *(TYPE*) task->src;\
-		TYPE *dst = (TYPE*) task->dst;\
-		*dst = val;\
-		for(i =1; i<task->elm; i++)\
-		if ( ((TYPE*)task->src)[i] != val)\
-			break;\
-		MOSinc(blk,i);\
-		task->dst +=  sizeof(TYPE);\
-		task->src += i * sizeof(TYPE);\
-	}
+{	TYPE val = *(TYPE*) task->src;\
+	TYPE *dst = (TYPE*) task->dst;\
+	BUN limit = task->elm > MOSlimit()? MOSlimit(): task->elm;\
+	*dst = val;\
+	for(i =1; i<limit; i++)\
+	if ( ((TYPE*)task->src)[i] != val)\
+		break;\
+	MOSinc(blk,i);\
+	task->dst +=  sizeof(TYPE);\
+	task->src += i * sizeof(TYPE);\
+}
 
 void
 MOScompress_rle(Client cntxt, MOStask task)
@@ -181,8 +184,9 @@ MOScompress_rle(Client cntxt, MOStask ta
 	case TYPE_int:
 		{	int val = *(int*) task->src;
 			int *dst = (int*) task->dst;
+			BUN limit = task->elm > MOSlimit()? MOSlimit(): task->elm;
 			*dst = val;
-			for(i =1; i<task->elm; i++)
+			for(i =1; i<limit; i++)
 			if ( ((int*)task->src)[i] != val)
 				break;
 			MOSinc(blk,i);
diff --git a/monetdb5/optimizer/opt_mosaic.c b/monetdb5/optimizer/opt_mosaic.c
--- a/monetdb5/optimizer/opt_mosaic.c
+++ b/monetdb5/optimizer/opt_mosaic.c
@@ -37,6 +37,9 @@ static int OPTmosaicType(MalBlkPtr mb, I
 	case TYPE_sht:
 	case TYPE_int:
 	case TYPE_lng:
+#ifdef HAVE_HGE
+	case TYPE_hge:
+#endif
 	case TYPE_oid:
 	case TYPE_wrd:
 	case TYPE_flt:
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list
[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic