[prev in list] [next in list] [prev in thread] [next in thread] 

List:       monetdb-checkins
Subject:    MonetDB: rdf - Modify the way of generating sql table/col names
From:       Minh-Duc Pham <commits () monetdb ! org>
Date:       2014-08-30 21:48:43
Message-ID: hg.2f08774fbc54.1409435323.6315528441665844383 () monetdb2 ! cwi-incubator ! nl
[Download RAW message or body]

Changeset: 2f08774fbc54 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=2f08774fbc54
Modified Files:
	monetdb5/extras/rdf/rdfparams.c
	monetdb5/extras/rdf/rdfschema.c
	monetdb5/extras/rdf/rdfschema.h
	sql/backends/monet5/sql_rdf.c
Branch: rdf
Log Message:

Modify the way of generating sql table/col names


diffs (truncated from 335 to 300 lines):

diff --git a/monetdb5/extras/rdf/rdfparams.c b/monetdb5/extras/rdf/rdfparams.c
--- a/monetdb5/extras/rdf/rdfparams.c
+++ b/monetdb5/extras/rdf/rdfparams.c
@@ -46,7 +46,7 @@ void createDefaultParamsFile(void){
 	fprintf(paramFile, "upperboundNumTables 1000\n");
 	//fprintf(paramFile, "simTfidfThreshold 0.75");
 	fprintf(paramFile, "minTableSize 1000\n");
-	fprintf(paramFile, "infreqTypeThreshold 0.1\n");
+	fprintf(paramFile, "infreqTypeThreshold 0.05\n");
 	fprintf(paramFile, "infreqPropThreshold 0.05\n");
 	fclose(paramFile); 
 }
diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -3951,8 +3951,8 @@ void buildLabelStat(LabelStat *labelStat
 			numDummy++;
 	}
 	
-	//printf("Total number of distinct labels in Top%d is %d \n", k, \
                labelStat->numLabeladded);
-	//printf("Number of DUMMY freqCS: %d \n",numDummy);
+	printf("Total number of distinct labels in Top%d is %d \n", k, \
labelStat->numLabeladded); +	printf("Number of DUMMY freqCS: %d \n",numDummy);
 	//Build list of FreqCS
 	labelStat->freqIdList = (int**) malloc(sizeof(int*) * labelStat->numLabeladded);
 	for (i =0; i < labelStat->numLabeladded; i++){
@@ -6702,6 +6702,27 @@ str getOrigObt(oid *obt, oid *origObt, B
 }
 #endif
 
+static
+oid getFirstEncodedSubjId(int tblIdx){
+	
+	return (BUN)(tblIdx + 1) << (sizeof(BUN)*8 - NBITS_FOR_CSID);
+}
+
+//Encoded subject BAT contains 
+//sequential numbers from getFirstEncodedSubjId()
+//to getFirstEncodedSubjId() + numberofelements 
+
+BAT* createEncodedSubjBat(int tblIdx, int num){
+	BAT* subjBat = NULL; 
+	
+	subjBat = BATnew(TYPE_void, TYPE_void , num + 1);
+	BATsetcount(subjBat,num);
+	BATseqbase(subjBat, 0);
+	BATseqbase(BATmirror(subjBat), getFirstEncodedSubjId(tblIdx));
+
+	return subjBat; 
+}
+
 #if NO_OUTPUTFILE == 0
 static
 char getObjTypeFromBATtype(int battype){
diff --git a/monetdb5/extras/rdf/rdfschema.h b/monetdb5/extras/rdf/rdfschema.h
--- a/monetdb5/extras/rdf/rdfschema.h
+++ b/monetdb5/extras/rdf/rdfschema.h
@@ -294,6 +294,10 @@ typedef struct SubCSSet{
 
 #define REMOVE_SMALL_TABLE	1	/* Remove SMALL but NOT dimension table*/
 
+#define APPENDSUBJECTCOLUMN	1	// The subject column actually doesn't need to be \
included into the relational table +					// However, for creating the foreign key \
relationship, we add this column and  +					// markt it as a primary key
+
 
 typedef struct CSset{
 	CS* items;
@@ -527,5 +531,9 @@ isCSTable(CS item, oid name);
 rdf_export str
 printTKNZStringFromOid(oid id);
 
+rdf_export BAT*
+createEncodedSubjBat(int tblIdx, int num);
+
+
 
 #endif /* _RDFSCHEMA_H_ */
diff --git a/sql/backends/monet5/sql_rdf.c b/sql/backends/monet5/sql_rdf.c
--- a/sql/backends/monet5/sql_rdf.c
+++ b/sql/backends/monet5/sql_rdf.c
@@ -542,6 +542,80 @@ SQLrdfShred(Client cntxt, MalBlkPtr mb, 
 #endif /* HAVE_RAPTOR */
 }
 
+static
+void getTblSQLname(char *tmptbname, int tblIdx, int isExTbl, CStableStat \
*cstablestat, BATiter mapi, BAT *mbat){ +	str	baseTblName;
+	char	tmpstr[20]; 
+
+	if (isExTbl ==0) 
+		sprintf(tmpstr, "%d",tblIdx);
+	else //isExTbl == 1
+		sprintf(tmpstr, "ex%d",tblIdx);
+
+	getTblName(&baseTblName, cstablestat->lstcstable[tblIdx].tblname, mapi, mbat); 
+	sprintf(tmptbname, "%s", baseTblName);
+	strcat(tmptbname,tmpstr);
+
+	GDKfree(baseTblName);
+}
+
+//If colType == -1, ==> default col
+//If not, it is a ex-type column
+static
+void getColSQLname(char *tmpcolname, int tblIdx, int colIdx, int colType, \
CStableStat *cstablestat, BATiter mapi, BAT *mbat){ +	str baseColName;
+	char    tmpstr[20];
+
+	if (colType == -1) sprintf(tmpstr, "%d",colIdx);
+	else 
+		sprintf(tmpstr, "%dtype%d",colIdx, colType); 
+	getTblName(&baseColName, cstablestat->lstcstable[tblIdx].lstProp[colIdx], mapi, \
mbat); +	sprintf(tmpcolname, "%s", baseColName);
+	strcat(tmpcolname,tmpstr); 
+
+
+	GDKfree(baseColName);
+}
+
+static
+void getMvTblSQLname(char *tmpmvtbname, int tblIdx, int colIdx, CStableStat \
*cstablestat, BATiter mapi, BAT *mbat){ +	str baseTblName;
+	str baseColName; 
+
+	getTblName(&baseTblName, cstablestat->lstcstable[tblIdx].tblname, mapi, mbat);
+	getTblName(&baseColName, cstablestat->lstcstable[tblIdx].lstProp[colIdx], mapi, \
mbat); +
+	sprintf(tmpmvtbname, "mv%s%d_%s%d", baseTblName, tblIdx, baseColName, colIdx);
+
+	GDKfree(baseTblName);
+	GDKfree(baseColName);
+}
+
+/*
+static
+addFKs(CStableStat* cstablestat, CSPropTypes *csPropTypes){
+	FILE            *fout;
+	char            filename[100];
+	int		i;
+	char		fromTbl[100]; 
+	char		fromTblCol[100]; 
+	char		toTbl[100];
+	char		toTblCol[100]; 
+	int		refTblId; 
+
+	strcpy(filename, "fkCreate.sql");
+	fout = fopen(filename, "wt");
+	for (i = 0; i < cstablestat->numTables; i++){
+		for(j = 0; j < csPropTypes[i].numProp; j++){
+			if (csPropTypes[i].lstPropTypes[j].isFKProp == 1){
+				refTblId = csPropTypes[i].lstPropTypes[j].refTblId;					
+			}
+		}
+	}
+	fclose(fout); 	
+
+}
+*/
 
 /* Re-organize triple table by using clustering storage
  * CALL rdf_reorganize('schema','tablename', 1);
@@ -560,13 +634,12 @@ SQLrdfreorganize(Client cntxt, MalBlkPtr
 	sql_schema *sch; 
 	int ret = 0; 
 	CStableStat *cstablestat; 
-	str	baseTblName;
 	char	tmptbname[100]; 
-	char	tmpstr[20]; 
+	char	tmpmvtbname[100];
 	char	tmptbnameex[100];
 	//char	tmpviewname[100]; 
-	str	baseColName;
 	char	tmpcolname[100]; 
+	char	tmpmvcolname[100];
 	//char	viewcommand[500];
 	sql_subtype tpe; 	
 	sql_subtype tpes[50];
@@ -710,50 +783,46 @@ SQLrdfreorganize(Client cntxt, MalBlkPtr
 	for (i = 0; i < cstablestat->numTables; i++){
 		//printf("creating table %d \n", i);
 
-		sprintf(tmpstr, "%d",i);
-		getTblName(&baseTblName, cstablestat->lstcstable[i].tblname, mapi, mbat); 
-		sprintf(tmptbname, "%s", baseTblName);
-		strcat(tmptbname,tmpstr);
+		getTblSQLname(tmptbname, i, 0, cstablestat, mapi, mbat);
 		printf("Table %d:||  %s ||\n",i, tmptbname);
 
 		cstables[i] = mvc_create_table(m, sch, tmptbname, tt_table, 0,
 				   SQL_PERSIST, 0, 3);
-		GDKfree(baseTblName);
 		totalNoTablesCreated++;
 		//Multivalues tables for each column
 		csmvtables[i] = (sql_table **)malloc(sizeof(sql_table*) * \
cstablestat->numPropPerTable[i]);  
+		#if APPENDSUBJECTCOLUMN
+		mvc_create_column(m, cstables[i], "subject",  &tpes[TYPE_oid]);
+		#endif
 		for (j = 0; j < cstablestat->numPropPerTable[i]; j++){
 
 			//TODO: Use propertyId from Propstat
-			sprintf(tmpstr, "%d",j);
-			getTblName(&baseColName, cstablestat->lstcstable[i].lstProp[j], mapi, mbat);
-			sprintf(tmpcolname, "%s", baseColName);
-			strcat(tmpcolname,tmpstr); 
-			//sprintf(tmpcolname, "col"BUNFMT,(cstablestat->lstcstable[i].lstProp[j]));
+			getColSQLname(tmpcolname, i, j, -1, cstablestat, mapi, mbat);
+
 
 			tmpbat = cstablestat->lstcstable[i].colBats[j];
 
 			mvc_create_column(m, cstables[i], tmpcolname,  &tpes[tmpbat->ttype]);
 			
-			GDKfree(baseColName);
 			//For multi-values table
 			tmpNumMVCols = cstablestat->lstcstable[i].lstMVTables[j].numCol;
 			if (tmpNumMVCols != 0){
-				sprintf(tmptbname, "mvtable%dp%d",i,j);
-				csmvtables[i][j] = mvc_create_table(m, sch, tmptbname, tt_table, 0, SQL_PERSIST, \
0, 3);  +				getMvTblSQLname(tmpmvtbname, i, j, cstablestat, mapi, mbat);
+				csmvtables[i][j] = mvc_create_table(m, sch, tmpmvtbname, tt_table, 0, \
SQL_PERSIST, 0, 3);   totalNoTablesCreated++;
 
 				//One column for key
-				sprintf(tmpcolname, "mvCol%dt%dpKey",i,j);
+				sprintf(tmpcolname, "mvKey");
 				tmpbat = cstablestat->lstcstable[i].lstMVTables[j].keyBat;
 				mvc_create_column(m, csmvtables[i][j], tmpcolname,  &tpes[tmpbat->ttype]);
 
 				//Value columns 
 				for (k = 0; k < tmpNumMVCols; k++){
-					sprintf(tmpcolname, "mvCol%dt%dp%dc",i,j,k);
+					getColSQLname(tmpmvcolname, i, j, k, cstablestat, mapi, mbat);
+
 					tmpbat = cstablestat->lstcstable[i].lstMVTables[j].mvBats[k];
-					mvc_create_column(m, csmvtables[i][j], tmpcolname,  &tpes[tmpbat->ttype]);
+					mvc_create_column(m, csmvtables[i][j], tmpmvcolname,  &tpes[tmpbat->ttype]);
 				}
 
 			}
@@ -767,20 +836,17 @@ SQLrdfreorganize(Client cntxt, MalBlkPtr
 		// Add non-default type table
 		if (cstablestat->lstcstableEx[i].numCol != 0){	
 
-			sprintf(tmpstr, "ex%d",i);
-			getTblName(&baseTblName, cstablestat->lstcstable[i].tblname, mapi, mbat); 
-			sprintf(tmptbnameex, "%s", baseTblName);
-			strcat(tmptbnameex,tmpstr);
+			getTblSQLname(tmptbnameex, i, 1, cstablestat, mapi, mbat);
 			printf("TableEx %d: || %s || \n",i, tmptbnameex);
 
 			cstablesEx[i] = mvc_create_table(m, sch, tmptbnameex, tt_table, 0,
 					   SQL_PERSIST, 0, 3);
-			GDKfree(baseTblName);
 			totalNoTablesCreated++;
 			totalNoExTables++;
 			for (j = 0; j < cstablestat->lstcstableEx[i].numCol; j++){
 				//TODO: Use propertyId from Propstat
-				sprintf(tmpcolname, \
"colex%dtype%d",cstablestat->lstcstableEx[i].mainTblColIdx[j], \
(int)(cstablestat->lstcstableEx[i].colTypes[j])); +				getColSQLname(tmpcolname, i, \
cstablestat->lstcstableEx[i].mainTblColIdx[j], \
(int)(cstablestat->lstcstableEx[i].colTypes[j]), cstablestat, mapi, mbat); +
 				tmpbat = cstablestat->lstcstableEx[i].colBats[j];
 				mvc_create_column(m, cstablesEx[i], tmpcolname,  &tpes[tmpbat->ttype]);				
 			}
@@ -789,15 +855,19 @@ SQLrdfreorganize(Client cntxt, MalBlkPtr
 
 		#endif
 
-
+		#if APPENDSUBJECTCOLUMN
+		{
+			BAT* subjBat = createEncodedSubjBat(i,BATcount(cstablestat->lstcstable[i].colBats[0]));
 +                	store_funcs.append_col(m->session->tr,
+					mvc_bind_column(m, cstables[i],"subject"), 
+					subjBat, TYPE_bat);
+			BBPreclaim(subjBat);
+		}
+		#endif
 		for (j = 0; j < cstablestat->numPropPerTable[i]; j++){
 
 			//TODO: Use propertyId from Propstat
-			sprintf(tmpstr, "%d",j);
-			getTblName(&baseColName, cstablestat->lstcstable[i].lstProp[j], mapi, mbat);
-			sprintf(tmpcolname, "%s", baseColName);
-			strcat(tmpcolname,tmpstr); 
-			//sprintf(tmpcolname, "col"BUNFMT,(cstablestat->lstcstable[i].lstProp[j]));
+			getColSQLname(tmpcolname, i, j, -1, cstablestat, mapi, mbat);
 
 			tmpbat = cstablestat->lstcstable[i].colBats[j];
 
@@ -807,14 +877,12 @@ SQLrdfreorganize(Client cntxt, MalBlkPtr
 					mvc_bind_column(m, cstables[i],tmpcolname ), 
 					tmpbat, TYPE_bat);
 
-			GDKfree(baseColName);
 			//For multi-values table
 			tmpNumMVCols = cstablestat->lstcstable[i].lstMVTables[j].numCol;
 			if (tmpNumMVCols != 0){
-				sprintf(tmptbname, "mvtable%dp%d",i,j);
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list


[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic