[prev in list] [next in list] [prev in thread] [next in thread]
List: monetdb-checkins
Subject: MonetDB: rdf - Modify the way of generating sql table/col names
From: Minh-Duc Pham <commits () monetdb ! org>
Date: 2014-08-30 21:48:43
Message-ID: hg.2f08774fbc54.1409435323.6315528441665844383 () monetdb2 ! cwi-incubator ! nl
[Download RAW message or body]
Changeset: 2f08774fbc54 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=2f08774fbc54
Modified Files:
monetdb5/extras/rdf/rdfparams.c
monetdb5/extras/rdf/rdfschema.c
monetdb5/extras/rdf/rdfschema.h
sql/backends/monet5/sql_rdf.c
Branch: rdf
Log Message:
Modify the way of generating sql table/col names
diffs (truncated from 335 to 300 lines):
diff --git a/monetdb5/extras/rdf/rdfparams.c b/monetdb5/extras/rdf/rdfparams.c
--- a/monetdb5/extras/rdf/rdfparams.c
+++ b/monetdb5/extras/rdf/rdfparams.c
@@ -46,7 +46,7 @@ void createDefaultParamsFile(void){
fprintf(paramFile, "upperboundNumTables 1000\n");
//fprintf(paramFile, "simTfidfThreshold 0.75");
fprintf(paramFile, "minTableSize 1000\n");
- fprintf(paramFile, "infreqTypeThreshold 0.1\n");
+ fprintf(paramFile, "infreqTypeThreshold 0.05\n");
fprintf(paramFile, "infreqPropThreshold 0.05\n");
fclose(paramFile);
}
diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -3951,8 +3951,8 @@ void buildLabelStat(LabelStat *labelStat
numDummy++;
}
- //printf("Total number of distinct labels in Top%d is %d \n", k, \
labelStat->numLabeladded);
- //printf("Number of DUMMY freqCS: %d \n",numDummy);
+ printf("Total number of distinct labels in Top%d is %d \n", k, \
labelStat->numLabeladded); + printf("Number of DUMMY freqCS: %d \n",numDummy);
//Build list of FreqCS
labelStat->freqIdList = (int**) malloc(sizeof(int*) * labelStat->numLabeladded);
for (i =0; i < labelStat->numLabeladded; i++){
@@ -6702,6 +6702,27 @@ str getOrigObt(oid *obt, oid *origObt, B
}
#endif
+static
+oid getFirstEncodedSubjId(int tblIdx){
+
+ return (BUN)(tblIdx + 1) << (sizeof(BUN)*8 - NBITS_FOR_CSID);
+}
+
+//Encoded subject BAT contains
+//sequential numbers from getFirstEncodedSubjId()
+//to getFirstEncodedSubjId() + numberofelements
+
+BAT* createEncodedSubjBat(int tblIdx, int num){
+ BAT* subjBat = NULL;
+
+ subjBat = BATnew(TYPE_void, TYPE_void , num + 1);
+ BATsetcount(subjBat,num);
+ BATseqbase(subjBat, 0);
+ BATseqbase(BATmirror(subjBat), getFirstEncodedSubjId(tblIdx));
+
+ return subjBat;
+}
+
#if NO_OUTPUTFILE == 0
static
char getObjTypeFromBATtype(int battype){
diff --git a/monetdb5/extras/rdf/rdfschema.h b/monetdb5/extras/rdf/rdfschema.h
--- a/monetdb5/extras/rdf/rdfschema.h
+++ b/monetdb5/extras/rdf/rdfschema.h
@@ -294,6 +294,10 @@ typedef struct SubCSSet{
#define REMOVE_SMALL_TABLE 1 /* Remove SMALL but NOT dimension table*/
+#define APPENDSUBJECTCOLUMN 1 // The subject column actually doesn't need to be \
included into the relational table + // However, for creating the foreign key \
relationship, we add this column and + // markt it as a primary key
+
typedef struct CSset{
CS* items;
@@ -527,5 +531,9 @@ isCSTable(CS item, oid name);
rdf_export str
printTKNZStringFromOid(oid id);
+rdf_export BAT*
+createEncodedSubjBat(int tblIdx, int num);
+
+
#endif /* _RDFSCHEMA_H_ */
diff --git a/sql/backends/monet5/sql_rdf.c b/sql/backends/monet5/sql_rdf.c
--- a/sql/backends/monet5/sql_rdf.c
+++ b/sql/backends/monet5/sql_rdf.c
@@ -542,6 +542,80 @@ SQLrdfShred(Client cntxt, MalBlkPtr mb,
#endif /* HAVE_RAPTOR */
}
+static
+void getTblSQLname(char *tmptbname, int tblIdx, int isExTbl, CStableStat \
*cstablestat, BATiter mapi, BAT *mbat){ + str baseTblName;
+ char tmpstr[20];
+
+ if (isExTbl ==0)
+ sprintf(tmpstr, "%d",tblIdx);
+ else //isExTbl == 1
+ sprintf(tmpstr, "ex%d",tblIdx);
+
+ getTblName(&baseTblName, cstablestat->lstcstable[tblIdx].tblname, mapi, mbat);
+ sprintf(tmptbname, "%s", baseTblName);
+ strcat(tmptbname,tmpstr);
+
+ GDKfree(baseTblName);
+}
+
+//If colType == -1, ==> default col
+//If not, it is a ex-type column
+static
+void getColSQLname(char *tmpcolname, int tblIdx, int colIdx, int colType, \
CStableStat *cstablestat, BATiter mapi, BAT *mbat){ + str baseColName;
+ char tmpstr[20];
+
+ if (colType == -1) sprintf(tmpstr, "%d",colIdx);
+ else
+ sprintf(tmpstr, "%dtype%d",colIdx, colType);
+ getTblName(&baseColName, cstablestat->lstcstable[tblIdx].lstProp[colIdx], mapi, \
mbat); + sprintf(tmpcolname, "%s", baseColName);
+ strcat(tmpcolname,tmpstr);
+
+
+ GDKfree(baseColName);
+}
+
+static
+void getMvTblSQLname(char *tmpmvtbname, int tblIdx, int colIdx, CStableStat \
*cstablestat, BATiter mapi, BAT *mbat){ + str baseTblName;
+ str baseColName;
+
+ getTblName(&baseTblName, cstablestat->lstcstable[tblIdx].tblname, mapi, mbat);
+ getTblName(&baseColName, cstablestat->lstcstable[tblIdx].lstProp[colIdx], mapi, \
mbat); +
+ sprintf(tmpmvtbname, "mv%s%d_%s%d", baseTblName, tblIdx, baseColName, colIdx);
+
+ GDKfree(baseTblName);
+ GDKfree(baseColName);
+}
+
+/*
+static
+addFKs(CStableStat* cstablestat, CSPropTypes *csPropTypes){
+ FILE *fout;
+ char filename[100];
+ int i;
+ char fromTbl[100];
+ char fromTblCol[100];
+ char toTbl[100];
+ char toTblCol[100];
+ int refTblId;
+
+ strcpy(filename, "fkCreate.sql");
+ fout = fopen(filename, "wt");
+ for (i = 0; i < cstablestat->numTables; i++){
+ for(j = 0; j < csPropTypes[i].numProp; j++){
+ if (csPropTypes[i].lstPropTypes[j].isFKProp == 1){
+ refTblId = csPropTypes[i].lstPropTypes[j].refTblId;
+ }
+ }
+ }
+ fclose(fout);
+
+}
+*/
/* Re-organize triple table by using clustering storage
* CALL rdf_reorganize('schema','tablename', 1);
@@ -560,13 +634,12 @@ SQLrdfreorganize(Client cntxt, MalBlkPtr
sql_schema *sch;
int ret = 0;
CStableStat *cstablestat;
- str baseTblName;
char tmptbname[100];
- char tmpstr[20];
+ char tmpmvtbname[100];
char tmptbnameex[100];
//char tmpviewname[100];
- str baseColName;
char tmpcolname[100];
+ char tmpmvcolname[100];
//char viewcommand[500];
sql_subtype tpe;
sql_subtype tpes[50];
@@ -710,50 +783,46 @@ SQLrdfreorganize(Client cntxt, MalBlkPtr
for (i = 0; i < cstablestat->numTables; i++){
//printf("creating table %d \n", i);
- sprintf(tmpstr, "%d",i);
- getTblName(&baseTblName, cstablestat->lstcstable[i].tblname, mapi, mbat);
- sprintf(tmptbname, "%s", baseTblName);
- strcat(tmptbname,tmpstr);
+ getTblSQLname(tmptbname, i, 0, cstablestat, mapi, mbat);
printf("Table %d:|| %s ||\n",i, tmptbname);
cstables[i] = mvc_create_table(m, sch, tmptbname, tt_table, 0,
SQL_PERSIST, 0, 3);
- GDKfree(baseTblName);
totalNoTablesCreated++;
//Multivalues tables for each column
csmvtables[i] = (sql_table **)malloc(sizeof(sql_table*) * \
cstablestat->numPropPerTable[i]);
+ #if APPENDSUBJECTCOLUMN
+ mvc_create_column(m, cstables[i], "subject", &tpes[TYPE_oid]);
+ #endif
for (j = 0; j < cstablestat->numPropPerTable[i]; j++){
//TODO: Use propertyId from Propstat
- sprintf(tmpstr, "%d",j);
- getTblName(&baseColName, cstablestat->lstcstable[i].lstProp[j], mapi, mbat);
- sprintf(tmpcolname, "%s", baseColName);
- strcat(tmpcolname,tmpstr);
- //sprintf(tmpcolname, "col"BUNFMT,(cstablestat->lstcstable[i].lstProp[j]));
+ getColSQLname(tmpcolname, i, j, -1, cstablestat, mapi, mbat);
+
tmpbat = cstablestat->lstcstable[i].colBats[j];
mvc_create_column(m, cstables[i], tmpcolname, &tpes[tmpbat->ttype]);
- GDKfree(baseColName);
//For multi-values table
tmpNumMVCols = cstablestat->lstcstable[i].lstMVTables[j].numCol;
if (tmpNumMVCols != 0){
- sprintf(tmptbname, "mvtable%dp%d",i,j);
- csmvtables[i][j] = mvc_create_table(m, sch, tmptbname, tt_table, 0, SQL_PERSIST, \
0, 3); + getMvTblSQLname(tmpmvtbname, i, j, cstablestat, mapi, mbat);
+ csmvtables[i][j] = mvc_create_table(m, sch, tmpmvtbname, tt_table, 0, \
SQL_PERSIST, 0, 3); totalNoTablesCreated++;
//One column for key
- sprintf(tmpcolname, "mvCol%dt%dpKey",i,j);
+ sprintf(tmpcolname, "mvKey");
tmpbat = cstablestat->lstcstable[i].lstMVTables[j].keyBat;
mvc_create_column(m, csmvtables[i][j], tmpcolname, &tpes[tmpbat->ttype]);
//Value columns
for (k = 0; k < tmpNumMVCols; k++){
- sprintf(tmpcolname, "mvCol%dt%dp%dc",i,j,k);
+ getColSQLname(tmpmvcolname, i, j, k, cstablestat, mapi, mbat);
+
tmpbat = cstablestat->lstcstable[i].lstMVTables[j].mvBats[k];
- mvc_create_column(m, csmvtables[i][j], tmpcolname, &tpes[tmpbat->ttype]);
+ mvc_create_column(m, csmvtables[i][j], tmpmvcolname, &tpes[tmpbat->ttype]);
}
}
@@ -767,20 +836,17 @@ SQLrdfreorganize(Client cntxt, MalBlkPtr
// Add non-default type table
if (cstablestat->lstcstableEx[i].numCol != 0){
- sprintf(tmpstr, "ex%d",i);
- getTblName(&baseTblName, cstablestat->lstcstable[i].tblname, mapi, mbat);
- sprintf(tmptbnameex, "%s", baseTblName);
- strcat(tmptbnameex,tmpstr);
+ getTblSQLname(tmptbnameex, i, 1, cstablestat, mapi, mbat);
printf("TableEx %d: || %s || \n",i, tmptbnameex);
cstablesEx[i] = mvc_create_table(m, sch, tmptbnameex, tt_table, 0,
SQL_PERSIST, 0, 3);
- GDKfree(baseTblName);
totalNoTablesCreated++;
totalNoExTables++;
for (j = 0; j < cstablestat->lstcstableEx[i].numCol; j++){
//TODO: Use propertyId from Propstat
- sprintf(tmpcolname, \
"colex%dtype%d",cstablestat->lstcstableEx[i].mainTblColIdx[j], \
(int)(cstablestat->lstcstableEx[i].colTypes[j])); + getColSQLname(tmpcolname, i, \
cstablestat->lstcstableEx[i].mainTblColIdx[j], \
(int)(cstablestat->lstcstableEx[i].colTypes[j]), cstablestat, mapi, mbat); +
tmpbat = cstablestat->lstcstableEx[i].colBats[j];
mvc_create_column(m, cstablesEx[i], tmpcolname, &tpes[tmpbat->ttype]);
}
@@ -789,15 +855,19 @@ SQLrdfreorganize(Client cntxt, MalBlkPtr
#endif
-
+ #if APPENDSUBJECTCOLUMN
+ {
+ BAT* subjBat = createEncodedSubjBat(i,BATcount(cstablestat->lstcstable[i].colBats[0]));
+ store_funcs.append_col(m->session->tr,
+ mvc_bind_column(m, cstables[i],"subject"),
+ subjBat, TYPE_bat);
+ BBPreclaim(subjBat);
+ }
+ #endif
for (j = 0; j < cstablestat->numPropPerTable[i]; j++){
//TODO: Use propertyId from Propstat
- sprintf(tmpstr, "%d",j);
- getTblName(&baseColName, cstablestat->lstcstable[i].lstProp[j], mapi, mbat);
- sprintf(tmpcolname, "%s", baseColName);
- strcat(tmpcolname,tmpstr);
- //sprintf(tmpcolname, "col"BUNFMT,(cstablestat->lstcstable[i].lstProp[j]));
+ getColSQLname(tmpcolname, i, j, -1, cstablestat, mapi, mbat);
tmpbat = cstablestat->lstcstable[i].colBats[j];
@@ -807,14 +877,12 @@ SQLrdfreorganize(Client cntxt, MalBlkPtr
mvc_bind_column(m, cstables[i],tmpcolname ),
tmpbat, TYPE_bat);
- GDKfree(baseColName);
//For multi-values table
tmpNumMVCols = cstablestat->lstcstable[i].lstMVTables[j].numCol;
if (tmpNumMVCols != 0){
- sprintf(tmptbname, "mvtable%dp%d",i,j);
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list
[prev in list] [next in list] [prev in thread] [next in thread]
Configure |
About |
News |
Add a list |
Sponsored by KoreLogic