[prev in list] [next in list] [prev in thread] [next in thread] 

List:       postgresql-general
Subject:    [HACKERS] WITH ORDINALITY planner improvements
From:       Greg Stark <stark () mit ! edu>
Date:       2013-07-31 15:50:53
Message-ID: CAM-w4HN8Npo-vZeerQ0Mxaf5ps3Ytc1-vQr0L2=UmsreFEkvFw () mail ! gmail ! com
[Download RAW message or body]

On Mon, Jul 29, 2013 at 1:02 PM, Greg Stark <stark@mit.edu> wrote:
> On Mon, Jul 29, 2013 at 8:56 AM, Craig Ringer <craig@2ndquadrant.com> wrote:
>> Unless LATERAL provides a way to do lock-step iteration through a pair
>> (or more) of functions I don't think we can get rid of SRFs [in select target lists] yet
>
> You don't even need lateral. This works fine:
>
> postgres=# select * from generate_series(1,10) with ordinality as
> a(a,o) natural full outer join generate_series(1,5) with ordinality as
> b(b,o) ;
>
>  o  | a  | b
> ----+----+---
>   1 |  1 | 1
>   2 |  2 | 2
>   3 |  3 | 3
>   4 |  4 | 4
>   5 |  5 | 5
>   6 |  6 |
>   7 |  7 |
>   8 |  8 |
>   9 |  9 |
>  10 | 10 |
> (10 rows)
>
> However it occurs to me that the plan isn't ideal:
>
> postgres=# explain select * from generate_series(1,10) with ordinality
> as a(a,o) natural full outer join generate_series(1,5) with ordinality
> as b(b,o) ;
>                                       QUERY PLAN
> ---------------------------------------------------------------------------------------
>  Merge Full Join  (cost=119.66..199.66 rows=5000 width=24)
>    Merge Cond: (a.o = b.o)
>    ->  Sort  (cost=59.83..62.33 rows=1000 width=12)
>          Sort Key: a.o
>          ->  Function Scan on generate_series a  (cost=0.00..10.00
> rows=1000 width=12)
>    ->  Sort  (cost=59.83..62.33 rows=1000 width=12)
>          Sort Key: b.o
>          ->  Function Scan on generate_series b  (cost=0.00..10.00
> rows=1000 width=12)
> (8 rows)
>
>
> I think all that's required to avoid the sorts is teaching the planner
> that the Path has a PathKey of the ordinal column. I can look at that
> later but I'll go ahead and commit it without it at first. I wonder if
> it's also useful to teach the planner that the column is unique.

So I took a crack at this. But it's not working.

To generate the pathkey for the ordinality column I basically exported
make_pathkey_from_sortop() through a thin wrapper called
make_pathkey_for_functionscan() and passed the Var from the
reltargetlist and hard coded int8gt's oid. That might stand to be
generalized a bit but in any case it doesn't matter because it doesn't
work.

My best guess of what's going wrong is that the eclass is being
generated too late and instead of getting the canonical eclass it's
getting a freshly minted one that doesn't match the eclass for the
merge join pathkey. But as I said, that's just a guess. I'm missing a
high level view of the order of operations between turning parser
column references into vars into eclasses and what has to be done
when. My best guess is that what's needed is marking the ordinality
column with a sortref early on but again, as I said, I'm just
guessing.



-- 
greg


-- 
greg

["ordinality-path.diff" (application/octet-stream)]

*** a/src/backend/optimizer/path/allpaths.c
--- b/src/backend/optimizer/path/allpaths.c
***************
*** 1268,1274 **** set_function_pathlist(PlannerInfo *root, RelOptInfo *rel, \
RangeTblEntry *rte)  required_outer = rel->lateral_relids;
  
  	/* Generate appropriate path */
! 	add_path(rel, create_functionscan_path(root, rel, required_outer));
  
  	/* Select cheapest path (pretty easy in this case...) */
  	set_cheapest(rel);
--- 1268,1274 ----
  	required_outer = rel->lateral_relids;
  
  	/* Generate appropriate path */
! 	add_path(rel, create_functionscan_path(root, rel, required_outer, \
rte->funcordinality));  
  	/* Select cheapest path (pretty easy in this case...) */
  	set_cheapest(rel);
*** a/src/backend/optimizer/path/pathkeys.c
--- b/src/backend/optimizer/path/pathkeys.c
***************
*** 782,787 **** make_pathkeys_for_sortclauses(PlannerInfo *root,
--- 782,820 ----
  }
  
  /****************************************************************************
+  *		PATHKEYS AND FUNCTION SCANS
+  ****************************************************************************/
+ 
+ /*
+  * make_pathkeys_for_functionscan
+  *		Generate a pathkeys list that represents the sort order specified by a
+  *		specific column of a function scan. This is used for WITH ORDINALITY
+  *
+  * This is basically exporting make_pathkey_from_sortop for callers that know
+  * exactly what expression they want to generate a pathkey list for. 
+  */
+ List *
+ make_pathkeys_for_functionscan(PlannerInfo *root,
+ 							   Expr *expr,
+ 							   Oid sortop)
+ 
+ {
+ 	List	   *pathkeys = NIL;
+ 	PathKey    *pathkey;
+ 	pathkey = make_pathkey_from_sortop(root,
+ 									   expr,
+ 									   sortop,
+ 									   false, /* nulls first */
+ 									   0, /* sortref */
+ 									   true);
+ 	
+ 	pathkeys = lappend(pathkeys, pathkey);
+ 	return pathkeys;
+ }
+ 
+ 
+ 
+ /****************************************************************************
   *		PATHKEYS AND MERGECLAUSES
   ****************************************************************************/
  
*** a/src/backend/optimizer/util/pathnode.c
--- b/src/backend/optimizer/util/pathnode.c
***************
*** 27,33 ****
  #include "parser/parsetree.h"
  #include "utils/lsyscache.h"
  #include "utils/selfuncs.h"
! 
  
  typedef enum
  {
--- 27,35 ----
  #include "parser/parsetree.h"
  #include "utils/lsyscache.h"
  #include "utils/selfuncs.h"
! #include "catalog/pg_opfamily.h"
! #include "catalog/pg_operator.h"
! #include "catalog/pg_type.h"
  
  typedef enum
  {
***************
*** 1623,1629 **** create_subqueryscan_path(PlannerInfo *root, RelOptInfo *rel,
   */
  Path *
  create_functionscan_path(PlannerInfo *root, RelOptInfo *rel,
! 						 Relids required_outer)
  {
  	Path	   *pathnode = makeNode(Path);
  
--- 1625,1631 ----
   */
  Path *
  create_functionscan_path(PlannerInfo *root, RelOptInfo *rel,
! 						 Relids required_outer, bool funcordinality)
  {
  	Path	   *pathnode = makeNode(Path);
  
***************
*** 1631,1637 **** create_functionscan_path(PlannerInfo *root, RelOptInfo *rel,
  	pathnode->parent = rel;
  	pathnode->param_info = get_baserel_parampathinfo(root, rel,
  													 required_outer);
! 	pathnode->pathkeys = NIL;	/* for now, assume unordered result */
  
  	cost_functionscan(pathnode, root, rel, pathnode->param_info);
  
--- 1633,1647 ----
  	pathnode->parent = rel;
  	pathnode->param_info = get_baserel_parampathinfo(root, rel,
  													 required_outer);
! 	if (funcordinality) {
! 		/* ordered by ordinality column */
! 		pathnode->pathkeys = 
! 			make_pathkeys_for_functionscan(root, 
! 										   llast(rel->reltargetlist), 
! 										   INT8GT);
! 	} else {
! 		pathnode->pathkeys = NIL; /* No information about ordering */
! 	}
  
  	cost_functionscan(pathnode, root, rel, pathnode->param_info);
  
*** a/src/include/catalog/pg_operator.h
--- b/src/include/catalog/pg_operator.h
***************
*** 171,182 **** DESCR("greater than or equal");
--- 171,184 ----
  
  DATA(insert OID = 410 ( "="		   PGNSP PGUID b t t	20	20	16 410 411 int8eq eqsel \
eqjoinsel ));  DESCR("equal");
+ #define INT8EQ 410
  DATA(insert OID = 411 ( "<>"	   PGNSP PGUID b f f	20	20	16 411 410 int8ne neqsel \
neqjoinsel ));  DESCR("not equal");
  DATA(insert OID = 412 ( "<"		   PGNSP PGUID b f f	20	20	16 413 415 int8lt \
scalarltsel scalarltjoinsel ));  DESCR("less than");
  DATA(insert OID = 413 ( ">"		   PGNSP PGUID b f f	20	20	16 412 414 int8gt \
scalargtsel scalargtjoinsel ));  DESCR("greater than");
+ #define INT8GT 413
  DATA(insert OID = 414 ( "<="	   PGNSP PGUID b f f	20	20	16 415 413 int8le \
scalarltsel scalarltjoinsel ));  DESCR("less than or equal");
  DATA(insert OID = 415 ( ">="	   PGNSP PGUID b f f	20	20	16 414 412 int8ge \
                scalargtsel scalargtjoinsel ));
*** a/src/include/optimizer/pathnode.h
--- b/src/include/optimizer/pathnode.h
***************
*** 70,76 **** extern UniquePath *create_unique_path(PlannerInfo *root, RelOptInfo \
*rel,  extern Path *create_subqueryscan_path(PlannerInfo *root, RelOptInfo *rel,
  						 List *pathkeys, Relids required_outer);
  extern Path *create_functionscan_path(PlannerInfo *root, RelOptInfo *rel,
! 						 Relids required_outer);
  extern Path *create_valuesscan_path(PlannerInfo *root, RelOptInfo *rel,
  					   Relids required_outer);
  extern Path *create_ctescan_path(PlannerInfo *root, RelOptInfo *rel,
--- 70,76 ----
  extern Path *create_subqueryscan_path(PlannerInfo *root, RelOptInfo *rel,
  						 List *pathkeys, Relids required_outer);
  extern Path *create_functionscan_path(PlannerInfo *root, RelOptInfo *rel,
! 									  Relids required_outer, bool funcordinality);
  extern Path *create_valuesscan_path(PlannerInfo *root, RelOptInfo *rel,
  					   Relids required_outer);
  extern Path *create_ctescan_path(PlannerInfo *root, RelOptInfo *rel,
*** a/src/include/optimizer/paths.h
--- b/src/include/optimizer/paths.h
***************
*** 174,179 **** extern List *build_join_pathkeys(PlannerInfo *root,
--- 174,182 ----
  extern List *make_pathkeys_for_sortclauses(PlannerInfo *root,
  							  List *sortclauses,
  							  List *tlist);
+ extern List * make_pathkeys_for_functionscan(PlannerInfo *root,
+ 											 Expr *expr,
+ 											 Oid sortop);
  extern void initialize_mergeclause_eclasses(PlannerInfo *root,
  								RestrictInfo *restrictinfo);
  extern void update_mergeclause_eclasses(PlannerInfo *root,



-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


[prev in list] [next in list] [prev in thread] [next in thread] 

Configure | About | News | Add a list | Sponsored by KoreLogic