diff -cr --new-file pgsql/src/backend/commands/explain.c pgsql-hashdistinct/src/backend/commands/explain.c
*** pgsql/src/backend/commands/explain.c 2006-04-08 14:49:52.000000000 -0400
--- pgsql-hashdistinct/src/backend/commands/explain.c 2006-08-02 12:30:20.000000000 -0400
***************
*** 560,565 ****
--- 560,568 ----
case T_Unique:
pname = "Unique";
break;
+ case T_HashDistinct:
+ pname = "HashDistinct";
+ break;
case T_SetOp:
switch (((SetOp *) plan)->cmd)
{
diff -cr --new-file pgsql/src/backend/executor/execAmi.c pgsql-hashdistinct/src/backend/executor/execAmi.c
*** pgsql/src/backend/executor/execAmi.c 2006-03-05 10:58:25.000000000 -0500
--- pgsql-hashdistinct/src/backend/executor/execAmi.c 2006-08-02 12:30:58.000000000 -0400
***************
*** 41,46 ****
--- 41,47 ----
#include "executor/nodeSubqueryscan.h"
#include "executor/nodeTidscan.h"
#include "executor/nodeUnique.h"
+ #include "executor/nodeHashDistinct.h"
/*
***************
*** 178,183 ****
--- 179,188 ----
case T_UniqueState:
ExecReScanUnique((UniqueState *) node, exprCtxt);
break;
+
+ case T_HashDistinctState:
+ ExecReScanHashDistinct((HashDistinctState *) node, exprCtxt);
+ break;
case T_HashState:
ExecReScanHash((HashState *) node, exprCtxt);
***************
*** 373,378 ****
--- 378,386 ----
case T_Unique:
return ExecSupportsBackwardScan(outerPlan(node));
+
+ case T_HashDistinct:
+ return ExecSupportsBackwardScan(outerPlan(node));
case T_Limit:
return ExecSupportsBackwardScan(outerPlan(node));
diff -cr --new-file pgsql/src/backend/executor/execProcnode.c pgsql-hashdistinct/src/backend/executor/execProcnode.c
*** pgsql/src/backend/executor/execProcnode.c 2006-05-30 10:01:57.000000000 -0400
--- pgsql-hashdistinct/src/backend/executor/execProcnode.c 2006-06-26 10:52:02.000000000 -0400
***************
*** 102,107 ****
--- 102,108 ----
#include "executor/nodeSubqueryscan.h"
#include "executor/nodeTidscan.h"
#include "executor/nodeUnique.h"
+ #include "executor/nodeHashDistinct.h"
#include "miscadmin.h"
#include "tcop/tcopprot.h"
***************
*** 240,245 ****
--- 241,251 ----
result = (PlanState *) ExecInitUnique((Unique *) node,
estate, eflags);
break;
+
+ case T_HashDistinct:
+ result = (PlanState *) ExecInitHashDistinct((HashDistinct *) node,
+ estate, eflags);
+ break;
case T_Hash:
result = (PlanState *) ExecInitHash((Hash *) node,
***************
*** 403,408 ****
--- 409,418 ----
case T_UniqueState:
result = ExecUnique((UniqueState *) node);
break;
+
+ case T_HashDistinctState:
+ result = ExecHashDistinct((HashDistinctState *) node);
+ break;
case T_HashState:
result = ExecHash((HashState *) node);
***************
*** 567,572 ****
--- 577,585 ----
case T_Unique:
return ExecCountSlotsUnique((Unique *) node);
+ case T_HashDistinct:
+ return ExecCountSlotsHashDistinct((HashDistinct *) node);
+
case T_Hash:
return ExecCountSlotsHash((Hash *) node);
***************
*** 707,712 ****
--- 720,729 ----
case T_UniqueState:
ExecEndUnique((UniqueState *) node);
break;
+
+ case T_HashDistinctState:
+ ExecEndHashDistinct((HashDistinctState *) node);
+ break;
case T_HashState:
ExecEndHash((HashState *) node);
diff -cr --new-file pgsql/src/backend/executor/Makefile pgsql-hashdistinct/src/backend/executor/Makefile
*** pgsql/src/backend/executor/Makefile 2005-04-19 18:35:11.000000000 -0400
--- pgsql-hashdistinct/src/backend/executor/Makefile 2006-06-22 14:05:37.000000000 -0400
***************
*** 19,25 ****
nodeBitmapHeapscan.o nodeBitmapIndexscan.o nodeHash.o \
nodeHashjoin.o nodeIndexscan.o nodeMaterial.o nodeMergejoin.o \
nodeNestloop.o nodeFunctionscan.o nodeResult.o nodeSeqscan.o \
! nodeSetOp.o nodeSort.o nodeUnique.o nodeLimit.o nodeGroup.o \
nodeSubplan.o nodeSubqueryscan.o nodeTidscan.o tstoreReceiver.o spi.o
all: SUBSYS.o
--- 19,25 ----
nodeBitmapHeapscan.o nodeBitmapIndexscan.o nodeHash.o \
nodeHashjoin.o nodeIndexscan.o nodeMaterial.o nodeMergejoin.o \
nodeNestloop.o nodeFunctionscan.o nodeResult.o nodeSeqscan.o \
! nodeSetOp.o nodeSort.o nodeUnique.o nodeHashDistinct.o nodeLimit.o nodeGroup.o \
nodeSubplan.o nodeSubqueryscan.o nodeTidscan.o tstoreReceiver.o spi.o
all: SUBSYS.o
diff -cr --new-file pgsql/src/backend/executor/nodeHashDistinct.c pgsql-hashdistinct/src/backend/executor/nodeHashDistinct.c
*** pgsql/src/backend/executor/nodeHashDistinct.c 1969-12-31 19:00:00.000000000 -0500
--- pgsql-hashdistinct/src/backend/executor/nodeHashDistinct.c 2006-06-19 14:08:58.000000000 -0400
***************
*** 0 ****
--- 1,242 ----
+ /*-------------------------------------------------------------------------
+ *
+ * nodeHashDistinct.c
+ * Routines to handle unique'ing of queries where appropriate
+ *
+ * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * $PostgreSQL: pgsql/src/backend/executor/nodeUnique.c,v 1.52 2006/03/05 15:58:26 momjian Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+ /*
+ * INTERFACE ROUTINES
+ * ExecHashDistinct - generate a unique'd temporary relation
+ * ExecInitHashDistinct - initialize node and subnodes..
+ * ExecEndHashDistinct - shutdown node and subnodes
+ *
+ * NOTES
+ * Assumes tuples returned from subplan arrive in
+ * sorted order.
+ */
+
+ #include "postgres.h"
+
+ #include "access/heapam.h"
+ #include "executor/executor.h"
+ #include "executor/nodeHashDistinct.h"
+ #include "utils/memutils.h"
+
+
+ /* ----------------------------------------------------------------
+ * ExecHashDistinct
+ *
+ * This is a very simple node which filters out duplicate
+ * tuples from a stream of sorted tuples from a subplan.
+ * ----------------------------------------------------------------
+ */
+ TupleTableSlot * /* return: a tuple or NULL */
+ ExecHashDistinct(HashDistinctState *node)
+ {
+ TupleTableSlot *resultTupleSlot;
+ TupleTableSlot *slot;
+ PlanState *outerPlan;
+ bool isnew = FALSE;
+
+ /*
+ * get information from the node
+ */
+ outerPlan = outerPlanState(node);
+ resultTupleSlot = node->ps.ps_ResultTupleSlot;
+
+ /*
+ * now loop, returning only non-duplicate tuples. We assume that the
+ * tuples arrive in sorted order so we can detect duplicates easily.
+ *
+ * We return the first tuple from each group of duplicates (or the last
+ * tuple of each group, when moving backwards). At either end of the
+ * subplan, clear the result slot so that we correctly return the
+ * first/last tuple when reversing direction.
+ */
+ for (;;)
+ {
+ /*
+ * fetch a tuple from the outer subplan
+ */
+ slot = ExecProcNode(outerPlan);
+ if (TupIsNull(slot))
+ {
+ /* end of subplan; reset in case we change direction */
+ ExecClearTuple(resultTupleSlot);
+ return NULL;
+ }
+
+ /*
+ * Test if the new tuple already exists in the hash table
+ * If so then we loop back and fetch another new tuple from the
+ * subplan.
+ */
+ LookupTupleHashEntry(node->hashtable, slot, &isnew);
+ if (isnew)
+ break;
+ }
+
+ /*
+ * We have a new tuple different from the previous saved tuple (if any).
+ * Save it and return it. We must copy it because the source subplan
+ * won't guarantee that this source tuple is still accessible after
+ * fetching the next source tuple.
+ */
+ return ExecCopySlot(resultTupleSlot, slot);
+ }
+
+ /* ----------------------------------------------------------------
+ * ExecInitHashDistinct
+ *
+ * This initializes the HashDistinct node state structures and
+ * the node's subplan.
+ * ----------------------------------------------------------------
+ */
+ HashDistinctState *
+ ExecInitHashDistinct(HashDistinct *node, EState *estate, int eflags)
+ {
+ HashDistinctState *distinctstate;
+
+ /* check for unsupported flags */
+ Assert(!(eflags & EXEC_FLAG_MARK));
+
+ /*
+ * create state structure
+ */
+ distinctstate = makeNode(HashDistinctState);
+ distinctstate->ps.plan = (Plan *) node;
+ distinctstate->ps.state = estate;
+
+ /*
+ * Miscellaneous initialization
+ *
+ * HashDistinct nodes have no ExprContext initialization because they never call
+ * ExecQual or ExecProject. But they do need a per-tuple memory context
+ * anyway for calling LookupTupleHashEntry.
+ *
+ * It also needs a long-lived memory context to contain the hash table.
+ */
+ distinctstate->tempContext =
+ AllocSetContextCreate(CurrentMemoryContext,
+ "HashDistinct tempContext",
+ ALLOCSET_DEFAULT_MINSIZE,
+ ALLOCSET_DEFAULT_INITSIZE,
+ ALLOCSET_DEFAULT_MAXSIZE);
+
+ distinctstate->tableContext =
+ AllocSetContextCreate(CurrentMemoryContext,
+ "HashDistinct tableContext",
+ ALLOCSET_DEFAULT_MINSIZE,
+ ALLOCSET_DEFAULT_INITSIZE,
+ ALLOCSET_DEFAULT_MAXSIZE);
+
+ /*
+ * Tuple table initialization
+ */
+ ExecInitResultTupleSlot(estate, &distinctstate->ps);
+
+ /*
+ * then initialize outer plan
+ */
+ outerPlanState(distinctstate) = ExecInitNode(outerPlan(node), estate, eflags);
+
+ /*
+ * HashDistinct nodes do no projections, so initialize projection info for this
+ * node appropriately
+ */
+ ExecAssignResultTypeFromTL(&distinctstate->ps);
+ distinctstate->ps.ps_ProjInfo = NULL;
+
+ /*
+ * Precompute lookup data for hash table lookup
+ */
+ execTuplesHashPrepare(ExecGetResultType(&distinctstate->ps),
+ node->numCols,
+ node->uniqColIdx,
+ &distinctstate->eqfunctions,
+ &distinctstate->hashfunctions);
+
+ distinctstate->hashtable = BuildTupleHashTable(node->numCols,
+ node->uniqColIdx,
+ distinctstate->eqfunctions,
+ distinctstate->hashfunctions,
+ node->numDistinct,
+ sizeof(TupleHashEntryData),
+ distinctstate->tableContext,
+ distinctstate->tempContext);
+
+ #define UNIQUE_NSLOTS 1
+
+ return distinctstate;
+ }
+
+ int
+ ExecCountSlotsHashDistinct(HashDistinct *node)
+ {
+ return ExecCountSlotsNode(outerPlan(node)) +
+ ExecCountSlotsNode(innerPlan(node)) +
+ UNIQUE_NSLOTS;
+ }
+
+ /* ----------------------------------------------------------------
+ * ExecEndHashDistinct
+ *
+ * This shuts down the subplan and frees resources allocated
+ * to this node.
+ * ----------------------------------------------------------------
+ */
+ void
+ ExecEndHashDistinct(HashDistinctState *node)
+ {
+ /* clean up tuple table */
+ ExecClearTuple(node->ps.ps_ResultTupleSlot);
+
+ MemoryContextDelete(node->tempContext);
+ MemoryContextDelete(node->tableContext);
+
+ ExecEndNode(outerPlanState(node));
+ }
+
+
+ void
+ ExecReScanHashDistinct(HashDistinctState *node, ExprContext *exprCtxt)
+ {
+ HashDistinct *plannode = (HashDistinct *) node->ps.plan;
+
+ /* must clear result tuple so first input tuple is returned */
+ ExecClearTuple(node->ps.ps_ResultTupleSlot);
+
+ /* release temporary storage */
+ MemoryContextReset(node->tempContext);
+
+ /* rebuild the hash table */
+ execTuplesHashPrepare(ExecGetResultType(&node->ps),
+ plannode->numCols,
+ plannode->uniqColIdx,
+ &node->eqfunctions,
+ &node->hashfunctions);
+
+ node->hashtable = BuildTupleHashTable(plannode->numCols,
+ plannode->uniqColIdx,
+ node->eqfunctions,
+ node->hashfunctions,
+ plannode->numDistinct,
+ sizeof(TupleHashEntryData),
+ node->tableContext,
+ node->tempContext);
+
+ /*
+ * if chgParam of subnode is not null then plan will be re-scanned by
+ * first ExecProcNode.
+ */
+ if (((PlanState *) node)->lefttree->chgParam == NULL)
+ ExecReScan(((PlanState *) node)->lefttree, exprCtxt);
+ }
diff -cr --new-file pgsql/src/backend/nodes/copyfuncs.c pgsql-hashdistinct/src/backend/nodes/copyfuncs.c
*** pgsql/src/backend/nodes/copyfuncs.c 2006-06-08 14:42:46.000000000 -0400
--- pgsql-hashdistinct/src/backend/nodes/copyfuncs.c 2006-08-02 12:23:38.000000000 -0400
***************
*** 556,561 ****
--- 556,585 ----
}
/*
+ * _copyHashDistinct
+ */
+ static HashDistinct *
+ _copyHashDistinct(HashDistinct *from)
+ {
+ HashDistinct *newnode = makeNode(HashDistinct);
+
+ /*
+ * copy node superclass fields
+ */
+ CopyPlanFields((Plan *) from, (Plan *) newnode);
+
+ /*
+ * copy remainder of node
+ */
+ COPY_SCALAR_FIELD(numCols);
+ COPY_SCALAR_FIELD(numDistinct);
+ COPY_POINTER_FIELD(uniqColIdx, from->numCols * sizeof(AttrNumber));
+
+ return newnode;
+ }
+
+
+ /*
* _copyHash
*/
static Hash *
***************
*** 2831,2836 ****
--- 2855,2863 ----
case T_Unique:
retval = _copyUnique(from);
break;
+ case T_HashDistinct:
+ retval = _copyHashDistinct(from);
+ break;
case T_Hash:
retval = _copyHash(from);
break;
diff -cr --new-file pgsql/src/backend/nodes/outfuncs.c pgsql-hashdistinct/src/backend/nodes/outfuncs.c
*** pgsql/src/backend/nodes/outfuncs.c 2006-04-30 14:30:39.000000000 -0400
--- pgsql-hashdistinct/src/backend/nodes/outfuncs.c 2006-06-22 15:07:45.000000000 -0400
***************
*** 520,525 ****
--- 520,541 ----
}
static void
+ _outHashDistinct(StringInfo str, HashDistinct *node)
+ {
+ int i;
+
+ WRITE_NODE_TYPE("HASHDISTINCT");
+
+ _outPlanInfo(str, (Plan *) node);
+
+ WRITE_INT_FIELD(numCols);
+
+ appendStringInfo(str, " :uniqColIdx");
+ for (i = 0; i < node->numCols; i++)
+ appendStringInfo(str, " %d", node->uniqColIdx[i]);
+ }
+
+ static void
_outSetOp(StringInfo str, SetOp *node)
{
int i;
***************
*** 1900,1905 ****
--- 1916,1924 ----
case T_Unique:
_outUnique(str, obj);
break;
+ case T_HashDistinct:
+ _outHashDistinct(str, obj);
+ break;
case T_SetOp:
_outSetOp(str, obj);
break;
diff -cr --new-file pgsql/src/backend/nodes/print.c pgsql-hashdistinct/src/backend/nodes/print.c
*** pgsql/src/backend/nodes/print.c 2006-04-04 15:35:34.000000000 -0400
--- pgsql-hashdistinct/src/backend/nodes/print.c 2006-08-02 12:25:19.000000000 -0400
***************
*** 525,530 ****
--- 525,532 ----
return "AGG";
case T_Unique:
return "UNIQUE";
+ case T_HashDistinct:
+ return "HASHDISTINCT";
case T_SetOp:
return "SETOP";
case T_Limit:
diff -cr --new-file pgsql/src/backend/optimizer/plan/createplan.c pgsql-hashdistinct/src/backend/optimizer/plan/createplan.c
*** pgsql/src/backend/optimizer/plan/createplan.c 2006-05-18 14:57:31.000000000 -0400
--- pgsql-hashdistinct/src/backend/optimizer/plan/createplan.c 2006-08-18 16:52:43.000000000 -0400
***************
*** 170,175 ****
--- 170,179 ----
plan = (Plan *) create_unique_plan(root,
(UniquePath *) best_path);
break;
+ case T_HashDistinct:
+ plan = (Plan *) create_unique_plan(root,
+ (UniquePath *) best_path);
+ break;
default:
elog(ERROR, "unrecognized node type: %d",
(int) best_path->pathtype);
***************
*** 2749,2754 ****
--- 2753,2815 ----
/*
* distinctList is a list of SortClauses, identifying the targetlist items
+ * that should be considered by the HashDistinct filter.
+ */
+ HashDistinct *
+ make_hash_distinct(Plan *lefttree,
+ List *distinctList,
+ long numDistinct)
+ {
+ HashDistinct *node = makeNode(HashDistinct);
+ Plan *plan = &node->plan;
+ int numCols = list_length(distinctList);
+ int keyno = 0;
+ AttrNumber *uniqColIdx;
+ ListCell *slitem;
+
+ copy_plan_costsize(plan, lefttree);
+
+ /*
+ * Charge one cpu_operator_cost per comparison per input tuple. We assume
+ * all columns get compared at most of the tuples. (XXX probably this is
+ * an overestimate.)
+ */
+ plan->total_cost += cpu_operator_cost * plan->plan_rows * numCols;
+
+ /*
+ * plan->plan_rows is left as a copy of the input subplan's plan_rows; ie,
+ * we assume the filter removes nothing. The caller must alter this if he
+ * has a better idea.
+ */
+
+ plan->targetlist = copyObject(lefttree->targetlist);
+ plan->qual = NIL;
+ plan->lefttree = lefttree;
+ plan->righttree = NULL;
+
+ /*
+ * convert SortClause list into array of attr indexes, as wanted by exec
+ */
+ Assert(numCols > 0);
+ uniqColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols);
+
+ foreach(slitem, distinctList)
+ {
+ SortClause *sortcl = (SortClause *) lfirst(slitem);
+ TargetEntry *tle = get_sortgroupclause_tle(sortcl, plan->targetlist);
+
+ uniqColIdx[keyno++] = tle->resno;
+ }
+
+ node->numCols = numCols;
+ node->uniqColIdx = uniqColIdx;
+ node->numDistinct = numDistinct;
+
+ return node;
+ }
+
+ /*
+ * distinctList is a list of SortClauses, identifying the targetlist items
* that should be considered by the SetOp filter.
*/
***************
*** 2931,2936 ****
--- 2992,2998 ----
case T_Material:
case T_Sort:
case T_Unique:
+ case T_HashDistinct:
case T_SetOp:
case T_Limit:
case T_Append:
diff -cr --new-file pgsql/src/backend/optimizer/plan/planner.c pgsql-hashdistinct/src/backend/optimizer/plan/planner.c
*** pgsql/src/backend/optimizer/plan/planner.c 2006-03-05 10:58:29.000000000 -0500
--- pgsql-hashdistinct/src/backend/optimizer/plan/planner.c 2006-08-20 22:04:49.000000000 -0400
***************
*** 73,78 ****
--- 73,86 ----
List *sub_tlist,
AttrNumber *groupColIdx);
static List *postprocess_setop_tlist(List *new_tlist, List *orig_tlist);
+ static bool clause_in_sort_list(SortClause *scl, List *sortList);
+ static bool choose_hashed_distinct(PlannerInfo *root,
+ Plan *lefttree,
+ double tuple_fraction,
+ double dNumGroups,
+ List *distinct_pathkeys,
+ List *sort_pathkeys,
+ List *current_pathkeys);
/*****************************************************************************
***************
*** 635,647 ****
Plan *result_plan;
List *current_pathkeys;
List *sort_pathkeys;
double dNumGroups = 0;
/* Tweak caller-supplied tuple_fraction if have LIMIT/OFFSET */
if (parse->limitCount || parse->limitOffset)
tuple_fraction = preprocess_limit(root, tuple_fraction,
&offset_est, &count_est);
!
if (parse->setOperations)
{
List *set_sortclauses;
--- 643,660 ----
Plan *result_plan;
List *current_pathkeys;
List *sort_pathkeys;
+ List *distinct_pathkeys = NIL;
double dNumGroups = 0;
+ long numGroups = 0;
+ bool use_hashed_distinct;
/* Tweak caller-supplied tuple_fraction if have LIMIT/OFFSET */
if (parse->limitCount || parse->limitOffset)
tuple_fraction = preprocess_limit(root, tuple_fraction,
&offset_est, &count_est);
! if (parse->distinctClause)
! distinct_pathkeys = make_pathkeys_for_sortclauses(parse->distinctClause, tlist);
!
if (parse->setOperations)
{
List *set_sortclauses;
***************
*** 710,716 ****
Path *cheapest_path;
Path *sorted_path;
Path *best_path;
- long numGroups = 0;
AggClauseCounts agg_counts;
int numGroupCols = list_length(parse->groupClause);
bool use_hashed_grouping = false;
--- 723,728 ----
***************
*** 763,773 ****
--- 775,792 ----
* BY is a superset of GROUP BY, it would be tempting to request sort
* by ORDER BY --- but that might just leave us failing to exploit an
* available sort order at all. Needs more thought...)
+ *
+ * Addition: Since the distinct list is no longer added to the sort list
+ * at parse time, requesting a sorted order by the distinct_pathkeys
+ * allows for the possibility of a sorted order to be exploited by the
+ * Unique filter.
*/
if (parse->groupClause)
root->query_pathkeys = root->group_pathkeys;
else if (parse->sortClause)
root->query_pathkeys = root->sort_pathkeys;
+ else if (parse->distinctClause)
+ root->query_pathkeys = distinct_pathkeys;
else
root->query_pathkeys = NIL;
***************
*** 1007,1018 ****
} /* end of non-minmax-aggregate case */
} /* end of if (setOperations) */
! /*
! * If we were not able to make the plan come out in the right order, add
! * an explicit sort step.
! */
if (parse->sortClause)
{
if (!pathkeys_contained_in(sort_pathkeys, current_pathkeys))
{
result_plan = (Plan *)
--- 1026,1089 ----
} /* end of non-minmax-aggregate case */
} /* end of if (setOperations) */
! /*
! * If there is a DISTINCT clause, decide which filter to
! * use and make adjustments to the sort list accordingly.
! *
! * Note: Although it seems like a better idea to sort after
! * the DISTINCT filter when using the hash-based algorithmn,
! * it would effectively break DISTINCT ON functionality.
! * Thus, unless we seperate DISTINCT from DISTINCT ON (or
! * completely redesign the clauses), it is a necessary evil
! * to always excecute an ORDER BY clause before the DISTINCT
! * clause.
! */
! if (parse->distinctClause)
! {
! distinct_pathkeys = canonicalize_pathkeys(root, distinct_pathkeys);
! use_hashed_distinct = choose_hashed_distinct(root, result_plan, tuple_fraction,
! dNumGroups, distinct_pathkeys,
! sort_pathkeys, current_pathkeys);
!
! if (!use_hashed_distinct)
! {
! /*
! * The sort list only needs to be adjusted for the Unique filter
! * if there is an ORDER BY clause and DISTINCT is not a subset of it.
! *
! * If there is no ORDER BY clause, then the distinct list can simply
! * be substituted for the sort list.
! */
! if (sort_pathkeys)
! {
! if(!pathkeys_contained_in(distinct_pathkeys, sort_pathkeys))
! {
! ListCell *dlitem;
!
! foreach (dlitem, parse->distinctClause)
! {
! SortClause *scl = (SortClause *) lfirst(dlitem);
! if (!clause_in_sort_list(scl, parse->sortClause))
! parse->sortClause = lappend(parse->sortClause, copyObject(scl));
! }
!
! /* force a sort */
! current_pathkeys = NIL;
! }
! }
! else
! {
! parse->sortClause = parse->distinctClause;
! sort_pathkeys = distinct_pathkeys;
! }
! }
! }
!
if (parse->sortClause)
{
+ /* If we were not able to make the plan come out in the right order, add
+ * an explicit sort step.
+ */
if (!pathkeys_contained_in(sort_pathkeys, current_pathkeys))
{
result_plan = (Plan *)
***************
*** 1022,1035 ****
current_pathkeys = sort_pathkeys;
}
}
!
/*
! * If there is a DISTINCT clause, add the UNIQUE node.
*/
if (parse->distinctClause)
! {
! result_plan = (Plan *) make_unique(result_plan, parse->distinctClause);
/*
* If there was grouping or aggregation, leave plan_rows as-is (ie,
* assume the result was already mostly unique). If not, use the
--- 1093,1116 ----
current_pathkeys = sort_pathkeys;
}
}
!
/*
! * If there is a DISTINCT clause, add the filtering node.
*/
if (parse->distinctClause)
! {
! if (use_hashed_distinct) {
! /* convert # groups to long int */
! numGroups = (long) Min(dNumGroups, (double) LONG_MAX);
!
! result_plan = (Plan *) make_hash_distinct(result_plan,
! parse->distinctClause,
! numGroups);
! } else {
! result_plan = (Plan *) make_unique(result_plan, parse->distinctClause);
! }
+
/*
* If there was grouping or aggregation, leave plan_rows as-is (ie,
* assume the result was already mostly unique). If not, use the
***************
*** 1037,1043 ****
*/
if (!parse->groupClause && !root->hasHavingQual && !parse->hasAggs)
result_plan->plan_rows = dNumGroups;
! }
/*
* Finally, if there is a LIMIT/OFFSET clause, add the LIMIT node.
--- 1118,1124 ----
*/
if (!parse->groupClause && !root->hasHavingQual && !parse->hasAggs)
result_plan->plan_rows = dNumGroups;
! }
/*
* Finally, if there is a LIMIT/OFFSET clause, add the LIMIT node.
***************
*** 1618,1620 ****
--- 1699,1761 ----
elog(ERROR, "resjunk output columns are not implemented");
return new_tlist;
}
+
+ static bool clause_in_sort_list(SortClause *scl, List *sortList) {
+ ListCell *slitem;
+
+ foreach (slitem, sortList) {
+ SortClause *current = (SortClause *) lfirst(slitem);
+
+ if (scl->tleSortGroupRef == current->tleSortGroupRef)
+ return true;
+ }
+
+ return false;
+ }
+
+ static bool choose_hashed_distinct(PlannerInfo *root, Plan *lefttree,
+ double tuple_fraction, double dNumGroups,
+ List *distinct_pathkeys, List *sort_pathkeys,
+ List *current_pathkeys) {
+
+ Path unique_path;
+ Path hashed_path;
+ /*
+ * If the best_path is already ordered for the Unique filter, or
+ * there is an explicit sort requested on the distinct clauses
+ * the Unique filter will be better.
+ */
+ if (pathkeys_contained_in(distinct_pathkeys, sort_pathkeys) ||
+ pathkeys_contained_in(distinct_pathkeys, current_pathkeys))
+ {
+ return false;
+ }
+
+ /*
+ * If it doesn't look like the hash table will fit in memory, don't
+ * hash.
+ */
+ if (lefttree->plan_width * dNumGroups > work_mem * 1024L)
+ return false;
+
+ /*
+ * Since cost_sort currently doesn't care how many keys we're sorting
+ * by, if there is an explicit sort, we might as well use the Unique
+ * filter (it uses less memory).
+ *
+ * If this were not the case, we would have calculated the cost of
+ * sorting (if needed) sort_pathkeys against the cost of sorting
+ * list_union(sort_pathkeys, distinct_pathkeys).
+ */
+ if (sort_pathkeys) {
+ return false;
+ }
+
+ /*
+ * Assuming that if the hash table will fit in memory and we're not
+ * explicitly sorting in any way, hashing would be better.
+ *
+ * Note: Not sure if this is covering all the possibilities.
+ */
+ return true;
+ }
diff -cr --new-file pgsql/src/backend/optimizer/plan/setrefs.c pgsql-hashdistinct/src/backend/optimizer/plan/setrefs.c
*** pgsql/src/backend/optimizer/plan/setrefs.c 2006-03-05 10:58:30.000000000 -0500
--- pgsql-hashdistinct/src/backend/optimizer/plan/setrefs.c 2006-08-02 12:50:55.000000000 -0400
***************
*** 213,218 ****
--- 213,219 ----
case T_Material:
case T_Sort:
case T_Unique:
+ case T_HashDistinct:
case T_SetOp:
/*
***************
*** 546,551 ****
--- 547,553 ----
case T_Material:
case T_Sort:
case T_Unique:
+ case T_HashDistinct:
case T_SetOp:
/*
diff -cr --new-file pgsql/src/backend/optimizer/plan/subselect.c pgsql-hashdistinct/src/backend/optimizer/plan/subselect.c
*** pgsql/src/backend/optimizer/plan/subselect.c 2006-05-02 20:24:56.000000000 -0400
--- pgsql-hashdistinct/src/backend/optimizer/plan/subselect.c 2006-08-02 12:33:00.000000000 -0400
***************
*** 1169,1174 ****
--- 1169,1175 ----
case T_Material:
case T_Sort:
case T_Unique:
+ case T_HashDistinct:
case T_SetOp:
case T_Group:
break;
diff -cr --new-file pgsql/src/backend/parser/parse_clause.c pgsql-hashdistinct/src/backend/parser/parse_clause.c
*** pgsql/src/backend/parser/parse_clause.c 2006-03-15 19:31:55.000000000 -0500
--- pgsql-hashdistinct/src/backend/parser/parse_clause.c 2006-08-15 13:07:24.000000000 -0400
***************
*** 1479,1496 ****
{
/* We had SELECT DISTINCT */
! /*
! * All non-resjunk elements from target list that are not already in
! * the sort list should be added to it. (We don't really care what
! * order the DISTINCT fields are checked in, so we can leave the
! * user's ORDER BY spec alone, and just add additional sort keys to it
! * to ensure that all targetlist items get sorted.)
! */
! *sortClause = addAllTargetsToSortList(pstate,
! *sortClause,
! *targetlist,
! true);
!
/*
* Now, DISTINCT list consists of all non-resjunk sortlist items.
* Actually, all the sortlist items had better be non-resjunk!
--- 1479,1496 ----
{
/* We had SELECT DISTINCT */
! // /*
! // * All non-resjunk elements from target list that are not already in
! // * the sort list should be added to it. (We don't really care what
! // * order the DISTINCT fields are checked in, so we can leave the
! // * user's ORDER BY spec alone, and just add additional sort keys to it
! // * to ensure that all targetlist items get sorted.)
! // */
! // *sortClause = addAllTargetsToSortList(pstate,
! // *sortClause,
! // *targetlist,
! // true);
! //
/*
* Now, DISTINCT list consists of all non-resjunk sortlist items.
* Actually, all the sortlist items had better be non-resjunk!
***************
*** 1507,1515 ****
ereport(ERROR,
(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
errmsg("for SELECT DISTINCT, ORDER BY expressions must appear in select list")));
! else
! result = lappend(result, copyObject(scl));
}
}
else
{
--- 1507,1516 ----
ereport(ERROR,
(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
errmsg("for SELECT DISTINCT, ORDER BY expressions must appear in select list")));
! // else
! // result = lappend(result, copyObject(scl));
}
+ result = addAllTargetsToSortList(pstate, result, *targetlist, true);
}
else
{
***************
*** 1552,1577 ****
}
else
{
! *sortClause = addTargetToSortList(pstate, tle,
! *sortClause, *targetlist,
SORTBY_ASC, NIL, true);
! /*
! * Probably, the tle should always have been added at the end
! * of the sort list ... but search to be safe.
! */
! foreach(slitem, *sortClause)
! {
! SortClause *scl = (SortClause *) lfirst(slitem);
!
! if (tle->ressortgroupref == scl->tleSortGroupRef)
! {
! result = lappend(result, copyObject(scl));
! break;
! }
! }
! if (slitem == NULL) /* should not happen */
! elog(ERROR, "failed to add DISTINCT ON clause to target list");
}
}
}
--- 1553,1581 ----
}
else
{
! // *sortClause = addTargetToSortList(pstate, tle,
! // *sortClause, *targetlist,
! // SORTBY_ASC, NIL, true);
! result = addTargetToSortList(pstate, tle,
! result, *targetlist,
SORTBY_ASC, NIL, true);
! // /*
! // * Probably, the tle should always have been added at the end
! // * of the sort list ... but search to be safe.
! // */
! // foreach(slitem, *sortClause)
! // {
! // SortClause *scl = (SortClause *) lfirst(slitem);
! //
! // if (tle->ressortgroupref == scl->tleSortGroupRef)
! // {
! // result = lappend(result, copyObject(scl));
! // break;
! // }
! // }
! // if (slitem == NULL) /* should not happen */
! // elog(ERROR, "failed to add DISTINCT ON clause to target list");
}
}
}
diff -cr --new-file pgsql/src/.cdtproject pgsql-hashdistinct/src/.cdtproject
*** pgsql/src/.cdtproject 1969-12-31 19:00:00.000000000 -0500
--- pgsql-hashdistinct/src/.cdtproject 2006-06-12 11:23:46.000000000 -0400
***************
*** 0 ****
--- 1,56 ----
+
+
+
+
+
+
+ -
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ -
+
+
+
+
+
+
\ No newline at end of file
diff -cr --new-file pgsql/src/include/executor/nodeHashDistinct.h pgsql-hashdistinct/src/include/executor/nodeHashDistinct.h
*** pgsql/src/include/executor/nodeHashDistinct.h 1969-12-31 19:00:00.000000000 -0500
--- pgsql-hashdistinct/src/include/executor/nodeHashDistinct.h 2006-06-13 21:52:31.000000000 -0400
***************
*** 0 ****
--- 1,25 ----
+ /*-------------------------------------------------------------------------
+ *
+ * nodeHashDistinct.h
+ *
+ *
+ *
+ * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * $PostgreSQL: pgsql/src/include/executor/nodeUnique.h,v 1.22 2006/03/05 15:58:56 momjian Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+ #ifndef NODEHAHSDISTINCT_H
+ #define NODEHASHDISTINCT_H
+
+ #include "nodes/execnodes.h"
+
+ extern int ExecCountSlotsHashDistinct(HashDistinct *node);
+ extern HashDistinctState *ExecInitHashDistinct(HashDistinct *node, EState *estate, int eflags);
+ extern TupleTableSlot *ExecHashDistinct(HashDistinctState *node);
+ extern void ExecEndHashDistinct(HashDistinctState *node);
+ extern void ExecReScanHashDistinct(HashDistinctState *node, ExprContext *exprCtxt);
+
+ #endif /* NODEHASHDISTINCT_H */
diff -cr --new-file pgsql/src/include/nodes/execnodes.h pgsql-hashdistinct/src/include/nodes/execnodes.h
*** pgsql/src/include/nodes/execnodes.h 2006-04-30 14:30:40.000000000 -0400
--- pgsql-hashdistinct/src/include/nodes/execnodes.h 2006-06-22 14:22:03.000000000 -0400
***************
*** 1268,1273 ****
--- 1268,1288 ----
} UniqueState;
/* ----------------
+ * UniqueState information
+ * ----------------
+ */
+ typedef struct HashDistinctState
+ {
+ PlanState ps; /* its first field is NodeTag */
+ FmgrInfo *eqfunctions; /* per-field lookup data for equality fns */
+ FmgrInfo *hashfunctions; /* per-field lookup data for hash functions */
+ MemoryContext tempContext; /* short-term context for comparisons */
+ MemoryContext tableContext; /* long-term context for the hash table */
+ TupleHashTable hashtable; /* hash table with one entry per distinct tuple */
+ } HashDistinctState;
+
+
+ /* ----------------
* HashState information
* ----------------
*/
diff -cr --new-file pgsql/src/include/nodes/nodes.h pgsql-hashdistinct/src/include/nodes/nodes.h
*** pgsql/src/include/nodes/nodes.h 2006-04-30 14:30:40.000000000 -0400
--- pgsql-hashdistinct/src/include/nodes/nodes.h 2006-06-22 14:55:06.000000000 -0400
***************
*** 63,68 ****
--- 63,69 ----
T_Group,
T_Agg,
T_Unique,
+ T_HashDistinct,
T_Hash,
T_SetOp,
T_Limit,
***************
*** 94,99 ****
--- 95,101 ----
T_GroupState,
T_AggState,
T_UniqueState,
+ T_HashDistinctState,
T_HashState,
T_SetOpState,
T_LimitState,
diff -cr --new-file pgsql/src/include/nodes/plannodes.h pgsql-hashdistinct/src/include/nodes/plannodes.h
*** pgsql/src/include/nodes/plannodes.h 2006-03-05 10:58:57.000000000 -0500
--- pgsql-hashdistinct/src/include/nodes/plannodes.h 2006-06-22 14:24:21.000000000 -0400
***************
*** 430,435 ****
--- 430,447 ----
} Unique;
/* ----------------
+ * hashdistinct node
+ * ----------------
+ */
+ typedef struct HashDistinct
+ {
+ Plan plan;
+ int numCols; /* number of columns to check for uniqueness */
+ int numDistinct; /* estimated number of distinct tuples */
+ AttrNumber *uniqColIdx; /* indexes into the target list */
+ } HashDistinct;
+
+ /* ----------------
* hash build node
* ----------------
*/
diff -cr --new-file pgsql/src/include/optimizer/planmain.h pgsql-hashdistinct/src/include/optimizer/planmain.h
*** pgsql/src/include/optimizer/planmain.h 2006-03-05 10:58:57.000000000 -0500
--- pgsql-hashdistinct/src/include/optimizer/planmain.h 2006-08-18 16:52:20.000000000 -0400
***************
*** 55,60 ****
--- 55,61 ----
extern Material *make_material(Plan *lefttree);
extern Plan *materialize_finished_plan(Plan *subplan);
extern Unique *make_unique(Plan *lefttree, List *distinctList);
+ extern HashDistinct *make_hash_distinct(Plan *lefttree, List *distinctList, long numDistinct);
extern Limit *make_limit(Plan *lefttree, Node *limitOffset, Node *limitCount,
int offset_est, int count_est);
extern SetOp *make_setop(SetOpCmd cmd, Plan *lefttree,
diff -cr --new-file pgsql/src/interfaces/libpq/libpq.rc pgsql-hashdistinct/src/interfaces/libpq/libpq.rc
*** pgsql/src/interfaces/libpq/libpq.rc 2006-06-04 05:32:13.000000000 -0400
--- pgsql-hashdistinct/src/interfaces/libpq/libpq.rc 2006-06-22 15:01:34.000000000 -0400
***************
*** 1,8 ****
#include
VS_VERSION_INFO VERSIONINFO
! FILEVERSION 8,2,0,6155
! PRODUCTVERSION 8,2,0,6155
FILEFLAGSMASK 0x3fL
FILEFLAGS 0
FILEOS VOS__WINDOWS32
--- 1,8 ----
#include
VS_VERSION_INFO VERSIONINFO
! FILEVERSION 8,2,0,6173
! PRODUCTVERSION 8,2,0,6173
FILEFLAGSMASK 0x3fL
FILEFLAGS 0
FILEOS VOS__WINDOWS32
diff -cr --new-file pgsql/src/.project pgsql-hashdistinct/src/.project
*** pgsql/src/.project 1969-12-31 19:00:00.000000000 -0500
--- pgsql-hashdistinct/src/.project 2006-06-12 11:23:45.000000000 -0400
***************
*** 0 ****
--- 1,84 ----
+
+
+ PostgreSQL
+
+
+
+
+
+ org.eclipse.cdt.make.core.makeBuilder
+
+
+ org.eclipse.cdt.make.core.append_environment
+ true
+
+
+ org.eclipse.cdt.make.core.enableCleanBuild
+ true
+
+
+ org.eclipse.cdt.make.core.build.command
+ gmake
+
+
+ org.eclipse.cdt.make.core.useDefaultBuildCmd
+ false
+
+
+ org.eclipse.cdt.make.core.build.target.auto
+ all
+
+
+ org.eclipse.cdt.make.core.stopOnError
+ false
+
+
+ org.eclipse.cdt.make.core.build.target.full
+ clean all
+
+
+ org.eclipse.cdt.make.core.build.target.inc
+ all
+
+
+ org.eclipse.cdt.make.core.build.arguments
+
+
+
+ org.eclipse.cdt.core.errorOutputParser
+ org.eclipse.cdt.core.MakeErrorParser;org.eclipse.cdt.core.GCCErrorParser;org.eclipse.cdt.core.GASErrorParser;org.eclipse.cdt.core.GLDErrorParser;org.eclipse.cdt.core.VCErrorParser;
+
+
+ org.eclipse.cdt.make.core.enableAutoBuild
+ true
+
+
+ org.eclipse.cdt.make.core.environment
+
+
+
+ org.eclipse.cdt.make.core.enabledIncrementalBuild
+ true
+
+
+ org.eclipse.cdt.make.core.build.target.clean
+ clean
+
+
+ org.eclipse.cdt.make.core.enableFullBuild
+ true
+
+
+
+
+ org.eclipse.cdt.make.core.ScannerConfigBuilder
+
+
+
+
+
+ org.eclipse.cdt.core.cnature
+ org.eclipse.cdt.make.core.makeNature
+ org.eclipse.cdt.make.core.ScannerConfigNature
+
+