arch-dev.sgml
上传用户:blenddy
上传日期:2007-01-07
资源大小:6495k
文件大小:148k
- for attributes that are not contained in the {it targetlist} of the {tt
- AGG} node yet and add them to the previously made copy.
- %
- <step> The extended {it targetlist} is used to create the subplan attached to
- the {tt lefttree} field of the {tt AGG} node. That means that the
- {it targetlists} of the {tt GRP} node, of the {tt SORT} node and of the
- {tt SeqScan} node will now contain an entry for the attribute {tt
- pno}. The {it targetlist} of the {tt AGG} node itself will not be changed
- because we do not want to include the attribute {tt pno} into the
- result returned by the whole query.
- %
- end{itemize}
- Care has to be taken that the {tt varattno} fields of the {tt VAR}
- nodes used in the {it targetlists} contain the position of the
- corresponding attribute in the {it targetlist} of the subplan (i.e the
- subplan delivering the tuples for further processing by the actual
- node). \
- \
- The following part deals with the source code of the new and
- changed functions involved in the planner/optimizer stage. The files
- affected are: \
- \
- indent {tt $ldots$/src/backend/optimizer/plan/setrefs.c} \
- indent {tt $ldots$/src/backend/optimizer/plan/planner.c} \
- \
- Since all of the functions presented here are very long and would need
- very much space if presented as a whole, we just list the most
- important parts. \
- \
- The following two functions are new and have been
- introduced for the {it having logic}. They are contained in the file
- {tt $ldots$/src/backend/optimizer/plan/setrefs.c}:
- %
- begin{itemize}
- %
- <step> {tt check_having_qual_for_aggs()} \
- This function takes the representation of a {it having clause} given
- by the parameter {tt clause}, a {it targetlist} given by the
- parameter {tt subplanTargetList} and a {it group clause} given by
- the parameter {tt groupClause} as arguments and scans the
- representation of the {it having clause} recursively for {it
- aggregate functions}. If an {it aggregate function} is found it is
- attached to a list (internally called {tt agg_list}) and finally
- returned by the function.
- Additionally the {tt varno} field of every {tt VAR} node found is
- set to the position of the corresponding attribute in the {it
- targetlist} given by {tt subplanTargetList}.
- If the {it having clause} contains a subquery the function also makes
- sure, that every attribute from the {it main query} that is used
- within the subquery also appears in the {it group clause} given by
- {tt groupClause}. If the attribute cannot be found in the {it group
- clause} an error message is printed to the screen and the query
- processing is aborted.
- %
- begin{verbatim}
- List *
- check_having_qual_for_aggs(Node *clause,
- List *subplanTargetList,
- List *groupClause)
- {
- List *t, *l1;
- List *agg_list = NIL;
- int contained_in_group_clause = 0;
-
- if (IsA(clause, Var))
- {
- TargetEntry *subplanVar;
- subplanVar = match_varid((Var *) clause,
- subplanTargetList);
- /* Change the varattno fields of the
- * var node to point to the resdom->resnofields
- * of the subplan (lefttree)
- */
- ((Var *) clause)->varattno =
- subplanVar->resdom->resno;
- return NIL;
- }
- else
- if (is_funcclause(clause) || not_clause(clause)
- || or_clause(clause) || and_clause(clause))
- {
- int new_length=0, old_length=0;
- /* This is a function. Recursively call this
- * routine for its arguments... (i.e. for AND,
- * OR, ... clauses!)
- */
- foreach(t, ((Expr *) clause)->args)
- {
- old_length=length((List *)agg_list);
- agg_list = nconc(agg_list,
- check_having_qual_for_aggs(lfirst(t),
- subplanTargetList,
- groupClause));
- if(((new_length=length((List *)agg_list)) ==
- old_length) || (new_length == 0))
- {
- elog(ERROR,"This could have been done
- in a where clause!!");
- return NIL;
- }
- }
- return agg_list;
- }
- else
- if (IsA(clause, Aggreg))
- {
- return lcons(clause,
- check_having_qual_for_aggs(
- ((Aggreg *)clause)->target,
- subplanTargetList,
- groupClause));
- }
- else
- .
- .
- .
- }
- end{verbatim}
- %
- <step> {tt check_having_qual_for_vars()} \
- This function takes the representation of a {it having clause} given
- by the parameter {tt clause} and the actual {it targetlist}
- given by the parameter {tt targetlist_so_far} as arguments and
- recursively scans the representation of the {it having clause} for
- attributes that are not included in the actual {it targetlist}
- yet. Whenever such an attribute is found it is added to the actual
- {it targetlist} which is finally returned by the function.
- Attributes contained in the {it having clause} but not in the {it
- targetlist} show up with queries like:
- %
- begin{verbatim}
- select sid
- from part
- group by sid
- having min(pid) > 1;
- end{verbatim}
- %
- In the above query the attribute {tt pid} is used in the {it having
- clause} but it does not appear in the {it targetlist} (i.e. the list
- of attributes after the keyword {tt select}). Unfortunately only
- those attributes are delivered by the subplan and can therefore be
- used within the {it having clause}. To become able to handle queries
- like that correctly, we have to extend the actual {it targetlist} by
- those attributes used in the {tt having clause} but not already
- appearing in the {it targetlist}.
- %
- begin{verbatim}
- List *
- check_having_qual_for_vars(Node *clause,
- List *targetlist_so_far)
- {
- List *t;
- if (IsA(clause, Var))
- {
- Rel tmp_rel;
-
- tmp_rel.targetlist = targetlist_so_far;
- /* Check if the VAR is already contained in the
- * targetlist
- */
- if (tlist_member((Var *)clause,
- (List *)targetlist_so_far) == NULL)
- {
- add_tl_element(&tmp_rel, (Var *)clause);
- }
- return tmp_rel.targetlist;
- }
- else
- if (is_funcclause(clause) || not_clause(clause)
- || or_clause(clause) || and_clause(clause))
- {
- /* This is a function. Recursively call this
- * routine for its arguments...
- */
- foreach(t, ((Expr *) clause)->args)
- {
- targetlist_so_far =
- check_having_qual_for_vars(lfirst(t),
- targetlist_so_far);
- }
- return targetlist_so_far;
- }
- else
- if (IsA(clause, Aggreg))
- {
- targetlist_so_far =
- check_having_qual_for_vars(
- ((Aggreg *)clause)->target,
- targetlist_so_far);
- return targetlist_so_far;
- }
- .
- .
- .
- }
- end{verbatim}
- %
- end{itemize}
- %
- The next function is found in {tt
- $ldots$/src/backend/optimizer/plan/planner.c}:
- %
- begin{itemize}
- %
- <step> {tt union_planner()} \
- This function creates a {it plan} from the {it parsetree} given to
- it by the parameter {tt parse} that can be executed by the {it
- executor}.
- If {it aggregate functions} are present (indicated by {tt
- parse->hasAggs} set to true) the first step is to extend the {it
- targetlist} by those attributes that are used within the {it having
- clause} (if any is present) but do not appear in the {it select list}
- (Refer to the description of {tt check_having_qual_for_vars()}
- above).
- The next step is to call the function {tt query_planner()} creating
- a {it plan} without taking the {it group clause}, the {it aggregate
- functions} and the {it having clause} into account for the moment.
- Next insert a {tt GRP} node at the top of the {it plan} according
- to the {it group clause} of the {it parsetree} if any is present.
- Add an {tt AGG} node to the top of the current {it plan} if {it
- aggregate functions} are present and if a {it having clause} is
- present additionally perform the following steps:
- %
- begin{itemize}
- <step> Perform various transformations to the representation of the
- {it having clause} (e.g. transform it to CNF, $ldots$).
- <step> Attach the transformed representation of the {it having clause} to the
- field {tt plan.qual} of the just created {tt AGG} node.
- <step> Examine the whole {it having clause} and search for {it
- aggregate functions}. This is done using the function {tt
- check_having_qual_for_aggs()} which appends every {it aggregate
- function} found to a list that is finally returned.
- <step> Append the list just created to the list already attached to the
- field {tt aggs} of the {tt AGG} node (this list contains the {it
- aggregate functions} found in the {it targetlist}).
- <step> Make sure that {it aggregate functions} do appear in the
- {it having clause}. This is done by comparing the length of the list
- attached to {tt aggs} before and after the call to {tt
- check_having_qual_for_aggs()}. If the length has not changed, we
- know that no {it aggregate function} has been detected and that this
- query could have been formulated using only a {it where clause}. In
- this case an error message is printed to the screen and the processing
- is aborted.
- end{itemize}
- %
- pagebreak
- %
- begin{verbatim}
- Plan *
- union_planner(Query *parse)
- {
- List *tlist = parse->targetList;
- + /* copy the original tlist, we will need the
- + * original one for the AGG node later on */
- + List *new_tlist = new_unsorted_tlist(tlist);
- .
- .
- .
- + if (parse->hasAggs)
- + {
- + /* extend targetlist by variables not
- + * contained already but used in the
- + * havingQual.
- + */
- + if (parse->havingQual != NULL)
- + {
- + new_tlist =
- + check_having_qual_for_vars(
- + parse->havingQual,
- + new_tlist);
- + }
- + }
- .
- .
- .
- /* Call the planner for everything
- * but groupclauses and aggregate funcs.
- */
- result_plan = query_planner(parse,
- parse->commandType,
- new_tlist,
- (List *) parse->qual);
- .
- .
- .
- /* If aggregate is present, insert the AGG node
- */
- if (parse->hasAggs)
- {
- int old_length=0, new_length=0;
- /* Create the AGG node but use 'tlist' not
- * 'new_tlist' as target list because we
- * don't want the additional attributes
- * (only used for the havingQual, see
- * above) to show up in the result.
- */
- result_plan = (Plan *) make_agg(tlist,
- result_plan);
- .
- .
- .
- + /* Check every clause of the havingQual for
- + * aggregates used and append them to
- + * the list in result_plan->aggs
- + */
- + foreach(clause,
- + ((Agg *) result_plan)->plan.qual)
- + {
- + /* Make sure there are aggregates in the
- + * havingQual if so, the list must be
- + * longer after check_having_qual_for_aggs
- + */
- + old_length =
- + length(((Agg *) result_plan)->aggs);
- +
- + ((Agg *) result_plan)->aggs =
- + nconc(((Agg *) result_plan)->aggs,
- + check_having_qual_for_aggs(
- + (Node *) lfirst(clause),
- + ((Agg *)result_plan)->
- + plan.lefttree->targetlist,
- + ((List *) parse->groupClause)));
- + /* Have a look at the length of the returned
- + * list. If there is no difference, no
- + * aggregates have been found and that means
- + * that the Qual belongs to the where clause
- + */
- + if (((new_length =
- + length(((Agg *) result_plan)->aggs))==
- + old_length) || (new_length == 0))
- + {
- + elog(ERROR,"This could have been done in a
- + where clause!!");
- + return (Plan *)NIL;
- + }
- + }
- .
- .
- .
- }
- end{verbatim}
- %
- end{itemize}
- subsubsection{Executor}
- The {it executor} takes the {it queryplan} produced by the {it
- planner/optimizer} in the way just described and processes all {it
- aggregate functions} in the way described in section ref{aggregates}
- {it The Implementation of Aggregate Functions} but before the tuple
- derived is handed back the {it operator tree} attached to the field
- {tt qpqual} is evaluated by calling the function {tt
- ExecQual()}. This function recursively steps through the {it operator
- tree} (i.e. the {it having clause}) and evaluates the predicates
- appearing there. Thanks to our changes that have been made to the {it
- planner} the values of all operands needed to evaluate the predicates
- (e.g. the values of all {it aggregate functions}) are already present
- and can be accessed throughout the evaluation without any problems.
- If the evaluation of the {it having qualification} returns {tt true}
- the tuple is returned by the function {tt execAgg()} otherwise it is
- ignored and the next group is processed. \
- \
- The necessary changes and enhancements have been applied to the
- following function in the file {tt
- $ldots$/src/backend/executor/nodeAgg.c}:
- %
- begin{itemize}
- %
- <step> {tt execAgg()}
- Whenever the {it executor} gets to an {tt AGG} node this function is
- called. Before the {it having logic} had been implemented, all the
- {it tuples} of the current group were fetched from the {it
- subplan} and all {it aggregate functions} were applied to these
- tuples. After that, the results were handed back to the calling
- function.
- Since the {it having logic} has been implemented there is one
- additional step executed. Before the results of applying the {it
- aggregate functions} are handed back, the function {tt ExecQual()} is
- called with the representation of the {it having clause} as an
- argument. If {tt true} is returned, the results are handed back,
- otherwise they are ignored and we start from the beginning for the
- next group until a group meeting the restrictions given in the {it
- having clause} is found.
- %
- begin{verbatim}
- TupleTableSlot *
- ExecAgg(Agg *node)
- {
- .
- .
- .
- /* We loop retrieving groups until we find one
- * matching node->plan.qual
- */
- + do
- + {
- .
- .
- .
- /* Apply *all* aggregate function to the
- * tuples of the *current* group
- */
- .
- .
- .
- econtext->ecxt_scantuple =
- aggstate->csstate.css_ScanTupleSlot;
- resultSlot = ExecProject(projInfo, &isDone);
- + /* As long as the retrieved group does not
- + * match the qualifications it is ignored and
- + * the next group is fetched
- + */
- + if(node->plan.qual != NULL)
- + {
- + qual_result =
- + ExecQual(fix_opids(node->plan.qual),
- + econtext);
- + }
- + if (oneTuple) pfree(oneTuple);
- + }
- + while((node->plan.qual!=NULL) &&
- + (qual_result!=true));
- return resultSlot;
- }
- end{verbatim}
- %
- end{itemize}
- section{The Realization of Union, Intersect and Except}
- label{union_impl}
- SQL92 supports the well known set theoretic operations {it union},
- {it intersect} and {it set difference} (the {it set difference} is
- called {it except} in SQL92). The operators are used to connect two
- or more {tt select} statements. Every {tt select} statement returns
- a set of tuples and the operators between the {tt select} statements
- tell how to merge the returned sets of tuples into one result
- relation.
- %
- begin{example}
- label{simple_set_ops}
- Let the following tables be given:
- %
- begin{verbatim}
- XXX All these table examples have combinations of "-" and "+" which
- causes problems inside an SGML comment. Mess them up to keep this
- from happening for now. - thomas 1999-02-10
- A C1|C2|C3 B C1|C2|C3
- MMPMMPMM MMPMMPMM
- 1| a| b 1| a| b
- 2| a| b 5| a| b
- 3| c| d 3| c| d
- 4| e| f 8| e| f
- C C1|C2|C3
- MMPMMPMM
- 4| e| f
- 8| e| f
- end{verbatim}
- Now let's have a look at the results of the following queries:
- begin{verbatim}
- select * from A
- union
- select * from B;
- end{verbatim}
- derives the set theoretic {it union} of the two tables:
- %
- begin{verbatim}
- C1|C2|C3
- MMPMMPMM
- 1| a| b
- 2| a| b
- 3| c| d
- 4| e| f
- 5| a| b
- 8| e| f
- end{verbatim}
- %
- The {tt select} statements used may be more complex:
- %
- begin{verbatim}
- select C1, C3 from A
- where C2 = 'a'
- union
- select C1, C2 from B
- where C3 = 'b';
- end{verbatim}
- %
- noindent will return the following table:
- %
- begin{verbatim}
- C1|C3
- MMPMM
- 1| b
- 2| b
- 1| a
- 5| a
- end{verbatim}
- %
- Note that the selected columns do not need to have identical names,
- they only have to be of the same type. In the previous example we
- selected for {tt C1} and {tt C3} in the first {tt select} statement
- and for {tt C1} and {tt C2} in the second one. The names of the
- resulting columns are taken from the first {tt select} statement. \
- \
- Let's have a look at a query using {tt intersect}:
- %
- begin{verbatim}
- select * from A
- intersect
- select * from B;
- end{verbatim}
- %
- will return:
- begin{verbatim}
- C1|C2|C3
- MMPMMPMM
- 1| a| b
- 3| c| d
- end{verbatim}
- %
- Here is an example using {tt except}:
- begin{verbatim}
- select * from A
- except
- select * from B;
- end{verbatim}
- %
- will return:
- begin{verbatim}
- C1|C2|C3
- MMPMMPMM
- 2| a| b
- 4| e| f
- end{verbatim}
- %
- The last examples were rather simple because they only used one set
- operator at a time with only two operands. Now we look at some more
- complex queries involving more {it operators}:
- %
- begin{verbatim}
- select * from A
- union
- select * from B
- intersect
- select * from C;
- end{verbatim}
- %
- will return:
- %
- begin{verbatim}
- C1|C2|C3
- MMPMMPMM
- 4| e| f
- 8| e| f
- end{verbatim}
- %
- The above query performs the set theoretic computation $(A cup B)
- cap C$. When no parentheses are used, the operations are considered to
- be left associative, i.e. $A cup B cup C cup D$ will be treated as
- $((A cup B) cup C) cup D$. \
- \
- The same query using parenthesis can lead to a completely different
- result:
- %
- begin{verbatim}
- select * from A
- union
- (select * from B
- intersect
- select * from C);
- end{verbatim}
- %
- performs $A cup (B cap C)$ and will return:
- %
- begin{verbatim}
- C1|C2|C3
- MMPMMPMM
- 1| a| b
- 2| a| b
- 3| c| d
- 4| e| f
- 8| e| f
- end{verbatim}
- %
- end{example}
- %
- subsection{How Unions have been Realized Until Version 6.3.2}
- First we give a description of the implementation of {it union} and
- {it union all} until version 6.3.2 because we need it to understand
- the implementation of {it intersect} and {it except} described later. \
- \
- A {it union} query is passed through the usual stages:
- %
- begin{itemize}
- <step> parser
- <step> rewrite system
- <step> planner/optimizer
- <step> executor
- end{itemize}
- %
- and we will now describe what every single stage does to the query.
- For our explanation we assume to process a simple query (i.e. a query
- without {it subselects}, {it aggregates} and without involving {it views})
- subsubsection{The Parser Stage}
- As described earlier the {it parser stage} can be divided into two parts:
- %
- begin{itemize}
- <step> the {it parser} built up by the grammar rules given in {tt gram.y}
- and
- <step> the {it transformation routines} performing a lot of
- changes and analysis to the tree built up by the parser. Most of these
- routines reside in {tt analyze.c}.
- end{itemize}
- %
- %pagebreak
- %
- A {it union} statement consists of two or more {it select}
- statements connected by the keyword {tt union} as the following
- example shows:
- %
- begin{verbatim}
- select * from A
- where C1=1
- union
- select * from B
- where C2 = 'a'
- union
- select * from C
- where C3 = 'f'
- end{verbatim}
- %
- The above {it union} statement consists of three {it select}
- statements connected by the keyword {tt union}. We will refer to the
- first {it select} statement by A, to the second one by B and to the
- third one by C for our further explanation (in the new notation our
- query looks like this: mbox{{tt A union B union C}}).\
- \
- The {it parser} (given by {tt gram.y}) processes all three {tt select}
- statements, creates a {tt SelectStmt} node for every {tt select} and
- attaches the {tt where} qualifications, {it targetlists} etc. to the
- corresponding nodes. Then it creates a list of the second and the
- third {tt SelectStmt} node (of B and C) and attaches it to the field
- {tt unionClause} of the first node (of A). Finally it hands back the
- first node (node A) with the list of the remaining nodes attached as
- shown in figure ref{parser_union_back}.
- %
- begin{figure}
- begin{center}
- epsfig{figure=figures/parser_union_back.ps}
- caption{Data structure handed back by the {it parser}}
- label{parser_union_back}
- end{center}
- end{figure}
- \
- \
- The following {it transformation routines} process the data structure
- handed back by the {it parser}. First the top node (node A) is transformed
- from a {tt SelectStmt} node to a {tt Query} node. The {it
- targetlist}, the {tt where} qualification etc. attached to it are
- transformed as well. Next the list of the remaining nodes (attached to
- {tt unionClause} of node A) is transformed and in this step also a
- check is made if the types and lengths of the {it targetlists} of
- the involved nodes are equal. The new {tt Query} nodes are now handed
- back in the same way as the {tt SelectStmt} nodes were before
- (i.e. the {tt Query} nodes B and C are collected in a list which is
- attached to {tt unionClause} of {tt Query} node A).
- subsubsection{The Rewrite System}
- If any {it rewrite rules} are present for the {tt Query} nodes
- (i.e. one of the {it select} statements uses a {it view}) the
- necessary changes to the {tt Query} nodes are performed (see section
- ref{view_impl} {it Techniques To Implement Views}). Otherwise no
- changes are made to the nodes in this stage.
- subsubsection{Planner/Optimizer}
- This stage has to create a {it plan} out of the {it querytree}
- produced by the {it parser stage} that can be executed by the {it
- executor}. In most cases there are several ways (paths) with different
- cost to get to the same result. It's the {it planner/optimizer's}
- task to find out which path is the cheapest and to create a {it plan}
- using this path. The implementation of {it unions} in <productname>Postgres</productname> is
- based on the following idea: \
- \
- The set derived by evaluating $A cup B$ must contain every member of
- $A$ {bf and} every member of $B$. So if we append the members of $B$
- to the members of $A$ we are almost done. If there exist members
- common to $A$ and $B$ these members are now contained twice in our new
- set, so the only thing left to do is to remove these duplicates.
- pagebreak
- noindent In the case of our example the {it planner} would build up the {it
- tree} shown in figure ref{union_plan}. Every {tt Query} node is
- planned separately and results in a {tt SeqScan} node in our
- example. The three {tt SeqScan} nodes are put together into a list
- which is attached to {tt unionplans} of an {tt Append} node.
- %
- begin{figure}[ht]
- begin{center}
- epsfig{figure=figures/union_plan.ps}
- caption{{it Plan} for a union query}
- label{union_plan}
- end{center}
- end{figure}
- subsubsection{Executor}
- The {it executor} will process all the {tt SeqScan} nodes and append all
- the delivered tuples to a single {it result relation}. Now it is
- possible that duplicate tuples are contained in the {it result relation}
- which have to be removed. The removal is done by the {tt Unique} node
- and the sort is just performed to make its work easier.
- subsection{How Intersect, Except and Union Work Together}
- The last section showed that every stage ({it parser stage}, {it
- planner/optimizer}, {it executor}) of <productname>Postgres</productname> has to provide
- features in order to support {it union} statements. For the
- implementation of {it intersect} and {it except} statements (and
- statements involving all {it set operators}) we choose a different approach
- based on {it query rewriting}. \
- \
- The idea is based on the fact that {it intersect} and {it except}
- statements are redundant in SQL, i.e. for every {it intersect} or
- {it except} statement it is possible to formulate a semantically
- equivalent statement without using {it intersect} or {it except}.
- %
- begin{example}
- label{transform_intersect}
- This example shows how a query using {it intersect} can be
- transformed to a semantically equivalent query without an {it
- intersect}:
- %
- pagebreak
- %
- begin{verbatim}
- select C1, C3 from A
- where C1 = 1
- intersect
- select C1, C2 from B
- where C2 = 'c';
- end{verbatim}
- %
- is equivalent to:
- %
- begin{verbatim}
- select C1, C3
- from A
- where C1 = 1 and
- (C1, C3) in (select C1, C2
- from B
- where C2 = 'c');
- end{verbatim}
- %
- This example shows how an {it except} query can be transformed to an
- {it except}-less form:
- %
- begin{verbatim}
- select C1, C2 from A
- where C2 = 'c'
- except
- select C1, C3 from B
- where C3 = 'f';
- end{verbatim}
- %
- is equivalent to:
- %
- begin{verbatim}
- select C1, C2
- from A
- where C2 = 'c' and
- (C1, C2) not in (select C1, C3
- from B
- where C3 = 'f');
- end{verbatim}
- %
- end{example}
- %
- The transformations used in example ref{transform_intersect} are
- always valid because they just implement the set theoretic definition
- of {it intersect} and {it except}:
- %
- begin{definition}
- label{def_intersect}
- ~ \
- The {it intersection} of two sets $A$ and $B$ is
- defined as:
- begin{displaymath}
- (A cap B) := {x mid x in A wedge x in B }
- end{displaymath}
- %
- The {it intersection} of $n$ sets $A_{1}, ldots, A_{n}$ is defined as:
- %
- begin{displaymath}
- bigcap_{i=1}^{n} A_{i} := {x mid bigwedge_{i=1}^{n} x in A_{i} }
- end{displaymath}
- %
- end{definition}
- %
- begin{definition}
- label{def_except}
- ~ \
- The {it difference} of two sets $A$ and $B$ is
- defined as:
- begin{displaymath}
- (A backslash B) := {x mid x in A wedge x notin B }
- end{displaymath}
- %
- end{definition}
- %
- begin{definition}
- label{def_union}
- ~\
- The {it union} of two sets $A$ and $B$ is
- defined as:
- begin{displaymath}
- (A cup B) := {x mid x in A vee x in B }
- end{displaymath}
- %
- The {it union} of $n$ sets $A_{1}, ldots, A_{n}$ is defined as:
- %
- begin{displaymath}
- bigcup_{i=1}^{n} A_{i} := {x mid bigvee_{i=1}^{n} x in A_{i} }
- end{displaymath}
- %
- end{definition}
- begin{definition} Disjunctive Normal Form (DNF) \
- Let $F = C_{1} vee ldots vee C_{n}$ be given where every $C_{i}$ is
- of the form $(L_{i}^{1} wedge ldots wedge L_{i}^{k_{i}})$ and
- $L_{i}^{j}$ is a propositional variable or the negation of a
- propositional variable. Now we say $F$ is in DNF.
- end{definition}
- %
- begin{example} In the following example the $L_{i}^{j}$ are of the form
- $x in X$ or $neg (x in X)$: \
- indent $((x in A wedge neg (x in B) wedge x in C) vee (x in D
- wedge x in E))$ is a formula in DNF \
- indent $((x in A vee x in B) wedge (x in C vee neg (x in D)))$
- is not in DNF.
- end{example}
- %
- The transformation of any formula in propositional logic into DNF is done by
- successively applying the following rules: \
- \
- indent $(R1)~~neg (F_{1} vee F_{2}) Rightarrow (neg F_{1} wedge neg
- F_{2})$ \
- indent $(R2)~~neg (F_{1} wedge F_{2}) Rightarrow (neg F_{1} vee neg
- F_{2})$ \
- indent $(R3)~~F_{1} wedge (F_{2} vee F_{3}) Rightarrow (F_{1} wedge
- F_{2}) vee (F_{1} wedge F_{3})$ \
- indent $(R4)~~(F_{1} vee F_{2}) wedge F_{3} Rightarrow (F_{1} wedge
- F_{3}) vee (F_{2} wedge F_{3})$ \
- \
- It can be shown that the transformation using the rules (R1) to
- (R4) always terminates after a finite number of steps.
- subsubsection{Set Operations as Propositional Logic Formulas}
- Using the definitions from above we can treat formulas
- involving set theoretic operations as formulas of {it propositional
- logic}. As we will see later these formulas can easily be used in the
- {tt where-} and {tt having} qualifications of the {tt select}
- statements involved in the query.
- %
- begin{example}
- Here are some examples: \ \
- %
- indent $((A cup B) cap C) := {x mid (x in A vee x in B) wedge x in C
- }$ \
- indent $((A cup B) cap (C cup D)) := {x mid (x in A vee x in B)
- wedge (x in C vee x in D) }$ \
- indent $((A cap B) backslash C) := {x mid (x in A wedge x in
- B) wedge x notin C }$ \
- indent $(A backslash (B cup C)) := {x mid x in A wedge neg (x
- in B vee x in C) }$ \
- indent $(((A cap B) cup (C backslash D)) cap E) := {((x in A
- wedge x in B) vee (x in C wedge x notin D)) wedge x in E }$
- %
- end{example}
- %
- subsection{Implementing Intersect and Except Using the
- Union Capabilities}
- %
- We want to be able to use queries involving more than just one type of
- set operation (e.g. only {it union} or only {it intersect}) at a
- time, so we have to look for a solution that supports correct handling
- of queries like that. As described above there is a solution for pure
- {it union} statements implemented already, so we have to develop an
- approach that makes use of these {it union} capabilities. \
- \
- As figure ref{parser_union_back} illustrates, the operands of a {it
- union} operation are just {tt Query} nodes (the first operand is the
- top node and all further operands form a list which is attached to the
- field {tt unionClause} of the top node). So our goal will be to
- transform every query involving set operations into this form. (Note
- that the operands to the {it union} operation may be complex,
- i.e. {it subselects}, {it grouping}, {it aggregates} etc. are allowed.)
- The transformation of a query involving set operations in any order
- into a query that can be accepted by the {it union} logic is
- equivalent to transforming the {it membership formula} (see
- definitions ref{def_intersect}, ref{def_except} and ref{def_union})
- in propositional logic into {it disjunctive normal form} (DNF). The
- transformation of any formula in propositional logic into DNF is
- always possible in a finite number of steps.
- noindent The advantage of this {it transformation technique} is the little
- impact on the whole system and the implicit invocation of the {it
- planner/optimizer}. The only changes necessary are made to the {it
- parser stage} and the {it rewrite system}. \
- \
- Here are some changes that had to be applied to the source code before
- the {it parser stage} and the {it rewrite system} could be adapted:
- %
- begin{itemize}
- <step> Add the additional field {tt intersectClause} to the data
- structures {tt Query} and {tt InsertStmt} defined in the file {tt
- $ldots$/src/include/nodes/parsenodes.h}:
- %
- begin{verbatim}
- typedef struct Query
- {
- NodeTag type;
- CmdType commandType;
- .
- .
- .
- Node *havingQual;
- + List *intersectClause;
- List *unionClause;
- List *base_relation_list_;
- List *join_relation_list_;
- } Query;
- typedef struct InsertStmt
- {
- NodeTag type;
- .
- .
- .
- bool unionall;
- + List *intersectClause;
- } InsertStmt;
- end{verbatim}
- %
- <step> Add the new keywords {tt EXCEPT} and {tt INTERSECT} to the
- file {tt $ldots$/src/backend/parser/keywords.c}:
- %
- begin{verbatim}
- static ScanKeyword ScanKeywords[] = {
- {"abort", ABORT_TRANS},
- {"action", ACTION},
- .
- .
- .
- {"end", END_TRANS},
- + {"except", EXCEPT},
- .
- .
- .
- {"instead", INSTEAD},
- + {"intersect", INTERSECT},
- .
- .
- .
- };
- end{verbatim}
- %
- <step> <productname>Postgres</productname> contains functions to convert the internal
- representation of a {it parsetree} or {it plantree} into an ASCII
- representation (that can easily be printed to the screen (for
- debugging purposes) or be stored in a file) and vice versa. These
- functions have to be adapted to be able to deal with {it intersects}
- and {it excepts}. These functions can be found in the files {tt
- $ldots$/src/backend/nodes/outfuncs.c} and {tt
- $ldots$/src/backend/nodes/readfuncs.c}:
- %
- begin{verbatim}
- static void
- _outQuery(StringInfo str, Query *node)
- {
- .
- .
- .
- appendStringInfo(str, " :unionClause ");
- _outNode(str, node->unionClause);
- + appendStringInfo(str, " :intersectClause ");
- + _outNode(str, node->intersectClause);
- }
- static Query *
- _readQuery()
- {
- .
- .
- .
- token = lsptok(NULL, &length);
- local_node->unionClause = nodeRead(true);
- + token = lsptok(NULL, &length);
- + local_node->intersectClause = nodeRead(true);
- return (local_node);
- }
- end{verbatim}
- %
- <step> The function {tt ExecReScan()} is called whenever a new
- execution of a given {it plan} has to be started (i.e. whenever we
- have to start from the beginning with the first tuple again). The call
- to this function happens implicitly. For the special kind of
- subqueries we are using for the rewritten queries (see example
- ref{transform_intersect}) we have to take that also
- {tt Group} nodes are processed. The function can be found in the file
- {tt $ldots$/backend/executor/execAmi.c}.
- %
- begin{verbatim}
- void
- ExecReScan(Plan *node, ExprContext *exprCtxt,
- Plan *parent)
- {
- .
- .
- .
- switch (nodeTag(node))
- {
- .
- .
- .
- end{verbatim}
- pagebreak
- begin{verbatim}
- + case T_Group:
- + ExecReScanGroup((Group *) node,
- + exprCtxt, parent);
- + break;
- .
- .
- .
- }
- }
- end{verbatim}
- %
- <step> The function {tt ExecReScanGroup()} is called by {tt
- ExecReScan()} described above whenever a {tt Group} node is detected
- and can be found in the file {tt
- $ldots$/src/backend/executor/nodeGroup.c} . It has been created for
- the {it intersect} and {it except} logic although it is actually
- needed by the special kind of subselect (see above).
- %
- begin{verbatim}
- void
- ExecReScanGroup(Group *node, ExprContext *exprCtxt,
- Plan *parent)
- {
- GroupState *grpstate = node->grpstate;
-
- grpstate->grp_useFirstTuple = FALSE;
- grpstate->grp_done = FALSE;
- grpstate->grp_firstTuple = (HeapTupleData *)NIL;
-
- /*
- * if chgParam of subnode is not null then plan
- * will be re-scanned by first ExecProcNode.
- */
- if (((Plan *) node)->lefttree->chgParam == NULL)
- ExecReScan(((Plan *) node)->lefttree,
- exprCtxt, (Plan *) node);
- }
- end{verbatim}
- %
- end{itemize}
- subsubsection{Parser}
- The {it parser} defined in the file {tt
- $ldots$/src/backend/parser/gram.y} had to be modified in two ways:
- %
- begin{itemize}
- %
- <step> The grammar had to be adapted to support the usage of
- parenthesis (to be able to specify the order of execution of the set
- operators).
- %
- <step> The code building up the data structures handed back by the
- {it parser} had to be inserted.
- %
- end{itemize}
- %
- Here is a part of the grammar which is responsible for {tt select}
- statements having the code building up the data structures inserted:
- %
- pagebreak
- %
- begin{verbatim}
- SelectStmt : select_w_o_sort sort_clause
- {
- .
- .
- .
- /* $1 holds the tree built up by the
- * rule 'select_w_o_sort'
- */
- Node *op = (Node *) $1;
- if IsA($1, SelectStmt)
- {
- SelectStmt *n = (SelectStmt *)$1;
- n->sortClause = $2;
- $$ = (Node *)n;
- }
- else
- {
- /* Create a "flat list" of the operator
- * tree built up by 'select_w_o_sort' and
- * let select_list point to it
- */
- create_select_list((Node *)op,
- &select_list,
- &unionall_present);
- /* Replace all the A_Expr nodes in the
- * operator tree by Expr nodes.
- */
- op = A_Expr_to_Expr(op, &intersect_present);
- .
- .
- .
- /* Get the leftmost SelectStmt node (which
- * automatically represents the first Select
- * Statement of the query!) */
- first_select =
- (SelectStmt *)lfirst(select_list);
- /* Attach the list of all SelectStmt nodes
- * to unionClause
- */
- first_select->unionClause = select_list;
- /* Attach the whole operator tree to
- * intersectClause */
- first_select->intersectClause =
- (List *) op;
- /* finally attach the sort clause */
- first_select->sortClause = $2;
-
- /* Now hand it back! */
- $$ = (Node *)first_select;
- }
- }
- ;
- end{verbatim}
- pagebreak
- begin{verbatim}
- select_w_o_sort : '(' select_w_o_sort ')'
- {
- $$ = $2;
- }
- | SubSelect
- {
- $$ = $1;
- }
- | select_w_o_sort EXCEPT select_w_o_sort
- {
- $$ = (Node *)makeA_Expr(AND,NULL,$1,
- makeA_Expr(NOT,NULL,NULL,$3));
- }
- | select_w_o_sort UNION opt_union select_w_o_sort
- {
- if (IsA($4, SelectStmt))
- {
- SelectStmt *n = (SelectStmt *)$4;
- n->unionall = $3;
- }
- $$ = (Node *)makeA_Expr(OR,NULL,$1,$4);
- }
- | select_w_o_sort INTERSECT select_w_o_sort
- {
- $$ = (Node *)makeA_Expr(AND,NULL,$1,$3);
- }
- ;
- end{verbatim}
- % pagebreak
- begin{verbatim}
- SubSelect : SELECT opt_unique res_target_list2
- result from_clause where_clause
- group_clause having_clause
- {
- SelectStmt *n = makeNode(SelectStmt);
- n->unique = $2;
- .
- .
- .
- n->havingClause = $8;
- $$ = (Node *)n;
- }
- ;
- end{verbatim}
- %
- The keywords {tt SELECT}, {tt EXCEPT}, {tt UNION}, {tt INTERSECT}, {tt
- '('} and {tt ')'} are {it terminal symbols} and {tt SelectStmt},
- {tt select_w_o_sort}, {tt sort_clause}, {tt opt_union}, {tt
- SubSelect}, {tt opt_unique}, {tt res_target_list2}, {tt result},
- {tt from_clause}, {tt where_clause}, {tt group_clause}, {tt
- having_clause} are {it nonterminal symbols}. The symbols {tt
- EXCEPT}, {tt UNION} and {tt INTERSECT} are {it left
- associative} meaning that a statement like:
- %
- begin{verbatim}
- select * from A
- union
- select * from B
- union
- select * from C;
- end{verbatim}
- %
- pagebreak
- will be treated as:
- %
- begin{verbatim}
- ((select * from A
- union
- select * from B)
- union
- select * from C)
- end{verbatim}
- %
- The {tt select_w_o_sort} rule builds up an {it operator tree}
- using nodes of type {tt A_Expr}. For every {it union} an {tt OR}
- node is created, for every {it intersect} an {tt AND} node and for
- every {it except} and {tt AND NOT} node building up a representation
- of a formula in propositional logic. If the query parsed did not
- contain any set operations the rule hands back a {tt SelectStmt} node
- representing the query otherwise the top node of the {it operator
- tree} is returned. Figure
- ref{union_intersect_select_w_o_sort} shows a typical {it operator tree}
- returned by the {tt select_w_o_sort} rule.
- begin{figure}[ht]
- begin{flushright}
- epsfig{figure=figures/union_intersect_select_w_o_sort.ps}
- caption{{it Operator tree} for $(A cup B) backslash (C cap D)$}
- label{union_intersect_select_w_o_sort}
- end{flushright}
- end{figure}
- noindent The rule {tt SelectStmt} transforms the {it operator tree} built of
- {tt A_Expr} nodes into an {it operator tree} using {tt Expr} nodes
- by a call to the function {tt A_Expr_to_Expr()} which additionally
- replaces every {tt OR} node by an {tt AND} node and vice
- versa. This is performed in order to be able to use the function {tt
- cnfify()} later on. \
- \
- The {it transformations} following the {it parser} expect a {tt
- SelectStmt} node to be returned by the rule {tt SelectStmt} and not
- an {it operator tree}. So if the rule {tt select_w_o_sort} hands
- back such a node (meaning that the query did not contain any set
- operations) we just have to attach the data structure built up by the
- {tt sort_clause} rule and are finished, but when we get an {it
- operator tree} we have to perform the following steps:
- %
- begin{itemize}
- %
- <step> Create a flat list of all {tt SelectStmt} nodes of the {it
- operator tree} (by a call to the function {tt create_select_list()})
- and attach the list to the field
- mbox{{tt unionClause}} of the leftmost {tt SelectStmt} (see next step).
- %
- <step> Find the leftmost leaf ({tt SelectStmt} node) of the {it operator
- tree} (this is automatically the first member of the above created
- list because of the technique {tt create_select_list()} uses to
- create the list).
- %
- <step> Attach the whole {it operator tree} (including the leftmost node
- itself) to the field mbox{{tt intersectClause}} of the leftmost {tt
- SelectStmt} node.
- %
- <step> Attach the data structure built up by the {tt sort_clause}
- rule to the field {tt sortClause} of the leftmost {tt SelectStmt} node.
- %
- <step> Hand back the leftmost {tt SelectStmt} node (with
- the {it operator tree}, the list of all {tt SelectStmt} nodes and the {tt
- sortClause} attached to it).
- %
- end{itemize}
- %
- noindent Figure ref{union_intersect_select} shows the final data structure
- derived from the {it operator tree} shown in figure
- ref{union_intersect_select_w_o_sort} handed back by the {tt SelectStmt} rule:
- %
- begin{figure}[ht]
- begin{flushright}
- epsfig{figure=figures/union_intersect_select.ps}
- caption{Data structure handed back by {tt SelectStmt} rule}
- label{union_intersect_select}
- end{flushright}
- end{figure}
- pagebreak
- noindent Here is a description of the above used functions. They can
- be found in the file {tt $ldots$/src/backend/parser/anlayze.c}.
- %
- begin{itemize}
- %
- <step> {tt create_select_list()}: \
- This function steps through the {it tree} handed to it by the
- parameter {tt ptr} and creates a list of all {tt SelectStmt} nodes
- found. The list is handed back by the parameter {tt
- select_list}. The function uses a recursive {it depth first search}
- algorithm to examine the {it tree} leading to the fact that the
- leftmost {tt SelectStmt} node will appear first in the created list.
- %
- begin{verbatim}
- void
- create_select_list(Node *ptr, List **select_list,
- bool *unionall_present)
- {
- if(IsA(ptr, SelectStmt))
- {
- *select_list = lappend(*select_list, ptr);
- if(((SelectStmt *)ptr)->unionall == TRUE)
- *unionall_present = TRUE;
- return;
- }
-
- /* Recursively call for all arguments.
- * A NOT expr has no lexpr!
- */
- if (((A_Expr *)ptr)->lexpr != NULL)
- create_select_list(((A_Expr *)ptr)->lexpr,
- select_list, unionall_present);
- create_select_list(((A_Expr *)ptr)->rexpr,
- select_list, unionall_present);
- }
- end{verbatim}
- %
- <step> {tt A_Expr_to_Expr()}: \
- This function recursively steps through the {it operator tree} handed
- to it by the parameter {tt ptr} and replaces {tt A_Expr} nodes by
- {tt Expr} nodes. Additionally it exchanges {tt AND} nodes with {tt
- OR} nodes and vice versa. The reason for this exchange is easy to
- understand. We implement {it intersect} and {it except} clauses by
- rewriting these queries to semantically equivalent queries that use
- {tt IN} and {tt NOT IN} subselects. To be able to use all three
- operations ({it unions}, {it intersects} and {it excepts}) in one
- complex query, we have to translate the queries into {it Disjunctive
- Normal Form} (DNF). Unfortunately there is no function {tt dnfify()},
- but there is a function {tt cnfify()} which produces DNF when we
- exchange {tt AND} nodes with {tt OR} nodes and vice versa before
- calling {tt cnfify()} and exchange them again in the result.
- %
- begin{verbatim}
- Node *
- A_Expr_to_Expr(Node *ptr,
- bool *intersect_present)
- {
- Node *result;
-
- switch(nodeTag(ptr))
- {
- case T_A_Expr:
- {
- A_Expr *a = (A_Expr *)ptr;
-
- switch (a->oper)
- {
- case AND:
- {
- Expr *expr = makeNode(Expr);
- Node *lexpr =
- A_Expr_to_Expr(((A_Expr *)ptr)->lexpr,
- intersect_present);
- Node *rexpr =
- A_Expr_to_Expr(((A_Expr *)ptr)->rexpr,
- intersect_present);
- *intersect_present = TRUE;
-
- expr->typeOid = BOOLOID;
- expr->opType = OR_EXPR;
- expr->args = makeList(lexpr, rexpr, -1);
- result = (Node *) expr;
- break;
- }
- case OR:
- {
- Expr *expr = makeNode(Expr);
- Node *lexpr =
- A_Expr_to_Expr(((A_Expr *)ptr)->lexpr,
- intersect_present);
- Node *rexpr =
- A_Expr_to_Expr(((A_Expr *)ptr)->rexpr,
- intersect_present);
- expr->typeOid = BOOLOID;
- expr->opType = AND_EXPR;
- expr->args = makeList(lexpr, rexpr, -1);
- result = (Node *) expr;
- break;
- }
- case NOT:
- {
- Expr *expr = makeNode(Expr);
- Node *rexpr =
- A_Expr_to_Expr(((A_Expr *)ptr)->rexpr,
- intersect_present);
- expr->typeOid = BOOLOID;
- expr->opType = NOT_EXPR;
- expr->args = makeList(rexpr, -1);
- result = (Node *) expr;
- break;
- }
- }
- break;
- }
- default:
- {
- result = ptr;
- }
- }
- }
- return result;
- }
- end{verbatim}
- %
- end{itemize}
- noindent Note that the {tt stmtmulti} and {tt OptStmtMulti} rules had to be
- changed in order to avoid {it shift/reduce} conflicts. The old rules
- allowed an invalid syntax (e.g. the concatenation of two statements
- without a ';' inbetween) which will be prevented now. The new rules
- have the second line commented out as shown below:
- %
- begin{verbatim}
- stmtmulti : stmtmulti stmt ';'
- /* | stmtmulti stmt */
- | stmt ';'
- ;
- OptStmtMulti : OptStmtMulti OptimizableStmt ';'
- /* | OptStmtMulti OptimizableStmt */
- | OptimizableStmt ';'
- ;
- end{verbatim}
- %
- subsubsection{Transformations}
- This step normally transforms every {tt SelectStmt} node found into a
- {tt Query} node and does a lot of transformations to the {it targetlist},
- the {tt where} qualification etc. As mentioned above this stage
- expects a {tt SelectStmt} node and cannot handle an {tt A_Expr}
- node. That's why we did the changes to the {it operator tree} shown in
- figure ref{union_intersect_select}. \
- \
- In this stage only very few changes have been necessary:
- %
- begin{itemize}
- <step> The transformation of the list attached to {tt unionClause} is
- prevented. The raw list is now passed through instead and the necessary
- transformations are performed at a later point in time.
- <step> The additionally introduced field {tt intersectClause} is also
- passed untouched through this stage.
- end{itemize}
- noindent The changes described in the above paragraph have been
- applied to the functions {tt transformInsertStmt()} and {tt
- transformSelectStmt()} which are contained in the file {tt
- $ldots$/src/backend/parser/analyze.c}:
- %
- begin{itemize}
- %
- <step> {tt transformInsertStmt()}:
- %
- begin{verbatim}
- static Query *
- transformInsertStmt(ParseState *pstate,
- InsertStmt *stmt)
- {
- .
- .
- .
- end{verbatim}
- pagebreak
- begin{verbatim}
- /* Just pass through the unionClause and
- * intersectClause. We will process it in
- * the function Except_Intersect_Rewrite()
- */
- qry->unionClause = stmt->unionClause;
- qry->intersectClause = stmt->intersectClause;
- .
- .
- .
- return (Query *) qry;
- }
- end{verbatim}
- %
- <step> {tt transformSelectStmt()}:
- %
- begin{verbatim}
- static Query *
- transformSelectStmt(ParseState *pstate,
- SelectStmt *stmt)
- {
- .
- .
- .
- /* Just pass through the unionClause and
- * intersectClause. We will process it in
- * the function Except_Intersect_Rewrite()
- */
- qry->unionClause = stmt->unionClause;
- qry->intersectClause = stmt->intersectClause;
- .
- .
- .
- return (Query *) qry;
- }
- end{verbatim}
- %
- end{itemize}
- subsubsection{The Rewrite System}
- In this stage the information contained in the {it operator tree} attached
- to the topmost {tt SelectStmt} node is used to form a tree of {tt
- Query} nodes representing the rewritten query (i.e. the semantically
- equivalent query that contains only {it union} but no {it intersect}
- or {it except} operations). \
- \
- The following steps have to be performed:
- %
- begin{itemize}
- <step> Save the {tt sortClause}, {tt uniqueFlag}, {tt targetList}
- fields etc. of the topmost {tt Query} node because the topmost node
- may change during the rewrite process (remember (only) the topmost
- {tt SelectStmt} node has already been transformed to a {tt Query}
- node).
- %
- <step> Recursively step through the {it operator tree} and transform
- every {tt SelectStmt} node to a {tt Query} node using the function
- {tt intersect_tree_analyze()} described below. The one node already
- transformed (the topmost node) is still contained in the {it operator
- tree} and must not be transformed again because this would cause
- troubles in the {it transforming logic}.
- %
- begin{figure}[ht]
- begin{center}
- epsfig{figure=figures/union_intersect_dnf.ps}
- caption{Data structure of $(A cup B) backslash C$ after transformation to DNF}
- label{union_intersect_dnf}
- end{center}
- end{figure}
- %
- <step> Transform the new {it operator tree} into DNF (disjunctive normal
- form). <productname>Postgres</productname> does not provide any function for the transformation
- into DNF but it provides a function {tt cnfify()} that performs a
- transformation into CNF (conjunctive normal form). So we can easily
- make use of this function when we exchange every {tt OR} with an {tt
- AND} and vice versa before calling {tt cnfify()} as we did already in
- the {it parser} (compare figure ref{union_intersect_select_w_o_sort}
- to figure ref{union_intersect_select}). When {tt cnfify()} is called
- with a special flag, the {tt removeAndFlag} set to {tt true} it
- returns a list where the entries can be thought of being connected
- together by {tt ANDs}, so the explicit {tt AND} nodes are removed.
- After {tt cnfify()} has been called we normally would have to
- exchange {tt OR} and {tt AND} nodes again. We skip this step by
- simply treating every {tt OR} node as an {tt AND} node throughout
- the following steps (remember, that there are no {tt AND} nodes left
- that have to be treated as {tt OR} nodes because of the {tt
- removeAndFlag}).
- noindent Figure ref{union_intersect_dnf} shows what the data
- structure looks like after the transformation to DNF has taken place
- for the following query:
- %
- begin{verbatim}
- (select * from A
- union
- select * from B)
- except
- select * from C;
- end{verbatim}
- %
- <step> For every entry of the list returned by {tt cnfify()} (i.e. for every
- {it operator tree} which may only contain {tt OR} and {tt NOT}
- operator nodes and {tt Query} nodes as leaves) contained in the {tt
- union_list} perform the following steps:
- %
- begin{itemize}
- %
- <step> Check if the {it targetlists} of all {tt Query} nodes appearing are
- compatible (i.e. all {it targetlists} have the same number of attributes
- and the corresponding attributes are of the same type)
- %
- <step> There must be at least one positive {tt
- OR} node (i.e. at least one {tt OR} node which is not preceded by a
- {tt NOT} node). Create a list of all {tt Query} nodes (or of {tt
- Query} nodes preceded by {tt NOT} nodes if {tt OR NOT} is found)
- with the non negated node first using the function {tt
- create_list()} described below.
- %
- <step> The first (non negated) node of the list will be the topmost
- {tt Query} node of the current {it union} operand. For all other
- nodes found in the list add an {tt IN} subselect ({tt NOT IN}
- subselect if the {tt Query} node is preceded by a {tt NOT}) to the
- {tt where} qualification of the topmost node. Adding a
- subselect to the {tt where} qualification is done by logically
- {tt AND}ing it to the original qualification.
- %
- <step> Append the {tt Query} node setup in the last steps to a list which
- is hold by the pointer {tt union_list}.
- end{itemize}
- %
- <step> Take the first node of {tt union_list} as the new topmost node
- of the whole query and attach the rest of the list to the
- field {tt unionClause} of this topmost node. Since the new topmost
- node might differ from the original one (i.e. from the node which was
- topmost when we entered the {it rewrite stage}) we have to attach the
- fields saved in the first step to the new topmost node (i.e. the {tt
- sortClause}, {tt targetList}, {tt unionFlag}, etc.).
- %
- <step> Hand the new topmost {tt Query} node back. Now the normal
- {it query rewriting} takes place (in order to handle views if
- present) and then the {it planner/optimizer} and {it executor}
- functions are called to get a result. There have no changes been made
- to the code of these stages.
- %
- end{itemize}
- Figure ref{union_operand} shows the rewritten data structure of the
- query:
- begin{verbatim}
- select C1, C2 from A
- intersect
- select C1, C3 from C;
- end{verbatim}
- %
- % clearpage
- %
- noindent against the tables defined in example ref{simple_set_ops}.
- The rewritten data structure represents the query:
- begin{verbatim}
- select C1, C2 form A
- where (C1, C2) in
- (select C1,C3 from C);
- end{verbatim}
- %
- noindent The field {tt lefttree} of the {tt Sublink} node points to a list
- where every entry points to a {tt VAR} node of the {it targetlist} of the
- topmost node (node A). The field {tt oper} of the {tt Sublink} node
- points to a list holding a pointer to an {tt Expr} node for every
- attribute of the topmost {it targetlist}. Every {tt Expr} node is used to
- compare a {tt VAR} node of the topmost {it targetlist} with the
- corresponding {tt VAR} node of the subselect's {it targetlist}. So the
- first argument of every {tt Expr} node points to a {tt VAR} node of
- the topmost {it targetlist} and the second argument points to the
- corresponding {tt VAR} node of the subselect's {it targetlist}.
- %
- begin{figure}[ht]
- begin{flushright}
- epsfig{figure=figures/union_operand.ps}
- caption{Data structure of $A cap C$ after query rewriting}
- label{union_operand}
- end{flushright}
- end{figure}
- %
- clearpage
- %
- noindent If the user's query involves {it union} as well as {it
- intersect} or {it except} there will be more {tt Query} nodes of the
- form shown in figure ref{union_operand}. One will be the topmost node
- (as described above) and the others will be collected in a list which
- is attached to the field {tt unionClause} of the topmost node. (The
- {it intersectClause} fields of all {tt Query} nodes will be set to
- {tt NULL} because they are no longer needed.) \
- \
- %
- The function {tt pg_parse_and_plan()} is responsible for invoking the
- rewrite procedure. It can be found in the file {tt
- $ldots$/src/backend/tcop/postgres.c}.
- %
- begin{verbatim}
- List *
- pg_parse_and_plan(char *query_string, Oid *typev,
- int nargs,
- List **queryListP,
- CommandDest dest)
- {
- .
- .
- .
- /* Rewrite Union, Intersect and Except Queries
- * to normal Union Queries using IN and NOT
- * IN subselects */
- if(querytree->intersectClause != NIL)
- {
- querytree = Except_Intersect_Rewrite(querytree);
- }
- .
- .
- .
- }
- end{verbatim}
- %
- Here are the functions that have been added to perform the
- functionality described above. They can be found in the file {tt
- $ldots$/src/backend/rewrite/rewriteHandler.c}.
- %
- begin{itemize}
- <step> {tt Except_Intersect_Rewrite()} \
- Rewrites queries involving {it union clauses}, {it intersect
- clauses} and {it except clauses} to semantiacally equivalent queries
- that use {tt IN} and {tt NOT IN} subselects instead.
- The {it operator tree} is attached to {tt intersectClause} (see rule
- {tt SelectStmt} above) of the {it parsetree} given as an
- argument. First we save some clauses (the {tt sortClause}, the {tt
- unique flag} etc.). Then we translate the {it operator tree} to DNF
- ({it Disjunctive Normal Form}) by {tt cnfify()}. Note that {tt
- cnfify()} produces CNF but as we exchanged {tt AND} nodes with {tt
- OR} nodes within function {tt A_Expr_to_Expr()} earlier we get DNF
- when we exchange {tt AND} nodes and {tt OR} nodes again in the
- result. Now we create a new (rewritten) query by examining the new
- {it operator tree} which is in DNF now. For every {tt AND} node we
- create an entry in the {it union list} and for every {tt OR} node we
- create an {tt IN} subselect. ({tt NOT IN} subselects are created for
- {tt OR NOT} nodes). The first entry of the {it union list} is handed
- back but before that the saved clauses ({tt sortClause} etc.) are
- restored to the new top node. Note that the new top node can differ
- from the one of the {it parsetree} given as argument because of the
- translation into DNF. That's why we had to save the {tt sortClause}
- etc.
- %
- pagebreak
- %
- begin{verbatim}
- Query *
- Except_Intersect_Rewrite (Query *parsetree)
- {
- .
- .
- .
- /* Save some fields, to be able to restore them
- * to the resulting top node at the end of the
- * function
- */
- sortClause = parsetree->sortClause;
- uniqueFlag = parsetree->uniqueFlag;
- into = parsetree->into;
- isBinary = parsetree->isBinary;
- isPortal = parsetree->isPortal;
-
- /* Transform the SelectStmt nodes into Query nodes
- * as usually done by transformSelectStmt() earlier.
- * /
- intersectClause =
- (List *)intersect_tree_analyze(
- (Node *)parsetree->intersectClause,
- (Node *)lfirst(parsetree->unionClause),
- (Node *)parsetree);
- .
- .
- .
- /* Transform the operator tree to DNF */
- intersectClause =
- cnfify((Expr *)intersectClause, true);
- /* For every entry of the intersectClause list we
- * generate one entry in the union_list
- */
- foreach(intersect, intersectClause)
- {
- /* For every OR we create an IN subselect and
- * for every OR NOT we create a NOT IN subselect,
- */
- intersect_list = NIL;
- create_list((Node *)lfirst(intersect),
- &intersect_list);
- /* The first node will become the Select Query
- * node, all other nodes are transformed into
- * subselects under this node!
- */
- intersect_node = (Query *)lfirst(intersect_list);
- intersect_list = lnext(intersect_list);
- .
- .
- .
- /* Transform all remaining nodes into subselects
- * and add them to the qualifications of the
- * Select Query node
- */
- while(intersect_list != NIL)
- {
- n = makeNode(SubLink);
-
- /* Here we got an OR so transform it to an
- * IN subselect
- */
- if(IsA(lfirst(intersect_list), Query))
- {
- .
- .
- .
- n->subselect = lfirst(intersect_list);
- op = "=";
- n->subLinkType = ANY_SUBLINK;
- n->useor = false;
- }
- /* Here we got an OR NOT node so transform
- * it to a NOT IN subselect
- */
- else
- {
- .
- .
- .
- n->subselect =
- (Node *)lfirst(((Expr *)
- lfirst(intersect_list))->args);
- op = "<>";
- n->subLinkType = ALL_SUBLINK;
- n->useor = true;
- }
- /* Prepare the lefthand side of the Sublinks:
- * All the entries of the targetlist must be
- * (IN) or must not be (NOT IN) the subselect
- */
- foreach(elist, intersect_node->targetList)
- {
- Node *expr = lfirst(elist);
- TargetEntry *tent = (TargetEntry *)expr;
-
- n->lefthand =
- lappend(n->lefthand, tent->expr);
- }
- /* The first arguments of oper also have to be
- * created for the sublink (they are the same
- * as the lefthand side!)
- */
- left_expr = n->lefthand;
- right_expr =
- ((Query *)(n->subselect))->targetList;
- foreach(elist, left_expr)
- {
- Node *lexpr = lfirst(elist);
- Node *rexpr = lfirst(right_expr);
- TargetEntry *tent = (TargetEntry *) rexpr;
- Expr *op_expr;
-
- op_expr = make_op(op, lexpr, tent->expr);
- n->oper = lappend(n->oper, op_expr);
- right_expr = lnext(right_expr);
- }
- /* If the Select Query node has aggregates
- * in use add all the subselects to the
- * HAVING qual else to the WHERE qual
- */
- if(intersect_node->hasAggs == false)
- {
- AddQual(intersect_node, (Node *)n);
- }
- else
- {
- AddHavingQual(intersect_node, (Node *)n);
- }
- /* Now we got sublinks */
- intersect_node->hasSubLinks = true;
- intersect_list = lnext(intersect_list);
- }
- intersect_node->intersectClause = NIL;
- union_list = lappend(union_list, intersect_node);
- }
- /* The first entry to union_list is our
- * new top node
- */
- result = (Query *)lfirst(union_list);
-
- /* attach the rest to unionClause */
- result->unionClause = lnext(union_list);
-
- /* Attach all the items saved in the
- * beginning of the function */
- result->sortClause = sortClause;
- result->uniqueFlag = uniqueFlag;
- result->into = into;
- result->isPortal = isPortal;
- result->isBinary = isBinary;
- .
- .
- .
- return result;
- }
- end{verbatim}
- %
- <step> {tt create_list()} \
- Create a list of nodes that are either {tt Query} nodes or {tt NOT}
- nodes followed by a {tt Query} node. The {it tree} given in {tt
- ptr} contains at least one {it non negated} {tt Query} node. This
- node is put to the beginning of the list.
- %
- begin{verbatim}
- void create_list(Node *ptr,
- List **intersect_list)
- {
- List *arg;
- if(IsA(ptr,Query))
- {
- /* The non negated node is attached at the
- * beginning (lcons) */
- *intersect_list = lcons(ptr, *intersect_list);
- return;
- }
- if(IsA(ptr,Expr))
- {
- if(((Expr *)ptr)->opType == NOT_EXPR)
- {
- /* negated nodes are appended to the
- * end (lappend)
- */
- *intersect_list =
- lappend(*intersect_list, ptr);
- return;
- }
- else
- {
- foreach(arg, ((Expr *)ptr)->args)
- {
- create_list(lfirst(arg), intersect_list);
- }
- return;
- }
- return;
- }
- }
- end{verbatim}
- %
- <step> {tt intersect_tree_analyze()} \
- %
- The nodes given in {tt tree} are not transformed yet so process them
- using {tt parse_analyze()}. The node given in {tt first_select}
- has already been processed, so don't transform it again but return a
- pointer to the already processed version given in {tt parsetree}
- instead.
- %
- begin{verbatim}
- Node *intersect_tree_analyze(Node *tree,
- Node *first_select, Node *parsetree)
- {
- Node *result;
- List *arg;
- if(IsA(tree, SelectStmt))
- {
- List *qtrees;
- /* If we get to the tree given in first_select
- * return parsetree instead of performing
- * parse_analyze() */
- if(tree == first_select)
- {
- result = parsetree;
- }
- else
- {
- /* transform the unprocessed Query nodes */
- qtrees = parse_analyze(lcons(tree, NIL), NULL);
- result = (Node *) lfirst(qtrees);
- }
- }
- if(IsA(tree,Expr))
- {
- /* Call recursively for every argument */
- foreach(arg, ((Expr *)tree)->args)
- {
- lfirst(arg) =
- intersect_tree_analyze(lfirst(arg),
- first_select, parsetree);
- }
- result = tree;
- }
- return result;
- }
- end{verbatim}
- %
- end{itemize}
- **********************************************************
- **********************************************************
- **********************************************************
- **********************************************************
- **********************************************************
- -->
- </chapter>
- <!-- Keep this comment at the end of the file
- Local variables:
- mode: sgml
- sgml-omittag:nil
- sgml-shorttag:t
- sgml-minimize-attributes:nil
- sgml-always-quote-attributes:t
- sgml-indent-step:1
- sgml-indent-data:t
- sgml-parent-document:nil
- sgml-default-dtd-file:"./reference.ced"
- sgml-exposed-tags:nil
- sgml-local-catalogs:"/usr/lib/sgml/catalog"
- sgml-local-ecat-files:nil
- End:
- -->