|||
(置顶:请到这里下载本文的后4张图片)
根据http://momjian.us/main/writings/pgsql/internalpics.pdf Page 11,在postgresql的执行流程中Parse Statement 阶段输出结果为parsetree_list(定义为List *parsetree_list;),这个parsetree_list在内存中到底是什么样子的呢?这就牵涉到postgresql的parse阶段,我们来分析分析。
在exec_simple_query() 中,我们知道 parsetree_list = pg_parse_query(query_string); 而在pg_parse_query()中,List *raw_parsetree_list; raw_parsetree_list = raw_parser(query_string);而在 raw_parser()中,进而调用base_yyparse();完成。
整个调用堆栈为:
9 raw_parser() /home/postgres/develop/postgresql-snapshot/src/backend/parser/parser.c:51 0x0813c1a7
8 pg_parse_query() /home/postgres/develop/postgresql-snapshot/src/backend/tcop/postgres.c:559
7 exec_simple_query() /home/postgres/develop/postgresql-snapshot/src/backend/tcop/postgres.c:827
6 PostgresMain() /home/postgres/develop/postgresql-snapshot/src/backend/tcop/postgres.c:3616
5 BackendRun() /home/postgres/develop/postgresql-snapshot/src/backend/postmaster/postmaster.c:3449
4 BackendStartup() /.../src/backend/postmaster/postmaster.c:3063
3 ServerLoop() /home/postgres/develop/postgresql-snapshot/src/backend/postmaster/postmaster.c:1387
2 PostmasterMain() /.../src/backend/postmaster/postmaster.c:1040
1 main() /home/postgres/develop/postgresql-snapshot/src/backend/main/main.c:188
既然用到了base_yyparse(),我们就不能不看看lex&yacc(对应为flex&Bison)了。关于如何快速泡上美丽的lex&yacc妹妹,请参考我的另一篇文章:一天之内不再畏惧lex&yacc之必备参考资料 http://www.sciencenet.cn/m/user_content.aspx?id=309595。好,不怕了lex&yacc之后,我们开工。
假定我们数据库中已经有如下表,并填充了数据:
CREATE TABLE pois
(
uid integer not null,
catcode VARCHAR(32) not null,
);
现在我们用psql发送请求:select catcode from pois;
那么接下来我们分析分析,这个parstree到底是什么?
让我们来分析分析gram.y中相应的代码(以下红色字体,皆为gram.y中代码)
extern List *parsetree; /* final parse result is delivered here */
既然是在外部定义,那么这个还记得在哪定义的吗?其实就是在调用函数raw_parser() 所在的文件parse.c中定义的:List *parsetree; /* result of parsing is left here */
接下来:
stmtblock: stmtmulti { parsetree = $1; }
stmtmulti: stmtmulti ';' stmt
{ if ($3 != NULL)
$$ = lappend($1, $3);
else
$$ = $1;
}
| stmt
{ if ($1 != NULL)
$$ = list_make1($1);
else
$$ = NIL;
}
;
好,上面的意思是调用$$ = list_make1($1);来生成parsetree 。那我们看看 list_make1()。
在pg_list.h中:#define list_make1(x1) lcons(x1, NIL)
进而在list.c中:
List * lcons(void *datum, List *list)
{
Assert(IsPointerList(list));
if (list == NIL)
list = new_list(T_List);
else
new_head_cell(list);
lfirst(list->head) = datum;
check_list_invariants(list);
return list;
}
进而:
static List * new_list(NodeTag type)
{
List *new_list;
ListCell *new_head;
new_head = (ListCell *) palloc(sizeof(*new_head));
new_head->next = NULL;
/* new_head->data is left undefined! */
new_list = (List *) palloc(sizeof(*new_list));
new_list->type = type;
new_list->length = 1;
new_list->head = new_head;
new_list->tail = new_head;
return new_list;
}
很简单,对吧。注意palloc()函数表明我们的parstree建立在heap上,没有建在shared memory里。
stmt :
AlterDatabaseStmt
| AlterDatabaseSetStmt
| AlterDomainStmt
| AlterFdwStmt
| AlterForeignServerStmt
| AlterFunctionStmt
.......
| SelectStmt
.......
| /*EMPTY*/
{ $$ = NULL; }
;
我们期待已久的SelectStmt在里面了。接下来:
SelectStmt: select_no_parens %prec UMINUS
| select_with_parens %prec UMINUS
;
select_with_parens:
'(' select_no_parens ')' { $$ = $2; }
| '(' select_with_parens ')' { $$ = $2; }
;
我们当然是属于不带括号的了,那么朝下看:
select_no_parens:
simple_select { $$ = $1; }
| select_clause sort_clause
{
insertSelectOptions((SelectStmt *) $1, $2, NIL,
NULL, NULL, NULL);
$$ = $1;
}
.....
;
接着:
simple_select:
SELECT opt_distinct target_list
into_clause from_clause where_clause
group_clause having_clause window_clause
{
SelectStmt *n = makeNode(SelectStmt);
n->distinctClause = $2;
n->targetList = $3;
n->intoClause = $4;
n->fromClause = $5;
n->whereClause = $6;
n->groupClause = $7;
n->havingClause = $8;
n->windowClause = $9;
$$ = (Node *)n;
}
| ....
;
那么至此,大概框架就有了,示意图如下。
内存结构图如下:
革命成功了十分之一,继续:
target_list:
target_el { $$ = list_make1($1); }
| target_list ',' target_el { $$ = lappend($1, $3); }
;
target_el: a_expr AS ColLabel
{
$$ = makeNode(ResTarget);
$$->name = $3;
$$->indirection = NIL;
$$->val = (Node *)$1;
$$->location = @1;
}
/*
...
| a_expr
{
$$ = makeNode(ResTarget);
$$->name = NULL;
$$->indirection = NIL;
$$->val = (Node *)$1;
$$->location = @1;
}
| '*'
{
ColumnRef *n = makeNode(ColumnRef);
n->fields = list_make1(makeNode(A_Star));
n->location = @1;
$$ = makeNode(ResTarget);
$$->name = NULL;
$$->indirection = NIL;
$$->val = (Node *)n;
$$->location = @1;
}
;
好, | a_expr 即为我们要找的。继续追踪a_expr :
a_expr: c_expr { $$ = $1; }
| a_expr TYPECAST Typename
{ $$ = makeTypeCast($1, $3, @2); }
...
;
c_expr: columnref { $$ = $1; }
| AexprConst { $$ = $1; }
...
;
接下来:
columnref: relation_name
{
$$ = makeColumnRef($1, NIL, @1);
}
| relation_name indirection
{
$$ = makeColumnRef($1, $2, @1);
}
;
然后:
relation_name:
SpecialRuleRelation { $$ = $1; }
| ColId { $$ = $1; }
;
ColId: IDENT { $$ = $1; }
| unreserved_keyword { $$ = pstrdup($1); }
| col_name_keyword { $$ = pstrdup($1); }
;
^_^,终于找到了IDENT了,因为我们稍作分析就知道“select catcode from pois”中的catcode可通过scan.I中的如下规则识别为IDENT:
{identifier} {
const ScanKeyword *keyword;
char *ident;
SET_YYLLOC();
/* Is it a keyword? */
keyword = ScanKeywordLookup(yytext);
if (keyword != NULL)
{
yylval.keyword = keyword->name;
return keyword->value;
}
/*
* No. Convert the identifier to lower case, and truncate
* if necessary.
*/
ident = downcase_truncate_identifier(yytext, yyleng, true);
yylval.str = ident;
return IDENT;
}
看源码中一段注释,就知道IDENT是干嘛的了:IDENT is the lexeme returned by the lexer for identifiers that match no known keyword.
通过gram.y自带的makeColumnRef()函数,我们可以知道该Node的内存存储结构。
static Node *makeColumnRef(char *colname, List *indirection, int location)
{
/*
* Generate a ColumnRef node, with an A_Indirection node added if there
* is any subscripting in the specified indirection list. However,
* any field selection at the start of the indirection list must be
* transposed into the "fields" part of the ColumnRef node.
*/
ColumnRef *c = makeNode(ColumnRef);
int nfields = 0;
ListCell *l;
c->location = location;
foreach(l, indirection)
{
if (IsA(lfirst(l), A_Indices))
{
A_Indirection *i = makeNode(A_Indirection);
if (nfields == 0)
{
/* easy case - all indirection goes to A_Indirection */
c->fields = list_make1(makeString(colname));
i->indirection = check_indirection(indirection);
}
else
{
/* got to split the list in two */
i->indirection = check_indirection(list_copy_tail(indirection,
nfields));
indirection = list_truncate(indirection, nfields);
c->fields = lcons(makeString(colname), indirection);
}
i->arg = (Node *) c;
return (Node *) i;
}
else if (IsA(lfirst(l), A_Star))
{
/* We only allow '*' at the end of a ColumnRef */
if (lnext(l) != NULL)
yyerror("improper use of \"*\"");
}
nfields++;
}
/* No subscripting, so all indirection gets added to field list */
c->fields = lcons(makeString(colname), indirection);
return (Node *) c;
}
至此,我们分析告一小段落,数据结构示意图如下(看不清图,请到这里下载本文的后4张图片):
此时,内存结构图如下(点击看大图):
接下来我们分析from 子句。
from_clause:
FROM from_list { $$ = $2; }
| /*EMPTY*/ { $$ = NIL; }
;
from_list:
table_ref { $$ = list_make1($1); }
| from_list ',' table_ref { $$ = lappend($1, $3); }
;
接下来:
table_ref: relation_expr
{
$$ = (Node *) $1;
}
...
;
relation_expr:
qualified_name
{
/* default inheritance */
$$ = $1;
$$->inhOpt = INH_DEFAULT;
$$->alias = NULL;
}
...
;
qualified_name:
relation_name
{
$$ = makeNode(RangeVar);
$$->catalogname = NULL;
$$->schemaname = NULL;
$$->relname = $1;
$$->location = @1;
}
...
;
relation_name:
SpecialRuleRelation { $$ = $1; }
| ColId { $$ = $1; }
;
哈,又见“ColId”。还记得下面的规则吗:
ColId: IDENT { $$ = $1; }
| unreserved_keyword { $$ = pstrdup($1); }
| col_name_keyword { $$ = pstrdup($1); }
;
这样,我们得出最终的示意图:
最终的内存结构图为:
那上面的分析以及结果图是否正确呢?我们debug运行一下:
Normal07.8 磅02falsefalsefalseEN-USZH-CNX-NONEMicrosoftInternetExplorer4/* Style Definitions */table.MsoNormalTable{mso-style-name:普通表格;mso-tstyle-rowband-size:0;mso-tstyle-colband-size:0;mso-style-noshow:yes;mso-style-priority:99;mso-style-qformat:yes;mso-style-parent:"";mso-padding-alt:0cm 5.4pt 0cm 5.4pt;mso-para-margin:0cm;mso-para-margin-bottom:.0001pt;mso-pagination:widow-orphan;font-size:10.5pt;mso-bidi-font-size:11.0pt;font-family:"Calibri","sans-serif";mso-ascii-font-family:Calibri;mso-ascii-theme-font:minor-latin;mso-fareast-font-family:宋体;mso-fareast-theme-font:minor-fareast;mso-hansi-font-family:Calibri;mso-hansi-theme-font:minor-latin;mso-bidi-font-family:"Times New Roman";mso-bidi-theme-font:minor-bidi;mso-font-kerning:1.0pt;}parsetree_list 0x0a1b46c0
type T_List
length 1
head 0x0a1b46ac
data {...}
ptr_value 0x0a1b4620
type T_SelectStmt
distinctClause 0x00000000
intoClause 0x00000000
targetList 0x0a1b4594
type T_List
length 1
head 0x0a1b4580
data {...}
ptr_value 0x0a1b4554
type T_ResTarget
name 0x00000000
indirection 0x00000000
val 0x0a1b44f4
type T_ColumnRef
fields 0x0a1b4538
type T_List
length 1
head 0x0a1b4524
data {...}
ptr_value 0x0a1b4510
type T_String
val {...}
ival 169559264
str 0x0a1b44e0
int_value 169559312
oid_value 169559312
next 0x00000000
tail 0x0a1b4524
location 7
location 7
int_value 169559380
oid_value 169559380
next 0x00000000
tail 0x0a1b4580
fromClause 0x0a1b4604
type T_List
length 1
head 0x0a1b45f0
data {...}
ptr_value 0x0a1b45c4
type T_RangeVar
catalogname 0x00000000
schemaname 0x00000000
relname 0x0a1b45b0
inhOpt INH_DEFAULT
istemp false
alias 0x00000000
location 20
int_value 169559492
oid_value 169559492
next 0x00000000
tail 0x0a1b45f0
whereClause 0x00000000
groupClause 0x00000000
havingClause 0x00000000
windowClause 0x00000000
withClause 0x00000000
valuesLists 0x00000000
sortClause 0x00000000
limitOffset 0x00000000
limitCount 0x00000000
lockingClause 0x00000000
op SETOP_NONE
all false
larg 0x00000000
rarg 0x00000000
int_value 169559584
oid_value 169559584
next 0x00000000
tail 0x0a1b46ac
表明我们的分析完全正确。
至此,结束。
加我私人微信,交流技术。
Archiver|手机版|科学网 ( 京ICP备07017567号-12 )
GMT+8, 2024-12-18 17:54
Powered by ScienceNet.cn
Copyright © 2007- 中国科学报社