PostgreSQL中create

126次阅读

共计 7141 个字符，预计需要花费 18 分钟才能阅读完成。

本篇内容主要讲解“PostgreSQL 中 create_index_path 函数有什么作用”，感兴趣的朋友不妨来看看。本文介绍的方法操作简单快捷，实用性强。下面就让丸趣 TV 小编来带大家学习“PostgreSQL 中 create_index_path 函数有什么作用”吧!

函数 build_index_paths 中的子函数 create_index_path 实现了索引扫描成本的估算主逻辑。

一、数据结构

IndexOptInfo
回顾 IndexOptInfo 索引信息结构体

 typedef struct IndexOptInfo
 {
 NodeTag type;
 
 Oid indexoid; /* Index 的 OID,OID of the index relation */
 Oid reltablespace; /* Index 的表空间,tablespace of index (not table) */
 RelOptInfo *rel; /*  指向 Relation 的指针,back-link to index s table */
 
 /* index-size statistics (from pg_class and elsewhere) */
 BlockNumber pages; /* Index 的 pages,number of disk pages in index */
 double tuples; /* Index 的元组数,number of index tuples in index */
 int tree_height; /*  索引高度,index tree height, or -1 if unknown */
 
 /* index descriptor information */
 int ncolumns; /*  索引的列数,number of columns in index */
 int nkeycolumns; /*  索引的关键列数,number of key columns in index */
 int *indexkeys; /* column numbers of index s attributes both
 * key and included columns, or 0 */
 Oid *indexcollations; /* OIDs of collations of index columns */
 Oid *opfamily; /* OIDs of operator families for columns */
 Oid *opcintype; /* OIDs of opclass declared input data types */
 Oid *sortopfamily; /* OIDs of btree opfamilies, if orderable */
 bool *reverse_sort; /*  倒序?is sort order descending? */
 bool *nulls_first; /* NULLs 值优先?do NULLs come first in the sort order? */
 bool *canreturn; /*  索引列可通过 Index-Only Scan 返回?which index cols can be returned in an
 * index-only scan? */
 Oid relam; /*  访问方法 OID,OID of the access method (in pg_am) */
 
 List *indexprs; /*  非简单索引列表达式链表, 如函数索引,expressions for non-simple index columns */
 List *indpred; /*  部分索引的谓词链表,predicate if a partial index, else NIL */
 
 List *indextlist; /*  索引列(TargetEntry 结构体链表),targetlist representing index columns */
 
 List *indrestrictinfo; /*  父关系的 baserestrictinfo 列表， *  不包含索引谓词隐含的所有条件
 * (除非是目标 rel，请参阅 check_index_predicates()中的注释),
 * parent relation s baserestrictinfo
 * list, less any conditions implied by
 * the index s predicate (unless it s a
 * target rel, see comments in
 * check_index_predicates()) */
 
 bool predOK; /* True, 如索引谓词满足查询要求,true if index predicate matches query */
 bool unique; /*  是否唯一索引,true if a unique index */
 bool immediate; /*  唯一性校验是否立即生效,is uniqueness enforced immediately? */
 bool hypothetical; /*  是否虚拟索引,true if index doesn t really exist */
 
 /* Remaining fields are copied from the index AM s API struct: */
 // 从 Index Relation 拷贝过来的 AM(访问方法)API 信息
 bool amcanorderbyop; /* does AM support order by operator result? */
 bool amoptionalkey; /* can query omit key for the first column? */
 bool amsearcharray; /* can AM handle ScalarArrayOpExpr quals? */
 bool amsearchnulls; /* can AM search for NULL/NOT NULL entries? */
 bool amhasgettuple; /* does AM have amgettuple interface? */
 bool amhasgetbitmap; /* does AM have amgetbitmap interface? */
 bool amcanparallel; /* does AM support parallel scan? */
 /* Rather than include amapi.h here, we declare amcostestimate like this */
 void (*amcostestimate) (); /*  访问方法的估算函数,AM s cost estimator */
 } IndexOptInfo;

Cost 相关
注意: 实际使用的参数值通过系统配置文件定义, 而不是这里的常量定义!

 typedef double Cost; /* execution cost (in page-access units) */
 /* defaults for costsize.c s Cost parameters */
 /* NB: cost-estimation code should use the variables, not these constants! */
 /*  注意: 实际值通过系统配置文件定义, 而不是这里的常量定义! */
 /* If you change these, update backend/utils/misc/postgresql.sample.conf */
 #define DEFAULT_SEQ_PAGE_COST 1.0 // 顺序扫描 page 的成本
 #define DEFAULT_RANDOM_PAGE_COST 4.0 // 随机扫描 page 的成本
 #define DEFAULT_CPU_TUPLE_COST 0.01 // 处理一个元组的 CPU 成本
 #define DEFAULT_CPU_INDEX_TUPLE_COST 0.005 // 处理一个索引元组的 CPU 成本
 #define DEFAULT_CPU_OPERATOR_COST 0.0025 // 执行一次操作或函数的 CPU 成本
 #define DEFAULT_PARALLEL_TUPLE_COST 0.1 // 并行执行, 从一个 worker 传输一个元组到另一个 worker 的成本
 #define DEFAULT_PARALLEL_SETUP_COST 1000.0 // 构建并行执行环境的成本
 
 #define DEFAULT_EFFECTIVE_CACHE_SIZE 524288 /* 先前已有介绍, measured in pages */
 double seq_page_cost = DEFAULT_SEQ_PAGE_COST;
 double random_page_cost = DEFAULT_RANDOM_PAGE_COST;
 double cpu_tuple_cost = DEFAULT_CPU_TUPLE_COST;
 double cpu_index_tuple_cost = DEFAULT_CPU_INDEX_TUPLE_COST;
 double cpu_operator_cost = DEFAULT_CPU_OPERATOR_COST;
 double parallel_tuple_cost = DEFAULT_PARALLEL_TUPLE_COST;
 double parallel_setup_cost = DEFAULT_PARALLEL_SETUP_COST;
 
 int effective_cache_size = DEFAULT_EFFECTIVE_CACHE_SIZE;
 Cost disable_cost = 1.0e10;// 1 后面 10 个 0, 通过设置一个巨大的成本, 让优化器自动放弃此路径
 
 int max_parallel_workers_per_gather = 2;// 每次 gather 使用的 worker 数

二、源码解读

create_index_path
该函数创建索引扫描路径节点, 其中调用函数 cost_index 计算索引扫描成本.

//----------------------------------------------- create_index_path
/*
* create_index_path
* Creates a path node for an index scan.
* 创建索引扫描路径节点
*
* index is a usable index.
* indexclauses is a list of RestrictInfo nodes representing clauses
* to be used as index qual conditions in the scan.
* indexclausecols is an integer list of index column numbers (zero based)
* the indexclauses can be used with.
* indexorderbys is a list of bare expressions (no RestrictInfos)
* to be used as index ordering operators in the scan.
* indexorderbycols is an integer list of index column numbers (zero based)
* the ordering operators can be used with.
* pathkeys describes the ordering of the path.
* indexscandir is ForwardScanDirection or BackwardScanDirection
* for an ordered index, or NoMovementScanDirection for
* an unordered index.
* indexonly is true if an index-only scan is wanted.
* required_outer is the set of outer relids for a parameterized path.
* loop_count is the number of repetitions of the indexscan to factor into
* estimates of caching behavior.
* partial_path is true if constructing a parallel index scan path.
*
* Returns the new path node.
*/
IndexPath *
create_index_path(PlannerInfo *root,// 优化器信息
IndexOptInfo *index,// 索引信息
List *indexclauses,// 索引约束条件链表
List *indexclausecols,// 索引约束条件列编号链表, 与 indexclauses 一一对应
List *indexorderbys,//ORDER BY 原始表达式链表
List *indexorderbycols,//ORDER BY 列编号链表
List *pathkeys,// 排序路径键
ScanDirection indexscandir,// 扫描方向
bool indexonly,// 纯索引扫描?
Relids required_outer,// 需依赖的外部 Relids
double loop_count,// 用于估计缓存的重复次数
bool partial_path)// 是否并行索引扫描
{
IndexPath *pathnode = makeNode(IndexPath);// 构建节点
RelOptInfo *rel = index- // 索引对应的 Rel
List *indexquals,
*indexqualcols;

pathnode- path.pathtype = indexonly ? T_IndexOnlyScan : T_IndexScan;// 路径类型
pathnode- path.parent = rel;//Relation
pathnode- path.pathtarget = rel- reltarget;// 路径最终的投影列
pathnode- path.param_info = get_baserel_parampathinfo(root, rel,
required_outer);// 参数化信息
pathnode- path.parallel_aware = false;//
pathnode- path.parallel_safe = rel- consider_parallel;// 是否并行
pathnode- path.parallel_workers = 0;//worker 数目
pathnode- path.pathkeys = pathkeys;// 排序路径键

/* Convert clauses to the executor can handle */
// 转换条件子句 (clauses) 为执行器可处理的索引表达式(indexquals)
expand_indexqual_conditions(index, indexclauses, indexclausecols,
indexquals, indexqualcols);

/* 填充路径节点信息,Fill in the pathnode */
pathnode- indexinfo = index;
pathnode- indexclauses = indexclauses;
pathnode- indexquals = indexquals;
pathnode- indexqualcols = indexqualcols;
pathnode- indexorderbys = indexorderbys;
pathnode- indexorderbycols = indexorderbycols;
pathnode- indexscandir = indexscandir;

cost_index(pathnode, root, loop_count, partial_path);// 估算成本

return pathnode;
}

正文完