共计 7141 个字符,预计需要花费 18 分钟才能阅读完成。
本篇内容主要讲解“PostgreSQL 中 create_index_path 函数有什么作用”,感兴趣的朋友不妨来看看。本文介绍的方法操作简单快捷,实用性强。下面就让丸趣 TV 小编来带大家学习“PostgreSQL 中 create_index_path 函数有什么作用”吧!
函数 build_index_paths 中的子函数 create_index_path 实现了索引扫描成本的估算主逻辑。
一、数据结构
IndexOptInfo
回顾 IndexOptInfo 索引信息结构体
typedef struct IndexOptInfo
{
NodeTag type;
Oid indexoid; /* Index 的 OID,OID of the index relation */
Oid reltablespace; /* Index 的表空间,tablespace of index (not table) */
RelOptInfo *rel; /* 指向 Relation 的指针,back-link to index s table */
/* index-size statistics (from pg_class and elsewhere) */
BlockNumber pages; /* Index 的 pages,number of disk pages in index */
double tuples; /* Index 的元组数,number of index tuples in index */
int tree_height; /* 索引高度,index tree height, or -1 if unknown */
/* index descriptor information */
int ncolumns; /* 索引的列数,number of columns in index */
int nkeycolumns; /* 索引的关键列数,number of key columns in index */
int *indexkeys; /* column numbers of index s attributes both
* key and included columns, or 0 */
Oid *indexcollations; /* OIDs of collations of index columns */
Oid *opfamily; /* OIDs of operator families for columns */
Oid *opcintype; /* OIDs of opclass declared input data types */
Oid *sortopfamily; /* OIDs of btree opfamilies, if orderable */
bool *reverse_sort; /* 倒序?is sort order descending? */
bool *nulls_first; /* NULLs 值优先?do NULLs come first in the sort order? */
bool *canreturn; /* 索引列可通过 Index-Only Scan 返回?which index cols can be returned in an
* index-only scan? */
Oid relam; /* 访问方法 OID,OID of the access method (in pg_am) */
List *indexprs; /* 非简单索引列表达式链表, 如函数索引,expressions for non-simple index columns */
List *indpred; /* 部分索引的谓词链表,predicate if a partial index, else NIL */
List *indextlist; /* 索引列(TargetEntry 结构体链表),targetlist representing index columns */
List *indrestrictinfo; /* 父关系的 baserestrictinfo 列表, * 不包含索引谓词隐含的所有条件
* (除非是目标 rel,请参阅 check_index_predicates()中的注释),
* parent relation s baserestrictinfo
* list, less any conditions implied by
* the index s predicate (unless it s a
* target rel, see comments in
* check_index_predicates()) */
bool predOK; /* True, 如索引谓词满足查询要求,true if index predicate matches query */
bool unique; /* 是否唯一索引,true if a unique index */
bool immediate; /* 唯一性校验是否立即生效,is uniqueness enforced immediately? */
bool hypothetical; /* 是否虚拟索引,true if index doesn t really exist */
/* Remaining fields are copied from the index AM s API struct: */
// 从 Index Relation 拷贝过来的 AM(访问方法)API 信息
bool amcanorderbyop; /* does AM support order by operator result? */
bool amoptionalkey; /* can query omit key for the first column? */
bool amsearcharray; /* can AM handle ScalarArrayOpExpr quals? */
bool amsearchnulls; /* can AM search for NULL/NOT NULL entries? */
bool amhasgettuple; /* does AM have amgettuple interface? */
bool amhasgetbitmap; /* does AM have amgetbitmap interface? */
bool amcanparallel; /* does AM support parallel scan? */
/* Rather than include amapi.h here, we declare amcostestimate like this */
void (*amcostestimate) (); /* 访问方法的估算函数,AM s cost estimator */
} IndexOptInfo;
Cost 相关
注意: 实际使用的参数值通过系统配置文件定义, 而不是这里的常量定义!
typedef double Cost; /* execution cost (in page-access units) */
/* defaults for costsize.c s Cost parameters */
/* NB: cost-estimation code should use the variables, not these constants! */
/* 注意: 实际值通过系统配置文件定义, 而不是这里的常量定义! */
/* If you change these, update backend/utils/misc/postgresql.sample.conf */
#define DEFAULT_SEQ_PAGE_COST 1.0 // 顺序扫描 page 的成本
#define DEFAULT_RANDOM_PAGE_COST 4.0 // 随机扫描 page 的成本
#define DEFAULT_CPU_TUPLE_COST 0.01 // 处理一个元组的 CPU 成本
#define DEFAULT_CPU_INDEX_TUPLE_COST 0.005 // 处理一个索引元组的 CPU 成本
#define DEFAULT_CPU_OPERATOR_COST 0.0025 // 执行一次操作或函数的 CPU 成本
#define DEFAULT_PARALLEL_TUPLE_COST 0.1 // 并行执行, 从一个 worker 传输一个元组到另一个 worker 的成本
#define DEFAULT_PARALLEL_SETUP_COST 1000.0 // 构建并行执行环境的成本
#define DEFAULT_EFFECTIVE_CACHE_SIZE 524288 /* 先前已有介绍, measured in pages */
double seq_page_cost = DEFAULT_SEQ_PAGE_COST;
double random_page_cost = DEFAULT_RANDOM_PAGE_COST;
double cpu_tuple_cost = DEFAULT_CPU_TUPLE_COST;
double cpu_index_tuple_cost = DEFAULT_CPU_INDEX_TUPLE_COST;
double cpu_operator_cost = DEFAULT_CPU_OPERATOR_COST;
double parallel_tuple_cost = DEFAULT_PARALLEL_TUPLE_COST;
double parallel_setup_cost = DEFAULT_PARALLEL_SETUP_COST;
int effective_cache_size = DEFAULT_EFFECTIVE_CACHE_SIZE;
Cost disable_cost = 1.0e10;// 1 后面 10 个 0, 通过设置一个巨大的成本, 让优化器自动放弃此路径
int max_parallel_workers_per_gather = 2;// 每次 gather 使用的 worker 数
二、源码解读
create_index_path
该函数创建索引扫描路径节点, 其中调用函数 cost_index 计算索引扫描成本.
//----------------------------------------------- create_index_path
/*
* create_index_path
* Creates a path node for an index scan.
* 创建索引扫描路径节点
*
* index is a usable index.
* indexclauses is a list of RestrictInfo nodes representing clauses
* to be used as index qual conditions in the scan.
* indexclausecols is an integer list of index column numbers (zero based)
* the indexclauses can be used with.
* indexorderbys is a list of bare expressions (no RestrictInfos)
* to be used as index ordering operators in the scan.
* indexorderbycols is an integer list of index column numbers (zero based)
* the ordering operators can be used with.
* pathkeys describes the ordering of the path.
* indexscandir is ForwardScanDirection or BackwardScanDirection
* for an ordered index, or NoMovementScanDirection for
* an unordered index.
* indexonly is true if an index-only scan is wanted.
* required_outer is the set of outer relids for a parameterized path.
* loop_count is the number of repetitions of the indexscan to factor into
* estimates of caching behavior.
* partial_path is true if constructing a parallel index scan path.
*
* Returns the new path node.
*/
IndexPath *
create_index_path(PlannerInfo *root,// 优化器信息
IndexOptInfo *index,// 索引信息
List *indexclauses,// 索引约束条件链表
List *indexclausecols,// 索引约束条件列编号链表, 与 indexclauses 一一对应
List *indexorderbys,//ORDER BY 原始表达式链表
List *indexorderbycols,//ORDER BY 列编号链表
List *pathkeys,// 排序路径键
ScanDirection indexscandir,// 扫描方向
bool indexonly,// 纯索引扫描?
Relids required_outer,// 需依赖的外部 Relids
double loop_count,// 用于估计缓存的重复次数
bool partial_path)// 是否并行索引扫描
{
IndexPath *pathnode = makeNode(IndexPath);// 构建节点
RelOptInfo *rel = index- // 索引对应的 Rel
List *indexquals,
*indexqualcols;
pathnode- path.pathtype = indexonly ? T_IndexOnlyScan : T_IndexScan;// 路径类型
pathnode- path.parent = rel;//Relation
pathnode- path.pathtarget = rel- reltarget;// 路径最终的投影列
pathnode- path.param_info = get_baserel_parampathinfo(root, rel,
required_outer);// 参数化信息
pathnode- path.parallel_aware = false;//
pathnode- path.parallel_safe = rel- consider_parallel;// 是否并行
pathnode- path.parallel_workers = 0;//worker 数目
pathnode- path.pathkeys = pathkeys;// 排序路径键
/* Convert clauses to the executor can handle */
// 转换条件子句 (clauses) 为执行器可处理的索引表达式(indexquals)
expand_indexqual_conditions(index, indexclauses, indexclausecols,
indexquals, indexqualcols);
/* 填充路径节点信息,Fill in the pathnode */
pathnode- indexinfo = index;
pathnode- indexclauses = indexclauses;
pathnode- indexquals = indexquals;
pathnode- indexqualcols = indexqualcols;
pathnode- indexorderbys = indexorderbys;
pathnode- indexorderbycols = indexorderbycols;
pathnode- indexscandir = indexscandir;
cost_index(pathnode, root, loop_count, partial_path);// 估算成本
return pathnode;
}
正文完