共计 10454 个字符,预计需要花费 27 分钟才能阅读完成。
本篇内容主要讲解“PostgreSQL 中 heap_insert 依赖的函数有哪些”,感兴趣的朋友不妨来看看。本文介绍的方法操作简单快捷,实用性强。下面就让丸趣 TV 小编来带大家学习“PostgreSQL 中 heap_insert 依赖的函数有哪些”吧!
一、数据结构
静态变量
进程中全局共享
/*
* An array of XLogRecData structs, to hold registered data.
* XLogRecData 结构体数组, 存储已注册的数据
*/
static XLogRecData *rdatas;
// 已使用的入口
static int num_rdatas; /* entries currently used */
// 已分配的空间大小
static int max_rdatas; /* allocated size */
// 是否调用 XLogBeginInsert 函数
static bool begininsert_called = false;
registered_buffer
对于每一个使用 XLogRegisterBuffer 注册的每个数据块, 填充到 registered_buffer 结构体中
/*
* For each block reference registered with XLogRegisterBuffer, we fill in
* a registered_buffer struct.
* 对于每一个使用 XLogRegisterBuffer 注册的每个数据块,
* 填充到 registered_buffer 结构体中
*/
typedef struct
//slot 是否在使用?
bool in_use; /* is this slot in use? */
//REGBUF_* 相关标记
uint8 flags; /* REGBUF_* flags */
// 定义关系和数据库的标识符
RelFileNode rnode; /* identifies the relation and block */
//fork 进程编号
ForkNumber forkno;
// 块编号
BlockNumber block;
// 页内容
Page page; /* page content */
//rdata 链中的数据总大小
uint32 rdata_len; /* total length of data in rdata chain */
// 使用该数据块注册的数据链头
XLogRecData *rdata_head; /* head of the chain of data registered with
* this block */
// 使用该数据块注册的数据链尾
XLogRecData *rdata_tail; /* last entry in the chain, or rdata_head if
* empty */
// 临时 rdatas 数据引用, 用于存储 XLogRecordAssemble()中使用的备份块数据
XLogRecData bkp_rdatas[2]; /* temporary rdatas used to hold references to
* backup block data in XLogRecordAssemble() */
/* buffer to store a compressed version of backup block image */
// 用于存储压缩版本的备份块镜像的缓存
char compressed_page[PGLZ_MAX_BLCKSZ];
} registered_buffer;
//registered_buffer 指正
static registered_buffer *registered_buffers;
// 已分配的大小
static int max_registered_buffers; /* allocated size */
// 最大块号 + 1(当前注册块)
static int max_registered_block_id = 0; /* highest block_id + 1 currently
* registered */
XLogCtlInsert
WAL 插入记录时使用的共享数据结构
/*
* Shared state data for WAL insertion.
* WAL 插入记录时使用的共享数据结构
*/
typedef struct XLogCtlInsert
// 包含 CurrBytePos 和 PrevBytePos 的 lock
slock_t insertpos_lck; /* protects CurrBytePos and PrevBytePos */
/*
* CurrBytePos is the end of reserved WAL. The next record will be
* inserted at that position. PrevBytePos is the start position of the
* previously inserted (or rather, reserved) record - it is copied to the
* prev-link of the next record. These are stored as usable byte
* positions rather than XLogRecPtrs (see XLogBytePosToRecPtr()).
* CurrBytePos 是保留 WAL 的结束位置。 * 下一条记录将插入到那个位置。 * PrevBytePos 是先前插入 (或者保留) 记录的起始位置——它被复制到下一条记录的 prev-link 中。 * 这些存储为“可用字节位置”,而不是 XLogRecPtrs(参见 XLogBytePosToRecPtr())。 */
uint64 CurrBytePos;
uint64 PrevBytePos;
/*
* Make sure the above heavily-contended spinlock and byte positions are
* on their own cache line. In particular, the RedoRecPtr and full page
* write variables below should be on a different cache line. They are
* read on every WAL insertion, but updated rarely, and we don t want
* those reads to steal the cache line containing Curr/PrevBytePos.
* 确保以上激烈竞争的自旋锁和字节位置在它们自己的缓存 line 上。 * 特别是,RedoRecPtr 和下面的全页写变量应该位于不同的缓存 line 上。 * 它们在每次插入 WAL 时都被读取,但很少更新, * 我们不希望这些读取窃取包含 Curr/PrevBytePos 的缓存 line。 */
char pad[PG_CACHE_LINE_SIZE];
/*
* fullPageWrites is the master copy used by all backends to determine
* whether to write full-page to WAL, instead of using process-local one.
* This is required because, when full_page_writes is changed by SIGHUP,
* we must WAL-log it before it actually affects WAL-logging by backends.
* Checkpointer sets at startup or after SIGHUP.
* fullpagewrite 是所有后台进程使用的主副本, * 用于确定是否将整个页面写入 WAL,而不是使用 process-local 副本。 * 这是必需的,因为当 SIGHUP 更改 full_page_write 时, * 我们必须在它通过后台进程实际影响 WAL-logging 之前对其进行 WAL-log 记录。 * Checkpointer 检查点设置在启动或 SIGHUP 之后。 *
* To read these fields, you must hold an insertion lock. To modify them,
* you must hold ALL the locks.
* 为了读取这些域, 必须持有 insertion lock.
* 如需更新, 则需要持有所有这些 lock.
*/
// 插入时的当前 redo point
XLogRecPtr RedoRecPtr; /* current redo point for insertions */
// 为 PITR 强制执行 full-page 写?
bool forcePageWrites; /* forcing full-page writes for PITR? */
// 是否全页写?
bool fullPageWrites;
/*
* exclusiveBackupState indicates the state of an exclusive backup (see
* comments of ExclusiveBackupState for more details). nonExclusiveBackups
* is a counter indicating the number of streaming base backups currently
* in progress. forcePageWrites is set to true when either of these is
* non-zero. lastBackupStart is the latest checkpoint redo location used
* as a starting point for an online backup.
* exclusive sivebackupstate 表示排他备份的状态
* (有关详细信息,请参阅 exclusive sivebackupstate 的注释)。 * 非排他性备份是一个计数器,指示当前正在进行的流基础备份的数量。 * forcePageWrites 在这两个值都不为零时被设置为 true。 * lastBackupStart 用作在线备份起点的最新检查点的重做位置。 */
ExclusiveBackupState exclusiveBackupState;
int nonExclusiveBackups;
XLogRecPtr lastBackupStart;
/*
* WAL insertion locks.
* WAL 写入锁
*/
WALInsertLockPadded *WALInsertLocks;
} XLogCtlInsert;
XLogRecData
xloginsert.c 中的函数构造一个 XLogRecData 结构体链用于标识最后的 WAL 记录
/*
* The functions in xloginsert.c construct a chain of XLogRecData structs
* to represent the final WAL record.
* xloginsert.c 中的函数构造一个 XLogRecData 结构体链用于标识最后的 WAL 记录
*/
typedef struct XLogRecData
// 链中的下一个结构体, 如无则为 NULL
struct XLogRecData *next; /* next struct in chain, or NULL */
//rmgr 数据的起始地址
char *data; /* start of rmgr data to include */
//rmgr 数据大小
uint32 len; /* length of rmgr data to include */
} XLogRecData;
registered_buffer/registered_buffers
对于每一个使用 XLogRegisterBuffer 注册的每个数据块, 填充到 registered_buffer 结构体中
/*
* For each block reference registered with XLogRegisterBuffer, we fill in
* a registered_buffer struct.
* 对于每一个使用 XLogRegisterBuffer 注册的每个数据块,
* 填充到 registered_buffer 结构体中
*/
typedef struct
//slot 是否在使用?
bool in_use; /* is this slot in use? */
//REGBUF_* 相关标记
uint8 flags; /* REGBUF_* flags */
// 定义关系和数据库的标识符
RelFileNode rnode; /* identifies the relation and block */
//fork 进程编号
ForkNumber forkno;
// 块编号
BlockNumber block;
// 页内容
Page page; /* page content */
//rdata 链中的数据总大小
uint32 rdata_len; /* total length of data in rdata chain */
// 使用该数据块注册的数据链头
XLogRecData *rdata_head; /* head of the chain of data registered with
* this block */
// 使用该数据块注册的数据链尾
XLogRecData *rdata_tail; /* last entry in the chain, or rdata_head if
* empty */
// 临时 rdatas 数据引用, 用于存储 XLogRecordAssemble()中使用的备份块数据
XLogRecData bkp_rdatas[2]; /* temporary rdatas used to hold references to
* backup block data in XLogRecordAssemble() */
/* buffer to store a compressed version of backup block image */
// 用于存储压缩版本的备份块镜像的缓存
char compressed_page[PGLZ_MAX_BLCKSZ];
} registered_buffer;
//registered_buffer 指针(全局变量)
static registered_buffer *registered_buffers;
// 已分配的大小
static int max_registered_buffers; /* allocated size */
// 最大块号 + 1(当前注册块)
static int max_registered_block_id = 0; /* highest block_id + 1 currently
* registered */
二、源码解读
heap_insert
主要实现逻辑是插入元组到堆中, 其中存在对 WAL(XLog)进行处理的部分.
参见 PostgreSQL 源码解读(104)- WAL#1(Insert WAL-heap_insert 函数 #1)
XLogBeginInsert
开始构造 WAL 记录.
必须在调用 XLogRegister* 和 XLogInsert()函数前调用.
/*
* Begin constructing a WAL record. This must be called before the
* XLogRegister* functions and XLogInsert().
* 开始构造 WAL 记录.
* 必须在调用 XLogRegister* 和 XLogInsert()函数前调用.
*/
XLogBeginInsert(void)
// 验证逻辑
Assert(max_registered_block_id == 0);
Assert(mainrdata_last == (XLogRecData *) mainrdata_head);
Assert(mainrdata_len == 0);
/* cross-check on whether we should be here or not */
// 交叉校验是否应该在这里还是不应该在这里出现
if (!XLogInsertAllowed())
elog(ERROR, cannot make new WAL entries during recovery
if (begininsert_called)
elog(ERROR, XLogBeginInsert was already called
// 变量赋值
begininsert_called = true;
* Is this process allowed to insert new WAL records?
* 判断该进程是否允许插入新的 WAL 记录
*
* Ordinarily this is essentially equivalent to !RecoveryInProgress().
* But we also have provisions for forcing the result true or false
* within specific processes regardless of the global state.
* 通常,这本质上等同于! recoverinprogress()。 * 但我们也有规定,无论全局状况如何,都要在特定进程中强制实现“正确”或“错误”的结果。 */
XLogInsertAllowed(void)
/*
* If value is unconditionally true or unconditionally false , just
* return it. This provides the normal fast path once recovery is known
* done.
* 如果值为“无条件为真”或“无条件为假”,则返回。 * 这提供正常的快速判断路径。 */
if (LocalXLogInsertAllowed = 0)
return (bool) LocalXLogInsertAllowed;
/*
* Else, must check to see if we re still in recovery.
* 否则, 必须检查是否处于恢复状态
*/
if (RecoveryInProgress())
return false;
/*
* On exit from recovery, reset to unconditionally true , since there is
* no need to keep checking.
* 从恢复中退出, 由于不需要继续检查, 重置为 无条件为真
*/
LocalXLogInsertAllowed = 1;
return true;
}
XLogRegisterData
添加数据到正在构造的 WAL 记录中
/*
* Add data to the WAL record that s being constructed.
* 添加数据到正在构造的 WAL 记录中
*
* The data is appended to the main chunk , available at replay with
* XLogRecGetData().
* 数据追加到 main chunk 中, 用于 XLogRecGetData()函数回放
*/
XLogRegisterData(char *data, int len)
XLogRecData *rdata;// 数据
// 验证是否已调用 begin
Assert(begininsert_called);
// 验证大小
if (num_rdatas = max_rdatas)
elog(ERROR, too much WAL data
rdata = rdatas[num_rdatas++];
rdata- data = data;
rdata- len = len;
/*
* we use the mainrdata_last pointer to track the end of the chain, so no
* need to clear next here.
* 使用 mainrdata_last 指针跟踪链条的结束点, 在这里不需要清除 next 变量
*/
mainrdata_last- next = rdata;
mainrdata_last = rdata;
mainrdata_len += len;
}
XLogRegisterBuffer
在缓冲区中注册已构建的 WAL 记录的依赖, 在 WAL-logged 操作更新每一个 page 时必须调用此函数
/*
* Register a reference to a buffer with the WAL record being constructed.
* This must be called for every page that the WAL-logged operation modifies.
* 在缓冲区中注册已构建的 WAL 记录的依赖
* 在 WAL-logged 操作更新每一个 page 时必须调用此函数
*/
XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags)
registered_buffer *regbuf;// 缓冲
/* NO_IMAGE doesn t make sense with FORCE_IMAGE */
//NO_IMAGE 不能与 REGBUF_NO_IMAGE 同时使用
Assert(!((flags REGBUF_FORCE_IMAGE) (flags (REGBUF_NO_IMAGE))));
Assert(begininsert_called);
// 块 ID 最大已注册的缓冲区, 报错
if (block_id = max_registered_block_id)
{
if (block_id = max_registered_buffers)
elog(ERROR, too many registered buffers
max_registered_block_id = block_id + 1;
}
// 赋值
regbuf = registered_buffers[block_id];
// 获取 Tag
BufferGetTag(buffer, regbuf- rnode, regbuf- forkno, regbuf- block);
regbuf- page = BufferGetPage(buffer);
regbuf- flags = flags;
regbuf- rdata_tail = (XLogRecData *) regbuf- rdata_head;
regbuf- rdata_len = 0;
/*
* Check that this page hasn t already been registered with some other
* block_id.
* 检查该 page 是否已被其他 block_id 注册
*/
#ifdef USE_ASSERT_CHECKING
{
int i;
for (i = 0; i max_registered_block_id; i++)// 循环检查
{
registered_buffer *regbuf_old = registered_buffers[i];
if (i == block_id || !regbuf_old- in_use)
continue;
Assert(!RelFileNodeEquals(regbuf_old- rnode, regbuf- rnode) ||
regbuf_old- forkno != regbuf- forkno ||
regbuf_old- block != regbuf- block);
}
}
#endif
regbuf- in_use = true;// 标记为使用