Skip to content

Resolve issues #5 and #1: reduce number of collisions in the ptrack map #6

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
May 16, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
.deps
*.so
*.o
ptrack--2.0.sql
Dockerfile

18 changes: 3 additions & 15 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,11 @@

MODULE_big = ptrack
OBJS = ptrack.o datapagemap.o engine.o $(WIN32RES)
EXTENSION = ptrack
EXTVERSION = 2.1
DATA = ptrack.sql ptrack--2.0--2.1.sql
DATA_built = $(EXTENSION)--$(EXTVERSION).sql
PGFILEDESC = "ptrack - block-level incremental backup engine"

EXTRA_CLEAN = $(EXTENSION)--$(EXTVERSION).sql
EXTENSION = ptrack
EXTVERSION = 2.2
DATA = ptrack--2.1.sql ptrack--2.0--2.1.sql ptrack--2.1--2.2.sql

TAP_TESTS = 1

Expand All @@ -22,13 +20,3 @@ top_builddir = ../..
include $(top_builddir)/src/Makefile.global
include $(top_srcdir)/contrib/contrib-global.mk
endif

$(EXTENSION)--$(EXTVERSION).sql: ptrack.sql
cat $^ > $@

# temp-install: EXTRA_INSTALL=contrib/ptrack

# check-tap: temp-install
# $(prove_check)

# check: check-tap
7 changes: 6 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,8 @@ To disable `ptrack` and clean up all remaining service files set `ptrack.map_siz

* ptrack_version() — returns ptrack version string.
* ptrack_init_lsn() — returns LSN of the last ptrack map initialization.
* ptrack_get_pagemapset('LSN') — returns a set of changed data files with bitmaps of changed blocks since specified LSN.
* ptrack_get_pagemapset(start_lsn pg_lsn) — returns a set of changed data files with bitmaps of changed blocks since specified `start_lsn`.
* ptrack_get_change_stat(start_lsn pg_lsn) — returns statistic of changes (number of files, pages and size in MB) since specified `start_lsn`.

Usage example:

Expand Down Expand Up @@ -102,6 +103,10 @@ Usually, you have to only install new version of `ptrack` and do `ALTER EXTENSIO
* Do `ALTER EXTENSION 'ptrack' UPDATE;`.
* Restart your server.

#### Upgrading from 2.1.* to 2.2.*:

Since version 2.2 we use a different algorithm for tracking changed pages. Thus, data recorded in the `ptrack.map` using pre 2.2 versions of `ptrack` is incompatible with newer versions. After extension upgrade and server restart old `ptrack.map` will be discarded with `WARNING` and initialized from the scratch.

## Limitations

1. You can only use `ptrack` safely with `wal_level >= 'replica'`. Otherwise, you can lose tracking of some changes if crash-recovery occurs, since [certain commands are designed not to write WAL at all if wal_level is minimal](https://www.postgresql.org/docs/12/populate.html#POPULATE-PITR), but we only durably flush `ptrack` map at checkpoint time.
Expand Down
102 changes: 61 additions & 41 deletions engine.c
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,8 @@ ptrackMapInit(void)
sprintf(ptrack_path, "%s/%s", DataDir, PTRACK_PATH);
sprintf(ptrack_mmap_path, "%s/%s", DataDir, PTRACK_MMAP_PATH);

ptrack_map_reinit:

/* Remove old PTRACK_MMAP_PATH file, if exists */
if (ptrack_file_exists(ptrack_mmap_path))
durable_unlink(ptrack_mmap_path, LOG);
Expand All @@ -175,18 +177,15 @@ ptrackMapInit(void)
if (stat(ptrack_path, &stat_buf) == 0)
{
copy_file(ptrack_path, ptrack_mmap_path);
is_new_map = false; /* flag to check checksum */
is_new_map = false; /* flag to check map file format and checksum */
ptrack_fd = BasicOpenFile(ptrack_mmap_path, O_RDWR | PG_BINARY);
if (ptrack_fd < 0)
elog(ERROR, "ptrack init: failed to open map file \"%s\": %m", ptrack_mmap_path);
}
else
{
/* Create new file for PTRACK_MMAP_PATH */
ptrack_fd = BasicOpenFile(ptrack_mmap_path, O_RDWR | O_CREAT | PG_BINARY);
if (ptrack_fd < 0)
elog(ERROR, "ptrack init: failed to open map file \"%s\": %m", ptrack_mmap_path);
}

if (ptrack_fd < 0)
elog(ERROR, "ptrack init: failed to open map file \"%s\": %m", ptrack_mmap_path);

#ifdef WIN32
{
Expand Down Expand Up @@ -227,7 +226,20 @@ ptrackMapInit(void)
elog(ERROR, "ptrack init: wrong map format of file \"%s\"", ptrack_path);

/* Check ptrack version inside old ptrack map */
/* No-op for now, but may be used for future compatibility checks */
if (ptrack_map->version_num != PTRACK_VERSION_NUM)
{
ereport(WARNING,
(errcode(ERRCODE_DATA_CORRUPTED),
errmsg("ptrack init: map format version %d in the file \"%s\" is incompatible with loaded version %d",
ptrack_map->version_num, ptrack_path, PTRACK_VERSION_NUM),
errdetail("Deleting file \"%s\" and reinitializing ptrack map.", ptrack_path)));

/* Clean up everything and try again */
ptrackCleanFilesAndMap();

is_new_map = true;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Не могу найти, где делается unmap в этом случае?
При этом сразу после метки ptrack_map_reinit делается durable_unlink(ptrack_mmap_path).
В итоге, этот файл повисает невидимкой в файловой системе, и в адрессном пространстве процесса повисает его mmap.

Наверное есть смысл позвать здесь ptrackCleanFilesAndMap ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Да, похоже на то. Я сомневался в этом месте, но потом забыл и не разобрался до конца

goto ptrack_map_reinit;
}

/* Check CRC */
INIT_CRC32C(crc);
Expand Down Expand Up @@ -378,7 +390,7 @@ ptrackCheckpoint(void)
/*
* We are writing ptrack map values to file, but we want to simply map it
* into the memory with mmap after a crash/restart. That way, we have to
* write values taking into account all paddings/allignments.
* write values taking into account all paddings/alignments.
*
* Write both magic and varsion_num at once.
*/
Expand Down Expand Up @@ -435,7 +447,7 @@ ptrackCheckpoint(void)
* going to overflow. */

/*
* We should not have any allignment issues here, since sizeof()
* We should not have any alignment issues here, since sizeof()
* takes into account all paddings for us.
*/
ptrack_write_chunk(ptrack_tmp_fd, &crc, (char *) buf, writesz);
Expand All @@ -446,7 +458,7 @@ ptrackCheckpoint(void)
}
}

/* Write if anythig left */
/* Write if anything left */
if ((i + 1) % PTRACK_BUF_SIZE != 0)
{
size_t writesz = sizeof(pg_atomic_uint64) * j;
Expand Down Expand Up @@ -641,48 +653,56 @@ void
ptrack_mark_block(RelFileNodeBackend smgr_rnode,
ForkNumber forknum, BlockNumber blocknum)
{
PtBlockId bid;
size_t hash;
size_t slot1;
size_t slot2;
XLogRecPtr new_lsn;
PtBlockId bid;
/*
* We use pg_atomic_uint64 here only for alignment purposes, because
* pg_atomic_uint64 is forcely aligned on 8 bytes during the MSVC build.
* pg_atomic_uint64 is forcedly aligned on 8 bytes during the MSVC build.
*/
pg_atomic_uint64 old_lsn;
pg_atomic_uint64 old_init_lsn;

if (ptrack_map_size != 0 && (ptrack_map != NULL) &&
smgr_rnode.backend == InvalidBackendId) /* do not track temporary
* relations */
{
bid.relnode = smgr_rnode.node;
bid.forknum = forknum;
bid.blocknum = blocknum;
hash = BID_HASH_FUNC(bid);

if (RecoveryInProgress())
new_lsn = GetXLogReplayRecPtr(NULL);
else
new_lsn = GetXLogInsertRecPtr();

old_lsn.value = pg_atomic_read_u64(&ptrack_map->entries[hash]);
if (ptrack_map_size == 0
|| ptrack_map == NULL
|| smgr_rnode.backend != InvalidBackendId) /* do not track temporary
* relations */
return;

/* Atomically assign new init LSN value */
old_init_lsn.value = pg_atomic_read_u64(&ptrack_map->init_lsn);
bid.relnode = smgr_rnode.node;
bid.forknum = forknum;
bid.blocknum = blocknum;

if (old_init_lsn.value == InvalidXLogRecPtr)
{
elog(DEBUG1, "ptrack_mark_block: init_lsn " UINT64_FORMAT " <- " UINT64_FORMAT, old_init_lsn.value, new_lsn);
hash = BID_HASH_FUNC(bid);
slot1 = hash % PtrackContentNblocks;
slot2 = ((hash << 32) | (hash >> 32)) % PtrackContentNblocks;

while (old_init_lsn.value < new_lsn &&
!pg_atomic_compare_exchange_u64(&ptrack_map->init_lsn, (uint64 *) &old_init_lsn.value, new_lsn));
}
if (RecoveryInProgress())
new_lsn = GetXLogReplayRecPtr(NULL);
else
new_lsn = GetXLogInsertRecPtr();

elog(DEBUG3, "ptrack_mark_block: map[%zu]=" UINT64_FORMAT " <- " UINT64_FORMAT, hash, old_lsn.value, new_lsn);
/* Atomically assign new init LSN value */
old_init_lsn.value = pg_atomic_read_u64(&ptrack_map->init_lsn);
if (old_init_lsn.value == InvalidXLogRecPtr)
{
elog(DEBUG1, "ptrack_mark_block: init_lsn " UINT64_FORMAT " <- " UINT64_FORMAT, old_init_lsn.value, new_lsn);

/* Atomically assign new LSN value */
while (old_lsn.value < new_lsn &&
!pg_atomic_compare_exchange_u64(&ptrack_map->entries[hash], (uint64 *) &old_lsn.value, new_lsn));
elog(DEBUG3, "ptrack_mark_block: map[%zu]=" UINT64_FORMAT, hash, pg_atomic_read_u64(&ptrack_map->entries[hash]));
while (old_init_lsn.value < new_lsn &&
!pg_atomic_compare_exchange_u64(&ptrack_map->init_lsn, (uint64 *) &old_init_lsn.value, new_lsn));
}

/* Atomically assign new LSN value to the first slot */
old_lsn.value = pg_atomic_read_u64(&ptrack_map->entries[slot1]);
elog(DEBUG3, "ptrack_mark_block: map[%zu]=" UINT64_FORMAT " <- " UINT64_FORMAT, slot1, old_lsn.value, new_lsn);
while (old_lsn.value < new_lsn &&
!pg_atomic_compare_exchange_u64(&ptrack_map->entries[slot1], (uint64 *) &old_lsn.value, new_lsn));
elog(DEBUG3, "ptrack_mark_block: map[%zu]=" UINT64_FORMAT, hash, pg_atomic_read_u64(&ptrack_map->entries[slot1]));

/* And to the second */
old_lsn.value = pg_atomic_read_u64(&ptrack_map->entries[slot2]);
while (old_lsn.value < new_lsn &&
!pg_atomic_compare_exchange_u64(&ptrack_map->entries[slot2], (uint64 *) &old_lsn.value, new_lsn));
}
8 changes: 4 additions & 4 deletions engine.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ typedef struct PtrackMapHdr
{
/*
* Three magic bytes (+ \0) to be sure, that we are reading ptrack.map
* with a right PtrackMapHdr strucutre.
* with a right PtrackMapHdr structure.
*/
char magic[PTRACK_MAGIC_SIZE];

Expand All @@ -72,7 +72,6 @@ typedef struct PtrackMapHdr

typedef PtrackMapHdr * PtrackMap;

/* TODO: check MAXALIGN usage below */
/* Number of elements in ptrack map (LSN array) */
#define PtrackContentNblocks \
((ptrack_map_size - offsetof(PtrackMapHdr, entries) - sizeof(pg_crc32c)) / sizeof(pg_atomic_uint64))
Expand All @@ -84,9 +83,10 @@ typedef PtrackMapHdr * PtrackMap;
/* CRC32 value offset in order to directly access it in the mmap'ed memory chunk */
#define PtrackCrcOffset (PtrackActualSize - sizeof(pg_crc32c))

/* Map block address 'bid' to map slot */
/* Block address 'bid' to hash. To get slot position in map should be divided
* with '% PtrackContentNblocks' */
#define BID_HASH_FUNC(bid) \
(size_t)(DatumGetUInt64(hash_any_extended((unsigned char *)&bid, sizeof(bid), 0)) % PtrackContentNblocks)
(size_t)(DatumGetUInt64(hash_any_extended((unsigned char *)&bid, sizeof(bid), 0)))

/*
* Per process pointer to shared ptrack_map
Expand Down
35 changes: 35 additions & 0 deletions ptrack--2.1--2.2.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
/* ptrack/ptrack--2.1--2.2.sql */

-- Complain if script is sourced in psql, rather than via ALTER EXTENSION
\echo Use "ALTER EXTENSION ptrack UPDATE;" to load this file.\ quit

DROP FUNCTION ptrack_get_pagemapset(start_lsn pg_lsn);
CREATE FUNCTION ptrack_get_pagemapset(start_lsn pg_lsn)
RETURNS TABLE (path text,
pagecount bigint,
pagemap bytea)
AS 'MODULE_PATHNAME'
LANGUAGE C STRICT VOLATILE;

CREATE FUNCTION ptrack_get_change_stat(start_lsn pg_lsn)
RETURNS TABLE (
files bigint,
pages numeric,
"size, MB" numeric
) AS
$func$
DECLARE
block_size bigint;
BEGIN
block_size := (SELECT setting FROM pg_settings WHERE name = 'block_size');

RETURN QUERY
SELECT changed_files,
changed_pages,
block_size * changed_pages / (1024.0 * 1024)
FROM
(SELECT count(path) AS changed_files,
sum(pagecount) AS changed_pages
FROM ptrack_get_pagemapset(start_lsn)) s;
END
$func$ LANGUAGE plpgsql;
2 changes: 2 additions & 0 deletions ptrack.sql → ptrack--2.1.sql
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
/* ptrack/ptrack--2.1.sql */

-- Complain if script is sourced in psql, rather than via CREATE EXTENSION
\echo Use "CREATE EXTENSION ptrack" to load this file. \quit

Expand Down
Loading