From dd6fdc06513698567e374c02bd2e631d2519807f Mon Sep 17 00:00:00 2001 From: Alexey Kondratov Date: Thu, 22 Apr 2021 23:08:30 +0300 Subject: [PATCH 1/7] Resolve issue#5: store update_lsn of each block into two independent slots. Previously we thought that 1 MB can track changes page-to-page in the 1 GB of data files. However, recently it became evident that our ptrack map or basic hash table behaves more like a Bloom filter with a number of hash functions k = 1. See more here: https://en.wikipedia.org/wiki/Bloom_filter#Probability_of_false_positives. Such filter has naturally more collisions. By storing update_lsn of each block in the additional slot we perform as a Bloom filter with k = 2, which significatly reduces collision rate. --- engine.c | 95 +++++++++++++++++++++++++++++++++----------------------- engine.h | 8 ++--- ptrack.c | 31 +++++++++++++----- ptrack.h | 2 +- 4 files changed, 85 insertions(+), 51 deletions(-) diff --git a/engine.c b/engine.c index 35cc14c..c8085a0 100644 --- a/engine.c +++ b/engine.c @@ -156,6 +156,8 @@ ptrackMapInit(void) sprintf(ptrack_path, "%s/%s", DataDir, PTRACK_PATH); sprintf(ptrack_mmap_path, "%s/%s", DataDir, PTRACK_MMAP_PATH); +ptrack_map_reinit: + /* Remove old PTRACK_MMAP_PATH file, if exists */ if (ptrack_file_exists(ptrack_mmap_path)) durable_unlink(ptrack_mmap_path, LOG); @@ -175,18 +177,15 @@ ptrackMapInit(void) if (stat(ptrack_path, &stat_buf) == 0) { copy_file(ptrack_path, ptrack_mmap_path); - is_new_map = false; /* flag to check checksum */ + is_new_map = false; /* flag to check map file format and checksum */ ptrack_fd = BasicOpenFile(ptrack_mmap_path, O_RDWR | PG_BINARY); - if (ptrack_fd < 0) - elog(ERROR, "ptrack init: failed to open map file \"%s\": %m", ptrack_mmap_path); } else - { /* Create new file for PTRACK_MMAP_PATH */ ptrack_fd = BasicOpenFile(ptrack_mmap_path, O_RDWR | O_CREAT | PG_BINARY); - if (ptrack_fd < 0) - elog(ERROR, "ptrack init: failed to open map file \"%s\": %m", ptrack_mmap_path); - } + + if (ptrack_fd < 0) + elog(ERROR, "ptrack init: failed to open map file \"%s\": %m", ptrack_mmap_path); #ifdef WIN32 { @@ -227,7 +226,19 @@ ptrackMapInit(void) elog(ERROR, "ptrack init: wrong map format of file \"%s\"", ptrack_path); /* Check ptrack version inside old ptrack map */ - /* No-op for now, but may be used for future compatibility checks */ + if (ptrack_map->version_num != PTRACK_VERSION_NUM) + { + ereport(WARNING, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("ptrack init: map format version %d in the file \"%s\" is incompatible with loaded version %d", + ptrack_map->version_num, ptrack_path, PTRACK_VERSION_NUM), + errdetail("Deleting file \"%s\" and reinitializing ptrack map.", ptrack_path))); + + /* Delete and try again */ + durable_unlink(ptrack_path, LOG); + is_new_map = true; + goto ptrack_map_reinit; + } /* Check CRC */ INIT_CRC32C(crc); @@ -641,48 +652,56 @@ void ptrack_mark_block(RelFileNodeBackend smgr_rnode, ForkNumber forknum, BlockNumber blocknum) { + PtBlockId bid; size_t hash; + size_t slot1; + size_t slot2; XLogRecPtr new_lsn; - PtBlockId bid; /* * We use pg_atomic_uint64 here only for alignment purposes, because - * pg_atomic_uint64 is forcely aligned on 8 bytes during the MSVC build. + * pg_atomic_uint64 is forcedly aligned on 8 bytes during the MSVC build. */ pg_atomic_uint64 old_lsn; pg_atomic_uint64 old_init_lsn; - if (ptrack_map_size != 0 && (ptrack_map != NULL) && - smgr_rnode.backend == InvalidBackendId) /* do not track temporary - * relations */ - { - bid.relnode = smgr_rnode.node; - bid.forknum = forknum; - bid.blocknum = blocknum; - hash = BID_HASH_FUNC(bid); - - if (RecoveryInProgress()) - new_lsn = GetXLogReplayRecPtr(NULL); - else - new_lsn = GetXLogInsertRecPtr(); + if (ptrack_map_size == 0 + || ptrack_map == NULL + || smgr_rnode.backend != InvalidBackendId) /* do not track temporary + * relations */ + return; - old_lsn.value = pg_atomic_read_u64(&ptrack_map->entries[hash]); + bid.relnode = smgr_rnode.node; + bid.forknum = forknum; + bid.blocknum = blocknum; - /* Atomically assign new init LSN value */ - old_init_lsn.value = pg_atomic_read_u64(&ptrack_map->init_lsn); + hash = BID_HASH_FUNC(bid); + slot1 = hash % PtrackContentNblocks; + slot2 = ((hash << 32) | (hash >> 32)) % PtrackContentNblocks; - if (old_init_lsn.value == InvalidXLogRecPtr) - { - elog(DEBUG1, "ptrack_mark_block: init_lsn " UINT64_FORMAT " <- " UINT64_FORMAT, old_init_lsn.value, new_lsn); - - while (old_init_lsn.value < new_lsn && - !pg_atomic_compare_exchange_u64(&ptrack_map->init_lsn, (uint64 *) &old_init_lsn.value, new_lsn)); - } + if (RecoveryInProgress()) + new_lsn = GetXLogReplayRecPtr(NULL); + else + new_lsn = GetXLogInsertRecPtr(); - elog(DEBUG3, "ptrack_mark_block: map[%zu]=" UINT64_FORMAT " <- " UINT64_FORMAT, hash, old_lsn.value, new_lsn); + /* Atomically assign new init LSN value */ + old_init_lsn.value = pg_atomic_read_u64(&ptrack_map->init_lsn); + if (old_init_lsn.value == InvalidXLogRecPtr) + { + elog(DEBUG1, "ptrack_mark_block: init_lsn " UINT64_FORMAT " <- " UINT64_FORMAT, old_init_lsn.value, new_lsn); - /* Atomically assign new LSN value */ - while (old_lsn.value < new_lsn && - !pg_atomic_compare_exchange_u64(&ptrack_map->entries[hash], (uint64 *) &old_lsn.value, new_lsn)); - elog(DEBUG3, "ptrack_mark_block: map[%zu]=" UINT64_FORMAT, hash, pg_atomic_read_u64(&ptrack_map->entries[hash])); + while (old_init_lsn.value < new_lsn && + !pg_atomic_compare_exchange_u64(&ptrack_map->init_lsn, (uint64 *) &old_init_lsn.value, new_lsn)); } + + /* Atomically assign new LSN value to the first slot */ + old_lsn.value = pg_atomic_read_u64(&ptrack_map->entries[slot1]); + elog(DEBUG3, "ptrack_mark_block: map[%zu]=" UINT64_FORMAT " <- " UINT64_FORMAT, slot1, old_lsn.value, new_lsn); + while (old_lsn.value < new_lsn && + !pg_atomic_compare_exchange_u64(&ptrack_map->entries[slot1], (uint64 *) &old_lsn.value, new_lsn)); + elog(DEBUG3, "ptrack_mark_block: map[%zu]=" UINT64_FORMAT, hash, pg_atomic_read_u64(&ptrack_map->entries[slot1])); + + /* And to the second */ + old_lsn.value = pg_atomic_read_u64(&ptrack_map->entries[slot2]); + while (old_lsn.value < new_lsn && + !pg_atomic_compare_exchange_u64(&ptrack_map->entries[slot2], (uint64 *) &old_lsn.value, new_lsn)); } diff --git a/engine.h b/engine.h index 34cf15f..e46f803 100644 --- a/engine.h +++ b/engine.h @@ -50,7 +50,7 @@ typedef struct PtrackMapHdr { /* * Three magic bytes (+ \0) to be sure, that we are reading ptrack.map - * with a right PtrackMapHdr strucutre. + * with a right PtrackMapHdr structure. */ char magic[PTRACK_MAGIC_SIZE]; @@ -72,7 +72,6 @@ typedef struct PtrackMapHdr typedef PtrackMapHdr * PtrackMap; -/* TODO: check MAXALIGN usage below */ /* Number of elements in ptrack map (LSN array) */ #define PtrackContentNblocks \ ((ptrack_map_size - offsetof(PtrackMapHdr, entries) - sizeof(pg_crc32c)) / sizeof(pg_atomic_uint64)) @@ -84,9 +83,10 @@ typedef PtrackMapHdr * PtrackMap; /* CRC32 value offset in order to directly access it in the mmap'ed memory chunk */ #define PtrackCrcOffset (PtrackActualSize - sizeof(pg_crc32c)) -/* Map block address 'bid' to map slot */ +/* Block address 'bid' to hash. To get slot position in map should be divided + * with '% PtrackContentNblocks' */ #define BID_HASH_FUNC(bid) \ - (size_t)(DatumGetUInt64(hash_any_extended((unsigned char *)&bid, sizeof(bid), 0)) % PtrackContentNblocks) + (size_t)(DatumGetUInt64(hash_any_extended((unsigned char *)&bid, sizeof(bid), 0))) /* * Per process pointer to shared ptrack_map diff --git a/ptrack.c b/ptrack.c index d897ecf..4992726 100644 --- a/ptrack.c +++ b/ptrack.c @@ -420,10 +420,9 @@ PG_FUNCTION_INFO_V1(ptrack_get_pagemapset); Datum ptrack_get_pagemapset(PG_FUNCTION_ARGS) { + PtScanCtx *ctx; FuncCallContext *funcctx; - PtScanCtx *ctx; MemoryContext oldcontext; - XLogRecPtr update_lsn; datapagemap_t pagemap; char gather_path[MAXPGPATH]; @@ -486,6 +485,12 @@ ptrack_get_pagemapset(PG_FUNCTION_ARGS) while (true) { + size_t hash; + size_t slot1; + size_t slot2; + XLogRecPtr update_lsn1; + XLogRecPtr update_lsn2; + /* Stop traversal if there are no more segments */ if (ctx->bid.blocknum > ctx->relsize) { @@ -525,15 +530,25 @@ ptrack_get_pagemapset(PG_FUNCTION_ARGS) } } - update_lsn = pg_atomic_read_u64(&ptrack_map->entries[BID_HASH_FUNC(ctx->bid)]); + hash = BID_HASH_FUNC(ctx->bid); + slot1 = hash % PtrackContentNblocks; + slot2 = ((hash << 32) | (hash >> 32)) % PtrackContentNblocks; + + update_lsn1 = pg_atomic_read_u64(&ptrack_map->entries[slot1]); + update_lsn2 = pg_atomic_read_u64(&ptrack_map->entries[slot2]); + + if (update_lsn1 != InvalidXLogRecPtr) + elog(DEBUG3, "ptrack: update_lsn1 %X/%X of blckno %u of file %s", + (uint32) (update_lsn1 >> 32), (uint32) update_lsn1, + ctx->bid.blocknum, ctx->relpath); - if (update_lsn != InvalidXLogRecPtr) - elog(DEBUG3, "ptrack: update_lsn %X/%X of blckno %u of file %s", - (uint32) (update_lsn >> 32), (uint32) update_lsn, + if (update_lsn2 != InvalidXLogRecPtr) + elog(DEBUG3, "ptrack: update_lsn2 %X/%X of blckno %u of file %s", + (uint32) (update_lsn1 >> 32), (uint32) update_lsn2, ctx->bid.blocknum, ctx->relpath); - /* Block has been changed since specified LSN. Mark it in the bitmap */ - if (update_lsn >= ctx->lsn) + /* Block has been changed since specified LSN. Mark it in the bitmap */ + if (update_lsn1 >= ctx->lsn && update_lsn2 >= ctx->lsn) datapagemap_add(&pagemap, ctx->bid.blocknum % ((BlockNumber) RELSEG_SIZE)); ctx->bid.blocknum += 1; diff --git a/ptrack.h b/ptrack.h index 7e6b6e5..4375963 100644 --- a/ptrack.h +++ b/ptrack.h @@ -24,7 +24,7 @@ /* Ptrack version as a string */ #define PTRACK_VERSION "2.1" /* Ptrack version as a number */ -#define PTRACK_VERSION_NUM 210 +#define PTRACK_VERSION_NUM 220 /* * Structure identifying block on the disk. From 829f96cf53131a0d57a31ff929ea8fd46fbeb09b Mon Sep 17 00:00:00 2001 From: Alexey Kondratov Date: Fri, 23 Apr 2021 00:12:27 +0300 Subject: [PATCH 2/7] Resolve issue#1: add ptrack_get_change_stat(). Also bump extversion to 2.2 --- .gitignore | 2 -- Makefile | 18 +++--------------- ptrack--2.1--2.2.sql | 29 +++++++++++++++++++++++++++++ ptrack.sql => ptrack--2.1.sql | 2 ++ ptrack.control | 2 +- ptrack.h | 2 +- t/001_basic.pl | 6 +++++- 7 files changed, 41 insertions(+), 20 deletions(-) create mode 100644 ptrack--2.1--2.2.sql rename ptrack.sql => ptrack--2.1.sql (94%) diff --git a/.gitignore b/.gitignore index b46b4ef..4990aa6 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,4 @@ .deps *.so *.o -ptrack--2.0.sql Dockerfile - diff --git a/Makefile b/Makefile index 8544f90..ba9ce1d 100644 --- a/Makefile +++ b/Makefile @@ -2,13 +2,11 @@ MODULE_big = ptrack OBJS = ptrack.o datapagemap.o engine.o $(WIN32RES) -EXTENSION = ptrack -EXTVERSION = 2.1 -DATA = ptrack.sql ptrack--2.0--2.1.sql -DATA_built = $(EXTENSION)--$(EXTVERSION).sql PGFILEDESC = "ptrack - block-level incremental backup engine" -EXTRA_CLEAN = $(EXTENSION)--$(EXTVERSION).sql +EXTENSION = ptrack +EXTVERSION = 2.2 +DATA = ptrack--2.1.sql ptrack--2.0--2.1.sql ptrack--2.1--2.2.sql TAP_TESTS = 1 @@ -22,13 +20,3 @@ top_builddir = ../.. include $(top_builddir)/src/Makefile.global include $(top_srcdir)/contrib/contrib-global.mk endif - -$(EXTENSION)--$(EXTVERSION).sql: ptrack.sql - cat $^ > $@ - -# temp-install: EXTRA_INSTALL=contrib/ptrack - -# check-tap: temp-install -# $(prove_check) - -# check: check-tap diff --git a/ptrack--2.1--2.2.sql b/ptrack--2.1--2.2.sql new file mode 100644 index 0000000..d666fc3 --- /dev/null +++ b/ptrack--2.1--2.2.sql @@ -0,0 +1,29 @@ +/* ptrack/ptrack--2.1--2.2.sql */ + +-- Complain if script is sourced in psql, rather than via ALTER EXTENSION +\echo Use "ALTER EXTENSION ptrack UPDATE;" to load this file.\ quit + +CREATE FUNCTION ptrack_get_change_stat(start_lsn pg_lsn) + RETURNS TABLE ( + files bigint, + pages bigint, + "size, MB" numeric + ) AS +$func$ +DECLARE +block_size bigint; +BEGIN + block_size := (SELECT setting FROM pg_settings WHERE name = 'block_size'); + + RETURN QUERY + SELECT changed_files, + changed_pages, + block_size*changed_pages/(1024.0*1024) + FROM + (SELECT count(path) AS changed_files, + sum( + length(replace(right((pagemap)::text, -1)::varbit::text, '0', '')) + ) AS changed_pages + FROM ptrack_get_pagemapset(start_lsn)) s; +END +$func$ LANGUAGE plpgsql; diff --git a/ptrack.sql b/ptrack--2.1.sql similarity index 94% rename from ptrack.sql rename to ptrack--2.1.sql index 80ae927..c963964 100644 --- a/ptrack.sql +++ b/ptrack--2.1.sql @@ -1,3 +1,5 @@ +/* ptrack/ptrack--2.1.sql */ + -- Complain if script is sourced in psql, rather than via CREATE EXTENSION \echo Use "CREATE EXTENSION ptrack" to load this file. \quit diff --git a/ptrack.control b/ptrack.control index d2d8792..ec0af9d 100644 --- a/ptrack.control +++ b/ptrack.control @@ -1,5 +1,5 @@ # ptrack extension comment = 'block-level incremental backup engine' -default_version = '2.1' +default_version = '2.2' module_pathname = '$libdir/ptrack' relocatable = true diff --git a/ptrack.h b/ptrack.h index 4375963..d205115 100644 --- a/ptrack.h +++ b/ptrack.h @@ -22,7 +22,7 @@ #include "utils/relcache.h" /* Ptrack version as a string */ -#define PTRACK_VERSION "2.1" +#define PTRACK_VERSION "2.2" /* Ptrack version as a number */ #define PTRACK_VERSION_NUM 220 diff --git a/t/001_basic.pl b/t/001_basic.pl index 1abc788..bac81f2 100644 --- a/t/001_basic.pl +++ b/t/001_basic.pl @@ -10,7 +10,7 @@ use TestLib; use Test::More; -plan tests => 23; +plan tests => 24; my $node; my $res; @@ -115,6 +115,10 @@ qr/$rel_oid/, 'ptrack pagemapset should contain new relation oid'); +# Check change stats +$res_stdout = $node->safe_psql("postgres", "SELECT pages FROM ptrack_get_change_stat('$flush_lsn')"); +is($res_stdout > 0, 1, 'should be able to get aggregated stats of changes'); + # We should be able to change ptrack map size (but loose all changes) $node->append_conf( 'postgresql.conf', q{ From 3026be92c398eeeb7bc8edf65ca0deef25c82c17 Mon Sep 17 00:00:00 2001 From: Alexey Kondratov Date: Wed, 12 May 2021 20:02:26 +0300 Subject: [PATCH 3/7] Add new function ptrack_get_change_file_stat(start_lsn pg_lsn) --- README.md | 8 +++++++- ptrack--2.1--2.2.sql | 25 +++++++++++++++++++++++++ t/001_basic.pl | 5 ++++- 3 files changed, 36 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 57a7c5c..06e4b18 100644 --- a/README.md +++ b/README.md @@ -65,7 +65,9 @@ To disable `ptrack` and clean up all remaining service files set `ptrack.map_siz * ptrack_version() — returns ptrack version string. * ptrack_init_lsn() — returns LSN of the last ptrack map initialization. - * ptrack_get_pagemapset('LSN') — returns a set of changed data files with bitmaps of changed blocks since specified LSN. + * ptrack_get_pagemapset(start_lsn pg_lsn) — returns a set of changed data files with bitmaps of changed blocks since specified `start_lsn`. + * ptrack_get_change_stat(start_lsn pg_lsn) — returns statistic of changes (number of files, pages and size in MB) since specified `start_lsn`. + * ptrack_get_change_file_stat(start_lsn pg_lsn) — returns per file statistic of changes (number of pages and size in MB) since specified `start_lsn`. Usage example: @@ -102,6 +104,10 @@ Usually, you have to only install new version of `ptrack` and do `ALTER EXTENSIO * Do `ALTER EXTENSION 'ptrack' UPDATE;`. * Restart your server. +#### Upgrading from 2.1.* to 2.2.*: + +Since version 2.2 we use a different algorithm for tracking changed pages. Thus, data recorded in the `ptrack.map` using pre 2.2 versions of `ptrack` is incompatible with newer versions. After extension upgrade and server restart old `ptrack.map` will be discarded with `WARNING` and initialized from the scratch. + ## Limitations 1. You can only use `ptrack` safely with `wal_level >= 'replica'`. Otherwise, you can lose tracking of some changes if crash-recovery occurs, since [certain commands are designed not to write WAL at all if wal_level is minimal](https://www.postgresql.org/docs/12/populate.html#POPULATE-PITR), but we only durably flush `ptrack` map at checkpoint time. diff --git a/ptrack--2.1--2.2.sql b/ptrack--2.1--2.2.sql index d666fc3..2a0d97f 100644 --- a/ptrack--2.1--2.2.sql +++ b/ptrack--2.1--2.2.sql @@ -27,3 +27,28 @@ BEGIN FROM ptrack_get_pagemapset(start_lsn)) s; END $func$ LANGUAGE plpgsql; + +CREATE FUNCTION ptrack_get_change_file_stat(start_lsn pg_lsn) + RETURNS TABLE ( + file_path text, + pages int, + "size, MB" numeric + ) AS +$func$ +DECLARE +block_size bigint; +BEGIN + block_size := (SELECT setting FROM pg_settings WHERE name = 'block_size'); + + RETURN QUERY + SELECT s.path, + changed_pages, + block_size*changed_pages/(1024.0*1024) + FROM + (SELECT path, + length(replace(right((pagemap)::text, -1)::varbit::text, '0', '')) + AS changed_pages + FROM ptrack_get_pagemapset(start_lsn)) s + ORDER BY (changed_pages, s.path) DESC; +END +$func$ LANGUAGE plpgsql; diff --git a/t/001_basic.pl b/t/001_basic.pl index bac81f2..37285d9 100644 --- a/t/001_basic.pl +++ b/t/001_basic.pl @@ -10,7 +10,7 @@ use TestLib; use Test::More; -plan tests => 24; +plan tests => 25; my $node; my $res; @@ -119,6 +119,9 @@ $res_stdout = $node->safe_psql("postgres", "SELECT pages FROM ptrack_get_change_stat('$flush_lsn')"); is($res_stdout > 0, 1, 'should be able to get aggregated stats of changes'); +$res_stdout = $node->safe_psql("postgres", "SELECT count(*) FROM ptrack_get_change_file_stat('$flush_lsn')"); +is($res_stdout > 0, 1, 'should be able to get per file stats of changes'); + # We should be able to change ptrack map size (but loose all changes) $node->append_conf( 'postgresql.conf', q{ From cf8e30962cf87afd9388f31428c895dd5b15278b Mon Sep 17 00:00:00 2001 From: Alexey Kondratov Date: Wed, 12 May 2021 20:33:42 +0300 Subject: [PATCH 4/7] Slightly optimize ptrack_get_pagemapset Probe the second slot only if the first one succeded. --- ptrack.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/ptrack.c b/ptrack.c index 4992726..f2701af 100644 --- a/ptrack.c +++ b/ptrack.c @@ -532,24 +532,29 @@ ptrack_get_pagemapset(PG_FUNCTION_ARGS) hash = BID_HASH_FUNC(ctx->bid); slot1 = hash % PtrackContentNblocks; - slot2 = ((hash << 32) | (hash >> 32)) % PtrackContentNblocks; update_lsn1 = pg_atomic_read_u64(&ptrack_map->entries[slot1]); - update_lsn2 = pg_atomic_read_u64(&ptrack_map->entries[slot2]); if (update_lsn1 != InvalidXLogRecPtr) elog(DEBUG3, "ptrack: update_lsn1 %X/%X of blckno %u of file %s", (uint32) (update_lsn1 >> 32), (uint32) update_lsn1, ctx->bid.blocknum, ctx->relpath); - if (update_lsn2 != InvalidXLogRecPtr) - elog(DEBUG3, "ptrack: update_lsn2 %X/%X of blckno %u of file %s", - (uint32) (update_lsn1 >> 32), (uint32) update_lsn2, - ctx->bid.blocknum, ctx->relpath); + /* Only probe the second slot if the first one is marked */ + if (update_lsn1 >= ctx->lsn) + { + slot2 = ((hash << 32) | (hash >> 32)) % PtrackContentNblocks; + update_lsn2 = pg_atomic_read_u64(&ptrack_map->entries[slot2]); - /* Block has been changed since specified LSN. Mark it in the bitmap */ - if (update_lsn1 >= ctx->lsn && update_lsn2 >= ctx->lsn) - datapagemap_add(&pagemap, ctx->bid.blocknum % ((BlockNumber) RELSEG_SIZE)); + if (update_lsn2 != InvalidXLogRecPtr) + elog(DEBUG3, "ptrack: update_lsn2 %X/%X of blckno %u of file %s", + (uint32) (update_lsn1 >> 32), (uint32) update_lsn2, + ctx->bid.blocknum, ctx->relpath); + + /* Block has been changed since specified LSN. Mark it in the bitmap */ + if (update_lsn2 >= ctx->lsn) + datapagemap_add(&pagemap, ctx->bid.blocknum % ((BlockNumber) RELSEG_SIZE)); + } ctx->bid.blocknum += 1; } From fbfba8c73bce64e176dadb3b66cb0576a20ebe59 Mon Sep 17 00:00:00 2001 From: Alexey Kondratov Date: Thu, 13 May 2021 18:56:41 +0300 Subject: [PATCH 5/7] Do a proper cleanup when ptrack.map version is incompatible --- engine.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/engine.c b/engine.c index c8085a0..86a1b60 100644 --- a/engine.c +++ b/engine.c @@ -234,8 +234,9 @@ ptrackMapInit(void) ptrack_map->version_num, ptrack_path, PTRACK_VERSION_NUM), errdetail("Deleting file \"%s\" and reinitializing ptrack map.", ptrack_path))); - /* Delete and try again */ - durable_unlink(ptrack_path, LOG); + /* Clean up everything and try again */ + ptrackCleanFilesAndMap(); + is_new_map = true; goto ptrack_map_reinit; } From ab17447196d46eaf5eef4b2edf07a155a9a8b11a Mon Sep 17 00:00:00 2001 From: Alexey Kondratov Date: Thu, 13 May 2021 20:19:34 +0300 Subject: [PATCH 6/7] Correct some typos --- engine.c | 6 +++--- ptrack.c | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/engine.c b/engine.c index 86a1b60..89217a9 100644 --- a/engine.c +++ b/engine.c @@ -390,7 +390,7 @@ ptrackCheckpoint(void) /* * We are writing ptrack map values to file, but we want to simply map it * into the memory with mmap after a crash/restart. That way, we have to - * write values taking into account all paddings/allignments. + * write values taking into account all paddings/alignments. * * Write both magic and varsion_num at once. */ @@ -447,7 +447,7 @@ ptrackCheckpoint(void) * going to overflow. */ /* - * We should not have any allignment issues here, since sizeof() + * We should not have any alignment issues here, since sizeof() * takes into account all paddings for us. */ ptrack_write_chunk(ptrack_tmp_fd, &crc, (char *) buf, writesz); @@ -458,7 +458,7 @@ ptrackCheckpoint(void) } } - /* Write if anythig left */ + /* Write if anything left */ if ((i + 1) % PTRACK_BUF_SIZE != 0) { size_t writesz = sizeof(pg_atomic_uint64) * j; diff --git a/ptrack.c b/ptrack.c index f2701af..1928499 100644 --- a/ptrack.c +++ b/ptrack.c @@ -137,7 +137,7 @@ _PG_fini(void) /* * Ptrack follow up for copydir() routine. It parses database OID - * and tablespace OID from path string. We do not need to recoursively + * and tablespace OID from path string. We do not need to recursively * walk subdirs here, copydir() will do it for us if needed. */ static void From 9c132a3a2f319ed712d11e9158e6dbda27f085b3 Mon Sep 17 00:00:00 2001 From: Alexey Kondratov Date: Fri, 14 May 2021 00:30:15 +0300 Subject: [PATCH 7/7] Refactor stats API and remove ptrack_get_change_file_stat --- README.md | 1 - ptrack--2.1--2.2.sql | 41 +++++++++++------------------------------ ptrack.c | 19 +++++++++++++------ t/001_basic.pl | 5 +---- 4 files changed, 25 insertions(+), 41 deletions(-) diff --git a/README.md b/README.md index 06e4b18..39ea00b 100644 --- a/README.md +++ b/README.md @@ -67,7 +67,6 @@ To disable `ptrack` and clean up all remaining service files set `ptrack.map_siz * ptrack_init_lsn() — returns LSN of the last ptrack map initialization. * ptrack_get_pagemapset(start_lsn pg_lsn) — returns a set of changed data files with bitmaps of changed blocks since specified `start_lsn`. * ptrack_get_change_stat(start_lsn pg_lsn) — returns statistic of changes (number of files, pages and size in MB) since specified `start_lsn`. - * ptrack_get_change_file_stat(start_lsn pg_lsn) — returns per file statistic of changes (number of pages and size in MB) since specified `start_lsn`. Usage example: diff --git a/ptrack--2.1--2.2.sql b/ptrack--2.1--2.2.sql index 2a0d97f..b09c15e 100644 --- a/ptrack--2.1--2.2.sql +++ b/ptrack--2.1--2.2.sql @@ -3,10 +3,18 @@ -- Complain if script is sourced in psql, rather than via ALTER EXTENSION \echo Use "ALTER EXTENSION ptrack UPDATE;" to load this file.\ quit +DROP FUNCTION ptrack_get_pagemapset(start_lsn pg_lsn); +CREATE FUNCTION ptrack_get_pagemapset(start_lsn pg_lsn) +RETURNS TABLE (path text, + pagecount bigint, + pagemap bytea) +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT VOLATILE; + CREATE FUNCTION ptrack_get_change_stat(start_lsn pg_lsn) RETURNS TABLE ( files bigint, - pages bigint, + pages numeric, "size, MB" numeric ) AS $func$ @@ -18,37 +26,10 @@ BEGIN RETURN QUERY SELECT changed_files, changed_pages, - block_size*changed_pages/(1024.0*1024) + block_size * changed_pages / (1024.0 * 1024) FROM (SELECT count(path) AS changed_files, - sum( - length(replace(right((pagemap)::text, -1)::varbit::text, '0', '')) - ) AS changed_pages + sum(pagecount) AS changed_pages FROM ptrack_get_pagemapset(start_lsn)) s; END $func$ LANGUAGE plpgsql; - -CREATE FUNCTION ptrack_get_change_file_stat(start_lsn pg_lsn) - RETURNS TABLE ( - file_path text, - pages int, - "size, MB" numeric - ) AS -$func$ -DECLARE -block_size bigint; -BEGIN - block_size := (SELECT setting FROM pg_settings WHERE name = 'block_size'); - - RETURN QUERY - SELECT s.path, - changed_pages, - block_size*changed_pages/(1024.0*1024) - FROM - (SELECT path, - length(replace(right((pagemap)::text, -1)::varbit::text, '0', '')) - AS changed_pages - FROM ptrack_get_pagemapset(start_lsn)) s - ORDER BY (changed_pages, s.path) DESC; -END -$func$ LANGUAGE plpgsql; diff --git a/ptrack.c b/ptrack.c index 1928499..40630e7 100644 --- a/ptrack.c +++ b/ptrack.c @@ -424,6 +424,7 @@ ptrack_get_pagemapset(PG_FUNCTION_ARGS) FuncCallContext *funcctx; MemoryContext oldcontext; datapagemap_t pagemap; + int64 pagecount = 0; char gather_path[MAXPGPATH]; /* Exit immediately if there is no map */ @@ -444,12 +445,13 @@ ptrack_get_pagemapset(PG_FUNCTION_ARGS) /* Make tuple descriptor */ #if PG_VERSION_NUM >= 120000 - tupdesc = CreateTemplateTupleDesc(2); + tupdesc = CreateTemplateTupleDesc(3); #else - tupdesc = CreateTemplateTupleDesc(2, false); + tupdesc = CreateTemplateTupleDesc(3, false); #endif TupleDescInitEntry(tupdesc, (AttrNumber) 1, "path", TEXTOID, -1, 0); - TupleDescInitEntry(tupdesc, (AttrNumber) 2, "pagemap", BYTEAOID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 2, "pagecount", INT8OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 3, "pagemap", BYTEAOID, -1, 0); funcctx->tuple_desc = BlessTupleDesc(tupdesc); funcctx->user_fctx = ctx; @@ -497,8 +499,8 @@ ptrack_get_pagemapset(PG_FUNCTION_ARGS) /* We completed a segment and there is a bitmap to return */ if (pagemap.bitmap != NULL) { - Datum values[2]; - bool nulls[2] = {false}; + Datum values[3]; + bool nulls[3] = {false}; char pathname[MAXPGPATH]; bytea *result = NULL; Size result_sz = pagemap.bitmapsize + VARHDRSZ; @@ -512,11 +514,13 @@ ptrack_get_pagemapset(PG_FUNCTION_ARGS) strcpy(pathname, ctx->relpath); values[0] = CStringGetTextDatum(pathname); - values[1] = PointerGetDatum(result); + values[1] = Int64GetDatum(pagecount); + values[2] = PointerGetDatum(result); pfree(pagemap.bitmap); pagemap.bitmap = NULL; pagemap.bitmapsize = 0; + pagecount = 0; htup = heap_form_tuple(funcctx->tuple_desc, values, nulls); if (htup) @@ -553,7 +557,10 @@ ptrack_get_pagemapset(PG_FUNCTION_ARGS) /* Block has been changed since specified LSN. Mark it in the bitmap */ if (update_lsn2 >= ctx->lsn) + { + pagecount += 1; datapagemap_add(&pagemap, ctx->bid.blocknum % ((BlockNumber) RELSEG_SIZE)); + } } ctx->bid.blocknum += 1; diff --git a/t/001_basic.pl b/t/001_basic.pl index 37285d9..bac81f2 100644 --- a/t/001_basic.pl +++ b/t/001_basic.pl @@ -10,7 +10,7 @@ use TestLib; use Test::More; -plan tests => 25; +plan tests => 24; my $node; my $res; @@ -119,9 +119,6 @@ $res_stdout = $node->safe_psql("postgres", "SELECT pages FROM ptrack_get_change_stat('$flush_lsn')"); is($res_stdout > 0, 1, 'should be able to get aggregated stats of changes'); -$res_stdout = $node->safe_psql("postgres", "SELECT count(*) FROM ptrack_get_change_file_stat('$flush_lsn')"); -is($res_stdout > 0, 1, 'should be able to get per file stats of changes'); - # We should be able to change ptrack map size (but loose all changes) $node->append_conf( 'postgresql.conf', q{