Skip to content

Commit d86f0ef

Browse files
dschoGit for Windows Build Agent
authored and
Git for Windows Build Agent
committed
Merge branch 'ns/batched-fsync'
This merges the topic branch (specifically backported onto v2.33.1 to allow for integrating into Git for Windows' `main` branch) that strikes a better balance between safety and speed: rather than `fsync()`ing each and every loose object file, we now offer to do it in a batch. This will become the new default in Git for Windows. Signed-off-by: Johannes Schindelin <[email protected]>
2 parents cb95b7a + 5b296d9 commit d86f0ef

24 files changed

+423
-33
lines changed

Documentation/config/core.txt

+23-6
Original file line numberDiff line numberDiff line change
@@ -576,12 +576,29 @@ core.whitespace::
576576
errors. The default tab width is 8. Allowed values are 1 to 63.
577577

578578
core.fsyncObjectFiles::
579-
This boolean will enable 'fsync()' when writing object files.
580-
+
581-
This is a total waste of time and effort on a filesystem that orders
582-
data writes properly, but can be useful for filesystems that do not use
583-
journalling (traditional UNIX filesystems) or that only journal metadata
584-
and not file contents (OS X's HFS+, or Linux ext3 with "data=writeback").
579+
A value indicating the level of effort Git will expend in
580+
trying to make objects added to the repo durable in the event
581+
of an unclean system shutdown. This setting currently only
582+
controls loose objects in the object store, so updates to any
583+
refs or the index may not be equally durable.
584+
+
585+
* `false` allows data to remain in file system caches according to
586+
operating system policy, whence it may be lost if the system loses power
587+
or crashes.
588+
* `true` triggers a data integrity flush for each loose object added to the
589+
object store. This is the safest setting that is likely to ensure durability
590+
across all operating systems and file systems that honor the 'fsync' system
591+
call. However, this setting comes with a significant performance cost on
592+
common hardware. Git does not currently fsync parent directories for
593+
newly-added files, so some filesystems may still allow data to be lost on
594+
system crash.
595+
* `batch` enables an experimental mode that uses interfaces available in some
596+
operating systems to write loose object data with a minimal set of FLUSH
597+
CACHE (or equivalent) commands sent to the storage controller. If the
598+
operating system interfaces are not available, this mode behaves the same as
599+
`true`. This mode is expected to be as safe as `true` on macOS for repos
600+
stored on HFS+ or APFS filesystems and on Windows for repos stored on NTFS or
601+
ReFS.
585602

586603
core.preloadIndex::
587604
Enable parallel index preload for operations like 'git diff'

Makefile

+6
Original file line numberDiff line numberDiff line change
@@ -405,6 +405,8 @@ all::
405405
#
406406
# Define HAVE_CLOCK_MONOTONIC if your platform has CLOCK_MONOTONIC.
407407
#
408+
# Define HAVE_SYNC_FILE_RANGE if your platform has sync_file_range.
409+
#
408410
# Define NEEDS_LIBRT if your platform requires linking with librt (glibc version
409411
# before 2.17) for clock_gettime and CLOCK_MONOTONIC.
410412
#
@@ -1907,6 +1909,10 @@ ifdef HAVE_CLOCK_MONOTONIC
19071909
BASIC_CFLAGS += -DHAVE_CLOCK_MONOTONIC
19081910
endif
19091911

1912+
ifdef HAVE_SYNC_FILE_RANGE
1913+
BASIC_CFLAGS += -DHAVE_SYNC_FILE_RANGE
1914+
endif
1915+
19101916
ifdef NEEDS_LIBRT
19111917
EXTLIBS += -lrt
19121918
endif

builtin/unpack-objects.c

+3
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#include "builtin.h"
22
#include "cache.h"
3+
#include "bulk-checkin.h"
34
#include "config.h"
45
#include "object-store.h"
56
#include "object.h"
@@ -503,10 +504,12 @@ static void unpack_all(void)
503504
if (!quiet)
504505
progress = start_progress(_("Unpacking objects"), nr_objects);
505506
CALLOC_ARRAY(obj_list, nr_objects);
507+
plug_bulk_checkin();
506508
for (i = 0; i < nr_objects; i++) {
507509
unpack_one(i);
508510
display_progress(progress, i + 1);
509511
}
512+
unplug_bulk_checkin();
510513
stop_progress(&progress);
511514

512515
if (delta_list)

builtin/update-index.c

+6
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
*/
66
#define USE_THE_INDEX_COMPATIBILITY_MACROS
77
#include "cache.h"
8+
#include "bulk-checkin.h"
89
#include "config.h"
910
#include "lockfile.h"
1011
#include "quote.h"
@@ -1099,6 +1100,9 @@ int cmd_update_index(int argc, const char **argv, const char *prefix)
10991100

11001101
the_index.updated_skipworktree = 1;
11011102

1103+
/* we might be adding many objects to the object database */
1104+
plug_bulk_checkin();
1105+
11021106
/*
11031107
* Custom copy of parse_options() because we want to handle
11041108
* filename arguments as they come.
@@ -1179,6 +1183,8 @@ int cmd_update_index(int argc, const char **argv, const char *prefix)
11791183
strbuf_release(&buf);
11801184
}
11811185

1186+
/* by now we must have added all of the new objects */
1187+
unplug_bulk_checkin();
11821188
if (split_index > 0) {
11831189
if (git_config_get_split_index() == 0)
11841190
warning(_("core.splitIndex is set to false; "

bulk-checkin.c

+80-10
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,22 @@
33
*/
44
#include "cache.h"
55
#include "bulk-checkin.h"
6+
#include "lockfile.h"
67
#include "repository.h"
78
#include "csum-file.h"
89
#include "pack.h"
910
#include "strbuf.h"
11+
#include "string-list.h"
12+
#include "tmp-objdir.h"
1013
#include "packfile.h"
1114
#include "object-store.h"
1215

13-
static struct bulk_checkin_state {
14-
unsigned plugged:1;
16+
static int bulk_checkin_plugged;
17+
static int needs_batch_fsync;
18+
19+
static struct tmp_objdir *bulk_fsync_objdir;
1520

21+
static struct bulk_checkin_state {
1622
char *pack_tmp_name;
1723
struct hashfile *f;
1824
off_t offset;
@@ -21,7 +27,7 @@ static struct bulk_checkin_state {
2127
struct pack_idx_entry **written;
2228
uint32_t alloc_written;
2329
uint32_t nr_written;
24-
} state;
30+
} bulk_checkin_state;
2531

2632
static void finish_tmp_packfile(struct strbuf *basename,
2733
const char *pack_tmp_name,
@@ -79,6 +85,34 @@ static void finish_bulk_checkin(struct bulk_checkin_state *state)
7985
reprepare_packed_git(the_repository);
8086
}
8187

88+
/*
89+
* Cleanup after batch-mode fsync_object_files.
90+
*/
91+
static void do_batch_fsync(void)
92+
{
93+
/*
94+
* Issue a full hardware flush against a temporary file to ensure
95+
* that all objects are durable before any renames occur. The code in
96+
* fsync_loose_object_bulk_checkin has already issued a writeout
97+
* request, but it has not flushed any writeback cache in the storage
98+
* hardware.
99+
*/
100+
101+
if (needs_batch_fsync) {
102+
struct strbuf temp_path = STRBUF_INIT;
103+
struct tempfile *temp;
104+
105+
strbuf_addf(&temp_path, "%s/bulk_fsync_XXXXXX", get_object_directory());
106+
temp = xmks_tempfile(temp_path.buf);
107+
fsync_or_die(get_tempfile_fd(temp), get_tempfile_path(temp));
108+
delete_tempfile(&temp);
109+
strbuf_release(&temp_path);
110+
}
111+
112+
if (bulk_fsync_objdir)
113+
tmp_objdir_migrate(bulk_fsync_objdir);
114+
}
115+
82116
static int already_written(struct bulk_checkin_state *state, struct object_id *oid)
83117
{
84118
int i;
@@ -273,25 +307,61 @@ static int deflate_to_pack(struct bulk_checkin_state *state,
273307
return 0;
274308
}
275309

310+
void fsync_loose_object_bulk_checkin(int fd)
311+
{
312+
assert(fsync_object_files == FSYNC_OBJECT_FILES_BATCH);
313+
314+
/*
315+
* If we have a plugged bulk checkin, we issue a call that
316+
* cleans the filesystem page cache but avoids a hardware flush
317+
* command. Later on we will issue a single hardware flush
318+
* before as part of do_batch_fsync.
319+
*/
320+
if (bulk_checkin_plugged &&
321+
git_fsync(fd, FSYNC_WRITEOUT_ONLY) >= 0) {
322+
if (!needs_batch_fsync)
323+
needs_batch_fsync = 1;
324+
} else {
325+
fsync_or_die(fd, "loose object file");
326+
}
327+
}
328+
276329
int index_bulk_checkin(struct object_id *oid,
277330
int fd, size_t size, enum object_type type,
278331
const char *path, unsigned flags)
279332
{
280-
int status = deflate_to_pack(&state, oid, fd, size, type,
333+
int status = deflate_to_pack(&bulk_checkin_state, oid, fd, size, type,
281334
path, flags);
282-
if (!state.plugged)
283-
finish_bulk_checkin(&state);
335+
if (!bulk_checkin_plugged)
336+
finish_bulk_checkin(&bulk_checkin_state);
284337
return status;
285338
}
286339

287340
void plug_bulk_checkin(void)
288341
{
289-
state.plugged = 1;
342+
assert(!bulk_checkin_plugged);
343+
344+
/*
345+
* A temporary object directory is used to hold the files
346+
* while they are not fsynced.
347+
*/
348+
if (fsync_object_files == FSYNC_OBJECT_FILES_BATCH) {
349+
bulk_fsync_objdir = tmp_objdir_create("bulk-fsync");
350+
if (!bulk_fsync_objdir)
351+
die(_("Could not create temporary object directory for core.fsyncobjectfiles=batch"));
352+
353+
tmp_objdir_replace_primary_odb(bulk_fsync_objdir, 0);
354+
}
355+
356+
bulk_checkin_plugged = 1;
290357
}
291358

292359
void unplug_bulk_checkin(void)
293360
{
294-
state.plugged = 0;
295-
if (state.f)
296-
finish_bulk_checkin(&state);
361+
assert(bulk_checkin_plugged);
362+
bulk_checkin_plugged = 0;
363+
if (bulk_checkin_state.f)
364+
finish_bulk_checkin(&bulk_checkin_state);
365+
366+
do_batch_fsync();
297367
}

bulk-checkin.h

+2
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66

77
#include "cache.h"
88

9+
void fsync_loose_object_bulk_checkin(int fd);
10+
911
int index_bulk_checkin(struct object_id *oid,
1012
int fd, size_t size, enum object_type type,
1113
const char *path, unsigned flags);

cache.h

+7-1
Original file line numberDiff line numberDiff line change
@@ -994,7 +994,13 @@ void reset_shared_repository(void);
994994
extern int read_replace_refs;
995995
extern char *git_replace_ref_base;
996996

997-
extern int fsync_object_files;
997+
enum fsync_object_files_mode {
998+
FSYNC_OBJECT_FILES_OFF,
999+
FSYNC_OBJECT_FILES_ON,
1000+
FSYNC_OBJECT_FILES_BATCH
1001+
};
1002+
1003+
extern enum fsync_object_files_mode fsync_object_files;
9981004
extern int use_fsync;
9991005
extern int core_preload_index;
10001006
extern int precomposed_unicode;

compat/mingw.h

+3
Original file line numberDiff line numberDiff line change
@@ -330,6 +330,9 @@ int mingw_getpagesize(void);
330330
#define getpagesize mingw_getpagesize
331331
#endif
332332

333+
int win32_fsync_no_flush(int fd);
334+
#define fsync_no_flush win32_fsync_no_flush
335+
333336
struct rlimit {
334337
unsigned int rlim_cur;
335338
};

compat/win32/flush.c

+28
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
#include "../../git-compat-util.h"
2+
#include <winternl.h>
3+
#include "lazyload.h"
4+
5+
int win32_fsync_no_flush(int fd)
6+
{
7+
IO_STATUS_BLOCK io_status;
8+
9+
#define FLUSH_FLAGS_FILE_DATA_ONLY 1
10+
11+
DECLARE_PROC_ADDR(ntdll.dll, NTSTATUS, NTAPI, NtFlushBuffersFileEx,
12+
HANDLE FileHandle, ULONG Flags, PVOID Parameters, ULONG ParameterSize,
13+
PIO_STATUS_BLOCK IoStatusBlock);
14+
15+
if (!INIT_PROC_ADDR(NtFlushBuffersFileEx)) {
16+
errno = ENOSYS;
17+
return -1;
18+
}
19+
20+
memset(&io_status, 0, sizeof(io_status));
21+
if (NtFlushBuffersFileEx((HANDLE)_get_osfhandle(fd), FLUSH_FLAGS_FILE_DATA_ONLY,
22+
NULL, 0, &io_status)) {
23+
errno = EINVAL;
24+
return -1;
25+
}
26+
27+
return 0;
28+
}

config.c

+6-1
Original file line numberDiff line numberDiff line change
@@ -1491,7 +1491,12 @@ int git_default_core_config(const char *var, const char *value, void *cb)
14911491
}
14921492

14931493
if (!strcmp(var, "core.fsyncobjectfiles")) {
1494-
fsync_object_files = git_config_bool(var, value);
1494+
if (value && !strcmp(value, "batch"))
1495+
fsync_object_files = FSYNC_OBJECT_FILES_BATCH;
1496+
else if (git_config_bool(var, value))
1497+
fsync_object_files = FSYNC_OBJECT_FILES_ON;
1498+
else
1499+
fsync_object_files = FSYNC_OBJECT_FILES_OFF;
14951500
return 0;
14961501
}
14971502

config.mak.uname

+3
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ ifeq ($(uname_S),Linux)
5757
HAVE_CLOCK_MONOTONIC = YesPlease
5858
# -lrt is needed for clock_gettime on glibc <= 2.16
5959
NEEDS_LIBRT = YesPlease
60+
HAVE_SYNC_FILE_RANGE = YesPlease
6061
HAVE_GETDELIM = YesPlease
6162
FREAD_READS_DIRECTORIES = UnfortunatelyYes
6263
BASIC_CFLAGS += -DHAVE_SYSINFO
@@ -480,6 +481,7 @@ endif
480481
CFLAGS =
481482
BASIC_CFLAGS = -nologo -I. -Icompat/vcbuild/include -DWIN32 -D_CONSOLE -DHAVE_STRING_H -D_CRT_SECURE_NO_WARNINGS -D_CRT_NONSTDC_NO_DEPRECATE
482483
COMPAT_OBJS = compat/msvc.o compat/winansi.o \
484+
compat/win32/flush.o \
483485
compat/win32/path-utils.o \
484486
compat/win32/pthread.o compat/win32/syslog.o \
485487
compat/win32/trace2_win32_process_info.o \
@@ -668,6 +670,7 @@ ifeq ($(uname_S),MINGW)
668670
COMPAT_CFLAGS += -DSTRIP_EXTENSION=\".exe\"
669671
COMPAT_OBJS += compat/mingw.o compat/winansi.o \
670672
compat/win32/trace2_win32_process_info.o \
673+
compat/win32/flush.o \
671674
compat/win32/path-utils.o \
672675
compat/win32/pthread.o compat/win32/syslog.o \
673676
compat/win32/dirent.o compat/win32/fscache.o

configure.ac

+8
Original file line numberDiff line numberDiff line change
@@ -1095,6 +1095,14 @@ AC_COMPILE_IFELSE([CLOCK_MONOTONIC_SRC],
10951095
[AC_MSG_RESULT([no])
10961096
HAVE_CLOCK_MONOTONIC=])
10971097
GIT_CONF_SUBST([HAVE_CLOCK_MONOTONIC])
1098+
1099+
#
1100+
# Define HAVE_SYNC_FILE_RANGE=YesPlease if sync_file_range is available.
1101+
GIT_CHECK_FUNC(sync_file_range,
1102+
[HAVE_SYNC_FILE_RANGE=YesPlease],
1103+
[HAVE_SYNC_FILE_RANGE])
1104+
GIT_CONF_SUBST([HAVE_SYNC_FILE_RANGE])
1105+
10981106
#
10991107
# Define NO_SETITIMER if you don't have setitimer.
11001108
GIT_CHECK_FUNC(setitimer,

contrib/buildsystems/CMakeLists.txt

+2-1
Original file line numberDiff line numberDiff line change
@@ -286,7 +286,8 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Windows")
286286
NOGDI OBJECT_CREATION_MODE=1 __USE_MINGW_ANSI_STDIO=0
287287
USE_NED_ALLOCATOR OVERRIDE_STRDUP MMAP_PREVENTS_DELETE USE_WIN32_MMAP
288288
UNICODE _UNICODE HAVE_WPGMPTR ENSURE_MSYSTEM_IS_SET)
289-
list(APPEND compat_SOURCES compat/mingw.c compat/winansi.c compat/win32/path-utils.c
289+
list(APPEND compat_SOURCES compat/mingw.c compat/winansi.c
290+
compat/win32/flush.c compat/win32/path-utils.c
290291
compat/win32/pthread.c compat/win32mmap.c compat/win32/syslog.c
291292
compat/win32/trace2_win32_process_info.c compat/win32/dirent.c
292293
compat/nedmalloc/nedmalloc.c compat/strdup.c compat/win32/fscache.c)

environment.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ const char *git_attributes_file;
4242
const char *git_hooks_path;
4343
int zlib_compression_level = Z_BEST_SPEED;
4444
int pack_compression_level = Z_DEFAULT_COMPRESSION;
45-
int fsync_object_files;
45+
enum fsync_object_files_mode fsync_object_files;
4646
int use_fsync = -1;
4747
size_t packed_git_window_size = DEFAULT_PACKED_GIT_WINDOW_SIZE;
4848
size_t packed_git_limit = DEFAULT_PACKED_GIT_LIMIT;

git-compat-util.h

+7
Original file line numberDiff line numberDiff line change
@@ -1275,6 +1275,13 @@ __attribute__((format (printf, 1, 2))) NORETURN
12751275
void BUG(const char *fmt, ...);
12761276
#endif
12771277

1278+
enum fsync_action {
1279+
FSYNC_WRITEOUT_ONLY,
1280+
FSYNC_HARDWARE_FLUSH
1281+
};
1282+
1283+
int git_fsync(int fd, enum fsync_action action);
1284+
12781285
/*
12791286
* Preserves errno, prints a message, but gives no warning for ENOENT.
12801287
* Returns 0 on success, which includes trying to unlink an object that does

0 commit comments

Comments
 (0)