Skip to content

Commit ac19fe2

Browse files
committed
linux: futex v2 API updates
* `futex2_waitv` always takes a 64-bit timespec. Perhaps the `kernel_timespec` should be renamed `timespec64`? Its used in iouring, too. * Add `packed struct` for futex v2 flags and parameters. * Add very basic "tests" for the futex v2 syscalls (just to ensure the code compiles). * Update the stale or broken comments. (I could also just delete these they're not really documenting Zig-specific behavior.) Given that the futex2 APIs are not used by Zig's library (they're a bit too new), and the fact that these are very specialized syscalls, and they currently provide no benefit over the existing v1 API, I wonder if instead of fixing these up, we should just replace them with a stub that says 'use a 3rd party library'.
1 parent dc7a713 commit ac19fe2

File tree

2 files changed

+214
-63
lines changed

2 files changed

+214
-63
lines changed

Diff for: lib/std/os/linux.zig

+84-63
Original file line numberDiff line numberDiff line change
@@ -679,56 +679,55 @@ pub fn futex_4arg(uaddr: *const anyopaque, futex_op: FUTEX_OP, val: u32, timeout
679679

680680
/// Given an array of `futex_waitv`, wait on each uaddr.
681681
/// The thread wakes if a futex_wake() is performed at any uaddr.
682-
/// The syscall returns immediately if any waiter has *uaddr != val.
683-
/// timeout is an optional timeout value for the operation.
684-
/// Each waiter has individual flags.
685-
/// The `flags` argument for the syscall should be used solely for specifying
686-
/// the timeout as realtime, if needed.
687-
/// Flags for private futexes, sizes, etc. should be used on the
688-
/// individual flags of each waiter.
682+
/// The syscall returns immediately if any futex has *uaddr != val.
683+
/// timeout is an optional, absolute timeout value for the operation.
684+
/// The `flags` argument is for future use and currently should be `.{}`.
685+
/// Flags for private futexes, sizes, etc. should be set on the
686+
/// individual flags of each futex.
689687
///
690688
/// Returns the array index of one of the woken futexes.
691689
/// No further information is provided: any number of other futexes may also
692690
/// have been woken by the same event, and if more than one futex was woken,
693691
/// the returned index may refer to any one of them.
694692
/// (It is not necessaryily the futex with the smallest index, nor the one
695693
/// most recently woken, nor...)
694+
///
695+
/// Requires at least kernel v5.16
696696
pub fn futex2_waitv(
697-
/// List of futexes to wait on.
698-
waiters: [*]futex_waitv,
699-
/// Length of `waiters`.
697+
futexes: [*]const futex_waitv,
698+
/// Length of `futexes`. Max of FUTEX2_WAITV_MAX.
700699
nr_futexes: u32,
701-
/// Flag for timeout (monotonic/realtime).
702-
flags: u32,
703-
/// Optional absolute timeout.
704-
timeout: ?*const timespec,
700+
flags: FUTEX2_FLAGS_WAITV,
701+
/// Optional absolute timeout. Always 64-bit, even on 32-bit platforms.
702+
timeout: ?*const kernel_timespec,
705703
/// Clock to be used for the timeout, realtime or monotonic.
706704
clockid: clockid_t,
707705
) usize {
708706
return syscall5(
709707
.futex_waitv,
710-
@intFromPtr(waiters),
708+
@intFromPtr(futexes),
711709
nr_futexes,
712-
flags,
710+
@as(u32, @bitCast(flags)),
713711
@intFromPtr(timeout),
714-
@bitCast(@as(isize, @intFromEnum(clockid))),
712+
@intFromEnum(clockid),
715713
);
716714
}
717715

718-
/// Wait on a futex.
719-
/// Identical to the traditional `FUTEX.FUTEX_WAIT_BITSET` op, except it is part of the
720-
/// futex2 familiy of calls.
716+
/// Wait on a single futex.
717+
/// Identical to the futex v1 `FUTEX.FUTEX_WAIT_BITSET` op, except it is part of the
718+
/// futex2 family of calls.
719+
///
720+
/// Requires at least kernel v6.7.
721721
pub fn futex2_wait(
722722
/// Address of the futex to wait on.
723723
uaddr: *const anyopaque,
724724
/// Value of `uaddr`.
725725
val: usize,
726-
/// Bitmask.
726+
/// Bitmask to match against incoming wakeup masks. Must not be zero.
727727
mask: usize,
728-
/// `FUTEX2` flags.
729-
flags: u32,
730-
/// Optional absolute timeout.
731-
timeout: ?*const timespec,
728+
flags: FUTEX2_FLAGS,
729+
/// Optional absolute timeout. Always 64-bit, even on 32-bit platforms.
730+
timeout: ?*const kernel_timespec,
732731
/// Clock to be used for the timeout, realtime or monotonic.
733732
clockid: clockid_t,
734733
) usize {
@@ -737,52 +736,55 @@ pub fn futex2_wait(
737736
@intFromPtr(uaddr),
738737
val,
739738
mask,
740-
flags,
739+
@as(u32, @bitCast(flags)),
741740
@intFromPtr(timeout),
742-
@bitCast(@as(isize, @intFromEnum(clockid))),
741+
@intFromEnum(clockid),
743742
);
744743
}
745744

746-
/// Wake a number of futexes.
747-
/// Identical to the traditional `FUTEX.FUTEX_WAIT_BITSET` op, except it is part of the
745+
/// Wake (subset of) waiters on given futex.
746+
/// Identical to the traditional `FUTEX.FUTEX_WAKE_BITSET` op, except it is part of the
748747
/// futex2 family of calls.
748+
///
749+
/// Requires at least kernel v6.7.
749750
pub fn futex2_wake(
750-
/// Address of the futex(es) to wake.
751+
/// Futex to wake
751752
uaddr: *const anyopaque,
752-
/// Bitmask
753+
/// Bitmask to match against waiters.
753754
mask: usize,
754-
/// Number of the futexes to wake.
755-
nr: i32,
756-
/// `FUTEX2` flags.
757-
flags: u32,
755+
/// Maximum number of waiters on the futex to wake.
756+
nr_wake: i32,
757+
flags: FUTEX2_FLAGS,
758758
) usize {
759759
return syscall4(
760760
.futex_wake,
761761
@intFromPtr(uaddr),
762762
mask,
763-
@bitCast(@as(isize, nr)),
764-
flags,
763+
@as(u32, @bitCast(nr_wake)),
764+
@as(u32, @bitCast(flags)),
765765
);
766766
}
767767

768-
/// Requeue a waiter from one futex to another.
768+
/// Wake and/or requeue waiter(s) from one futex to another.
769769
/// Identical to `FUTEX.CMP_REQUEUE`, except it is part of the futex2 family of calls.
770+
///
771+
/// Requires at least kernel v6.7.
770772
pub fn futex2_requeue(
771-
/// Array describing the source and destination futex.
772-
waiters: [*]futex_waitv,
773-
/// Unused.
774-
flags: u32,
775-
/// Number of futexes to wake.
773+
/// The source and destination futexes. Must be a 2-element array.
774+
waiters: [*]const futex_waitv,
775+
/// Currently unused.
776+
flags: FUTEX2_FLAGS_REQUEUE,
777+
/// Maximum number of waiters to wake on the source futex.
776778
nr_wake: i32,
777-
/// Number of futexes to requeue.
779+
/// Maximum number of waiters to transfer to the destination futex.
778780
nr_requeue: i32,
779781
) usize {
780782
return syscall4(
781783
.futex_requeue,
782784
@intFromPtr(waiters),
783-
flags,
784-
@bitCast(@as(isize, nr_wake)),
785-
@bitCast(@as(isize, nr_requeue)),
785+
@as(u32, @bitCast(flags)),
786+
@as(u32, @bitCast(nr_wake)),
787+
@as(u32, @bitCast(nr_requeue)),
786788
);
787789
}
788790

@@ -3275,16 +3277,6 @@ pub const FALLOC = struct {
32753277
pub const FL_UNSHARE_RANGE = 0x40;
32763278
};
32773279

3278-
pub const FUTEX2 = struct {
3279-
pub const SIZE_U8 = 0x00;
3280-
pub const SIZE_U16 = 0x01;
3281-
pub const SIZE_U32 = 0x02;
3282-
pub const SIZE_U64 = 0x03;
3283-
pub const NUMA = 0x04;
3284-
3285-
pub const PRIVATE = FUTEX.PRIVATE_FLAG;
3286-
};
3287-
32883280
// Futex v1 API commands. See futex man page for each command's
32893281
// interpretation of the futex arguments.
32903282
pub const FUTEX_COMMAND = enum(u7) {
@@ -3348,6 +3340,36 @@ pub const FUTEX_WAKE_OP_CMP = enum(u4) {
33483340
/// Max numbers of elements in a `futex_waitv` array.
33493341
pub const FUTEX2_WAITV_MAX = 128;
33503342

3343+
/// For futex v2 API, the size of the futex at the uaddr. v1 futex are
3344+
/// always implicitly U32. As of kernel v6.14, only U32 is implemented
3345+
/// for v2 futexes.
3346+
pub const FUTEX2_SIZE = enum(u2) {
3347+
U8 = 0,
3348+
U16 = 1,
3349+
U32 = 2,
3350+
U64 = 3,
3351+
};
3352+
3353+
/// As of kernel 6.14 there are no defined flags to futex2_waitv.
3354+
pub const FUTEX2_FLAGS_WAITV = packed struct(u32) {
3355+
_reserved: u32 = 0,
3356+
};
3357+
3358+
/// As of kernel 6.14 there are no defined flags to futex2_requeue.
3359+
pub const FUTEX2_FLAGS_REQUEUE = packed struct(u32) {
3360+
_reserved: u32 = 0,
3361+
};
3362+
3363+
/// Flags for futex v2 APIs (futex_wait, futex_wake, futex_requeue, but
3364+
/// not the futex_waitv syscall, but also used in the futex_waitv struct).
3365+
pub const FUTEX2_FLAGS = packed struct(u32) {
3366+
size: FUTEX2_SIZE,
3367+
numa: bool = false,
3368+
_reserved: u4 = 0,
3369+
private: bool,
3370+
_undefined: u24 = 0,
3371+
};
3372+
33513373
pub const PROT = struct {
33523374
/// page can not be accessed
33533375
pub const NONE = 0x0;
@@ -8989,17 +9011,16 @@ pub const PTRACE = struct {
89899011
pub const GET_SYSCALL_INFO = 0x420e;
89909012
};
89919013

8992-
/// A waiter for vectorized wait.
9014+
/// Futex v2 API, for operating on multiple futexes in one call.
89939015
pub const futex_waitv = extern struct {
8994-
// Expected value at uaddr
9016+
/// Expected value at uaddr, should match size of futex.
89959017
val: u64,
8996-
/// User address to wait on.
9018+
/// User address to wait on. Top-bits must be 0 on 32-bit.
89979019
uaddr: u64,
89989020
/// Flags for this waiter.
8999-
flags: u32,
9021+
flags: FUTEX2_FLAGS,
90009022
/// Reserved member to preserve alignment.
9001-
/// Should be 0.
9002-
__reserved: u32,
9023+
__reserved: u32 = 0,
90039024
};
90049025

90059026
pub const cache_stat_range = extern struct {

Diff for: lib/std/os/linux/test.zig

+130
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,136 @@ test "futex v1" {
242242
}
243243
}
244244

245+
comptime {
246+
std.debug.assert(2 == @as(u32, @bitCast(linux.FUTEX2_FLAGS{ .size = .U32, .private = false })));
247+
std.debug.assert(128 == @as(u32, @bitCast(linux.FUTEX2_FLAGS{ .size = @enumFromInt(0), .private = true })));
248+
}
249+
250+
test "futex2_waitv" {
251+
const locks = [_]std.atomic.Value(u32){
252+
std.atomic.Value(u32).init(1),
253+
std.atomic.Value(u32).init(1),
254+
std.atomic.Value(u32).init(1),
255+
};
256+
257+
const futexes = [_]linux.futex_waitv{
258+
.{
259+
.val = 1,
260+
.uaddr = @intFromPtr(&locks[0].raw),
261+
.flags = .{ .size = .U32, .private = true },
262+
},
263+
.{
264+
.val = 1,
265+
.uaddr = @intFromPtr(&locks[1].raw),
266+
.flags = .{ .size = .U32, .private = true },
267+
},
268+
.{
269+
.val = 1,
270+
.uaddr = @intFromPtr(&locks[2].raw),
271+
.flags = .{ .size = .U32, .private = true },
272+
},
273+
};
274+
275+
const timeout = linux.kernel_timespec{ .sec = 0, .nsec = 2 }; // absolute timeout, so this is 1970...
276+
const rc = linux.futex2_waitv(&futexes, futexes.len, .{}, &timeout, .MONOTONIC);
277+
switch (linux.E.init(rc)) {
278+
.NOSYS => return error.SkipZigTest, // futex2_waitv added in kernel v5.16
279+
else => |err| try expectEqual(.TIMEDOUT, err),
280+
}
281+
}
282+
283+
// Futex v2 API is only supported on recent kernels (v6.7), so skip tests if the syscalls
284+
// return ENOSYS.
285+
fn futex2_skip_if_unsupported() !void {
286+
const lock: u32 = 0;
287+
const rc = linux.futex2_wake(&lock, 0, 1, .{ .size = .U32, .private = true });
288+
if (linux.E.init(rc) == .NOSYS) {
289+
return error.SkipZigTest;
290+
}
291+
}
292+
293+
test "futex2_wait" {
294+
var lock: std.atomic.Value(u32) = std.atomic.Value(u32).init(1);
295+
var rc: usize = 0;
296+
const mask = 0x1;
297+
298+
try futex2_skip_if_unsupported();
299+
300+
// The API for 8,16,64 bit futexes is defined, but as of kernel v6.14
301+
// (at least) they're not implemented.
302+
if (false) {
303+
rc = linux.futex2_wait(&lock.raw, 1, mask, .{ .size = .U8, .private = true }, null, .MONOTONIC);
304+
try expectEqual(.INVAL, linux.E.init(rc));
305+
306+
rc = linux.futex2_wait(&lock.raw, 1, mask, .{ .size = .U16, .private = true }, null, .MONOTONIC);
307+
try expectEqual(.INVAL, linux.E.init(rc));
308+
309+
rc = linux.futex2_wait(&lock.raw, 1, mask, .{ .size = .U64, .private = true }, null, .MONOTONIC);
310+
try expectEqual(.INVAL, linux.E.init(rc));
311+
}
312+
313+
const flags = linux.FUTEX2_FLAGS{ .size = .U32, .private = true };
314+
// no-wait, lock state mismatch
315+
rc = linux.futex2_wait(&lock.raw, 2, mask, flags, null, .MONOTONIC);
316+
try expectEqual(.AGAIN, linux.E.init(rc));
317+
318+
// hit timeout on wait
319+
rc = linux.futex2_wait(&lock.raw, 1, mask, flags, &.{ .sec = 0, .nsec = 2 }, .MONOTONIC);
320+
try expectEqual(.TIMEDOUT, linux.E.init(rc));
321+
322+
// timeout is absolute
323+
{
324+
var curr: linux.timespec = undefined;
325+
rc = linux.clock_gettime(.MONOTONIC, &curr); // gettime() uses platform timespec
326+
try expectEqual(0, rc);
327+
328+
// ... but futex2_wait always uses 64-bit timespec
329+
var timeout: linux.kernel_timespec = .{
330+
.sec = curr.sec,
331+
.nsec = curr.nsec + 2,
332+
};
333+
rc = linux.futex2_wait(&lock.raw, 1, mask, flags, &timeout, .MONOTONIC);
334+
try expectEqual(.TIMEDOUT, linux.E.init(rc));
335+
}
336+
337+
rc = linux.futex2_wait(&lock.raw, 1, mask, flags, &.{ .sec = 0, .nsec = 2 }, .REALTIME);
338+
try expectEqual(.TIMEDOUT, linux.E.init(rc));
339+
}
340+
341+
test "futex2_wake" {
342+
var lock: std.atomic.Value(u32) = std.atomic.Value(u32).init(1);
343+
344+
try futex2_skip_if_unsupported();
345+
346+
const rc = linux.futex2_wake(&lock.raw, 0xFF, 1, .{ .size = .U32, .private = true });
347+
try expectEqual(0, rc);
348+
}
349+
350+
test "futex2_requeue" {
351+
try futex2_skip_if_unsupported();
352+
353+
const locks = [_]std.atomic.Value(u32){
354+
std.atomic.Value(u32).init(1),
355+
std.atomic.Value(u32).init(1),
356+
};
357+
358+
const futexes = [_]linux.futex_waitv{
359+
.{
360+
.val = 1,
361+
.uaddr = @intFromPtr(&locks[0].raw),
362+
.flags = .{ .size = .U32, .private = true },
363+
},
364+
.{
365+
.val = 1,
366+
.uaddr = @intFromPtr(&locks[1].raw),
367+
.flags = .{ .size = .U32, .private = true },
368+
},
369+
};
370+
371+
const rc = linux.futex2_requeue(&futexes, .{}, 2, 2);
372+
try expectEqual(0, rc);
373+
}
374+
245375
test {
246376
_ = linux.IoUring;
247377
}

0 commit comments

Comments
 (0)