Skip to content

Commit dd72ae8

Browse files
Al Virogregkh
Al Viro
authored andcommitted
fix bitmap corruption on close_range() with CLOSE_RANGE_UNSHARE
commit 9a2fa14 upstream. copy_fd_bitmaps(new, old, count) is expected to copy the first count/BITS_PER_LONG bits from old->full_fds_bits[] and fill the rest with zeroes. What it does is copying enough words (BITS_TO_LONGS(count/BITS_PER_LONG)), then memsets the rest. That works fine, *if* all bits past the cutoff point are clear. Otherwise we are risking garbage from the last word we'd copied. For most of the callers that is true - expand_fdtable() has count equal to old->max_fds, so there's no open descriptors past count, let alone fully occupied words in ->open_fds[], which is what bits in ->full_fds_bits[] correspond to. The other caller (dup_fd()) passes sane_fdtable_size(old_fdt, max_fds), which is the smallest multiple of BITS_PER_LONG that covers all opened descriptors below max_fds. In the common case (copying on fork()) max_fds is ~0U, so all opened descriptors will be below it and we are fine, by the same reasons why the call in expand_fdtable() is safe. Unfortunately, there is a case where max_fds is less than that and where we might, indeed, end up with junk in ->full_fds_bits[] - close_range(from, to, CLOSE_RANGE_UNSHARE) with * descriptor table being currently shared * 'to' being above the current capacity of descriptor table * 'from' being just under some chunk of opened descriptors. In that case we end up with observably wrong behaviour - e.g. spawn a child with CLONE_FILES, get all descriptors in range 0..127 open, then close_range(64, ~0U, CLOSE_RANGE_UNSHARE) and watch dup(0) ending up with descriptor raspberrypi#128, despite raspberrypi#64 being observably not open. The minimally invasive fix would be to deal with that in dup_fd(). If this proves to add measurable overhead, we can go that way, but let's try to fix copy_fd_bitmaps() first. * new helper: bitmap_copy_and_expand(to, from, bits_to_copy, size). * make copy_fd_bitmaps() take the bitmap size in words, rather than bits; it's 'count' argument is always a multiple of BITS_PER_LONG, so we are not losing any information, and that way we can use the same helper for all three bitmaps - compiler will see that count is a multiple of BITS_PER_LONG for the large ones, so it'll generate plain memcpy()+memset(). Reproducer added to tools/testing/selftests/core/close_range_test.c Cc: [email protected] Signed-off-by: Al Viro <[email protected]> Signed-off-by: Greg Kroah-Hartman <[email protected]>
1 parent 97a532c commit dd72ae8

File tree

3 files changed

+60
-17
lines changed

3 files changed

+60
-17
lines changed

fs/file.c

Lines changed: 13 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -46,27 +46,23 @@ static void free_fdtable_rcu(struct rcu_head *rcu)
4646
#define BITBIT_NR(nr) BITS_TO_LONGS(BITS_TO_LONGS(nr))
4747
#define BITBIT_SIZE(nr) (BITBIT_NR(nr) * sizeof(long))
4848

49+
#define fdt_words(fdt) ((fdt)->max_fds / BITS_PER_LONG) // words in ->open_fds
4950
/*
5051
* Copy 'count' fd bits from the old table to the new table and clear the extra
5152
* space if any. This does not copy the file pointers. Called with the files
5253
* spinlock held for write.
5354
*/
54-
static void copy_fd_bitmaps(struct fdtable *nfdt, struct fdtable *ofdt,
55-
unsigned int count)
55+
static inline void copy_fd_bitmaps(struct fdtable *nfdt, struct fdtable *ofdt,
56+
unsigned int copy_words)
5657
{
57-
unsigned int cpy, set;
58-
59-
cpy = count / BITS_PER_BYTE;
60-
set = (nfdt->max_fds - count) / BITS_PER_BYTE;
61-
memcpy(nfdt->open_fds, ofdt->open_fds, cpy);
62-
memset((char *)nfdt->open_fds + cpy, 0, set);
63-
memcpy(nfdt->close_on_exec, ofdt->close_on_exec, cpy);
64-
memset((char *)nfdt->close_on_exec + cpy, 0, set);
65-
66-
cpy = BITBIT_SIZE(count);
67-
set = BITBIT_SIZE(nfdt->max_fds) - cpy;
68-
memcpy(nfdt->full_fds_bits, ofdt->full_fds_bits, cpy);
69-
memset((char *)nfdt->full_fds_bits + cpy, 0, set);
58+
unsigned int nwords = fdt_words(nfdt);
59+
60+
bitmap_copy_and_extend(nfdt->open_fds, ofdt->open_fds,
61+
copy_words * BITS_PER_LONG, nwords * BITS_PER_LONG);
62+
bitmap_copy_and_extend(nfdt->close_on_exec, ofdt->close_on_exec,
63+
copy_words * BITS_PER_LONG, nwords * BITS_PER_LONG);
64+
bitmap_copy_and_extend(nfdt->full_fds_bits, ofdt->full_fds_bits,
65+
copy_words, nwords);
7066
}
7167

7268
/*
@@ -84,7 +80,7 @@ static void copy_fdtable(struct fdtable *nfdt, struct fdtable *ofdt)
8480
memcpy(nfdt->fd, ofdt->fd, cpy);
8581
memset((char *)nfdt->fd + cpy, 0, set);
8682

87-
copy_fd_bitmaps(nfdt, ofdt, ofdt->max_fds);
83+
copy_fd_bitmaps(nfdt, ofdt, fdt_words(ofdt));
8884
}
8985

9086
/*
@@ -374,7 +370,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, unsigned int max_fds, int
374370
open_files = sane_fdtable_size(old_fdt, max_fds);
375371
}
376372

377-
copy_fd_bitmaps(new_fdt, old_fdt, open_files);
373+
copy_fd_bitmaps(new_fdt, old_fdt, open_files / BITS_PER_LONG);
378374

379375
old_fds = old_fdt->fd;
380376
new_fds = new_fdt->fd;

include/linux/bitmap.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,18 @@ static inline void bitmap_copy_clear_tail(unsigned long *dst,
281281
dst[nbits / BITS_PER_LONG] &= BITMAP_LAST_WORD_MASK(nbits);
282282
}
283283

284+
static inline void bitmap_copy_and_extend(unsigned long *to,
285+
const unsigned long *from,
286+
unsigned int count, unsigned int size)
287+
{
288+
unsigned int copy = BITS_TO_LONGS(count);
289+
290+
memcpy(to, from, copy * sizeof(long));
291+
if (count % BITS_PER_LONG)
292+
to[copy - 1] &= BITMAP_LAST_WORD_MASK(count);
293+
memset(to + copy, 0, bitmap_size(size) - copy * sizeof(long));
294+
}
295+
284296
/*
285297
* On 32-bit systems bitmaps are represented as u32 arrays internally. On LE64
286298
* machines the order of hi and lo parts of numbers match the bitmap structure.

tools/testing/selftests/core/close_range_test.c

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -563,4 +563,39 @@ TEST(close_range_cloexec_unshare_syzbot)
563563
EXPECT_EQ(close(fd3), 0);
564564
}
565565

566+
TEST(close_range_bitmap_corruption)
567+
{
568+
pid_t pid;
569+
int status;
570+
struct __clone_args args = {
571+
.flags = CLONE_FILES,
572+
.exit_signal = SIGCHLD,
573+
};
574+
575+
/* get the first 128 descriptors open */
576+
for (int i = 2; i < 128; i++)
577+
EXPECT_GE(dup2(0, i), 0);
578+
579+
/* get descriptor table shared */
580+
pid = sys_clone3(&args, sizeof(args));
581+
ASSERT_GE(pid, 0);
582+
583+
if (pid == 0) {
584+
/* unshare and truncate descriptor table down to 64 */
585+
if (sys_close_range(64, ~0U, CLOSE_RANGE_UNSHARE))
586+
exit(EXIT_FAILURE);
587+
588+
ASSERT_EQ(fcntl(64, F_GETFD), -1);
589+
/* ... and verify that the range 64..127 is not
590+
stuck "fully used" according to secondary bitmap */
591+
EXPECT_EQ(dup(0), 64)
592+
exit(EXIT_FAILURE);
593+
exit(EXIT_SUCCESS);
594+
}
595+
596+
EXPECT_EQ(waitpid(pid, &status, 0), pid);
597+
EXPECT_EQ(true, WIFEXITED(status));
598+
EXPECT_EQ(0, WEXITSTATUS(status));
599+
}
600+
566601
TEST_HARNESS_MAIN

0 commit comments

Comments
 (0)