Skip to content

Commit 58e55ef

Browse files
andrealmeidbrauner
authored andcommitted
tmpfs: Add casefold lookup support
Enable casefold lookup in tmpfs, based on the encoding defined by userspace. That means that instead of comparing byte per byte a file name, it compares to a case-insensitive equivalent of the Unicode string. * Dcache handling There's a special need when dealing with case-insensitive dentries. First of all, we currently invalidated every negative casefold dentries. That happens because currently VFS code has no proper support to deal with that, giving that it could incorrectly reuse a previous filename for a new file that has a casefold match. For instance, this could happen: $ mkdir DIR $ rm -r DIR $ mkdir dir $ ls DIR/ And would be perceived as inconsistency from userspace point of view, because even that we match files in a case-insensitive manner, we still honor whatever is the initial filename. Along with that, tmpfs stores only the first equivalent name dentry used in the dcache, preventing duplications of dentries in the dcache. The d_compare() version for casefold files uses a normalized string, so the filename under lookup will be compared to another normalized string for the existing file, achieving a casefolded lookup. * Enabling casefold via mount options Most filesystems have their data stored in disk, so casefold option need to be enabled when building a filesystem on a device (via mkfs). However, as tmpfs is a RAM backed filesystem, there's no disk information and thus no mkfs to store information about casefold. For tmpfs, create casefold options for mounting. Userspace can then enable casefold support for a mount point using: $ mount -t tmpfs -o casefold=utf8-12.1.0 fs_name mount_dir/ Userspace must set what Unicode standard is aiming to. The available options depends on what the kernel Unicode subsystem supports. And for strict encoding: $ mount -t tmpfs -o casefold=utf8-12.1.0,strict_encoding fs_name mount_dir/ Strict encoding means that tmpfs will refuse to create invalid UTF-8 sequences. When this option is not enabled, any invalid sequence will be treated as an opaque byte sequence, ignoring the encoding thus not being able to be looked up in a case-insensitive way. * Check for casefold dirs on simple_lookup() On simple_lookup(), do not create dentries for casefold directories. Currently, VFS does not support case-insensitive negative dentries and can create inconsistencies in the filesystem. Prevent such dentries to being created in the first place. Reviewed-by: Gabriel Krisman Bertazi <[email protected]> Reviewed-by: Gabriel Krisman Bertazi <[email protected]> Signed-off-by: André Almeida <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Christian Brauner <[email protected]>
1 parent 458532c commit 58e55ef

File tree

2 files changed

+127
-4
lines changed

2 files changed

+127
-4
lines changed

fs/libfs.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,10 @@ struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, unsigned
7777
return ERR_PTR(-ENAMETOOLONG);
7878
if (!dentry->d_sb->s_d_op)
7979
d_set_d_op(dentry, &simple_dentry_operations);
80+
81+
if (IS_ENABLED(CONFIG_UNICODE) && IS_CASEFOLDED(dir))
82+
return NULL;
83+
8084
d_add(dentry, NULL);
8185
return NULL;
8286
}

mm/shmem.c

Lines changed: 123 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
#include <linux/fs_parser.h>
4141
#include <linux/swapfile.h>
4242
#include <linux/iversion.h>
43+
#include <linux/unicode.h>
4344
#include "swap.h"
4445

4546
static struct vfsmount *shm_mnt __ro_after_init;
@@ -123,6 +124,10 @@ struct shmem_options {
123124
bool noswap;
124125
unsigned short quota_types;
125126
struct shmem_quota_limits qlimits;
127+
#if IS_ENABLED(CONFIG_UNICODE)
128+
struct unicode_map *encoding;
129+
bool strict_encoding;
130+
#endif
126131
#define SHMEM_SEEN_BLOCKS 1
127132
#define SHMEM_SEEN_INODES 2
128133
#define SHMEM_SEEN_HUGE 4
@@ -3565,6 +3570,9 @@ shmem_mknod(struct mnt_idmap *idmap, struct inode *dir,
35653570
struct inode *inode;
35663571
int error;
35673572

3573+
if (!generic_ci_validate_strict_name(dir, &dentry->d_name))
3574+
return -EINVAL;
3575+
35683576
inode = shmem_get_inode(idmap, dir->i_sb, dir, mode, dev, VM_NORESERVE);
35693577
if (IS_ERR(inode))
35703578
return PTR_ERR(inode);
@@ -3584,7 +3592,12 @@ shmem_mknod(struct mnt_idmap *idmap, struct inode *dir,
35843592
dir->i_size += BOGO_DIRENT_SIZE;
35853593
inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir));
35863594
inode_inc_iversion(dir);
3587-
d_instantiate(dentry, inode);
3595+
3596+
if (IS_ENABLED(CONFIG_UNICODE) && IS_CASEFOLDED(dir))
3597+
d_add(dentry, inode);
3598+
else
3599+
d_instantiate(dentry, inode);
3600+
35883601
dget(dentry); /* Extra count - pin the dentry in core */
35893602
return error;
35903603

@@ -3675,7 +3688,10 @@ static int shmem_link(struct dentry *old_dentry, struct inode *dir,
36753688
inc_nlink(inode);
36763689
ihold(inode); /* New dentry reference */
36773690
dget(dentry); /* Extra pinning count for the created dentry */
3678-
d_instantiate(dentry, inode);
3691+
if (IS_ENABLED(CONFIG_UNICODE) && IS_CASEFOLDED(dir))
3692+
d_add(dentry, inode);
3693+
else
3694+
d_instantiate(dentry, inode);
36793695
out:
36803696
return ret;
36813697
}
@@ -3695,6 +3711,14 @@ static int shmem_unlink(struct inode *dir, struct dentry *dentry)
36953711
inode_inc_iversion(dir);
36963712
drop_nlink(inode);
36973713
dput(dentry); /* Undo the count from "create" - does all the work */
3714+
3715+
/*
3716+
* For now, VFS can't deal with case-insensitive negative dentries, so
3717+
* we invalidate them
3718+
*/
3719+
if (IS_ENABLED(CONFIG_UNICODE) && IS_CASEFOLDED(dir))
3720+
d_invalidate(dentry);
3721+
36983722
return 0;
36993723
}
37003724

@@ -3839,7 +3863,10 @@ static int shmem_symlink(struct mnt_idmap *idmap, struct inode *dir,
38393863
dir->i_size += BOGO_DIRENT_SIZE;
38403864
inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir));
38413865
inode_inc_iversion(dir);
3842-
d_instantiate(dentry, inode);
3866+
if (IS_ENABLED(CONFIG_UNICODE) && IS_CASEFOLDED(dir))
3867+
d_add(dentry, inode);
3868+
else
3869+
d_instantiate(dentry, inode);
38433870
dget(dentry);
38443871
return 0;
38453872

@@ -4192,6 +4219,9 @@ enum shmem_param {
41924219
Opt_usrquota_inode_hardlimit,
41934220
Opt_grpquota_block_hardlimit,
41944221
Opt_grpquota_inode_hardlimit,
4222+
Opt_casefold_version,
4223+
Opt_casefold,
4224+
Opt_strict_encoding,
41954225
};
41964226

41974227
static const struct constant_table shmem_param_enums_huge[] = {
@@ -4223,9 +4253,54 @@ const struct fs_parameter_spec shmem_fs_parameters[] = {
42234253
fsparam_string("grpquota_block_hardlimit", Opt_grpquota_block_hardlimit),
42244254
fsparam_string("grpquota_inode_hardlimit", Opt_grpquota_inode_hardlimit),
42254255
#endif
4256+
fsparam_string("casefold", Opt_casefold_version),
4257+
fsparam_flag ("casefold", Opt_casefold),
4258+
fsparam_flag ("strict_encoding", Opt_strict_encoding),
42264259
{}
42274260
};
42284261

4262+
#if IS_ENABLED(CONFIG_UNICODE)
4263+
static int shmem_parse_opt_casefold(struct fs_context *fc, struct fs_parameter *param,
4264+
bool latest_version)
4265+
{
4266+
struct shmem_options *ctx = fc->fs_private;
4267+
unsigned int version = UTF8_LATEST;
4268+
struct unicode_map *encoding;
4269+
char *version_str = param->string + 5;
4270+
4271+
if (!latest_version) {
4272+
if (strncmp(param->string, "utf8-", 5))
4273+
return invalfc(fc, "Only UTF-8 encodings are supported "
4274+
"in the format: utf8-<version number>");
4275+
4276+
version = utf8_parse_version(version_str);
4277+
if (version < 0)
4278+
return invalfc(fc, "Invalid UTF-8 version: %s", version_str);
4279+
}
4280+
4281+
encoding = utf8_load(version);
4282+
4283+
if (IS_ERR(encoding)) {
4284+
return invalfc(fc, "Failed loading UTF-8 version: utf8-%u.%u.%u\n",
4285+
unicode_major(version), unicode_minor(version),
4286+
unicode_rev(version));
4287+
}
4288+
4289+
pr_info("tmpfs: Using encoding : utf8-%u.%u.%u\n",
4290+
unicode_major(version), unicode_minor(version), unicode_rev(version));
4291+
4292+
ctx->encoding = encoding;
4293+
4294+
return 0;
4295+
}
4296+
#else
4297+
static int shmem_parse_opt_casefold(struct fs_context *fc, struct fs_parameter *param,
4298+
bool latest_version)
4299+
{
4300+
return invalfc(fc, "tmpfs: Kernel not built with CONFIG_UNICODE\n");
4301+
}
4302+
#endif
4303+
42294304
static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param)
42304305
{
42314306
struct shmem_options *ctx = fc->fs_private;
@@ -4384,6 +4459,17 @@ static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param)
43844459
"Group quota inode hardlimit too large.");
43854460
ctx->qlimits.grpquota_ihardlimit = size;
43864461
break;
4462+
case Opt_casefold_version:
4463+
return shmem_parse_opt_casefold(fc, param, false);
4464+
case Opt_casefold:
4465+
return shmem_parse_opt_casefold(fc, param, true);
4466+
case Opt_strict_encoding:
4467+
#if IS_ENABLED(CONFIG_UNICODE)
4468+
ctx->strict_encoding = true;
4469+
break;
4470+
#else
4471+
return invalfc(fc, "tmpfs: Kernel not built with CONFIG_UNICODE\n");
4472+
#endif
43874473
}
43884474
return 0;
43894475

@@ -4613,6 +4699,11 @@ static void shmem_put_super(struct super_block *sb)
46134699
{
46144700
struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
46154701

4702+
#if IS_ENABLED(CONFIG_UNICODE)
4703+
if (sb->s_encoding)
4704+
utf8_unload(sb->s_encoding);
4705+
#endif
4706+
46164707
#ifdef CONFIG_TMPFS_QUOTA
46174708
shmem_disable_quotas(sb);
46184709
#endif
@@ -4623,6 +4714,14 @@ static void shmem_put_super(struct super_block *sb)
46234714
sb->s_fs_info = NULL;
46244715
}
46254716

4717+
#if IS_ENABLED(CONFIG_UNICODE) && defined(CONFIG_TMPFS)
4718+
static const struct dentry_operations shmem_ci_dentry_ops = {
4719+
.d_hash = generic_ci_d_hash,
4720+
.d_compare = generic_ci_d_compare,
4721+
.d_delete = always_delete_dentry,
4722+
};
4723+
#endif
4724+
46264725
static int shmem_fill_super(struct super_block *sb, struct fs_context *fc)
46274726
{
46284727
struct shmem_options *ctx = fc->fs_private;
@@ -4657,9 +4756,25 @@ static int shmem_fill_super(struct super_block *sb, struct fs_context *fc)
46574756
}
46584757
sb->s_export_op = &shmem_export_ops;
46594758
sb->s_flags |= SB_NOSEC | SB_I_VERSION;
4759+
4760+
#if IS_ENABLED(CONFIG_UNICODE)
4761+
if (!ctx->encoding && ctx->strict_encoding) {
4762+
pr_err("tmpfs: strict_encoding option without encoding is forbidden\n");
4763+
error = -EINVAL;
4764+
goto failed;
4765+
}
4766+
4767+
if (ctx->encoding) {
4768+
sb->s_encoding = ctx->encoding;
4769+
sb->s_d_op = &shmem_ci_dentry_ops;
4770+
if (ctx->strict_encoding)
4771+
sb->s_encoding_flags = SB_ENC_STRICT_MODE_FL;
4772+
}
4773+
#endif
4774+
46604775
#else
46614776
sb->s_flags |= SB_NOUSER;
4662-
#endif
4777+
#endif /* CONFIG_TMPFS */
46634778
sbinfo->max_blocks = ctx->blocks;
46644779
sbinfo->max_inodes = ctx->inodes;
46654780
sbinfo->free_ispace = sbinfo->max_inodes * BOGO_INODE_SIZE;
@@ -4933,6 +5048,10 @@ int shmem_init_fs_context(struct fs_context *fc)
49335048
ctx->uid = current_fsuid();
49345049
ctx->gid = current_fsgid();
49355050

5051+
#if IS_ENABLED(CONFIG_UNICODE)
5052+
ctx->encoding = NULL;
5053+
#endif
5054+
49365055
fc->fs_private = ctx;
49375056
fc->ops = &shmem_fs_context_ops;
49385057
return 0;

0 commit comments

Comments
 (0)