Skip to content

Commit c31bc0e

Browse files
authored
Merge pull request #44 from nexB/posix-safe-filename
Make safe filename safe to use on POSIX
2 parents d3eed9a + b88d65a commit c31bc0e

File tree

3 files changed

+77
-24
lines changed

3 files changed

+77
-24
lines changed

CHANGELOG.rst

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,14 @@
11
Release notes
22
=============
33

4-
Version (next)
4+
Version (next)
55
------------------------------
66

7-
TBD.
7+
- Add ``posix_only`` option to ``commoncode.paths.portable_filename`` and
8+
``commoncode.paths.safe_path``. This option prevents
9+
``commoncode.paths.portable_filename`` and ``commoncode.paths.safe_path`` from
10+
replacing filenames and punctuation in filenames that are valid on POSIX
11+
operating systems, but not Windows.
812

913
Version 31.0.0 - (2022-05-16)
1014
------------------------------
@@ -50,7 +54,7 @@ This is a major version with API-breaking changes in the resource module.
5054
otherwise missing from files path list.
5155
In particular this behaviour changed when you create a VirtualCodebase from
5256
a previous Codebase created with a "full_root" argument. Previously, the
53-
missing paths of a "full_root" Codebase were kept unchanged.
57+
missing paths of a "full_root" Codebase were kept unchanged.
5458
Note that the VirtualCodebase has always ignored the "full_root" argument.
5559

5660
- The Codebase and VirtualCodebase are now iterable. Iterating on a codebase
@@ -80,7 +84,7 @@ Other changes:
8084

8185
- Remove Python upper version limit.
8286
- Merge latest skeleton
83-
- fileutils.parent_directory() now accepts a "with_trail" argument.
87+
- fileutils.parent_directory() now accepts a "with_trail" argument.
8488
The returned directory has a trailing path separator unless with_trail is False.
8589
The default is True and the default behaviour is unchanged.
8690

src/commoncode/paths.py

Lines changed: 43 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
# Build OS-portable and safer paths
2727

2828

29-
def safe_path(path, posix=False, preserve_spaces=False):
29+
def safe_path(path, posix=False, preserve_spaces=False, posix_only=False):
3030
"""
3131
Convert `path` to a safe and portable POSIX path usable on multiple OSes.
3232
The returned path is an ASCII-only byte string, resolved for relative
@@ -52,7 +52,13 @@ def safe_path(path, posix=False, preserve_spaces=False):
5252
_pathmod, path_sep = path_handlers(path, posix)
5353

5454
segments = [s.strip() for s in path.split(path_sep) if s.strip()]
55-
segments = [portable_filename(s, preserve_spaces=preserve_spaces) for s in segments]
55+
segments = [
56+
portable_filename(
57+
s,
58+
preserve_spaces=preserve_spaces,
59+
posix_only=posix_only
60+
) for s in segments
61+
]
5662

5763
if not segments:
5864
return '_'
@@ -134,17 +140,34 @@ def resolve(path, posix=True):
134140
return path
135141

136142

137-
legal_punctuation = r"!\#$%&\(\)\+,\-\.;\=@\[\]_\{\}\~"
138-
legal_spaces = r" "
139-
legal_chars = r'A-Za-z0-9' + legal_punctuation
143+
legal_punctuation = r'!\#$%&\(\)\+,\-\.;\=@\[\]_\{\}\~'
144+
legal_spaces = r' '
145+
legal_alphanumeric = r'A-Za-z0-9'
146+
legal_chars = legal_alphanumeric + legal_punctuation
140147
legal_chars_inc_spaces = legal_chars + legal_spaces
141148
illegal_chars_re = r'[^' + legal_chars + r']'
142149
illegal_chars_exc_spaces_re = r'[^' + legal_chars_inc_spaces + r']'
143150
replace_illegal_chars = re.compile(illegal_chars_re).sub
144151
replace_illegal_chars_exc_spaces = re.compile(illegal_chars_exc_spaces_re).sub
145152

146153

147-
def portable_filename(filename, preserve_spaces=False):
154+
posix_legal_punctuation = r'<:"/>\|\*\^\\\'`\?' + legal_punctuation
155+
posix_legal_chars = legal_alphanumeric + posix_legal_punctuation
156+
posix_legal_chars_inc_spaces = posix_legal_chars + legal_spaces
157+
posix_illegal_chars_re = r'[^' + posix_legal_chars + r']'
158+
posix_illegal_chars_exc_spaces_re = r'[^' + posix_legal_chars_inc_spaces + r']'
159+
replace_illegal_posix_chars = re.compile(posix_illegal_chars_re).sub
160+
replace_illegal_posix_chars_exc_spaces = re.compile(posix_illegal_chars_exc_spaces_re).sub
161+
162+
163+
ILLEGAL_WINDOWS_NAMES = set([
164+
'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9',
165+
'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9',
166+
'aux', 'con', 'nul', 'prn'
167+
])
168+
169+
170+
def portable_filename(filename, preserve_spaces=False, posix_only=False):
148171
"""
149172
Return a new name for `filename` that is portable across operating systems.
150173
@@ -170,22 +193,21 @@ def portable_filename(filename, preserve_spaces=False):
170193
if not filename:
171194
return '_'
172195

173-
if preserve_spaces:
174-
filename = replace_illegal_chars_exc_spaces('_', filename)
196+
if posix_only:
197+
if preserve_spaces:
198+
filename = replace_illegal_posix_chars_exc_spaces('_', filename)
199+
else:
200+
filename = replace_illegal_posix_chars('_', filename)
175201
else:
176-
filename = replace_illegal_chars('_', filename)
177-
178-
# these are illegal both upper and lowercase and with or without an extension
179-
# we insert an underscore after the base name.
180-
windows_illegal_names = set([
181-
'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9',
182-
'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9',
183-
'aux', 'con', 'nul', 'prn'
184-
])
202+
if preserve_spaces:
203+
filename = replace_illegal_chars_exc_spaces('_', filename)
204+
else:
205+
filename = replace_illegal_chars('_', filename)
185206

186-
basename, dot, extension = filename.partition('.')
187-
if basename.lower() in windows_illegal_names:
188-
filename = ''.join([basename, '_', dot, extension])
207+
if not posix_only:
208+
basename, dot, extension = filename.partition('.')
209+
if basename.lower() in ILLEGAL_WINDOWS_NAMES:
210+
filename = ''.join([basename, '_', dot, extension])
189211

190212
# no name made only of dots.
191213
if set(filename) == set(['.']):
@@ -198,6 +220,7 @@ def portable_filename(filename, preserve_spaces=False):
198220

199221
return filename
200222

223+
201224
#
202225
# paths comparisons, common prefix and suffix extraction
203226
#

tests/test_paths.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,14 @@ def test_safe_path_posix_style_many_dots(self):
9393
expected = 'dotdot/dotdot/dotdot/webform.components.inc'
9494
assert test == expected
9595

96+
def test_safe_path_posix_only(self):
97+
test_path = 'var/lib/dpkg/info/libgsm1:amd64.list'
98+
test = paths.safe_path(test_path)
99+
expected = 'var/lib/dpkg/info/libgsm1_amd64.list'
100+
assert test == expected
101+
test = paths.safe_path(test_path, posix_only=True)
102+
assert test == test_path
103+
96104
def test_resolve_mixed_slash(self):
97105
test = paths.resolve('C:\\..\\./drupal.js')
98106
expected = 'C/drupal.js'
@@ -140,6 +148,24 @@ def test_portable_filename(self):
140148
expected = 'This_contain_UMLAUT_umlauts.txt'
141149
assert paths.portable_filename(u'This contain UMLAUT \xfcml\xe4uts.txt') == expected
142150

151+
# Check to see if illegal Windows filenames are properly handled
152+
for illegal_window_name in paths.ILLEGAL_WINDOWS_NAMES:
153+
# Rename files with names that are illegal on Windows
154+
expected = f'{illegal_window_name}_'
155+
assert paths.portable_filename(illegal_window_name) == expected
156+
157+
# Allow files with names that are illegal on Windows
158+
assert paths.portable_filename(illegal_window_name, posix_only=True) == illegal_window_name
159+
160+
# Check to see if the posix_only option does and does not replace
161+
# punctuation characters that are illegal in Windows filenames
162+
for valid_posix_path_char in paths.posix_legal_punctuation:
163+
test_name = f'test{valid_posix_path_char}'
164+
assert paths.portable_filename(test_name, posix_only=True) == test_name
165+
if valid_posix_path_char not in paths.legal_punctuation:
166+
expected = f'test_'
167+
assert paths.portable_filename(test_name) == expected
168+
143169

144170
class TestCommonPath(TestCase):
145171

0 commit comments

Comments
 (0)