Skip to content

Commit fd58e6b

Browse files
Jayman2000adrienverge
authored andcommitted
decoder: Autodetect encoding for ignore-from-file
Before this change, yamllint would decode files on the ignore-from-file list using open()’s default encoding [1][2]. This can cause decoding to fail in some situations (see the previous commit message for details). This change makes yamllint automatically detect the encoding for files on the ignore-from-file list. It uses the same algorithm that it uses for detecting the encoding of YAML files, so the same limitations apply: files must use UTF-8, UTF-16 or UTF-32 and they must begin with either a byte order mark or an ASCII character. [1]: <https://docs.python.org/3.12/library/fileinput.html#fileinput.input> [2]: <https://docs.python.org/3.12/library/fileinput.html#fileinput.FileInput>
1 parent 8e3a3b3 commit fd58e6b

File tree

4 files changed

+91
-7
lines changed

4 files changed

+91
-7
lines changed

docs/configuration.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,10 @@ or:
228228
229229
.. note:: However, this is mutually exclusive with the ``ignore`` key.
230230

231+
.. note:: Files on the ``ignore-from-file`` list should use either UTF-8,
232+
UTF-16 or UTF-32. See :doc:`Character Encoding <character_encoding>` for
233+
details and workarounds.
234+
231235
If you need to know the exact list of files that yamllint would process,
232236
without really linting them, you can use ``--list-files``:
233237

tests/test_config.py

Lines changed: 49 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
# Copyright (C) 2016 Adrien Vergé
2+
# Copyright (C) 2023–2025 Jason Yundt
23
#
34
# This program is free software: you can redistribute it and/or modify
45
# it under the terms of the GNU General Public License as published by
@@ -13,14 +14,20 @@
1314
# You should have received a copy of the GNU General Public License
1415
# along with this program. If not, see <http://www.gnu.org/licenses/>.
1516

17+
import itertools
1618
import os
1719
import shutil
1820
import sys
1921
import tempfile
2022
import unittest
2123
from io import StringIO
2224

23-
from tests.common import build_temp_workspace, RunContext
25+
from tests.common import (
26+
build_temp_workspace,
27+
register_test_codecs,
28+
RunContext,
29+
unregister_test_codecs,
30+
)
2431

2532
from yamllint import cli, config
2633
from yamllint.config import YamlLintConfigError
@@ -820,3 +827,44 @@ def test_run_with_ignore_on_ignored_file(self):
820827
sys.stdout.getvalue().strip(),
821828
'file-at-root.yaml:4:17: [error] trailing spaces (trailing-spaces)'
822829
)
830+
831+
def create_ignore_file(self, text, codec):
832+
path = os.path.join(self.wd, f'{codec}.ignore')
833+
with open(path, 'wb') as f:
834+
f.write(text.encode(codec))
835+
self.addCleanup(lambda: os.remove(path))
836+
return path
837+
838+
def test_ignored_from_file_with_multiple_encodings(self):
839+
register_test_codecs()
840+
self.addCleanup(unregister_test_codecs)
841+
842+
ignore_files = itertools.starmap(
843+
self.create_ignore_file, (
844+
('bin/file.lint-me-anyway.yaml\n', 'utf_32_be'),
845+
('bin/file.yaml\n', 'utf_32_be_sig'),
846+
('file-at-root.yaml\n', 'utf_32_le'),
847+
('file.dont-lint-me.yaml\n', 'utf_32_le_sig'),
848+
849+
('ign-dup/file.yaml\n', 'utf_16_be'),
850+
('ign-dup/sub/dir/file.yaml\n', 'utf_16_be_sig'),
851+
('ign-trail/file.yaml\n', 'utf_16_le'),
852+
('include/ign-dup/sub/dir/file.yaml\n', 'utf_16_le_sig'),
853+
854+
('s/s/ign-trail/file.yaml\n', 'utf_8'),
855+
(
856+
's/s/ign-trail/s/s/file.yaml\n'
857+
's/s/ign-trail/s/s/file2.lint-me-anyway.yaml\n'
858+
'.yamllint\n',
859+
860+
'utf_8_sig'
861+
),
862+
)
863+
)
864+
conf = ('---\n'
865+
'extends: default\n'
866+
f'ignore-from-file: [{", ".join(ignore_files)}]\n')
867+
868+
with self.assertRaises(SystemExit) as cm:
869+
cli.run(('-d', conf, '.'))
870+
self.assertEqual(cm.exception.code, 0)

tests/test_decoder.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
# along with this program. If not, see <http://www.gnu.org/licenses/>.
1515

1616
import codecs
17+
import itertools
1718
import os
1819
import unittest
1920

@@ -22,6 +23,8 @@
2223
encoding_detectable,
2324
is_test_codec,
2425
register_test_codecs,
26+
temp_workspace,
27+
temp_workspace_with_files_in_many_codecs,
2528
test_codec_built_in_equivalent,
2629
unregister_test_codecs,
2730
uses_bom,
@@ -452,3 +455,30 @@ def test_auto_decode_with_strings_encoded_at_runtime(self):
452455
msg=("None of the TEST_STRINGS_TO_ENCODE_AT_RUNTIME triggered a "
453456
"decoding error.")
454457
)
458+
459+
def perform_lines_in_file_test(self, strings):
460+
workspace = temp_workspace_with_files_in_many_codecs(
461+
'{}',
462+
'\n'.join(strings)
463+
)
464+
with temp_workspace(workspace):
465+
iterable = zip(
466+
itertools.cycle(strings),
467+
decoder.lines_in_files(workspace.keys())
468+
)
469+
for item in iterable:
470+
self.assertEqual(item[0], item[1])
471+
472+
def test_lines_in_file(self):
473+
self.perform_lines_in_file_test((
474+
"YAML",
475+
"ⓎⒶⓂⓁ",
476+
"🅨🅐🅜🅛",
477+
"YAML"
478+
))
479+
self.perform_lines_in_file_test((
480+
"𝐘𝐀𝐌𝐋",
481+
"𝖄𝕬𝕸𝕷",
482+
"𝒀𝑨𝑴𝑳",
483+
"𝓨𝓐𝓜𝓛"
484+
))

yamllint/config.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
# You should have received a copy of the GNU General Public License
1414
# along with this program. If not, see <http://www.gnu.org/licenses/>.
1515

16-
import fileinput
1716
import os.path
1817

1918
import pathspec
@@ -110,8 +109,10 @@ def parse(self, raw_content):
110109
raise YamlLintConfigError(
111110
'invalid config: ignore-from-file should contain '
112111
'filename(s), either as a list or string')
113-
with fileinput.input(conf['ignore-from-file']) as f:
114-
self.ignore = pathspec.PathSpec.from_lines('gitwildmatch', f)
112+
self.ignore = pathspec.PathSpec.from_lines(
113+
'gitwildmatch',
114+
decoder.lines_in_files(conf['ignore-from-file'])
115+
)
115116
elif 'ignore' in conf:
116117
if isinstance(conf['ignore'], str):
117118
self.ignore = pathspec.PathSpec.from_lines(
@@ -164,9 +165,10 @@ def validate_rule_conf(rule, conf):
164165
raise YamlLintConfigError(
165166
'invalid config: ignore-from-file should contain '
166167
'valid filename(s), either as a list or string')
167-
with fileinput.input(conf['ignore-from-file']) as f:
168-
conf['ignore'] = pathspec.PathSpec.from_lines(
169-
'gitwildmatch', f)
168+
conf['ignore'] = pathspec.PathSpec.from_lines(
169+
'gitwildmatch',
170+
decoder.lines_in_files(conf['ignore-from-file'])
171+
)
170172
elif ('ignore' in conf and not isinstance(
171173
conf['ignore'], pathspec.pathspec.PathSpec)):
172174
if isinstance(conf['ignore'], str):

0 commit comments

Comments
 (0)