Skip to content

Commit 9e43e46

Browse files
serhiy-storchakadiegorusso
authored andcommitted
pythongh-115712: Support CSV dialects with delimiter=' ' and skipinitialspace=True (pythonGH-115721)
Restore support of such combination, disabled in pythongh-113796. csv.writer() now quotes empty fields if delimiter is a space and skipinitialspace is true and raises exception if quoting is not possible.
1 parent de74b97 commit 9e43e46

File tree

3 files changed

+90
-17
lines changed

3 files changed

+90
-17
lines changed

Lib/test/test_csv.py

Lines changed: 57 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -64,8 +64,7 @@ def _test_arg_valid(self, ctor, arg):
6464
ctor(arg, delimiter='\t', skipinitialspace=True)
6565
ctor(arg, escapechar='\t', skipinitialspace=True)
6666
ctor(arg, quotechar='\t', skipinitialspace=True)
67-
self.assertRaises(ValueError, ctor, arg,
68-
delimiter=' ', skipinitialspace=True)
67+
ctor(arg, delimiter=' ', skipinitialspace=True)
6968
self.assertRaises(ValueError, ctor, arg,
7069
escapechar=' ', skipinitialspace=True)
7170
self.assertRaises(ValueError, ctor, arg,
@@ -192,9 +191,6 @@ def _write_error_test(self, exc, fields, **kwargs):
192191

193192
def test_write_arg_valid(self):
194193
self._write_error_test(csv.Error, None)
195-
self._write_test((), '')
196-
self._write_test([None], '""')
197-
self._write_error_test(csv.Error, [None], quoting = csv.QUOTE_NONE)
198194
# Check that exceptions are passed up the chain
199195
self._write_error_test(OSError, BadIterable())
200196
class BadList:
@@ -208,7 +204,6 @@ class BadItem:
208204
def __str__(self):
209205
raise OSError
210206
self._write_error_test(OSError, [BadItem()])
211-
212207
def test_write_bigfield(self):
213208
# This exercises the buffer realloc functionality
214209
bigstring = 'X' * 50000
@@ -315,6 +310,49 @@ def test_writerows_with_none(self):
315310
fileobj.seek(0)
316311
self.assertEqual(fileobj.read(), 'a\r\n""\r\n')
317312

313+
314+
def test_write_empty_fields(self):
315+
self._write_test((), '')
316+
self._write_test([''], '""')
317+
self._write_error_test(csv.Error, [''], quoting=csv.QUOTE_NONE)
318+
self._write_test([''], '""', quoting=csv.QUOTE_STRINGS)
319+
self._write_test([''], '""', quoting=csv.QUOTE_NOTNULL)
320+
self._write_test([None], '""')
321+
self._write_error_test(csv.Error, [None], quoting=csv.QUOTE_NONE)
322+
self._write_error_test(csv.Error, [None], quoting=csv.QUOTE_STRINGS)
323+
self._write_error_test(csv.Error, [None], quoting=csv.QUOTE_NOTNULL)
324+
self._write_test(['', ''], ',')
325+
self._write_test([None, None], ',')
326+
327+
def test_write_empty_fields_space_delimiter(self):
328+
self._write_test([''], '""', delimiter=' ', skipinitialspace=False)
329+
self._write_test([''], '""', delimiter=' ', skipinitialspace=True)
330+
self._write_test([None], '""', delimiter=' ', skipinitialspace=False)
331+
self._write_test([None], '""', delimiter=' ', skipinitialspace=True)
332+
333+
self._write_test(['', ''], ' ', delimiter=' ', skipinitialspace=False)
334+
self._write_test(['', ''], '"" ""', delimiter=' ', skipinitialspace=True)
335+
self._write_test([None, None], ' ', delimiter=' ', skipinitialspace=False)
336+
self._write_test([None, None], '"" ""', delimiter=' ', skipinitialspace=True)
337+
338+
self._write_test(['', ''], ' ', delimiter=' ', skipinitialspace=False,
339+
quoting=csv.QUOTE_NONE)
340+
self._write_error_test(csv.Error, ['', ''],
341+
delimiter=' ', skipinitialspace=True,
342+
quoting=csv.QUOTE_NONE)
343+
for quoting in csv.QUOTE_STRINGS, csv.QUOTE_NOTNULL:
344+
self._write_test(['', ''], '"" ""', delimiter=' ', skipinitialspace=False,
345+
quoting=quoting)
346+
self._write_test(['', ''], '"" ""', delimiter=' ', skipinitialspace=True,
347+
quoting=quoting)
348+
349+
for quoting in csv.QUOTE_NONE, csv.QUOTE_STRINGS, csv.QUOTE_NOTNULL:
350+
self._write_test([None, None], ' ', delimiter=' ', skipinitialspace=False,
351+
quoting=quoting)
352+
self._write_error_test(csv.Error, [None, None],
353+
delimiter=' ', skipinitialspace=True,
354+
quoting=quoting)
355+
318356
def test_writerows_errors(self):
319357
with TemporaryFile("w+", encoding="utf-8", newline='') as fileobj:
320358
writer = csv.writer(fileobj)
@@ -429,6 +467,14 @@ def test_read_skipinitialspace(self):
429467
[[None, None, None]],
430468
skipinitialspace=True, quoting=csv.QUOTE_STRINGS)
431469

470+
def test_read_space_delimiter(self):
471+
self._read_test(['a b', ' a ', ' ', ''],
472+
[['a', '', '', 'b'], ['', '', 'a', '', ''], ['', '', ''], []],
473+
delimiter=' ', skipinitialspace=False)
474+
self._read_test(['a b', ' a ', ' ', ''],
475+
[['a', 'b'], ['a', ''], [''], []],
476+
delimiter=' ', skipinitialspace=True)
477+
432478
def test_read_bigfield(self):
433479
# This exercises the buffer realloc functionality and field size
434480
# limits.
@@ -555,10 +601,10 @@ class space(csv.excel):
555601
escapechar = "\\"
556602

557603
with TemporaryFile("w+", encoding="utf-8") as fileobj:
558-
fileobj.write("abc def\nc1ccccc1 benzene\n")
604+
fileobj.write("abc def\nc1ccccc1 benzene\n")
559605
fileobj.seek(0)
560606
reader = csv.reader(fileobj, dialect=space())
561-
self.assertEqual(next(reader), ["abc", "def"])
607+
self.assertEqual(next(reader), ["abc", "", "", "def"])
562608
self.assertEqual(next(reader), ["c1ccccc1", "benzene"])
563609

564610
def compare_dialect_123(self, expected, *writeargs, **kwwriteargs):
@@ -1164,8 +1210,9 @@ class mydialect(csv.Dialect):
11641210
self.assertRaises(csv.Error, create_invalid, field_name, 5)
11651211
self.assertRaises(ValueError, create_invalid, field_name, "\n")
11661212
self.assertRaises(ValueError, create_invalid, field_name, "\r")
1167-
self.assertRaises(ValueError, create_invalid, field_name, " ",
1168-
skipinitialspace=True)
1213+
if field_name != "delimiter":
1214+
self.assertRaises(ValueError, create_invalid, field_name, " ",
1215+
skipinitialspace=True)
11691216

11701217

11711218
class TestSniffer(unittest.TestCase):
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
Restore support of space delimiter with ``skipinitialspace=True`` in
2+
:mod:`csv`. :func:`csv.writer()` now quotes empty fields if delimiter is a
3+
space and skipinitialspace is true and raises exception if quoting is not
4+
possible.

Modules/_csv.c

Lines changed: 29 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -332,9 +332,9 @@ dialect_check_quoting(int quoting)
332332
}
333333

334334
static int
335-
dialect_check_char(const char *name, Py_UCS4 c, DialectObj *dialect)
335+
dialect_check_char(const char *name, Py_UCS4 c, DialectObj *dialect, bool allowspace)
336336
{
337-
if (c == '\r' || c == '\n' || (dialect->skipinitialspace && c == ' ')) {
337+
if (c == '\r' || c == '\n' || (c == ' ' && !allowspace)) {
338338
PyErr_Format(PyExc_ValueError, "bad %s value", name);
339339
return -1;
340340
}
@@ -535,9 +535,11 @@ dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
535535
PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
536536
goto err;
537537
}
538-
if (dialect_check_char("delimiter", self->delimiter, self) ||
539-
dialect_check_char("escapechar", self->escapechar, self) ||
540-
dialect_check_char("quotechar", self->quotechar, self) ||
538+
if (dialect_check_char("delimiter", self->delimiter, self, true) ||
539+
dialect_check_char("escapechar", self->escapechar, self,
540+
!self->skipinitialspace) ||
541+
dialect_check_char("quotechar", self->quotechar, self,
542+
!self->skipinitialspace) ||
541543
dialect_check_chars("delimiter", "escapechar",
542544
self->delimiter, self->escapechar) ||
543545
dialect_check_chars("delimiter", "quotechar",
@@ -1221,6 +1223,7 @@ join_check_rec_size(WriterObj *self, Py_ssize_t rec_len)
12211223
static int
12221224
join_append(WriterObj *self, PyObject *field, int quoted)
12231225
{
1226+
DialectObj *dialect = self->dialect;
12241227
int field_kind = -1;
12251228
const void *field_data = NULL;
12261229
Py_ssize_t field_len = 0;
@@ -1231,6 +1234,19 @@ join_append(WriterObj *self, PyObject *field, int quoted)
12311234
field_data = PyUnicode_DATA(field);
12321235
field_len = PyUnicode_GET_LENGTH(field);
12331236
}
1237+
if (!field_len && dialect->delimiter == ' ' && dialect->skipinitialspace) {
1238+
if (dialect->quoting == QUOTE_NONE ||
1239+
(field == NULL &&
1240+
(dialect->quoting == QUOTE_STRINGS ||
1241+
dialect->quoting == QUOTE_NOTNULL)))
1242+
{
1243+
PyErr_Format(self->error_obj,
1244+
"empty field must be quoted if delimiter is a space "
1245+
"and skipinitialspace is true");
1246+
return 0;
1247+
}
1248+
quoted = 1;
1249+
}
12341250
rec_len = join_append_data(self, field_kind, field_data, field_len,
12351251
&quoted, 0);
12361252
if (rec_len < 0)
@@ -1282,6 +1298,7 @@ csv_writerow(WriterObj *self, PyObject *seq)
12821298
{
12831299
DialectObj *dialect = self->dialect;
12841300
PyObject *iter, *field, *line, *result;
1301+
bool null_field = false;
12851302

12861303
iter = PyObject_GetIter(seq);
12871304
if (iter == NULL) {
@@ -1318,11 +1335,12 @@ csv_writerow(WriterObj *self, PyObject *seq)
13181335
break;
13191336
}
13201337

1338+
null_field = (field == Py_None);
13211339
if (PyUnicode_Check(field)) {
13221340
append_ok = join_append(self, field, quoted);
13231341
Py_DECREF(field);
13241342
}
1325-
else if (field == Py_None) {
1343+
else if (null_field) {
13261344
append_ok = join_append(self, NULL, quoted);
13271345
Py_DECREF(field);
13281346
}
@@ -1348,7 +1366,11 @@ csv_writerow(WriterObj *self, PyObject *seq)
13481366
return NULL;
13491367

13501368
if (self->num_fields > 0 && self->rec_len == 0) {
1351-
if (dialect->quoting == QUOTE_NONE) {
1369+
if (dialect->quoting == QUOTE_NONE ||
1370+
(null_field &&
1371+
(dialect->quoting == QUOTE_STRINGS ||
1372+
dialect->quoting == QUOTE_NOTNULL)))
1373+
{
13521374
PyErr_Format(self->error_obj,
13531375
"single empty field record must be quoted");
13541376
return NULL;

0 commit comments

Comments
 (0)