Skip to content

Commit f128ae9

Browse files
committed
Merge branch 'master' into fix-non-ascii-chars-in-status-lines
2 parents 07a8f73 + e9405ac commit f128ae9

File tree

4 files changed

+51
-18
lines changed

4 files changed

+51
-18
lines changed

doc/source/changes.rst

+3
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@ Changelog
77

88
* Fix: remote output parser now correctly matches refs with non-ASCII
99
chars in them
10+
* API: Diffs now have `a_rawpath`, `b_rawpath`, `raw_rename_from`,
11+
`raw_rename_to` properties, which are the raw-bytes equivalents of their
12+
unicode path counterparts.
1013
* Fix: TypeError about passing keyword argument to string decode() on
1114
Python 2.6.
1215
* Feature: `setUrl API on Remotes <https://github.com/gitpython-developers/GitPython/pull/446#issuecomment-224670539>`_

git/compat.py

+2
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ def mviter(d):
3535
return d.values()
3636
range = xrange
3737
unicode = str
38+
binary_type = bytes
3839
else:
3940
FileType = file
4041
# usually, this is just ascii, which might not enough for our encoding needs
@@ -44,6 +45,7 @@ def mviter(d):
4445
byte_ord = ord
4546
bchr = chr
4647
unicode = unicode
48+
binary_type = str
4749
range = xrange
4850
def mviter(d):
4951
return d.itervalues()

git/diff.py

+41-17
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
from gitdb.util import hex_to_bin
99

10+
from .compat import binary_type
1011
from .objects.blob import Blob
1112
from .objects.util import mode_str_to_int
1213

@@ -245,18 +246,20 @@ class Diff(object):
245246
NULL_HEX_SHA = "0" * 40
246247
NULL_BIN_SHA = b"\0" * 20
247248

248-
__slots__ = ("a_blob", "b_blob", "a_mode", "b_mode", "a_path", "b_path",
249-
"new_file", "deleted_file", "rename_from", "rename_to", "diff")
249+
__slots__ = ("a_blob", "b_blob", "a_mode", "b_mode", "a_rawpath", "b_rawpath",
250+
"new_file", "deleted_file", "raw_rename_from", "raw_rename_to", "diff")
250251

251-
def __init__(self, repo, a_path, b_path, a_blob_id, b_blob_id, a_mode,
252-
b_mode, new_file, deleted_file, rename_from,
253-
rename_to, diff):
252+
def __init__(self, repo, a_rawpath, b_rawpath, a_blob_id, b_blob_id, a_mode,
253+
b_mode, new_file, deleted_file, raw_rename_from,
254+
raw_rename_to, diff):
254255

255256
self.a_mode = a_mode
256257
self.b_mode = b_mode
257258

258-
self.a_path = a_path
259-
self.b_path = b_path
259+
assert a_rawpath is None or isinstance(a_rawpath, binary_type)
260+
assert b_rawpath is None or isinstance(b_rawpath, binary_type)
261+
self.a_rawpath = a_rawpath
262+
self.b_rawpath = b_rawpath
260263

261264
if self.a_mode:
262265
self.a_mode = mode_str_to_int(self.a_mode)
@@ -266,19 +269,21 @@ def __init__(self, repo, a_path, b_path, a_blob_id, b_blob_id, a_mode,
266269
if a_blob_id is None or a_blob_id == self.NULL_HEX_SHA:
267270
self.a_blob = None
268271
else:
269-
self.a_blob = Blob(repo, hex_to_bin(a_blob_id), mode=self.a_mode, path=a_path)
272+
self.a_blob = Blob(repo, hex_to_bin(a_blob_id), mode=self.a_mode, path=self.a_path)
270273

271274
if b_blob_id is None or b_blob_id == self.NULL_HEX_SHA:
272275
self.b_blob = None
273276
else:
274-
self.b_blob = Blob(repo, hex_to_bin(b_blob_id), mode=self.b_mode, path=b_path)
277+
self.b_blob = Blob(repo, hex_to_bin(b_blob_id), mode=self.b_mode, path=self.b_path)
275278

276279
self.new_file = new_file
277280
self.deleted_file = deleted_file
278281

279282
# be clear and use None instead of empty strings
280-
self.rename_from = rename_from or None
281-
self.rename_to = rename_to or None
283+
assert raw_rename_from is None or isinstance(raw_rename_from, binary_type)
284+
assert raw_rename_to is None or isinstance(raw_rename_to, binary_type)
285+
self.raw_rename_from = raw_rename_from or None
286+
self.raw_rename_to = raw_rename_to or None
282287

283288
self.diff = diff
284289

@@ -344,6 +349,22 @@ def __str__(self):
344349
# end
345350
return res
346351

352+
@property
353+
def a_path(self):
354+
return self.a_rawpath.decode(defenc, 'replace') if self.a_rawpath else None
355+
356+
@property
357+
def b_path(self):
358+
return self.b_rawpath.decode(defenc, 'replace') if self.b_rawpath else None
359+
360+
@property
361+
def rename_from(self):
362+
return self.raw_rename_from.decode(defenc, 'replace') if self.raw_rename_from else None
363+
364+
@property
365+
def rename_to(self):
366+
return self.raw_rename_to.decode(defenc, 'replace') if self.raw_rename_to else None
367+
347368
@property
348369
def renamed(self):
349370
""":returns: True if the blob of our diff has been renamed
@@ -388,6 +409,7 @@ def _index_from_patch_format(cls, repo, stream):
388409
new_file_mode, deleted_file_mode, \
389410
a_blob_id, b_blob_id, b_mode, \
390411
a_path, b_path = header.groups()
412+
391413
new_file, deleted_file = bool(new_file_mode), bool(deleted_file_mode)
392414

393415
a_path = cls._pick_best_path(a_path, rename_from, a_path_fallback)
@@ -404,15 +426,15 @@ def _index_from_patch_format(cls, repo, stream):
404426
a_mode = old_mode or deleted_file_mode or (a_path and (b_mode or new_mode or new_file_mode))
405427
b_mode = b_mode or new_mode or new_file_mode or (b_path and a_mode)
406428
index.append(Diff(repo,
407-
a_path and a_path.decode(defenc, 'replace'),
408-
b_path and b_path.decode(defenc, 'replace'),
429+
a_path,
430+
b_path,
409431
a_blob_id and a_blob_id.decode(defenc),
410432
b_blob_id and b_blob_id.decode(defenc),
411433
a_mode and a_mode.decode(defenc),
412434
b_mode and b_mode.decode(defenc),
413435
new_file, deleted_file,
414-
rename_from and rename_from.decode(defenc, 'replace'),
415-
rename_to and rename_to.decode(defenc, 'replace'),
436+
rename_from,
437+
rename_to,
416438
None))
417439

418440
previous_header = header
@@ -438,8 +460,8 @@ def _index_from_raw_format(cls, repo, stream):
438460
meta, _, path = line[1:].partition('\t')
439461
old_mode, new_mode, a_blob_id, b_blob_id, change_type = meta.split(None, 4)
440462
path = path.strip()
441-
a_path = path
442-
b_path = path
463+
a_path = path.encode(defenc)
464+
b_path = path.encode(defenc)
443465
deleted_file = False
444466
new_file = False
445467
rename_from = None
@@ -455,6 +477,8 @@ def _index_from_raw_format(cls, repo, stream):
455477
new_file = True
456478
elif change_type[0] == 'R': # parses RXXX, where XXX is a confidence value
457479
a_path, b_path = path.split('\t', 1)
480+
a_path = a_path.encode(defenc)
481+
b_path = b_path.encode(defenc)
458482
rename_from, rename_to = a_path, b_path
459483
# END add/remove handling
460484

git/test/test_diff.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,8 @@ def test_diff_with_rename(self):
9090
assert_true(diff.renamed)
9191
assert_equal(diff.rename_from, u'Jérôme')
9292
assert_equal(diff.rename_to, u'müller')
93+
assert_equal(diff.raw_rename_from, b'J\xc3\xa9r\xc3\xb4me')
94+
assert_equal(diff.raw_rename_to, b'm\xc3\xbcller')
9395
assert isinstance(str(diff), str)
9496

9597
output = StringProcessAdapter(fixture('diff_rename_raw'))
@@ -129,7 +131,7 @@ def test_diff_index_raw_format(self):
129131
output = StringProcessAdapter(fixture('diff_index_raw'))
130132
res = Diff._index_from_raw_format(None, output.stdout)
131133
assert res[0].deleted_file
132-
assert res[0].b_path == ''
134+
assert res[0].b_path is None
133135

134136
def test_diff_initial_commit(self):
135137
initial_commit = self.rorepo.commit('33ebe7acec14b25c5f84f35a664803fcab2f7781')
@@ -162,7 +164,9 @@ def test_diff_unsafe_paths(self):
162164
self.assertEqual(res[7].b_path, u'path/with-question-mark?')
163165
self.assertEqual(res[8].b_path, u'path/¯\\_(ツ)_|¯')
164166
self.assertEqual(res[9].b_path, u'path/💩.txt')
167+
self.assertEqual(res[9].b_rawpath, b'path/\xf0\x9f\x92\xa9.txt')
165168
self.assertEqual(res[10].b_path, u'path/�-invalid-unicode-path.txt')
169+
self.assertEqual(res[10].b_rawpath, b'path/\x80-invalid-unicode-path.txt')
166170

167171
# The "Moves"
168172
# NOTE: The path prefixes a/ and b/ here are legit! We're actually

0 commit comments

Comments
 (0)