Skip to content

Commit 24b5d01

Browse files
committed
Fix name parsing
1 parent 626e846 commit 24b5d01

File tree

1 file changed

+61
-26
lines changed

1 file changed

+61
-26
lines changed

pepreader/pep0.py

+61-26
Original file line numberDiff line numberDiff line change
@@ -59,23 +59,29 @@ class Author(object):
5959

6060
def __init__(self, author_and_email_tuple):
6161
"""Parse the name and email address of an author."""
62+
self.first = self.last = ''
63+
6264
name, email = author_and_email_tuple
6365
self.first_last = name.strip()
6466
self.email = email.lower()
65-
last_name_fragment, suffix = self._last_name(name)
66-
name_sep = name.index(last_name_fragment)
67-
self.first = name[:name_sep].rstrip()
68-
self.last = last_name_fragment
69-
if self.last[1] == ".":
70-
# Add an escape to avoid docutils turning `v.` into `22.`.
71-
self.last = "\\" + self.last
72-
self.suffix = suffix
73-
if not self.first:
74-
self.last_first = self.last
67+
68+
name_dict = self._parse_name(name)
69+
self.suffix = name_dict.get("suffix")
70+
if name_dict.get("name"):
71+
self.last_first = name_dict["name"]
72+
self.nick = name_dict["name"]
7573
else:
74+
self.first = name_dict["forename"].rstrip()
75+
self.last = name_dict["surname"]
76+
if self.last[1] == ".":
77+
# Add an escape to avoid docutils turning `v.` into `22.`.
78+
self.last = "\\" + self.last
7679
self.last_first = ", ".join([self.last, self.first])
77-
if self.suffix:
78-
self.last_first += ", " + self.suffix
80+
self.nick = self.last
81+
82+
if self.suffix:
83+
self.last_first += ", " + self.suffix
84+
7985
if self.last == "van Rossum":
8086
# Special case for our beloved BDFL. :)
8187
if self.first == "Guido":
@@ -85,8 +91,6 @@ def __init__(self, author_and_email_tuple):
8591
else:
8692
raise ValueError(f"unknown van Rossum {self}!")
8793
self.last_first += f" ({self.nick})"
88-
else:
89-
self.nick = self.last
9094

9195
def __hash__(self):
9296
return hash(self.first_last)
@@ -107,7 +111,7 @@ def sort_by(self):
107111
return unicodedata.normalize("NFKD", base)
108112

109113
@staticmethod
110-
def _last_name(full_name):
114+
def _parse_name(full_name):
111115
"""Find the last name (or nickname) of a full name.
112116
113117
If no last name (e.g, 'Aahz') then return the full name. If there is
@@ -116,19 +120,50 @@ def _last_name(full_name):
116120
through a comma, then drop the suffix.
117121
118122
"""
119-
name_partition = full_name.partition(",")
120-
no_suffix = name_partition[0].strip()
121-
suffix = name_partition[2].strip()
122-
name_parts = no_suffix.split()
123-
part_count = len(name_parts)
124-
if part_count == 1 or part_count == 2:
125-
return name_parts[-1], suffix
126-
else:
127-
assert part_count > 2
123+
possible_suffixes = ["Jr", "Jr.", "II", "III"]
124+
special_cases = ["The Python core team and community"]
125+
126+
if full_name in special_cases:
127+
return {"name": full_name}
128+
129+
suffix_partition = full_name.partition(",")
130+
pre_suffix = suffix_partition[0].strip()
131+
suffix = suffix_partition[2].strip()
132+
133+
name_parts = pre_suffix.split(" ")
134+
num_parts = len(name_parts)
135+
name = {"suffix": suffix}
136+
137+
if num_parts == 0:
138+
raise ValueError("Name is empty!")
139+
elif num_parts == 1:
140+
name.update({"name": name_parts[0]})
141+
elif num_parts == 2:
142+
name.update({"forename": name_parts[0], "surname": name_parts[1]})
143+
elif num_parts > 2:
144+
# handles III etc.
145+
if name_parts[-1] in possible_suffixes:
146+
new_suffix = " ".join([*name_parts[-1:], suffix]).strip()
147+
name_parts.pop(-1)
148+
name.update(suffix=new_suffix)
149+
150+
# handles von, van, v. etc.
128151
if name_parts[-2].islower():
129-
return " ".join(name_parts[-2:]), suffix
152+
forename = " ".join(name_parts[:-2])
153+
surname = " ".join(name_parts[-2:])
154+
name.update({"forename": forename, "surname": surname})
155+
# handles double surnames after a middle initial (e.g.
156+
elif any(s.endswith(".") for s in name_parts):
157+
split_position = [i for i, x in enumerate(name_parts) if x.endswith(".")][-1] + 1
158+
forename = " ".join(name_parts[:split_position])
159+
surname = " ".join(name_parts[split_position:])
160+
name.update({"forename": forename, "surname": surname})
130161
else:
131-
return name_parts[-1], suffix
162+
forename = " ".join(name_parts[:-1])
163+
surname = " ".join(name_parts[-1:])
164+
name.update({"forename": forename, "surname": surname})
165+
166+
return name
132167

133168

134169
class PEP(object):

0 commit comments

Comments
 (0)