From a0be5e54d3ac504d0e7943666377e2f5c0c18f86 Mon Sep 17 00:00:00 2001
From: AA Turner <9087854+AA-Turner@users.noreply.github.com>
Date: Tue, 20 Apr 2021 04:37:04 +0100
Subject: [PATCH 1/5] Add pep_rss_gen.py

---
 pep_rss_gen.py | 137 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 137 insertions(+)
 create mode 100644 pep_rss_gen.py
diff --git a/pep_rss_gen.py b/pep_rss_gen.py
new file mode 100644
index 00000000000..9613c4699dc
--- /dev/null
+++ b/pep_rss_gen.py
@@ -0,0 +1,137 @@
+import datetime
+import email.utils
+from pathlib import Path
+import re
+
+from dateutil import parser
+import docutils.frontend
+import docutils.nodes
+import docutils.parsers.rst
+import docutils.utils
+from feedgen import entry
+from feedgen import feed
+
+
+# Monkeypatch feedgen.util.formatRFC2822
+def _format_rfc_2822(dt: datetime.datetime) -> str:
+    return email.utils.format_datetime(dt, usegmt=True)
+
+
+entry.formatRFC2822 = feed.formatRFC2822 = _format_rfc_2822
+line_cache: dict[Path, dict[str, str]] = {}
+
+
+def first_line_starting_with(full_path: Path, text: str) -> str:
+    # Try and retrieve from cache
+    if full_path in line_cache:
+        return line_cache[full_path].get(text, "")
+
+    # Else read source
+    line_cache[full_path] = path_cache = {}
+    for line in full_path.open(encoding="utf-8"):
+        if line.startswith("Created:"):
+            path_cache["Created:"] = line.removeprefix("Created:").strip()
+        elif line.startswith("Title:"):
+            path_cache["Title:"] = line.removeprefix("Title:").strip()
+        elif line.startswith("Author:"):
+            path_cache["Author:"] = line.removeprefix("Author:").strip()
+
+        # Once all have been found, exit loop
+        if path_cache.keys == {"Created:", "Title:", "Author:"}:
+            break
+    return path_cache.get(text, "")
+
+
+def pep_creation(full_path: Path) -> datetime.datetime:
+    created_str = first_line_starting_with(full_path, "Created:")
+    # bleh, I was hoping to avoid re but some PEPs editorialize on the Created line
+    # (note as of Aug 2020 only PEP 102 has additional content on the Created line)
+    m = re.search(r"(\d+[- ][\w\d]+[- ]\d{2,4})", created_str)
+    if not m:
+        # some older ones have an empty line, that's okay, if it's old we ipso facto don't care about it.
+        # "return None" would make the most sense but datetime objects refuse to compare with that. :-|
+        return datetime.datetime(1900, 1, 1)
+    created_str = m.group(1)
+    try:
+        return parser.parse(created_str, dayfirst=True)
+    except (ValueError, OverflowError):
+        return datetime.datetime(1900, 1, 1)
+
+
+def parse_rst(text: str) -> docutils.nodes.document:
+    rst_parser = docutils.parsers.rst.Parser()
+    components = (docutils.parsers.rst.Parser,)
+    settings = docutils.frontend.OptionParser(components=components).get_default_values()
+    document = docutils.utils.new_document('<rst-doc>', settings=settings)
+    rst_parser.parse(text, document)
+    return document
+
+
+def pep_abstract(full_path: Path) -> str:
+    """Return the first paragraph of the PEP abstract"""
+    text = full_path.read_text(encoding="utf-8")
+    for node in parse_rst(text):
+        if "<title>Abstract</title>" in str(node):
+            for child in node:
+                if child.tagname == "paragraph":
+                    return child.astext().strip().replace("\n", " ")
+    return ""
+
+
+def main():
+    # get the directory with the PEP sources
+    pep_dir = Path(__file__).parent
+
+    # get list of peps with creation time (from "Created:" string in pep source)
+    peps_with_dt = sorted((pep_creation(path), path) for path in pep_dir.glob("pep-????.*"))
+
+    # generate rss items for 10 most recent peps
+    items = []
+    for dt, full_path in peps_with_dt[-10:]:
+        try:
+            pep_num = int(full_path.stem.split("-")[-1])
+        except ValueError:
+            continue
+
+        title = first_line_starting_with(full_path, "Title:")
+        author = first_line_starting_with(full_path, "Author:")
+        parsed_authors = email.utils.getaddresses([author]) if "@" in author else [(author, "")]
+        url = f"https://www.python.org/dev/peps/pep-{pep_num:0>4}"
+
+        item = entry.FeedEntry()
+        item.title(f"PEP {pep_num}: {title}")
+        item.link(href=url)
+        item.description(pep_abstract(full_path))
+        item.guid(url, permalink=True)
+        item.published(dt.replace(tzinfo=datetime.timezone.utc))  # ensure datetime has a timezone
+        item.author([dict(name=parsed_author[0], email=parsed_author[1]) for parsed_author in parsed_authors])
+        items.append(item)
+
+    # The rss envelope
+    desc = """
+    Newest Python Enhancement Proposals (PEPs) - Information on new
+    language features, and some meta-information like release
+    procedure and schedules.
+    """.replace("\n    ", " ").strip()
+
+    # Setup feed generator
+    fg = feed.FeedGenerator()
+    fg.language("en")
+    fg.generator("")
+    fg.docs("https://cyber.harvard.edu/rss/rss.html")
+
+    # Add metadata
+    fg.title("Newest Python PEPs")
+    fg.link(href="https://www.python.org/dev/peps")
+    fg.description(desc)
+    fg.lastBuildDate(datetime.datetime.utcnow().replace(tzinfo=datetime.timezone.utc))
+
+    # Add PEP information (ordered by newest first)
+    for item in items:
+        fg.add_entry(item)
+
+    pep_dir.joinpath("peps.rss").write_bytes(fg.rss_str(pretty=True))
+
+
+if __name__ == "__main__":
+    main()

From add7f45838a19917999bd4ef2763cdaa3a3d7d64 Mon Sep 17 00:00:00 2001
From: AA Turner <9087854+AA-Turner@users.noreply.github.com>
Date: Fri, 7 May 2021 14:27:56 +0100
Subject: [PATCH 2/5] Add new RSS target

---
 Makefile | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index eebb2d613ef..0f201b0c04a 100644
--- a/Makefile
+++ b/Makefile
@@ -60,7 +60,11 @@ lint:
 SPHINX_JOBS=8
 SPHINX_BUILD=$(PYTHON) build.py -j $(SPHINX_JOBS)
 
-pages: rss
+# TODO replace `rss:` with this when merged & tested
+pep_rss:
+	$(PYTHON) pep_rss_gen.py
+
+pages: pep_rss
 	$(SPHINX_BUILD) --index-file
 
 sphinx:

From 0d5e6ec6b87bc75058f9b7f9c7b50c8e3fbf3747 Mon Sep 17 00:00:00 2001
From: AA Turner <9087854+AA-Turner@users.noreply.github.com>
Date: Wed, 9 Jun 2021 00:13:01 +0100
Subject: [PATCH 3/5] Add RSS dependencies

---
 requirements.txt | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/requirements.txt b/requirements.txt
index 99202ceca35..837f41b3ef7 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,6 @@
 # Requirements for building PEPs with Sphinx
 sphinx >= 3.5
 docutils >= 0.16
+
+# For RSS
+feedgen >= 0.9.0  # For RSS feed

From 45e295488e5fd99d03450c74f596ef12c6d8f418 Mon Sep 17 00:00:00 2001
From: AA Turner <9087854+AA-Turner@users.noreply.github.com>
Date: Sat, 12 Jun 2021 20:08:04 +0100
Subject: [PATCH 4/5] Ensure that there is only ever one author element in the
 RSS document

---
 pep_rss_gen.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/pep_rss_gen.py b/pep_rss_gen.py
index 9613c4699dc..21ede194012 100644
--- a/pep_rss_gen.py
+++ b/pep_rss_gen.py
@@ -95,7 +95,14 @@ def main():
 
         title = first_line_starting_with(full_path, "Title:")
         author = first_line_starting_with(full_path, "Author:")
-        parsed_authors = email.utils.getaddresses([author]) if "@" in author else [(author, "")]
+        if "@" in author or " at " in author:
+            parsed_authors = email.utils.getaddresses([author])
+            # ideal would be to pass as a list of dicts with names and emails to
+            # item.author, but FeedGen's RSS <author/> output doesn't pass W3C
+            # validation (as of 12/06/2021)
+            joined_authors = ", ".join(f"{name} ({email_address})" for name, email_address in parsed_authors)
+        else:
+            joined_authors = author
         url = f"https://www.python.org/dev/peps/pep-{pep_num:0>4}"
 
         item = entry.FeedEntry()
@@ -104,7 +111,7 @@ def main():
         item.description(pep_abstract(full_path))
         item.guid(url, permalink=True)
         item.published(dt.replace(tzinfo=datetime.timezone.utc))  # ensure datetime has a timezone
-        item.author([dict(name=parsed_author[0], email=parsed_author[1]) for parsed_author in parsed_authors])
+        item.author(email=joined_authors)
         items.append(item)
 
     # The rss envelope

From a8b01b9ea5ce847963a1e384e105e4923c0af0c9 Mon Sep 17 00:00:00 2001
From: AA Turner <9087854+AA-Turner@users.noreply.github.com>
Date: Sat, 12 Jun 2021 20:08:15 +0100
Subject: [PATCH 5/5] Add self link to RSS feed

---
 pep_rss_gen.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pep_rss_gen.py b/pep_rss_gen.py
index 21ede194012..a06ffd20caa 100644
--- a/pep_rss_gen.py
+++ b/pep_rss_gen.py
@@ -130,6 +130,7 @@ def main():
     # Add metadata
     fg.title("Newest Python PEPs")
     fg.link(href="https://www.python.org/dev/peps")
+    fg.link(href="https://www.python.org/dev/peps/peps.rss", rel="self")
     fg.description(desc)
     fg.lastBuildDate(datetime.datetime.utcnow().replace(tzinfo=datetime.timezone.utc))