From ef4c5ceef72bae4b1d74117f57d580b1426e15f1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=B6rg=20Thalheim?= <joerg@thalheim.io>
Date: Mon, 9 Sep 2024 09:31:49 +0200
Subject: [PATCH 1/3] ruff: pyupgrade to 3.8

ruff check --target-version py38 --select UP --fix .
---
 debug-info.py                                 | 1 -
 doc/conf.py                                   | 3 +--
 html5lib/__init__.py                          | 1 -
 html5lib/_ihatexml.py                         | 3 +--
 html5lib/_inputstream.py                      | 9 ++++-----
 html5lib/_tokenizer.py                        | 3 +--
 html5lib/_trie/__init__.py                    | 1 -
 html5lib/_trie/_base.py                       | 3 +--
 html5lib/_trie/py.py                          | 1 -
 html5lib/_utils.py                            | 7 +++----
 html5lib/constants.py                         | 1 -
 html5lib/filters/alphabeticalattributes.py    | 1 -
 html5lib/filters/base.py                      | 3 +--
 html5lib/filters/inject_meta_charset.py       | 1 -
 html5lib/filters/lint.py                      | 1 -
 html5lib/filters/optionaltags.py              | 1 -
 html5lib/filters/sanitizer.py                 | 1 -
 html5lib/filters/whitespace.py                | 1 -
 html5lib/html5parser.py                       | 5 ++---
 html5lib/serializer.py                        | 3 +--
 html5lib/tests/__init__.py                    | 1 -
 html5lib/tests/conftest.py                    | 5 ++---
 html5lib/tests/sanitizer.py                   | 1 -
 html5lib/tests/support.py                     | 3 +--
 html5lib/tests/test_alphabeticalattributes.py | 1 -
 html5lib/tests/test_encoding.py               | 5 ++---
 html5lib/tests/test_meta.py                   | 6 +-----
 html5lib/tests/test_optionaltags_filter.py    | 1 -
 html5lib/tests/test_parser2.py                | 1 -
 html5lib/tests/test_sanitizer.py              | 1 -
 html5lib/tests/test_serializer.py             | 1 -
 html5lib/tests/test_stream.py                 | 7 +++----
 html5lib/tests/test_tokenizer2.py             | 1 -
 html5lib/tests/test_treeadapters.py           | 1 -
 html5lib/tests/test_treewalkers.py            | 5 ++---
 html5lib/tests/test_whitespace_filter.py      | 1 -
 html5lib/tests/tokenizer.py                   | 3 +--
 html5lib/tests/tokenizertotree.py             | 3 +--
 html5lib/tests/tree_construction.py           | 1 -
 html5lib/treeadapters/__init__.py             | 1 -
 html5lib/treeadapters/genshi.py               | 1 -
 html5lib/treeadapters/sax.py                  | 1 -
 html5lib/treebuilders/__init__.py             | 1 -
 html5lib/treebuilders/base.py                 | 5 ++---
 html5lib/treebuilders/dom.py                  | 3 +--
 html5lib/treebuilders/etree.py                | 1 -
 html5lib/treebuilders/etree_lxml.py           | 7 +++----
 html5lib/treewalkers/__init__.py              | 1 -
 html5lib/treewalkers/base.py                  | 3 +--
 html5lib/treewalkers/dom.py                   | 1 -
 html5lib/treewalkers/etree.py                 | 1 -
 html5lib/treewalkers/etree_lxml.py            | 7 +++----
 html5lib/treewalkers/genshi.py                | 1 -
 parse.py                                      | 2 +-
 setup.py                                      | 1 -
 toxver.py                                     | 4 ----
 56 files changed, 39 insertions(+), 100 deletions(-)

diff --git a/debug-info.py b/debug-info.py
index b47b8ebf..7e1b6fd0 100644
--- a/debug-info.py
+++ b/debug-info.py
@@ -1,4 +1,3 @@
-from __future__ import print_function, unicode_literals
 
 import platform
 import sys
diff --git a/doc/conf.py b/doc/conf.py
index d5a1e863..66defcce 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -1,5 +1,4 @@
 #!/usr/bin/env python3
-# -*- coding: utf-8 -*-
 #
 # html5lib documentation build configuration file, created by
 # sphinx-quickstart on Wed May  8 00:04:49 2013.
@@ -100,7 +99,7 @@
 }
 
 
-class CExtMock(object):
+class CExtMock:
     """Required for autodoc on readthedocs.org where you cannot build C extensions."""
     def __init__(self, *args, **kwargs):
         pass
diff --git a/html5lib/__init__.py b/html5lib/__init__.py
index 7b854f99..d2c68855 100644
--- a/html5lib/__init__.py
+++ b/html5lib/__init__.py
@@ -20,7 +20,6 @@
 * :func:`~.serializer.serialize`
 """
 
-from __future__ import absolute_import, division, unicode_literals
 
 from .html5parser import HTMLParser, parse, parseFragment
 from .treebuilders import getTreeBuilder
diff --git a/html5lib/_ihatexml.py b/html5lib/_ihatexml.py
index d725eabd..f5b6e1f4 100644
--- a/html5lib/_ihatexml.py
+++ b/html5lib/_ihatexml.py
@@ -1,4 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
 
 import re
 import warnings
@@ -181,7 +180,7 @@ def escapeRegexp(string):
 nonPubidCharRegexp = re.compile("[^\x20\x0D\x0Aa-zA-Z0-9\\-'()+,./:=?;!*#@$_%]")
 
 
-class InfosetFilter(object):
+class InfosetFilter:
     replacementRegexp = re.compile(r"U[\dA-F]{5,5}")
 
     def __init__(self,
diff --git a/html5lib/_inputstream.py b/html5lib/_inputstream.py
index a93b5a4e..54c5c498 100644
--- a/html5lib/_inputstream.py
+++ b/html5lib/_inputstream.py
@@ -1,4 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
 
 from six import text_type
 from six.moves import http_client, urllib
@@ -48,7 +47,7 @@
 charsUntilRegEx = {}
 
 
-class BufferedStream(object):
+class BufferedStream:
     """Buffering for streams that do not have buffering of their own
 
     The buffer is implemented as a list of chunks on the assumption that
@@ -145,7 +144,7 @@ def HTMLInputStream(source, **kwargs):
         return HTMLBinaryInputStream(source, **kwargs)
 
 
-class HTMLUnicodeInputStream(object):
+class HTMLUnicodeInputStream:
     """Provides a unicode stream of characters to the HTMLTokenizer.
 
     This class takes care of character encoding and removing or replacing
@@ -673,7 +672,7 @@ def jumpTo(self, bytes):
         return True
 
 
-class EncodingParser(object):
+class EncodingParser:
     """Mini parser for detecting character encoding from meta elements"""
 
     def __init__(self, data):
@@ -861,7 +860,7 @@ def getAttribute(self):
                 attrValue.append(c)
 
 
-class ContentAttrParser(object):
+class ContentAttrParser:
     def __init__(self, data):
         assert isinstance(data, bytes)
         self.data = data
diff --git a/html5lib/_tokenizer.py b/html5lib/_tokenizer.py
index 4748a197..782310ec 100644
--- a/html5lib/_tokenizer.py
+++ b/html5lib/_tokenizer.py
@@ -1,4 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
 
 from six import unichr as chr
 
@@ -24,7 +23,7 @@
     attributeMap = OrderedDict
 
 
-class HTMLTokenizer(object):
+class HTMLTokenizer:
     """ This class takes care of tokenizing HTML.
 
     * self.currentToken
diff --git a/html5lib/_trie/__init__.py b/html5lib/_trie/__init__.py
index 07bad5d3..df8912a0 100644
--- a/html5lib/_trie/__init__.py
+++ b/html5lib/_trie/__init__.py
@@ -1,4 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
 
 from .py import Trie
 
diff --git a/html5lib/_trie/_base.py b/html5lib/_trie/_base.py
index 6b71975f..fe2d02e5 100644
--- a/html5lib/_trie/_base.py
+++ b/html5lib/_trie/_base.py
@@ -1,9 +1,8 @@
-from __future__ import absolute_import, division, unicode_literals
 
 try:
     from collections.abc import Mapping
 except ImportError:  # Python 2.7
-    from collections import Mapping
+    from collections.abc import Mapping
 
 
 class Trie(Mapping):
diff --git a/html5lib/_trie/py.py b/html5lib/_trie/py.py
index c2ba3da7..92f6f861 100644
--- a/html5lib/_trie/py.py
+++ b/html5lib/_trie/py.py
@@ -1,4 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
 from six import text_type
 
 from bisect import bisect_left
diff --git a/html5lib/_utils.py b/html5lib/_utils.py
index 7e23ee57..1c229d0f 100644
--- a/html5lib/_utils.py
+++ b/html5lib/_utils.py
@@ -1,11 +1,10 @@
-from __future__ import absolute_import, division, unicode_literals
 
 from types import ModuleType
 
 try:
     from collections.abc import Mapping
 except ImportError:
-    from collections import Mapping
+    from collections.abc import Mapping
 
 from six import text_type, PY3
 
@@ -13,7 +12,7 @@
     import xml.etree.ElementTree as default_etree
 else:
     try:
-        import xml.etree.cElementTree as default_etree
+        import xml.etree.ElementTree as default_etree
     except ImportError:
         import xml.etree.ElementTree as default_etree
 
@@ -122,7 +121,7 @@ def moduleFactoryFactory(factory):
     moduleCache = {}
 
     def moduleFactory(baseModule, *args, **kwargs):
-        if isinstance(ModuleType.__name__, type("")):
+        if isinstance(ModuleType.__name__, str):
             name = "_%s_factory" % baseModule.__name__
         else:
             name = b"_%s_factory" % baseModule.__name__
diff --git a/html5lib/constants.py b/html5lib/constants.py
index 2fa4146d..a4b1efa1 100644
--- a/html5lib/constants.py
+++ b/html5lib/constants.py
@@ -1,4 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
 
 import string
 
diff --git a/html5lib/filters/alphabeticalattributes.py b/html5lib/filters/alphabeticalattributes.py
index 5ba926e3..c0be95b2 100644
--- a/html5lib/filters/alphabeticalattributes.py
+++ b/html5lib/filters/alphabeticalattributes.py
@@ -1,4 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
 
 from . import base
 
diff --git a/html5lib/filters/base.py b/html5lib/filters/base.py
index c7dbaed0..6d6639e6 100644
--- a/html5lib/filters/base.py
+++ b/html5lib/filters/base.py
@@ -1,7 +1,6 @@
-from __future__ import absolute_import, division, unicode_literals
 
 
-class Filter(object):
+class Filter:
     def __init__(self, source):
         self.source = source
 
diff --git a/html5lib/filters/inject_meta_charset.py b/html5lib/filters/inject_meta_charset.py
index aefb5c84..c8dc57b8 100644
--- a/html5lib/filters/inject_meta_charset.py
+++ b/html5lib/filters/inject_meta_charset.py
@@ -1,4 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
 
 from . import base
 
diff --git a/html5lib/filters/lint.py b/html5lib/filters/lint.py
index acd4d7a2..cd7a6a43 100644
--- a/html5lib/filters/lint.py
+++ b/html5lib/filters/lint.py
@@ -1,4 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
 
 from six import text_type
 
diff --git a/html5lib/filters/optionaltags.py b/html5lib/filters/optionaltags.py
index 4a865012..a44b2a00 100644
--- a/html5lib/filters/optionaltags.py
+++ b/html5lib/filters/optionaltags.py
@@ -1,4 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
 
 from . import base
 
diff --git a/html5lib/filters/sanitizer.py b/html5lib/filters/sanitizer.py
index ea2c5dd3..2dc4583d 100644
--- a/html5lib/filters/sanitizer.py
+++ b/html5lib/filters/sanitizer.py
@@ -6,7 +6,6 @@
 if Bleach is unsuitable for your needs.
 
 """
-from __future__ import absolute_import, division, unicode_literals
 
 import re
 import warnings
diff --git a/html5lib/filters/whitespace.py b/html5lib/filters/whitespace.py
index 0d12584b..ab40ef5a 100644
--- a/html5lib/filters/whitespace.py
+++ b/html5lib/filters/whitespace.py
@@ -1,4 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
 
 import re
 
diff --git a/html5lib/html5parser.py b/html5lib/html5parser.py
index b3c206d1..8ab005ba 100644
--- a/html5lib/html5parser.py
+++ b/html5lib/html5parser.py
@@ -1,4 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
 from six import viewkeys
 
 from . import _inputstream
@@ -69,7 +68,7 @@ def parseFragment(doc, container="div", treebuilder="etree", namespaceHTMLElemen
     return p.parseFragment(doc, container=container, **kwargs)
 
 
-class HTMLParser(object):
+class HTMLParser:
     """HTML parser
 
     Generates a tree structure from a stream of (possibly malformed) HTML.
@@ -397,7 +396,7 @@ def parseRCDataRawtext(self, token, contentType):
         self.phase = self.phases["text"]
 
 
-class Phase(object):
+class Phase:
     """Base class for helper object that implements each phase of processing
     """
     __slots__ = ("parser", "tree", "__startTagCache", "__endTagCache")
diff --git a/html5lib/serializer.py b/html5lib/serializer.py
index a171ac1c..34f1b7e3 100644
--- a/html5lib/serializer.py
+++ b/html5lib/serializer.py
@@ -1,4 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
 from six import text_type
 
 import re
@@ -101,7 +100,7 @@ def serialize(input, tree="etree", encoding=None, **serializer_opts):
     return s.render(walker(input), encoding)
 
 
-class HTMLSerializer(object):
+class HTMLSerializer:
 
     # attribute quoting options
     quote_attr_values = "legacy"  # be secure by default
diff --git a/html5lib/tests/__init__.py b/html5lib/tests/__init__.py
index b8ce2de3..e69de29b 100644
--- a/html5lib/tests/__init__.py
+++ b/html5lib/tests/__init__.py
@@ -1 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
diff --git a/html5lib/tests/conftest.py b/html5lib/tests/conftest.py
index fffeb50c..de9b1572 100644
--- a/html5lib/tests/conftest.py
+++ b/html5lib/tests/conftest.py
@@ -1,4 +1,3 @@
-from __future__ import print_function
 import os.path
 import sys
 
@@ -54,7 +53,7 @@ def pytest_configure(config):
         # Check for optional requirements
         req_file = os.path.join(_root, "requirements-optional.txt")
         if os.path.exists(req_file):
-            with open(req_file, "r") as fp:
+            with open(req_file) as fp:
                 for line in fp:
                     if (line.strip() and
                         not (line.startswith("-r") or
@@ -79,7 +78,7 @@ def pytest_configure(config):
         import xml.etree.ElementTree as ElementTree
 
         try:
-            import xml.etree.cElementTree as cElementTree
+            import xml.etree.ElementTree as cElementTree
         except ImportError:
             msgs.append("cElementTree unable to be imported")
         else:
diff --git a/html5lib/tests/sanitizer.py b/html5lib/tests/sanitizer.py
index 16e53868..93ad4f52 100644
--- a/html5lib/tests/sanitizer.py
+++ b/html5lib/tests/sanitizer.py
@@ -1,4 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
 
 import codecs
 import json
diff --git a/html5lib/tests/support.py b/html5lib/tests/support.py
index 1bd0ccc1..3a6f37c2 100644
--- a/html5lib/tests/support.py
+++ b/html5lib/tests/support.py
@@ -1,4 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
 
 # pylint:disable=wrong-import-position
 
@@ -86,7 +85,7 @@ def __getitem__(self, key):
         return dict.get(self, key, self.default)
 
 
-class TestData(object):
+class TestData:
     def __init__(self, filename, newTestHeading="data", encoding="utf8"):
         if encoding is None:
             self.f = open(filename, mode="rb")
diff --git a/html5lib/tests/test_alphabeticalattributes.py b/html5lib/tests/test_alphabeticalattributes.py
index 7d5b8e0f..87beb8f1 100644
--- a/html5lib/tests/test_alphabeticalattributes.py
+++ b/html5lib/tests/test_alphabeticalattributes.py
@@ -1,4 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
 
 from collections import OrderedDict
 
diff --git a/html5lib/tests/test_encoding.py b/html5lib/tests/test_encoding.py
index 47c4814a..10b666da 100644
--- a/html5lib/tests/test_encoding.py
+++ b/html5lib/tests/test_encoding.py
@@ -1,4 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
 
 import os
 
@@ -9,7 +8,7 @@
 
 
 def test_basic_prescan_length():
-    data = "<title>Caf\u00E9</title><!--a--><meta charset='utf-8'>".encode('utf-8')
+    data = "<title>Caf\u00E9</title><!--a--><meta charset='utf-8'>".encode()
     pad = 1024 - len(data) + 1
     data = data.replace(b"-a-", b"-" + (b"a" * pad) + b"-")
     assert len(data) == 1024  # Sanity
@@ -18,7 +17,7 @@ def test_basic_prescan_length():
 
 
 def test_parser_reparse():
-    data = "<title>Caf\u00E9</title><!--a--><meta charset='utf-8'>".encode('utf-8')
+    data = "<title>Caf\u00E9</title><!--a--><meta charset='utf-8'>".encode()
     pad = 10240 - len(data) + 1
     data = data.replace(b"-a-", b"-" + (b"a" * pad) + b"-")
     assert len(data) == 10240  # Sanity
diff --git a/html5lib/tests/test_meta.py b/html5lib/tests/test_meta.py
index e02268aa..aa7e35e2 100644
--- a/html5lib/tests/test_meta.py
+++ b/html5lib/tests/test_meta.py
@@ -1,10 +1,6 @@
-from __future__ import absolute_import, division, unicode_literals
 
 import six
-try:
-    from unittest.mock import Mock
-except ImportError:
-    from mock import Mock
+from unittest.mock import Mock
 
 from . import support
 
diff --git a/html5lib/tests/test_optionaltags_filter.py b/html5lib/tests/test_optionaltags_filter.py
index cd282149..180a109e 100644
--- a/html5lib/tests/test_optionaltags_filter.py
+++ b/html5lib/tests/test_optionaltags_filter.py
@@ -1,4 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
 
 from html5lib.filters.optionaltags import Filter
 
diff --git a/html5lib/tests/test_parser2.py b/html5lib/tests/test_parser2.py
index 6b464bea..f30595b4 100644
--- a/html5lib/tests/test_parser2.py
+++ b/html5lib/tests/test_parser2.py
@@ -1,4 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
 
 from six import PY2, text_type
 
diff --git a/html5lib/tests/test_sanitizer.py b/html5lib/tests/test_sanitizer.py
index 499310b6..562ee7fa 100644
--- a/html5lib/tests/test_sanitizer.py
+++ b/html5lib/tests/test_sanitizer.py
@@ -1,4 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
 
 import warnings
 
diff --git a/html5lib/tests/test_serializer.py b/html5lib/tests/test_serializer.py
index a2be0be5..5c225790 100644
--- a/html5lib/tests/test_serializer.py
+++ b/html5lib/tests/test_serializer.py
@@ -1,4 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
 
 import os
 import json
diff --git a/html5lib/tests/test_stream.py b/html5lib/tests/test_stream.py
index efe9b472..7dce2b1d 100644
--- a/html5lib/tests/test_stream.py
+++ b/html5lib/tests/test_stream.py
@@ -1,4 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
 
 from . import support  # noqa
 
@@ -105,7 +104,7 @@ def test_char_ascii():
 
 
 def test_char_utf8():
-    stream = HTMLInputStream('\u2018'.encode('utf-8'), override_encoding='utf-8')
+    stream = HTMLInputStream('\u2018'.encode(), override_encoding='utf-8')
     assert stream.charEncoding[0].name == 'utf-8'
     assert stream.char() == '\u2018'
 
@@ -186,7 +185,7 @@ def test_python_issue_20007():
     Make sure we have a work-around for Python bug #20007
     http://bugs.python.org/issue20007
     """
-    class FakeSocket(object):
+    class FakeSocket:
         def makefile(self, _mode, _bufsize=None):
             # pylint:disable=unused-argument
             return BytesIO(b"HTTP/1.1 200 Ok\r\n\r\nText")
@@ -205,7 +204,7 @@ def test_python_issue_20007_b():
     if six.PY2:
         return
 
-    class FakeSocket(object):
+    class FakeSocket:
         def makefile(self, _mode, _bufsize=None):
             # pylint:disable=unused-argument
             return BytesIO(b"HTTP/1.1 200 Ok\r\n\r\nText")
diff --git a/html5lib/tests/test_tokenizer2.py b/html5lib/tests/test_tokenizer2.py
index 158d847a..f8a74eee 100644
--- a/html5lib/tests/test_tokenizer2.py
+++ b/html5lib/tests/test_tokenizer2.py
@@ -1,4 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
 
 import io
 
diff --git a/html5lib/tests/test_treeadapters.py b/html5lib/tests/test_treeadapters.py
index 95e56c00..3af383c3 100644
--- a/html5lib/tests/test_treeadapters.py
+++ b/html5lib/tests/test_treeadapters.py
@@ -1,4 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
 
 from . import support  # noqa
 
diff --git a/html5lib/tests/test_treewalkers.py b/html5lib/tests/test_treewalkers.py
index 780ca964..89e20dab 100644
--- a/html5lib/tests/test_treewalkers.py
+++ b/html5lib/tests/test_treewalkers.py
@@ -1,4 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
 
 import itertools
 import sys
@@ -74,11 +73,11 @@ def param_treewalker_six_mix():
     # fragment but not using the u'' syntax nor importing unicode_literals
     sm_tests = [
         ('<a href="http://example.com">Example</a>',
-         [(str('class'), str('test123'))],
+         [('class', 'test123')],
          '<a>\n  class="test123"\n  href="http://example.com"\n  "Example"'),
 
         ('<link href="http://example.com/cow">',
-         [(str('rel'), str('alternate'))],
+         [('rel', 'alternate')],
          '<link>\n  href="http://example.com/cow"\n  rel="alternate"\n  "Example"')
     ]
 
diff --git a/html5lib/tests/test_whitespace_filter.py b/html5lib/tests/test_whitespace_filter.py
index e9da6140..d4e4e3be 100644
--- a/html5lib/tests/test_whitespace_filter.py
+++ b/html5lib/tests/test_whitespace_filter.py
@@ -1,4 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
 
 from html5lib.filters.whitespace import Filter
 from html5lib.constants import spaceCharacters
diff --git a/html5lib/tests/tokenizer.py b/html5lib/tests/tokenizer.py
index b49d2e6e..9ba19b16 100644
--- a/html5lib/tests/tokenizer.py
+++ b/html5lib/tests/tokenizer.py
@@ -1,4 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
 
 import codecs
 import json
@@ -12,7 +11,7 @@
 from html5lib import constants, _utils
 
 
-class TokenizerTestParser(object):
+class TokenizerTestParser:
     def __init__(self, initialState, lastStartTag=None):
         self.tokenizer = HTMLTokenizer
         self._state = initialState
diff --git a/html5lib/tests/tokenizertotree.py b/html5lib/tests/tokenizertotree.py
index 42463f32..6c0b4f77 100644
--- a/html5lib/tests/tokenizertotree.py
+++ b/html5lib/tests/tokenizertotree.py
@@ -1,4 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
 
 import sys
 import os
@@ -25,7 +24,7 @@ def main(out_path):
 
 def run_file(filename, out_path):
     try:
-        tests_data = json.load(open(filename, "r"))
+        tests_data = json.load(open(filename))
     except ValueError:
         sys.stderr.write("Failed to load %s\n" % filename)
         return
diff --git a/html5lib/tests/tree_construction.py b/html5lib/tests/tree_construction.py
index 363b48c2..e2381754 100644
--- a/html5lib/tests/tree_construction.py
+++ b/html5lib/tests/tree_construction.py
@@ -1,4 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
 
 import itertools
 import re
diff --git a/html5lib/treeadapters/__init__.py b/html5lib/treeadapters/__init__.py
index dfeb0ba5..1444fc9a 100644
--- a/html5lib/treeadapters/__init__.py
+++ b/html5lib/treeadapters/__init__.py
@@ -16,7 +16,6 @@
    genshi_tree = genshi.to_genshi(TreeWalker(tree))
 
 """
-from __future__ import absolute_import, division, unicode_literals
 
 from . import sax
 
diff --git a/html5lib/treeadapters/genshi.py b/html5lib/treeadapters/genshi.py
index 61d5fb6a..b0b29ed3 100644
--- a/html5lib/treeadapters/genshi.py
+++ b/html5lib/treeadapters/genshi.py
@@ -1,4 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
 
 from genshi.core import QName, Attrs
 from genshi.core import START, END, TEXT, COMMENT, DOCTYPE
diff --git a/html5lib/treeadapters/sax.py b/html5lib/treeadapters/sax.py
index f4ccea5a..ead1a5c4 100644
--- a/html5lib/treeadapters/sax.py
+++ b/html5lib/treeadapters/sax.py
@@ -1,4 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
 
 from xml.sax.xmlreader import AttributesNSImpl
 
diff --git a/html5lib/treebuilders/__init__.py b/html5lib/treebuilders/__init__.py
index d44447ea..90aad5fb 100644
--- a/html5lib/treebuilders/__init__.py
+++ b/html5lib/treebuilders/__init__.py
@@ -29,7 +29,6 @@
 
 """
 
-from __future__ import absolute_import, division, unicode_literals
 
 from .._utils import default_etree
 
diff --git a/html5lib/treebuilders/base.py b/html5lib/treebuilders/base.py
index 020d7e15..125ed82c 100644
--- a/html5lib/treebuilders/base.py
+++ b/html5lib/treebuilders/base.py
@@ -1,4 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
 from six import text_type
 
 from ..constants import scopingElements, tableInsertModeElements, namespaces
@@ -20,7 +19,7 @@
 }
 
 
-class Node(object):
+class Node:
     """Represents an item in the tree"""
     def __init__(self, name):
         """Creates a Node
@@ -144,7 +143,7 @@ def nodesEqual(self, node1, node2):
         return True
 
 
-class TreeBuilder(object):
+class TreeBuilder:
     """Base treebuilder implementation
 
     * documentClass - the class to use for the bottommost node of a document
diff --git a/html5lib/treebuilders/dom.py b/html5lib/treebuilders/dom.py
index d8b53004..09b217c4 100644
--- a/html5lib/treebuilders/dom.py
+++ b/html5lib/treebuilders/dom.py
@@ -1,10 +1,9 @@
-from __future__ import absolute_import, division, unicode_literals
 
 
 try:
     from collections.abc import MutableMapping
 except ImportError:  # Python 2.7
-    from collections import MutableMapping
+    from collections.abc import MutableMapping
 from xml.dom import minidom, Node
 import weakref
 
diff --git a/html5lib/treebuilders/etree.py b/html5lib/treebuilders/etree.py
index 0b745081..bd20b957 100644
--- a/html5lib/treebuilders/etree.py
+++ b/html5lib/treebuilders/etree.py
@@ -1,4 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
 # pylint:disable=protected-access
 
 from six import text_type
diff --git a/html5lib/treebuilders/etree_lxml.py b/html5lib/treebuilders/etree_lxml.py
index e73de61a..bc2d779e 100644
--- a/html5lib/treebuilders/etree_lxml.py
+++ b/html5lib/treebuilders/etree_lxml.py
@@ -9,7 +9,6 @@
 When any of these things occur, we emit a DataLossWarning
 """
 
-from __future__ import absolute_import, division, unicode_literals
 # pylint:disable=protected-access
 
 import warnings
@@ -19,7 +18,7 @@
 try:
     from collections.abc import MutableMapping
 except ImportError:
-    from collections import MutableMapping
+    from collections.abc import MutableMapping
 
 from . import base
 from ..constants import DataLossWarning
@@ -37,14 +36,14 @@
 comment_type = etree.Comment("asd").tag
 
 
-class DocumentType(object):
+class DocumentType:
     def __init__(self, name, publicId, systemId):
         self.name = name
         self.publicId = publicId
         self.systemId = systemId
 
 
-class Document(object):
+class Document:
     def __init__(self):
         self._elementTree = None
         self._childNodes = []
diff --git a/html5lib/treewalkers/__init__.py b/html5lib/treewalkers/__init__.py
index b2d3aac3..b78d6f46 100644
--- a/html5lib/treewalkers/__init__.py
+++ b/html5lib/treewalkers/__init__.py
@@ -8,7 +8,6 @@
 returns an iterator which generates tokens.
 """
 
-from __future__ import absolute_import, division, unicode_literals
 
 from .. import constants
 from .._utils import default_etree
diff --git a/html5lib/treewalkers/base.py b/html5lib/treewalkers/base.py
index 80c474c4..7ee75d81 100644
--- a/html5lib/treewalkers/base.py
+++ b/html5lib/treewalkers/base.py
@@ -1,4 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
 
 from xml.dom import Node
 from ..constants import namespaces, voidElements, spaceCharacters
@@ -17,7 +16,7 @@
 spaceCharacters = "".join(spaceCharacters)
 
 
-class TreeWalker(object):
+class TreeWalker:
     """Walks a tree yielding tokens
 
     Tokens are dicts that all have a ``type`` field specifying the type of the
diff --git a/html5lib/treewalkers/dom.py b/html5lib/treewalkers/dom.py
index b0c89b00..85e12505 100644
--- a/html5lib/treewalkers/dom.py
+++ b/html5lib/treewalkers/dom.py
@@ -1,4 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
 
 from xml.dom import Node
 
diff --git a/html5lib/treewalkers/etree.py b/html5lib/treewalkers/etree.py
index 411a1d45..ef5e914c 100644
--- a/html5lib/treewalkers/etree.py
+++ b/html5lib/treewalkers/etree.py
@@ -1,4 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
 
 from collections import OrderedDict
 import re
diff --git a/html5lib/treewalkers/etree_lxml.py b/html5lib/treewalkers/etree_lxml.py
index a614ac5b..af6c260d 100644
--- a/html5lib/treewalkers/etree_lxml.py
+++ b/html5lib/treewalkers/etree_lxml.py
@@ -1,4 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
 from six import text_type
 
 from collections import OrderedDict
@@ -20,7 +19,7 @@ def ensure_str(s):
         return s.decode("ascii", "strict")
 
 
-class Root(object):
+class Root:
     def __init__(self, et):
         self.elementtree = et
         self.children = []
@@ -58,7 +57,7 @@ def __len__(self):
         return 1
 
 
-class Doctype(object):
+class Doctype:
     def __init__(self, root_node, name, public_id, system_id):
         self.root_node = root_node
         self.name = name
@@ -81,7 +80,7 @@ def getnext(self):
         return None
 
 
-class FragmentWrapper(object):
+class FragmentWrapper:
     def __init__(self, fragment_root, obj):
         self.root_node = fragment_root
         self.obj = obj
diff --git a/html5lib/treewalkers/genshi.py b/html5lib/treewalkers/genshi.py
index 7483be27..78f22fd3 100644
--- a/html5lib/treewalkers/genshi.py
+++ b/html5lib/treewalkers/genshi.py
@@ -1,4 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
 
 from genshi.core import QName
 from genshi.core import START, END, XML_NAMESPACE, DOCTYPE, TEXT
diff --git a/parse.py b/parse.py
index e6806b46..14bbe99a 100755
--- a/parse.py
+++ b/parse.py
@@ -42,7 +42,7 @@ def parse():
             try:
                 # Try opening from file system
                 f = open(f, "rb")
-            except IOError as e:
+            except OSError as e:
                 sys.stderr.write("Unable to open file: %s\n" % e)
                 sys.exit(1)
     except IndexError:
diff --git a/setup.py b/setup.py
index 30ee0575..5f3dc186 100644
--- a/setup.py
+++ b/setup.py
@@ -1,4 +1,3 @@
-from __future__ import print_function
 
 import ast
 import codecs
diff --git a/toxver.py b/toxver.py
index 68eb71ec..b082a345 100755
--- a/toxver.py
+++ b/toxver.py
@@ -20,10 +20,6 @@
 
 """
 
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-from __future__ import unicode_literals
 
 import sys
 

From fb864a9e4ea393c3bd863de3f1c62275ce94f622 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=B6rg=20Thalheim?= <joerg@thalheim.io>
Date: Mon, 9 Sep 2024 10:18:02 +0200
Subject: [PATCH 2/3] manually remove fallback imports

---
 html5lib/_trie/_base.py             | 5 +----
 html5lib/_utils.py                  | 5 +----
 html5lib/treebuilders/dom.py        | 5 +----
 html5lib/treebuilders/etree_lxml.py | 5 +----
 4 files changed, 4 insertions(+), 16 deletions(-)

diff --git a/html5lib/_trie/_base.py b/html5lib/_trie/_base.py
index fe2d02e5..63927ee4 100644
--- a/html5lib/_trie/_base.py
+++ b/html5lib/_trie/_base.py
@@ -1,8 +1,5 @@
 
-try:
-    from collections.abc import Mapping
-except ImportError:  # Python 2.7
-    from collections.abc import Mapping
+from collections.abc import Mapping
 
 
 class Trie(Mapping):
diff --git a/html5lib/_utils.py b/html5lib/_utils.py
index 1c229d0f..2e74c07f 100644
--- a/html5lib/_utils.py
+++ b/html5lib/_utils.py
@@ -1,10 +1,7 @@
 
 from types import ModuleType
 
-try:
-    from collections.abc import Mapping
-except ImportError:
-    from collections.abc import Mapping
+from collections.abc import Mapping
 
 from six import text_type, PY3
 
diff --git a/html5lib/treebuilders/dom.py b/html5lib/treebuilders/dom.py
index 09b217c4..bc56c708 100644
--- a/html5lib/treebuilders/dom.py
+++ b/html5lib/treebuilders/dom.py
@@ -1,9 +1,6 @@
 
 
-try:
-    from collections.abc import MutableMapping
-except ImportError:  # Python 2.7
-    from collections.abc import MutableMapping
+from collections.abc import MutableMapping
 from xml.dom import minidom, Node
 import weakref
 
diff --git a/html5lib/treebuilders/etree_lxml.py b/html5lib/treebuilders/etree_lxml.py
index bc2d779e..3e88d76e 100644
--- a/html5lib/treebuilders/etree_lxml.py
+++ b/html5lib/treebuilders/etree_lxml.py
@@ -15,10 +15,7 @@
 import re
 import sys
 
-try:
-    from collections.abc import MutableMapping
-except ImportError:
-    from collections.abc import MutableMapping
+from collections.abc import MutableMapping
 
 from . import base
 from ..constants import DataLossWarning

From 29d3072ee1f982df582b88b4e120453cbbefd37e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=B6rg=20Thalheim?= <joerg@thalheim.io>
Date: Mon, 9 Sep 2024 09:44:00 +0200
Subject: [PATCH 3/3] only support pythons that are not EOL
 (https://endoflife.date/python)

Even debian oldstable has python 3.9. For internet-facing libraries it
is not secure for contributor to install unsupported python versions in order to test them.
Reducing the number of python versions will make maintenance and testing easier.
is not secure to contributor to install unsupported python versions in order to test them.
---
 .appveyor.yml                    | 29 -----------------------------
 .github/workflows/python-tox.yml |  3 ---
 README.rst                       | 16 ++--------------
 html5lib/html5parser.py          |  4 ++--
 setup.py                         |  6 +-----
 tox.ini                          |  2 +-
 toxver.py                        |  7 -------
 7 files changed, 6 insertions(+), 61 deletions(-)
 delete mode 100644 .appveyor.yml

diff --git a/.appveyor.yml b/.appveyor.yml
deleted file mode 100644
index e6f7bf48..00000000
--- a/.appveyor.yml
+++ /dev/null
@@ -1,29 +0,0 @@
-# appveyor.yml - https://www.appveyor.com/docs/lang/python
-# https://www.appveyor.com/docs/windows-images-software/#visual-studio-2022
----
-image: Visual Studio 2022
-environment:
-  matrix:
-    - PY_PYTHON: 2.7
-      TOXENV: py27-base
-    - PY_PYTHON: 2.7
-      TOXENV: py27-optional
-    - PY_PYTHON: 3.7
-      TOXENV: py37-base
-    - PY_PYTHON: 3.7
-      TOXENV: py37-optional
-
-install:
-  - git submodule update --init --recursive
-  - py --list
-  - py -VV
-  - py -m pip install --upgrade pip
-  - py -m pip install tox
-
-build: off
-
-test_script:
-  - py -m tox
-
-after_test:
-  - py debug-info.py
diff --git a/.github/workflows/python-tox.yml b/.github/workflows/python-tox.yml
index 5ed83175..0912abb3 100644
--- a/.github/workflows/python-tox.yml
+++ b/.github/workflows/python-tox.yml
@@ -12,9 +12,6 @@ jobs:
         os: [ubuntu-latest, windows-latest]
         deps: [base, optional]
         include:
-          - python: "pypy-2.7"
-            os: ubuntu-latest
-            deps: base
           - python: "pypy-3.10"
             os: ubuntu-latest
             deps: base
diff --git a/README.rst b/README.rst
index 6a623a43..befc7aaa 100644
--- a/README.rst
+++ b/README.rst
@@ -29,7 +29,7 @@ or:
 
 By default, the ``document`` will be an ``xml.etree`` element instance.
 Whenever possible, html5lib chooses the accelerated ``ElementTree``
-implementation (i.e. ``xml.etree.cElementTree`` on Python 2.x).
+implementation.
 
 Two other tree types are supported: ``xml.dom.minidom`` and
 ``lxml.etree``. To use an alternative format, specify the name of
@@ -41,18 +41,6 @@ a treebuilder:
   with open("mydocument.html", "rb") as f:
       lxml_etree_document = html5lib.parse(f, treebuilder="lxml")
 
-When using with ``urllib2`` (Python 2), the charset from HTTP should be
-pass into html5lib as follows:
-
-.. code-block:: python
-
-  from contextlib import closing
-  from urllib2 import urlopen
-  import html5lib
-
-  with closing(urlopen("http://example.com/")) as f:
-      document = html5lib.parse(f, transport_encoding=f.info().getparam("charset"))
-
 When using with ``urllib.request`` (Python 3), the charset from HTTP
 should be pass into html5lib as follows:
 
@@ -90,7 +78,7 @@ More documentation is available at https://html5lib.readthedocs.io/.
 Installation
 ------------
 
-html5lib works on CPython 2.7+, CPython 3.5+ and PyPy. To install:
+html5lib works on CPython 3.8+ and PyPy. To install:
 
 .. code-block:: bash
 
diff --git a/html5lib/html5parser.py b/html5lib/html5parser.py
index 8ab005ba..3fe78b6b 100644
--- a/html5lib/html5parser.py
+++ b/html5lib/html5parser.py
@@ -427,7 +427,7 @@ def processSpaceCharacters(self, token):
     def processStartTag(self, token):
         # Note the caching is done here rather than BoundMethodDispatcher as doing it there
         # requires a circular reference to the Phase, and this ends up with a significant
-        # (CPython 2.7, 3.8) GC cost when parsing many short inputs
+        # (CPython 3.8) GC cost when parsing many short inputs
         name = token["name"]
         # In Py2, using `in` is quicker in general than try/except KeyError
         # In Py3, `in` is quicker when there are few cache hits (typically short inputs)
@@ -454,7 +454,7 @@ def startTagHtml(self, token):
     def processEndTag(self, token):
         # Note the caching is done here rather than BoundMethodDispatcher as doing it there
         # requires a circular reference to the Phase, and this ends up with a significant
-        # (CPython 2.7, 3.8) GC cost when parsing many short inputs
+        # (CPython 3.8) GC cost when parsing many short inputs
         name = token["name"]
         # In Py2, using `in` is quicker in general than try/except KeyError
         # In Py3, `in` is quicker when there are few cache hits (typically short inputs)
diff --git a/setup.py b/setup.py
index 5f3dc186..afab2904 100644
--- a/setup.py
+++ b/setup.py
@@ -63,11 +63,7 @@ def default_environment():
     'Operating System :: OS Independent',
     'Programming Language :: Python',
     'Programming Language :: Python :: 2',
-    'Programming Language :: Python :: 2.7',
     'Programming Language :: Python :: 3',
-    'Programming Language :: Python :: 3.5',
-    'Programming Language :: Python :: 3.6',
-    'Programming Language :: Python :: 3.7',
     'Programming Language :: Python :: 3.8',
     'Programming Language :: Python :: 3.9',
     'Programming Language :: Python :: 3.10',
@@ -109,7 +105,7 @@ def default_environment():
           'six>=1.9',
           'webencodings>=0.5.1',
       ],
-      python_requires=">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*",
+      python_requires=">=3.8",
       extras_require={
           # A conditional extra will only install these items when the extra is
           # requested and the condition matches.
diff --git a/tox.ini b/tox.ini
index fb228e96..94a78542 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,5 +1,5 @@
 [tox]
-envlist = py{27,35,36,37,38,39,310,311,py,py3}-{base,optional,oldest}
+envlist = py{38,39,310,311,py,py3}-{base,optional,oldest}
 
 [testenv]
 deps =
diff --git a/toxver.py b/toxver.py
index b082a345..950dc083 100755
--- a/toxver.py
+++ b/toxver.py
@@ -12,9 +12,6 @@
     $ toxver.py pypy-3.8 base
     TOXENV=pypy3-base
 
-    $ toxver.py 2.7 oldest
-    TOXENV=py27-oldest
-
     $ toxver.py ~3.12.0-0 optional
     TOXENV=py312-optional
 
@@ -31,10 +28,6 @@ def main(argv):
 
     deps = argv[2]
 
-    if argv[1].startswith("pypy-2"):
-        print("TOXENV=pypy-" + deps)
-        return 0
-
     if argv[1].startswith("pypy-3"):
         print("TOXENV=pypy3-" + deps)
         return 0