Skip to content

Commit c51ba79

Browse files
committed
WL#15212: Update collation mappings
MySQL Server 8.0.30 has added new collations and renamed some. Support for new utf8mb4 collations has been added, and all existing utf8_* collations have been renamed to utf8mb3_*. This worklog updates the charsets.py module accordingly, adds the 5.7 server character set mapping, and makes the necessary adjustments to keep backwards compatibility. See changes introduced by WL#14307, BUG#33787300 and BUG#33635120. Change-Id: If21839728fb52a5f82fc4c1b4f83d230f178046c
1 parent af94348 commit c51ba79

11 files changed

+632
-75
lines changed

Diff for: CHANGES.txt

+1
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ Full release notes:
1111
v8.0.30
1212
=======
1313

14+
- WL#15212: Update collation mappings
1415
- WL#15151: Increase to 88 characters per line
1516
- WL#15137: Fix linting issues
1617
- WL#15035: Enforce PEP 7 and PEP 8 coding style

Diff for: lib/mysql/connector/abstracts.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -976,7 +976,7 @@ def python_charset(self):
976976
Returns a string.
977977
"""
978978
encoding = CharacterSet.get_info(self._charset_id)[0]
979-
if encoding in ("utf8mb4", "binary"):
979+
if encoding in ("utf8mb4", "utf8mb3", "binary"):
980980
return "utf8"
981981
return encoding
982982

Diff for: lib/mysql/connector/charsets.py

+299-31
Large diffs are not rendered by default.

Diff for: lib/mysql/connector/connection.py

+2
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
from .authentication import get_auth_plugin
4545
from .constants import (
4646
NET_BUFFER_LENGTH,
47+
CharacterSet,
4748
ClientFlag,
4849
FieldType,
4950
ServerCmd,
@@ -172,6 +173,7 @@ def _do_handshake(self):
172173
self._server_version = self._check_server_version(
173174
handshake["server_version_original"]
174175
)
176+
CharacterSet.set_mysql_version(self._server_version)
175177

176178
if not handshake["capabilities"] & ClientFlag.SSL:
177179
if self._auth_plugin == "mysql_clear_password":

Diff for: lib/mysql/connector/connection_cext.py

+1
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,7 @@ def _do_handshake(self):
133133
self._server_version = self._check_server_version(
134134
self._handshake["server_version_original"]
135135
)
136+
CharacterSet.set_mysql_version(self._server_version)
136137

137138
@property
138139
def _server_status(self):

Diff for: lib/mysql/connector/constants.py

+15-1
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232

3333
from abc import ABC, ABCMeta
3434

35-
from .charsets import MYSQL_CHARACTER_SETS
35+
from .charsets import MYSQL_CHARACTER_SETS, MYSQL_CHARACTER_SETS_57
3636
from .errors import ProgrammingError
3737

3838
MAX_PACKET_LENGTH = 16777215
@@ -678,10 +678,22 @@ class CharacterSet(_Constants):
678678
"""
679679

680680
desc = MYSQL_CHARACTER_SETS
681+
mysql_version = (8, 0)
681682

682683
# Multi-byte character sets which use 5c (backslash) in characters
683684
slash_charsets = (1, 13, 28, 84, 87, 88)
684685

686+
@classmethod
687+
def set_mysql_version(cls, version):
688+
"""Set the MySQL major version and change the charset mapping if is 5.7.
689+
690+
Args:
691+
version (tuple): MySQL version tuple.
692+
"""
693+
cls.mysql_version = version[:2]
694+
if cls.mysql_version == (5, 7):
695+
cls.desc = MYSQL_CHARACTER_SETS_57
696+
685697
@classmethod
686698
def get_info(cls, setid):
687699
"""Retrieves character set information as tuple using an ID
@@ -757,6 +769,8 @@ def get_charset_info(cls, charset=None, collation=None):
757769
except IndexError:
758770
ProgrammingError(f"Character set ID {charset} unknown")
759771

772+
if charset in ("utf8", "utf-8") and cls.mysql_version == (8, 0):
773+
charset = "utf8mb4"
760774
if charset is not None and collation is None:
761775
info = cls.get_default_collation(charset)
762776
return (info[2], info[1], info[0])

Diff for: lib/mysql/connector/conversion.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ def __init__(self, charset="utf8", use_unicode=True, str_fallback=False):
6363

6464
def set_charset(self, charset):
6565
"""Set character set"""
66-
if charset == "utf8mb4":
66+
if charset in ("utf8mb4", "utf8mb3"):
6767
charset = "utf8"
6868
if charset is not None:
6969
self.charset = charset

0 commit comments

Comments
 (0)