Skip to content

Commit 231d032

Browse files
authored
Feat!: support Unicode in sqlite, mysql, tsql, postgres, oracle (#4554)
* add support for UNICODE for sqlite, mysql, tsql, postgres, and oracle * UNICODE tests * linter fix * oracle unicode fix * clean-up * ruff
1 parent b0cc7d0 commit 231d032

File tree

5 files changed

+16
-0
lines changed

5 files changed

+16
-0
lines changed

sqlglot/dialects/mysql.py

+1
Original file line numberDiff line numberDiff line change
@@ -775,6 +775,7 @@ class Generator(generator.Generator):
775775
exp.TsOrDsAdd: date_add_sql("ADD"),
776776
exp.TsOrDsDiff: lambda self, e: self.func("DATEDIFF", e.this, e.expression),
777777
exp.TsOrDsToDate: _ts_or_ds_to_date_sql,
778+
exp.Unicode: lambda self, e: f"ORD(CONVERT({self.sql(e.this)} USING utf32))",
778779
exp.UnixToTime: _unix_to_time_sql,
779780
exp.Week: _remove_ts_or_ds_to_date(),
780781
exp.WeekOfYear: _remove_ts_or_ds_to_date(rename_func("WEEKOFYEAR")),

sqlglot/dialects/oracle.py

+1
Original file line numberDiff line numberDiff line change
@@ -333,6 +333,7 @@ class Generator(generator.Generator):
333333
exp.ToChar: lambda self, e: self.function_fallback_sql(e),
334334
exp.ToNumber: to_number_with_nls_param,
335335
exp.Trim: _trim_sql,
336+
exp.Unicode: lambda self, e: f"ASCII(UNISTR({self.sql(e.this)}))",
336337
exp.UnixToTime: lambda self,
337338
e: f"TO_DATE('1970-01-01', 'YYYY-MM-DD') + ({self.sql(e, 'this')} / 86400)",
338339
}

sqlglot/dialects/postgres.py

+2
Original file line numberDiff line numberDiff line change
@@ -370,6 +370,7 @@ class Parser(parser.Parser):
370370

371371
FUNCTIONS = {
372372
**parser.Parser.FUNCTIONS,
373+
"ASCII": exp.Unicode.from_arg_list,
373374
"DATE_TRUNC": build_timestamp_trunc,
374375
"DIV": lambda args: exp.cast(
375376
binary_from_function(exp.IntDiv)(args), exp.DataType.Type.DECIMAL
@@ -605,6 +606,7 @@ class Generator(generator.Generator):
605606
exp.VariancePop: rename_func("VAR_POP"),
606607
exp.Variance: rename_func("VAR_SAMP"),
607608
exp.Xor: bool_xor_sql,
609+
exp.Unicode: rename_func("ASCII"),
608610
exp.UnixToTime: _unix_to_time_sql,
609611
exp.Levenshtein: _levenshtein_sql,
610612
}

sqlglot/expressions.py

+4
Original file line numberDiff line numberDiff line change
@@ -6603,6 +6603,10 @@ class Unhex(Func):
66036603
pass
66046604

66056605

6606+
class Unicode(Func):
6607+
pass
6608+
6609+
66066610
# https://cloud.google.com/bigquery/docs/reference/standard-sql/date_functions#unix_date
66076611
class UnixDate(Func):
66086612
pass

tests/dialects/test_sqlite.py

+8
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,14 @@ def test_sqlite(self):
9292
read={"snowflake": "LEAST(x, y, z)"},
9393
write={"snowflake": "LEAST(x, y, z)"},
9494
)
95+
self.validate_all(
96+
"UNICODE(x)",
97+
write={
98+
"mysql": "ORD(CONVERT(x USING utf32))",
99+
"postgres": "ASCII(x)",
100+
"oracle": "ASCII(UNISTR(x))",
101+
},
102+
)
95103
self.validate_identity(
96104
"SELECT * FROM station WHERE city IS NOT ''",
97105
"SELECT * FROM station WHERE NOT city IS ''",

0 commit comments

Comments
 (0)