Skip to content

Commit 1041dc3

Browse files
authored
Support Tiktoken Gpt-4.1 Model (#7453)
* Support Tiktoken Gpt-4.1 Model * Add o4-mini
1 parent 52e0db1 commit 1041dc3

File tree

2 files changed

+8
-0
lines changed

2 files changed

+8
-0
lines changed

src/Microsoft.ML.Tokenizers/Model/TiktokenTokenizer.cs

+3
Original file line numberDiff line numberDiff line change
@@ -1032,6 +1032,7 @@ private static readonly (string Prefix, ModelEncoding Encoding)[] _modelPrefixTo
10321032
// chat
10331033
( "o1-", ModelEncoding.O200kBase ), // e.g. o1-mini
10341034
( "o3-", ModelEncoding.O200kBase ), // e.g. o3-mini
1035+
( "gpt-4.1-", ModelEncoding.O200kBase), // e.g., gpt-4.1-mini
10351036
( "gpt-4o-", ModelEncoding.O200kBase), // e.g., gpt-4o-2024-05-13
10361037
( "gpt-4-", ModelEncoding.Cl100kBase), // e.g., gpt-4-0314, etc., plus gpt-4-32k
10371038
( "gpt-3.5-", ModelEncoding.Cl100kBase), // e.g, gpt-3.5-turbo-0301, -0401, etc.
@@ -1045,6 +1046,8 @@ private static readonly (string Prefix, ModelEncoding Encoding)[] _modelPrefixTo
10451046
{ "gpt-4o", ModelEncoding.O200kBase },
10461047
{ "o1", ModelEncoding.O200kBase },
10471048
{ "o3", ModelEncoding.O200kBase },
1049+
{ "o4-mini", ModelEncoding.O200kBase },
1050+
{ "gpt-4.1", ModelEncoding.O200kBase },
10481051
{ "gpt-4", ModelEncoding.Cl100kBase },
10491052
{ "gpt-3.5-turbo", ModelEncoding.Cl100kBase },
10501053
{ "gpt-3.5-turbo-16k", ModelEncoding.Cl100kBase },

test/Microsoft.ML.Tokenizers.Tests/TiktokenTests.cs

+5
Original file line numberDiff line numberDiff line change
@@ -401,6 +401,9 @@ public void TestEncodeR50kBase()
401401
[InlineData("o3")]
402402
[InlineData("o3-")]
403403
[InlineData("o3-mini")]
404+
[InlineData("o4-mini")]
405+
[InlineData("gpt-4.1")]
406+
[InlineData("gpt-4.1-mini")]
404407
[InlineData("gpt-4o")]
405408
[InlineData("gpt-4o-")]
406409
[InlineData("gpt-4")]
@@ -502,9 +505,11 @@ public void TestEncodingNamesNegativeCases()
502505
}
503506

504507
[InlineData("gpt-4")]
508+
[InlineData("gpt-4.1")]
505509
[InlineData("gpt-4o")]
506510
[InlineData("o1")]
507511
[InlineData("o3")]
512+
[InlineData("o4-mini")]
508513
[InlineData("text-davinci-003")]
509514
[InlineData("text-curie-001")]
510515
[InlineData("text-davinci-edit-001")]

0 commit comments

Comments
 (0)