Skip to content

Commit 55710dd

Browse files
authored
Public tokenizer errors, hasChatTemplate (#171)
* Make tokenizer errors public, add hasChatTemplate * Tests, better message.
1 parent 47d6c65 commit 55710dd

File tree

2 files changed

+32
-2
lines changed

2 files changed

+32
-2
lines changed

Sources/Tokenizers/Tokenizer.swift

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ import Jinja
1212
public typealias Message = [String: Any]
1313
public typealias ToolSpec = [String: Any]
1414

15-
enum TokenizerError: Error {
15+
public enum TokenizerError: Error {
1616
case missingConfig
1717
case missingTokenizerClassInConfig
1818
case unsupportedTokenizer(String)
@@ -144,6 +144,8 @@ public protocol Tokenizer {
144144
var unknownToken: String? { get }
145145
var unknownTokenId: Int? { get }
146146

147+
var hasChatTemplate: Bool { get }
148+
147149
/// The appropriate chat template is selected from the tokenizer config
148150
func applyChatTemplate(messages: [Message]) throws -> [Int]
149151

@@ -182,6 +184,8 @@ public protocol Tokenizer {
182184
}
183185

184186
extension Tokenizer {
187+
public var hasChatTemplate: Bool { false }
188+
185189
/// Call previous signature for backwards compatibility
186190
func applyChatTemplate(
187191
messages: [Message],
@@ -399,6 +403,10 @@ public class PreTrainedTokenizer: Tokenizer {
399403
model.convertIdToToken(id)
400404
}
401405

406+
public var hasChatTemplate: Bool {
407+
return tokenizerConfig.chatTemplate != nil
408+
}
409+
402410
public func applyChatTemplate(messages: [Message]) throws -> [Int] {
403411
try applyChatTemplate(messages: messages, addGenerationPrompt: true)
404412
}
@@ -485,7 +493,7 @@ public class PreTrainedTokenizer: Tokenizer {
485493
}
486494

487495
guard let selectedChatTemplate else {
488-
throw TokenizerError.chatTemplate("No chat template was specified")
496+
throw TokenizerError.chatTemplate("This tokenizer does not have a chat template, and no template was passed.")
489497
}
490498

491499
let template = try Template(selectedChatTemplate)

Tests/TokenizersTests/ChatTemplateTests.swift

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,4 +169,26 @@ What is the weather in Paris today?<|im_end|>
169169
XCTAssertTrue(decoded.hasPrefix(expectedPromptStart), "Prompt should start with expected system message")
170170
XCTAssertTrue(decoded.hasSuffix(expectedPromptEnd), "Prompt should end with expected format")
171171
}
172+
173+
func testHasChatTemplate() async throws {
174+
var tokenizer = try await AutoTokenizer.from(pretrained: "google-bert/bert-base-uncased")
175+
XCTAssertFalse(tokenizer.hasChatTemplate)
176+
177+
tokenizer = try await AutoTokenizer.from(pretrained: "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B")
178+
XCTAssertTrue(tokenizer.hasChatTemplate)
179+
}
180+
181+
func testApplyTemplateError() async throws {
182+
let tokenizer = try await AutoTokenizer.from(pretrained: "google-bert/bert-base-uncased")
183+
XCTAssertFalse(tokenizer.hasChatTemplate)
184+
XCTAssertThrowsError(try tokenizer.applyChatTemplate(messages: []))
185+
do {
186+
_ = try tokenizer.applyChatTemplate(messages: [])
187+
XCTFail()
188+
} catch TokenizerError.chatTemplate(let message) {
189+
XCTAssertEqual(message, "This tokenizer does not have a chat template, and no template was passed.")
190+
} catch {
191+
XCTFail()
192+
}
193+
}
172194
}

0 commit comments

Comments
 (0)