[Cherry-Pick][Fix] deepsparse – tokenizer stride size issue with original transformers (#1426)

dbogunowicz · web-flow · commit 65a38ccc0f7d · 2023-11-22T13:05:10.000-05:00
* [Fix] Remove erronous LIB.kv_cache input when using external kv cache management (#1337) * initial commit * initial commit * cleanup * cleanup2 * initial commit * final solution
diff --git a/src/deepsparse/transformers/pipelines/question_answering.py b/src/deepsparse/transformers/pipelines/question_answering.py
@@ -37,6 +37,7 @@
 import json
 import logging
 import os
+import warnings
 from typing import Any, Dict, List, Optional, Tuple, Type
 
 import numpy
@@ -504,6 +505,20 @@ def route_input_to_bucket(
     def _tokenize(self, example: SquadExample, *args):
         # The logic here closely matches the tokenization step performed
         # on evaluation dataset in the SparseML question answering training script
+
+        added_special_tokens = self.tokenizer.num_special_tokens_to_add()
+        effective_max_length = self.sequence_length - added_special_tokens
+        if self.doc_stride >= effective_max_length:
+            new_doc_stride = effective_max_length
+            warnings.warn(
+                f"Tokenizer stride set to {self.doc_stride}, "
+                f"which is greater than or equal to its effective max length "
+                f"of {effective_max_length} (= {self.sequence_length} "
+                f"original max length - {added_special_tokens} added special tokens). "
+                f"Capping the doc stride to {new_doc_stride}"
+            )
+            self._doc_stride = new_doc_stride
+
         if not self.tokenizer.is_fast:
             raise ValueError(
                 "This example script only works for models that have a fast tokenizer."