diff --git a/src/deepsparse/transformers/engines/nl_decoder_engine.py b/src/deepsparse/transformers/engines/nl_decoder_engine.py index 5ec5001c2f..9c93616053 100644 --- a/src/deepsparse/transformers/engines/nl_decoder_engine.py +++ b/src/deepsparse/transformers/engines/nl_decoder_engine.py @@ -185,7 +185,7 @@ def __call__( :return: The generated token and corresponding logits """ - timer = self.timer_manager.current + timer = self.timer_manager.current_or_new() if kv_cache: # if model has kv cache enabled, we need # to add the kv cache state to the input diff --git a/src/deepsparse/utils/timer.py b/src/deepsparse/utils/timer.py index 56a3452b6e..47dfd05dcb 100644 --- a/src/deepsparse/utils/timer.py +++ b/src/deepsparse/utils/timer.py @@ -338,6 +338,16 @@ def all_times(self) -> Dict[str, List[float]]: return all_times + def current_or_new(self) -> StagedTimer: + """ + Return the current timer if there is one, otherwise return a new one. + """ + if self.current: + return self.current + else: + with self.new_timer_context(total_inference=False) as timer: + return timer + def clear(self): for t in self._timers: t.clear()