add todo for split/join

dsikka · dsikka · commit 6007a75bb8aa · 2023-10-31T20:49:20.000-04:00
diff --git a/src/deepsparse/v2/text_generation/process_inputs.py b/src/deepsparse/v2/text_generation/process_inputs.py
@@ -121,5 +121,8 @@ def run(
             frequency_penalty=generation_config.repetition_penalty,
         )
 
+        # TODO: move this step to prep_for_prefill and add attention mask to the output
+        # this will allow us to split/join more easily when processing multiple prompts
+        # in parallel
         tokens = input_ids[attention_mask.nonzero()].tolist()
         return {"tokens": tokens}, inference_state_update

Original file line number	Diff line number	Diff line change
`@@ -121,5 +121,8 @@ def run(`
`121`	`121`	`frequency_penalty=generation_config.repetition_penalty,`
`122`	`122`	`)`
`123`	`123`
	`124`	`+ # TODO: move this step to prep_for_prefill and add attention mask to the output`
	`125`	`+ # this will allow us to split/join more easily when processing multiple prompts`
	`126`	`+ # in parallel`
`124`	`127`	`tokens = input_ids[attention_mask.nonzero()].tolist()`
`125`	`128`	`return {"tokens": tokens}, inference_state_update`