We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent bd12e8b commit 8682754Copy full SHA for 8682754
scripts/gpt2-tf2/gpt2_train_distributed.py
@@ -50,7 +50,7 @@ def get_dataset(fil):
50
51
def tokenize(data, tokenizer, truncate=False):
52
if truncate:
53
- data = tokenizer(data[:1000], return_tensors='tf', padding=True, truncation=True)
+ data = tokenizer(data[:100], return_tensors='tf', padding=True, truncation=True)
54
else:
55
data = tokenizer(data, return_tensors='tf', padding=True, truncation=True)
56
return tf.data.Dataset.from_tensor_slices((dict(data), data['input_ids']))
0 commit comments