Skip to content

Commit acdbe5e

Browse files
committed
Update for Phi4
Release of Ph-4mini and Phi4-multimodal
1 parent e4f0202 commit acdbe5e

File tree

3,009 files changed

+18495
-102092
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

3,009 files changed

+18495
-102092
lines changed

README.md

+147-149
Large diffs are not rendered by default.
Original file line numberDiff line numberDiff line change
@@ -1,74 +1,74 @@
1-
# This code is for fine-tuning Phi-3 Models.
2-
# Note thi requires 7.4 GB of GPU RAM for the process.
3-
# Model available at https://huggingface.co/collections/microsoft/phi-3-6626e15e9585a200d2d761e3
4-
# Model Names
5-
# microsoft/Phi-3-mini-4k-instruct
6-
# microsoft/Phi-3-mini-128k-instruct
7-
# microsoft/Phi-3-small-8k-instruct
8-
# microsoft/Phi-3-small-128k-instruct
9-
# microsoft/Phi-3-medium-4k-instruct
10-
# microsoft/Phi-3-medium-128k-instruct
11-
# microsoft/Phi-3-vision-128k-instruct
12-
# microsoft/Phi-3-mini-4k-instruct-onnx
13-
# microsoft/Phi-3-mini-4k-instruct-onnx-web
14-
# microsoft/Phi-3-mini-128k-instruct-onnx
15-
# microsoft/Phi-3-small-8k-instruct-onnx-cuda
16-
# microsoft/Phi-3-small-128k-instruct-onnx-cuda
17-
# microsoft/Phi-3-medium-4k-instruct-onnx-cpu
18-
# microsoft/Phi-3-medium-4k-instruct-onnx-cuda
19-
# microsoft/Phi-3-medium-4k-instruct-onnx-directml
20-
# microsoft/Phi-3-medium-128k-instruct-onnx-cpu
21-
# microsoft/Phi-3-medium-128k-instruct-onnx-cuda
22-
# microsoft/Phi-3-medium-128k-instruct-onnx-directml
23-
# microsoft/Phi-3-mini-4k-instruct-gguf
24-
25-
# Load the pre-trained model and tokenizer
26-
model = AutoModelForCausalLM.from_pretrained('Model_Name', torch_dtype=torch.float16)
27-
tokenizer = AutoTokenizer.from_pretrained('Mode_Name')
28-
29-
# Load the dataset for fine-tuning
30-
dataset = load_dataset(DATASET_NAME, split="train")
31-
32-
# Define the formatting function for the prompts
33-
def formatting_prompts_func(examples):
34-
convos = examples["conversations"]
35-
texts = []
36-
mapper = {"system": "system\n", "human": "\nuser\n", "gpt": "\nassistant\n"}
37-
end_mapper = {"system": "", "human": "", "gpt": ""}
38-
for convo in convos:
39-
text = "".join(f"{mapper[(turn := x['from'])]} {x['value']}\n{end_mapper[turn]}" for x in convo)
40-
texts.append(f"{text}{tokenizer.eos_token}")
41-
return {"text": texts}
42-
43-
# Apply the formatting function to the dataset
44-
dataset = dataset.map(formatting_prompts_func, batched=True)
45-
46-
# Define the training arguments
47-
args = TrainingArguments(
48-
evaluation_strategy="steps",
49-
per_device_train_batch_size=7,
50-
gradient_accumulation_steps=4,
51-
gradient_checkpointing=True,
52-
learning_rate=1e-4,
53-
fp16=True,
54-
max_steps=-1,
55-
num_train_epochs=3,
56-
save_strategy="epoch",
57-
logging_steps=10,
58-
output_dir=NEW_MODEL_NAME,
59-
optim="paged_adamw_32bit",
60-
lr_scheduler_type="linear"
61-
)
62-
63-
# Create the trainer
64-
trainer = SFTTrainer(
65-
model=model,
66-
args=args,
67-
train_dataset=dataset,
68-
dataset_text_field="text",
69-
max_seq_length=128,
70-
formatting_func=formatting_prompts_func
71-
)
72-
73-
# Start the training process
1+
# This code is for fine-tuning Phi-3 Models.
2+
# Note thi requires 7.4 GB of GPU RAM for the process.
3+
# Model available at https://huggingface.co/collections/microsoft/phi-3-6626e15e9585a200d2d761e3
4+
# Model Names
5+
# microsoft/Phi-3-mini-4k-instruct
6+
# microsoft/Phi-3-mini-128k-instruct
7+
# microsoft/Phi-3-small-8k-instruct
8+
# microsoft/Phi-3-small-128k-instruct
9+
# microsoft/Phi-3-medium-4k-instruct
10+
# microsoft/Phi-3-medium-128k-instruct
11+
# microsoft/Phi-3-vision-128k-instruct
12+
# microsoft/Phi-3-mini-4k-instruct-onnx
13+
# microsoft/Phi-3-mini-4k-instruct-onnx-web
14+
# microsoft/Phi-3-mini-128k-instruct-onnx
15+
# microsoft/Phi-3-small-8k-instruct-onnx-cuda
16+
# microsoft/Phi-3-small-128k-instruct-onnx-cuda
17+
# microsoft/Phi-3-medium-4k-instruct-onnx-cpu
18+
# microsoft/Phi-3-medium-4k-instruct-onnx-cuda
19+
# microsoft/Phi-3-medium-4k-instruct-onnx-directml
20+
# microsoft/Phi-3-medium-128k-instruct-onnx-cpu
21+
# microsoft/Phi-3-medium-128k-instruct-onnx-cuda
22+
# microsoft/Phi-3-medium-128k-instruct-onnx-directml
23+
# microsoft/Phi-3-mini-4k-instruct-gguf
24+
25+
# Load the pre-trained model and tokenizer
26+
model = AutoModelForCausalLM.from_pretrained('Model_Name', torch_dtype=torch.float16)
27+
tokenizer = AutoTokenizer.from_pretrained('Mode_Name')
28+
29+
# Load the dataset for fine-tuning
30+
dataset = load_dataset(DATASET_NAME, split="train")
31+
32+
# Define the formatting function for the prompts
33+
def formatting_prompts_func(examples):
34+
convos = examples["conversations"]
35+
texts = []
36+
mapper = {"system": "system\n", "human": "\nuser\n", "gpt": "\nassistant\n"}
37+
end_mapper = {"system": "", "human": "", "gpt": ""}
38+
for convo in convos:
39+
text = "".join(f"{mapper[(turn := x['from'])]} {x['value']}\n{end_mapper[turn]}" for x in convo)
40+
texts.append(f"{text}{tokenizer.eos_token}")
41+
return {"text": texts}
42+
43+
# Apply the formatting function to the dataset
44+
dataset = dataset.map(formatting_prompts_func, batched=True)
45+
46+
# Define the training arguments
47+
args = TrainingArguments(
48+
evaluation_strategy="steps",
49+
per_device_train_batch_size=7,
50+
gradient_accumulation_steps=4,
51+
gradient_checkpointing=True,
52+
learning_rate=1e-4,
53+
fp16=True,
54+
max_steps=-1,
55+
num_train_epochs=3,
56+
save_strategy="epoch",
57+
logging_steps=10,
58+
output_dir=NEW_MODEL_NAME,
59+
optim="paged_adamw_32bit",
60+
lr_scheduler_type="linear"
61+
)
62+
63+
# Create the trainer
64+
trainer = SFTTrainer(
65+
model=model,
66+
args=args,
67+
train_dataset=dataset,
68+
dataset_text_field="text",
69+
max_seq_length=128,
70+
formatting_func=formatting_prompts_func
71+
)
72+
73+
# Start the training process
7474
trainer.train()
File renamed without changes.
File renamed without changes.

0 commit comments

Comments
 (0)