-
Notifications
You must be signed in to change notification settings - Fork 523
/
Copy pathconvert_main.py
53 lines (42 loc) · 1.76 KB
/
convert_main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
import argparse
from executorch.extension.gguf_util.converter import convert_to_pte
from executorch.extension.gguf_util.load_gguf import load_file
def save_pte_program(_, pte_file) -> None:
# TODO (mnachin): Save the PTE program to a file
print(f"Saving PTE program to {pte_file}")
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument(
"--gguf_file",
type=str,
help="The GGUF file to load.",
)
parser.add_argument(
"--pte_file",
type=str,
help="The path to save the PTE file.",
)
args = parser.parse_args()
# Step 1: Load the GGUF file
gguf_model_args, gguf_weights = load_file(args.gguf_file)
# Step 2: Convert the GGUF model to PTE
# Currently, underneath the hood, it is first converting the GGUF model
# to a PyTorch model (nn.Module), then exporting to ET.
#
# NOTE: In the future, it may makes sense to refactor out the conversion from GGUF to nn.Module
# into its own package that can be shared between ExecuTorch and PyTorch core. I can
# imagine that there will be a need to do load GGUF file directly into PyTorch core, and
# use torch.compile/AOTInductor to accelerate on server, without ever touching ExecuTorch.
#
# TODO(mnachin): Add a knob to delegate to various backends.
pte_program = convert_to_pte(gguf_model_args, gguf_weights)
# Step 3: Save the PTE program so that
# it can be used by the ExecuTorch runtime
save_pte_program(pte_program, args.pte_file)
if __name__ == "__main__":
main()