extension/gguf_util/convert_main.py

# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

import argparse

from executorch.extension.gguf_util.converter import convert_to_pte
from executorch.extension.gguf_util.load_gguf import load_file


def save_pte_program(_, pte_file) -> None:
    # TODO (mnachin): Save the PTE program to a file
    print(f"Saving PTE program to {pte_file}")


def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--gguf_file",
        type=str,
        help="The GGUF file to load.",
    )
    parser.add_argument(
        "--pte_file",
        type=str,
        help="The path to save the PTE file.",
    )
    args = parser.parse_args()

    # Step 1: Load the GGUF file
    gguf_model_args, gguf_weights = load_file(args.gguf_file)

    # Step 2: Convert the GGUF model to PTE
    # Currently, underneath the hood, it is first converting the GGUF model
    # to a PyTorch model (nn.Module), then exporting to ET.
    #
    # NOTE: In the future, it may makes sense to refactor out the conversion from GGUF to nn.Module
    # into its own package that can be shared between ExecuTorch and PyTorch core. I can
    # imagine that there will be a need to do load GGUF file directly into PyTorch core, and
    # use torch.compile/AOTInductor to accelerate on server, without ever touching ExecuTorch.
    #
    # TODO(mnachin): Add a knob to delegate to various backends.
    pte_program = convert_to_pte(gguf_model_args, gguf_weights)

    # Step 3: Save the PTE program so that
    # it can be used by the ExecuTorch runtime
    save_pte_program(pte_program, args.pte_file)


if __name__ == "__main__":
    main()