Skip to content

Commit 893ac07

Browse files
cebtenzzreolexiyb
authored andcommitted
gguf : track writer state, free unneeded tensors, cleanup (ggml-org#3871)
1 parent 4f9bc74 commit 893ac07

File tree

2 files changed

+54
-30
lines changed

2 files changed

+54
-30
lines changed

gguf-py/gguf/gguf.py

+53-29
Original file line numberDiff line numberDiff line change
@@ -646,18 +646,17 @@ def get_type(val):
646646
sys.exit()
647647

648648

649+
class WriterState(Enum):
650+
EMPTY = auto()
651+
HEADER = auto()
652+
KV_DATA = auto()
653+
TI_DATA = auto()
654+
655+
649656
class GGUFWriter:
650657
fout: BufferedWriter
651-
arch: str
652-
offset_tensor = 0
653-
data_alignment = GGUF_DEFAULT_ALIGNMENT
654-
kv_data = b""
655-
kv_data_count = 0
656-
ti_data = b""
657-
ti_data_count = 0
658-
use_temp_file: bool
659-
temp_file: tempfile.SpooledTemporaryFile[bytes] | None = None
660-
tensors: list[tuple[np.ndarray[Any, Any], int]]
658+
temp_file: tempfile.SpooledTemporaryFile[bytes] | None
659+
tensors: list[np.ndarray[Any, Any]]
661660

662661
@property
663662
def pack_prefix(self):
@@ -683,27 +682,47 @@ def __init__(self, path: os.PathLike[str] | str, arch: str, use_temp_file = True
683682
GGUFValueType.FLOAT64: f"{self.pack_prefix}d",
684683
GGUFValueType.BOOL: "?" ,
685684
}
686-
self.add_architecture()
685+
self.offset_tensor = 0
686+
self.data_alignment = GGUF_DEFAULT_ALIGNMENT
687+
self.kv_data = b""
688+
self.kv_data_count = 0
689+
self.ti_data = b""
690+
self.ti_data_count = 0
687691
self.use_temp_file = use_temp_file
692+
self.temp_file = None
688693
self.tensors = []
689694
endianess_str = "Big Endian" if self.endianess == GGUFEndian.BIG else "Little Endian"
690695
print(f"This gguf file is for {endianess_str} only")
696+
self.state = WriterState.EMPTY
697+
698+
self.add_architecture()
691699

692700
def write_header_to_file(self):
701+
if self.state is not WriterState.EMPTY:
702+
raise ValueError(f'Expected output file to be empty, got {self.state}')
703+
693704
self.fout.write(struct.pack("<I", GGUF_MAGIC))
694705
self.fout.write(struct.pack(f"{self.pack_prefix}I", GGUF_VERSION))
695706
self.fout.write(struct.pack(f"{self.pack_prefix}Q", self.ti_data_count))
696707
self.fout.write(struct.pack(f"{self.pack_prefix}Q", self.kv_data_count))
697708
self.flush()
698-
# print("tensors " + str(self.ti_data_count) + " kv " + str(self.kv_data_count))
709+
self.state = WriterState.HEADER
699710

700711
def write_kv_data_to_file(self):
712+
if self.state is not WriterState.HEADER:
713+
raise ValueError(f'Expected output file to contain the header, got {self.state}')
714+
701715
self.fout.write(self.kv_data)
702716
self.flush()
717+
self.state = WriterState.KV_DATA
703718

704719
def write_ti_data_to_file(self):
720+
if self.state is not WriterState.KV_DATA:
721+
raise ValueError(f'Expected output file to contain KV data, got {self.state}')
722+
705723
self.fout.write(self.ti_data)
706724
self.flush()
725+
self.state = WriterState.TI_DATA
707726

708727
def add_key(self, key: str):
709728
self.add_val(key, GGUFValueType.STRING, add_vtype=False)
@@ -796,6 +815,9 @@ def ggml_pad(x: int, n: int) -> int:
796815
return ((x + n - 1) // n) * n
797816

798817
def add_tensor_info(self, name: str, tensor_shape: Sequence[int], tensor_dtype: np.dtype[np.float16] | np.dtype[np.float32], tensor_nbytes: int, raw_dtype: GGMLQuantizationType | None = None):
818+
if self.state is not WriterState.EMPTY:
819+
raise ValueError(f'Expected output file to be empty, got {self.state}')
820+
799821
assert raw_dtype is not None or tensor_dtype in (np.float32, np.float16), "Only F32 and F16 tensors are supported for now"
800822

801823
encoded_name = name.encode("utf8")
@@ -825,23 +847,22 @@ def add_tensor(self, name: str, tensor: np.ndarray[Any, Any], raw_shape: Sequenc
825847
shape: Sequence[int] = raw_shape if raw_shape is not None else tensor.shape
826848
self.add_tensor_info(name, shape, tensor.dtype, tensor.nbytes, raw_dtype = raw_dtype)
827849

828-
pad = GGUFWriter.ggml_pad(tensor.nbytes, self.data_alignment) - tensor.nbytes
829-
830-
if self.temp_file is None:
831-
self.tensors.append((tensor, pad))
850+
if self.temp_file is None:
851+
self.tensors.append(tensor)
832852
return
833853

834854
tensor.tofile(self.temp_file)
855+
self.write_padding(self.temp_file, tensor.nbytes)
835856

836-
if pad != 0:
837-
self.temp_file.write(bytes([0] * pad))
838-
839-
def write_padding(self, fp: BinaryIO, n: int, align: int | None = None):
857+
def write_padding(self, fp: IO[bytes], n: int, align: int | None = None):
840858
pad = GGUFWriter.ggml_pad(n, align if align is not None else self.data_alignment) - n
841859
if pad != 0:
842860
fp.write(bytes([0] * pad))
843861

844862
def write_tensor_data(self, tensor: np.ndarray[Any, Any]):
863+
if self.state is not WriterState.TI_DATA:
864+
raise ValueError(f'Expected output file to contain tensor info, got {self.state}')
865+
845866
if self.endianess==GGUFEndian.BIG:
846867
tensor.byteswap(inplace=True)
847868
self.write_padding(self.fout, self.fout.tell())
@@ -854,10 +875,13 @@ def write_tensors_to_file(self):
854875
self.write_padding(self.fout, self.fout.tell())
855876

856877
if self.temp_file is None:
857-
for (currtensor, currpad) in self.tensors:
858-
currtensor.tofile(self.fout)
859-
if currpad != 0:
860-
self.fout.write(bytes([0] * currpad))
878+
while True:
879+
try:
880+
tensor = self.tensors.pop(0)
881+
except IndexError:
882+
break
883+
tensor.tofile(self.fout)
884+
self.write_padding(self.fout, tensor.nbytes)
861885
return
862886

863887
self.temp_file.seek(0)
@@ -1002,11 +1026,8 @@ def add_pad_token_id(self, id: int):
10021026

10031027

10041028
class SpecialVocab:
1005-
load_merges: bool = False
1006-
merges: list[str] = []
1007-
special_token_types: tuple[str, ...] = ('bos', 'eos', 'unk', 'sep', 'pad')
1008-
special_token_ids: dict[str, int] = {}
1009-
n_vocab: int | None = None
1029+
merges: list[str]
1030+
special_token_ids: dict[str, int]
10101031

10111032
def __init__(
10121033
self, path: str | os.PathLike[str], load_merges: bool = False,
@@ -1016,8 +1037,11 @@ def __init__(
10161037
self.special_token_ids = {}
10171038
self.n_vocab = n_vocab
10181039
self.load_merges = load_merges
1040+
self.merges = []
10191041
if special_token_types is not None:
10201042
self.special_token_types = special_token_types
1043+
else:
1044+
self.special_token_types = ('bos', 'eos', 'unk', 'sep', 'pad')
10211045
self._load(Path(path))
10221046

10231047
def _load(self, path: Path) -> None:

gguf-py/pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "gguf"
3-
version = "0.4.5"
3+
version = "0.4.6"
44
description = "Write ML models in GGUF for GGML"
55
authors = ["GGML <[email protected]>"]
66
packages = [

0 commit comments

Comments
 (0)