Skip to content

Commit 15db19a

Browse files
committed
Merge 'origin/master' into hipblas
2 parents 04419f1 + 46088f7 commit 15db19a

21 files changed

+580
-251
lines changed

CMakeLists.txt

-5
Original file line numberDiff line numberDiff line change
@@ -419,11 +419,6 @@ if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm" OR ${CMAKE_SYSTEM_PROCESSOR} MATCHES
419419
if (MSVC)
420420
# TODO: arm msvc?
421421
else()
422-
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64")
423-
# Apple M1, M2, etc.
424-
# Raspberry Pi 3, 4, Zero 2 (64-bit)
425-
add_compile_options(-mcpu=native)
426-
endif()
427422
if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv6")
428423
# Raspberry Pi 1, Zero
429424
add_compile_options(-mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access)

README.md

+1
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ as the main playground for developing new features for the [ggml](https://github
8585
- [X] [OpenBuddy 🐶 (Multilingual)](https://github.com/OpenBuddy/OpenBuddy)
8686
- [X] [Pygmalion 7B / Metharme 7B](#using-pygmalion-7b--metharme-7b)
8787
- [X] [WizardLM](https://github.com/nlpxucan/WizardLM)
88+
- [X] [Baichuan-7B](https://huggingface.co/baichuan-inc/baichuan-7B)
8889

8990
**Bindings:**
9091

convert.py

+36-5
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ def find_n_mult(n_ff: int, n_embd: int) -> int:
136136
calc_ff = (((8*n_embd) // 3 + n_mult - 1) // n_mult)*n_mult
137137
if calc_ff == n_ff:
138138
return n_mult
139-
return 1
139+
raise Exception(f"failed to find n_mult for (n_ff={n_ff}, n_embd={n_embd}).")
140140

141141
@dataclass
142142
class Params:
@@ -321,6 +321,10 @@ def astype(self, data_type: DataType) -> 'Tensor': ...
321321
@abstractmethod
322322
def permute(self, n_head: int) -> 'Tensor': ...
323323
@abstractmethod
324+
def permute_part(self, n_part: int, n_head: int) -> 'UnquantizedTensor': ...
325+
@abstractmethod
326+
def part(self, n_part: int) -> 'UnquantizedTensor': ...
327+
@abstractmethod
324328
def to_ggml(self) -> 'GGMLCompatibleTensor': ...
325329

326330

@@ -345,6 +349,14 @@ def astype(self, data_type: DataType) -> Tensor:
345349
def to_ggml(self) -> 'UnquantizedTensor':
346350
return self
347351

352+
def permute_part(self, n_part: int, n_head: int) -> 'UnquantizedTensor':
353+
r = self.ndarray.shape[0] // 3
354+
return UnquantizedTensor(permute(self.ndarray[r * n_part : r * n_part + r, ...], n_head))
355+
356+
def part(self, n_part: int) -> 'UnquantizedTensor':
357+
r = self.ndarray.shape[0] // 3
358+
return UnquantizedTensor(self.ndarray[r * n_part : r * n_part + r, ...])
359+
348360
def permute(self, n_head: int) -> 'UnquantizedTensor':
349361
return UnquantizedTensor(permute(self.ndarray, n_head))
350362

@@ -642,6 +654,19 @@ def load() -> Tensor:
642654
return lazy_tensor.load().permute(n_head)
643655
return LazyTensor(load, lazy_tensor.shape, lazy_tensor.data_type, f'permute({n_head}) ' + lazy_tensor.description)
644656

657+
def permute_part_lazy(lazy_tensor: LazyTensor, n_part: int, n_head: int) -> LazyTensor:
658+
def load() -> Tensor:
659+
return lazy_tensor.load().permute_part(n_part, n_head)
660+
s = lazy_tensor.shape.copy()
661+
s[0] = s[0] // 3
662+
return LazyTensor(load, s, lazy_tensor.data_type, f'permute({n_head}) ' + lazy_tensor.description)
663+
664+
def part_lazy(lazy_tensor: LazyTensor, n_part: int) -> LazyTensor:
665+
def load() -> Tensor:
666+
return lazy_tensor.load().part(n_part)
667+
s = lazy_tensor.shape.copy()
668+
s[0] = s[0] // 3
669+
return LazyTensor(load, s, lazy_tensor.data_type, 'part ' + lazy_tensor.description)
645670

646671
def convert_transformers_to_orig(model: LazyModel, params: Params) -> LazyModel:
647672
out: LazyModel = {}
@@ -650,11 +675,17 @@ def convert_transformers_to_orig(model: LazyModel, params: Params) -> LazyModel:
650675
out["output.weight"] = model["lm_head.weight"]
651676

652677
for i in itertools.count():
653-
if f"model.layers.{i}.self_attn.q_proj.weight" not in model:
678+
if f"model.layers.{i}.self_attn.q_proj.weight" in model:
679+
out[f"layers.{i}.attention.wq.weight"] = permute_lazy(model[f"model.layers.{i}.self_attn.q_proj.weight"], params.n_head)
680+
out[f"layers.{i}.attention.wk.weight"] = permute_lazy(model[f"model.layers.{i}.self_attn.k_proj.weight"], params.n_head)
681+
out[f"layers.{i}.attention.wv.weight"] = model[f"model.layers.{i}.self_attn.v_proj.weight"]
682+
elif f"model.layers.{i}.self_attn.W_pack.weight" in model:
683+
out[f"layers.{i}.attention.wq.weight"] = permute_part_lazy(model[f"model.layers.{i}.self_attn.W_pack.weight"], 0, params.n_head)
684+
out[f"layers.{i}.attention.wk.weight"] = permute_part_lazy(model[f"model.layers.{i}.self_attn.W_pack.weight"], 1, params.n_head)
685+
out[f"layers.{i}.attention.wv.weight"] = part_lazy(model[f"model.layers.{i}.self_attn.W_pack.weight"], 2)
686+
else:
654687
break
655-
out[f"layers.{i}.attention.wq.weight"] = permute_lazy(model[f"model.layers.{i}.self_attn.q_proj.weight"], params.n_head)
656-
out[f"layers.{i}.attention.wk.weight"] = permute_lazy(model[f"model.layers.{i}.self_attn.k_proj.weight"], params.n_head)
657-
out[f"layers.{i}.attention.wv.weight"] = model[f"model.layers.{i}.self_attn.v_proj.weight"]
688+
658689
out[f"layers.{i}.attention.wo.weight"] = model[f"model.layers.{i}.self_attn.o_proj.weight"]
659690

660691
out[f"layers.{i}.feed_forward.w1.weight"] = model[f"model.layers.{i}.mlp.gate_proj.weight"]

examples/common.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
110110
invalid_param = true;
111111
break;
112112
}
113-
params.seed = std::stoi(argv[i]);
113+
params.seed = std::stoul(argv[i]);
114114
} else if (arg == "-t" || arg == "--threads") {
115115
if (++i >= argc) {
116116
invalid_param = true;

examples/common.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
int32_t get_num_physical_cores();
2323

2424
struct gpt_params {
25-
int32_t seed = -1; // RNG seed
25+
uint32_t seed = -1; // RNG seed
2626
int32_t n_threads = get_num_physical_cores();
2727
int32_t n_predict = -1; // new tokens to predict
2828
int32_t n_ctx = 512; // context size

examples/embd-input/embd-input-lib.cpp

+6-3
Original file line numberDiff line numberDiff line change
@@ -210,9 +210,12 @@ llama_token sampling_id(struct MyModel* mymodel) {
210210
const char * sampling(struct MyModel * mymodel) {
211211
llama_context * ctx = mymodel->ctx;
212212
int id = sampling_id(mymodel);
213-
std::string ret;
214-
if (id == llama_token_eos()) ret = "</s>";
215-
else ret = llama_token_to_str(ctx, id);
213+
static std::string ret;
214+
if (id == llama_token_eos()) {
215+
ret = "</s>";
216+
} else {
217+
ret = llama_token_to_str(ctx, id);
218+
}
216219
eval_id(mymodel, id);
217220
return ret.c_str();
218221
}

examples/embd-input/embd-input.h

+1-3
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
#include "llama.h"
66
#include "build-info.h"
77

8-
98
extern "C" {
109

1110
typedef struct MyModel {
@@ -14,14 +13,13 @@ typedef struct MyModel {
1413
int n_past = 0;
1514
} MyModel;
1615

17-
1816
struct MyModel* create_mymodel(int argc, char ** argv);
1917

2018
bool eval_float(void* model, float* input, int N);
2119
bool eval_tokens(void* model, std::vector<llama_token> tokens);
2220
bool eval_id(struct MyModel* mymodel, int id);
2321
bool eval_string(struct MyModel* mymodel, const char* str);
24-
const char* sampling(struct MyModel* mymodel);
22+
const char * sampling(struct MyModel* mymodel);
2523
llama_token sampling_id(struct MyModel* mymodel);
2624
void free_mymodel(struct MyModel* mymodel);
2725

examples/embedding/embedding.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,11 @@ int main(int argc, char ** argv) {
2424

2525
fprintf(stderr, "%s: build = %d (%s)\n", __func__, BUILD_NUMBER, BUILD_COMMIT);
2626

27-
if (params.seed < 0) {
27+
if (params.seed == LLAMA_DEFAULT_SEED) {
2828
params.seed = time(NULL);
2929
}
3030

31-
fprintf(stderr, "%s: seed = %d\n", __func__, params.seed);
31+
fprintf(stderr, "%s: seed = %u\n", __func__, params.seed);
3232

3333
std::mt19937 rng(params.seed);
3434
if (params.random_prompt) {

examples/main/README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,7 @@ Example usage: `--logit-bias 29905-inf`
242242

243243
### RNG Seed
244244

245-
- `-s SEED, --seed SEED`: Set the random number generator (RNG) seed (default: -1, < 0 = random seed).
245+
- `-s SEED, --seed SEED`: Set the random number generator (RNG) seed (default: -1, -1 = random seed).
246246

247247
The RNG seed is used to initialize the random number generator that influences the text generation process. By setting a specific seed value, you can obtain consistent and reproducible results across multiple runs with the same input and settings. This can be helpful for testing, debugging, or comparing the effects of different options on the generated text to see when they diverge. If the seed is set to a value less than 0, a random seed will be used, which will result in different outputs on each run.
248248

examples/main/main.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -94,11 +94,11 @@ int main(int argc, char ** argv) {
9494

9595
fprintf(stderr, "%s: build = %d (%s)\n", __func__, BUILD_NUMBER, BUILD_COMMIT);
9696

97-
if (params.seed < 0) {
97+
if (params.seed == LLAMA_DEFAULT_SEED) {
9898
params.seed = time(NULL);
9999
}
100100

101-
fprintf(stderr, "%s: seed = %d\n", __func__, params.seed);
101+
fprintf(stderr, "%s: seed = %u\n", __func__, params.seed);
102102

103103
std::mt19937 rng(params.seed);
104104
if (params.random_prompt) {

examples/perplexity/perplexity.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -136,11 +136,11 @@ int main(int argc, char ** argv) {
136136

137137
fprintf(stderr, "%s: build = %d (%s)\n", __func__, BUILD_NUMBER, BUILD_COMMIT);
138138

139-
if (params.seed < 0) {
139+
if (params.seed == LLAMA_DEFAULT_SEED) {
140140
params.seed = time(NULL);
141141
}
142142

143-
fprintf(stderr, "%s: seed = %d\n", __func__, params.seed);
143+
fprintf(stderr, "%s: seed = %u\n", __func__, params.seed);
144144

145145
std::mt19937 rng(params.seed);
146146
if (params.random_prompt) {

examples/server/README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ node .
152152

153153
`mirostat_eta`: Set the Mirostat learning rate, parameter eta (default: 0.1).
154154

155-
`seed`: Set the random number generator (RNG) seed (default: -1, < 0 = random seed).
155+
`seed`: Set the random number generator (RNG) seed (default: -1, -1 = random seed).
156156

157157
`ignore_eos`: Ignore end of stream token and continue generating (default: false).
158158

examples/train-text-from-scratch/train-text-from-scratch.cpp

+5-4
Original file line numberDiff line numberDiff line change
@@ -2671,7 +2671,8 @@ struct train_params {
26712671
const char * fn_checkpoint_out;
26722672
const char * fn_model_out;
26732673

2674-
int seed;
2674+
uint32_t seed;
2675+
26752676
int n_ctx;
26762677
int n_embd;
26772678
int n_mult;
@@ -2768,7 +2769,7 @@ void train_print_usage(int /*argc*/, char ** argv, const struct train_params * p
27682769
fprintf(stderr, " --checkpoint-in FNAME path from which to load training checkpoint (default '%s')\n", params->fn_checkpoint_in);
27692770
fprintf(stderr, " --checkpoint-out FNAME path to save training checkpoint (default '%s')\n", params->fn_checkpoint_out);
27702771
fprintf(stderr, " --model-out FNAME path to save ggml model (default '%s')\n", params->fn_model_out);
2771-
fprintf(stderr, " -s SEED, --seed SEED RNG seed (default: -1, use random seed for < 0)\n");
2772+
fprintf(stderr, " -s SEED, --seed SEED RNG seed (default: -1, use random seed for -1)\n");
27722773
fprintf(stderr, " -c N, --ctx N Context size used during training (default %d)\n", params->n_ctx);
27732774
fprintf(stderr, " --embd N Embedding size used for new models (default %d)\n", params->n_embd);
27742775
fprintf(stderr, " --mult N Mult size used for new models, influences feedforward size. (default %d)\n", params->n_mult);
@@ -3034,10 +3035,10 @@ int main(int argc, char ** argv) {
30343035
return 1;
30353036
}
30363037

3037-
if (params.seed < 0) {
3038+
if (params.seed == LLAMA_DEFAULT_SEED) {
30383039
params.seed = time(NULL);
30393040
}
3040-
printf("%s: seed: %d\n", __func__, params.seed);
3041+
printf("%s: seed: %u\n", __func__, params.seed);
30413042
srand(params.seed);
30423043

30433044
struct llama_context_params llama_params = llama_context_default_params();

0 commit comments

Comments
 (0)