Skip to content

Commit f648ca2

Browse files
committed
llama : add llama_sampling API + move grammar in libllama
ggml-ci
1 parent b69a480 commit f648ca2

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

48 files changed

+2429
-2538
lines changed

Diff for: Makefile

-6
Original file line numberDiff line numberDiff line change
@@ -927,7 +927,6 @@ OBJ_COMMON = \
927927
common/ngram-cache.o \
928928
common/sampling.o \
929929
common/train.o \
930-
common/grammar-parser.o \
931930
common/build-info.o \
932931
common/json-schema-to-grammar.o
933932

@@ -1167,11 +1166,6 @@ common/console.o: \
11671166
common/console.h
11681167
$(CXX) $(CXXFLAGS) -c $< -o $@
11691168

1170-
common/grammar-parser.o: \
1171-
common/grammar-parser.cpp \
1172-
common/grammar-parser.h
1173-
$(CXX) $(CXXFLAGS) -c $< -o $@
1174-
11751169
common/json-schema-to-grammar.o: \
11761170
common/json-schema-to-grammar.cpp \
11771171
common/json-schema-to-grammar.h

Diff for: common/CMakeLists.txt

-2
Original file line numberDiff line numberDiff line change
@@ -58,8 +58,6 @@ add_library(${TARGET} STATIC
5858
sampling.cpp
5959
console.h
6060
console.cpp
61-
grammar-parser.h
62-
grammar-parser.cpp
6361
json.hpp
6462
json-schema-to-grammar.cpp
6563
train.h

Diff for: common/common.cpp

+37-72
Large diffs are not rendered by default.

Diff for: common/common.h

+1-5
Original file line numberDiff line numberDiff line change
@@ -77,8 +77,6 @@ struct cpu_params {
7777
};
7878

7979
struct gpt_params {
80-
uint32_t seed = LLAMA_DEFAULT_SEED; // RNG seed
81-
8280
int32_t n_predict = -1; // new tokens to predict
8381
int32_t n_ctx = 0; // context size
8482
int32_t n_batch = 2048; // logical batch size for prompt processing (must be >=32 to use BLAS)
@@ -120,8 +118,7 @@ struct gpt_params {
120118
enum llama_pooling_type pooling_type = LLAMA_POOLING_TYPE_UNSPECIFIED; // pooling type for embeddings
121119
enum llama_attention_type attention_type = LLAMA_ATTENTION_TYPE_UNSPECIFIED; // attention type for embeddings
122120

123-
// // sampling parameters
124-
struct llama_sampling_params sparams;
121+
struct gpt_sampling_params sparams;
125122

126123
std::string model = ""; // model path
127124
std::string model_draft = ""; // draft model for speculative decoding
@@ -185,7 +182,6 @@ struct gpt_params {
185182
bool flash_attn = false; // flash attention
186183

187184
bool input_prefix_bos = false; // prefix BOS to user inputs, preceding input_prefix
188-
bool ignore_eos = false; // ignore generated EOS tokens
189185
bool logits_all = false; // return logits for all tokens in the batch
190186
bool use_mmap = true; // use mmap for faster loads
191187
bool use_mlock = false; // use mlock to keep model in memory

0 commit comments

Comments
 (0)