Skip to content

Commit 0cccb41

Browse files
author
Mug
committed
Added iterative search to prevent instructions from being echoed, add ignore eos, add no-mmap, fixed 1 character echo too much bug
1 parent 241d608 commit 0cccb41

File tree

2 files changed

+35
-4
lines changed

2 files changed

+35
-4
lines changed

examples/low_level_api/common.py

+3
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ class GptParams:
4040
instruct: bool = False
4141
ignore_eos: bool = False
4242
perplexity: bool = False
43+
use_mmap: bool = True
4344
use_mlock: bool = False
4445
mem_test: bool = False
4546
verbose_prompt: bool = False
@@ -110,7 +111,9 @@ def gpt_params_parse(argv = None, params: Optional[GptParams] = None):
110111
dest="use_color"
111112
)
112113
parser.add_argument("--mlock", action="store_true",help="force system to keep model in RAM rather than swapping or compressing",dest="use_mlock")
114+
parser.add_argument("--no-mmap", action="store_false",help="do not memory-map model (slower load but may reduce pageouts if not using mlock)",dest="use_mmap")
113115
parser.add_argument("--mtest", action="store_true",help="compute maximum memory usage",dest="mem_test")
116+
parser.add_argument("--verbose-prompt", action="store_true",help="print prompt before generation",dest="verbose_prompt")
114117
parser.add_argument(
115118
"-r",
116119
"--reverse-prompt",

examples/low_level_api/low_level_api_chat_cpp.py

+32-4
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,25 @@
2626
CONSOLE_COLOR_PROMPT = ANSI_COLOR_YELLOW
2727
CONSOLE_COLOR_USER_INPUT = ANSI_BOLD + ANSI_COLOR_GREEN
2828

29+
# Iterative search
30+
# Actively searches and prevents a pattern from being returned
31+
class IterSearch:
32+
def __init__(self, pattern):
33+
self.pattern = list(pattern)
34+
self.buffer = []
35+
36+
def __call__(self, char):
37+
self.buffer += [char]
38+
39+
if (self.pattern[:len(self.buffer)] == self.buffer):
40+
if (len(self.buffer) >= len(self.pattern)):
41+
self.buffer.clear()
42+
return []
43+
44+
_tmp = self.buffer[:]
45+
self.buffer.clear()
46+
return _tmp
47+
2948
# A LLaMA interactive session
3049
class LLaMAInteract:
3150
def __init__(self, params: GptParams) -> None:
@@ -69,6 +88,7 @@ def __init__(self, params: GptParams) -> None:
6988
self.lparams.seed = self.params.seed
7089
self.lparams.memory_f16 = self.params.memory_f16
7190
self.lparams.use_mlock = self.params.use_mlock
91+
self.lparams.use_mmap = self.params.use_mmap
7292

7393
self.ctx = llama_cpp.llama_init_from_file(self.params.model.encode("utf8"), self.lparams)
7494
if (not self.ctx):
@@ -114,7 +134,9 @@ def __init__(self, params: GptParams) -> None:
114134
# in instruct mode, we inject a prefix and a suffix to each input by the user
115135
if (self.params.instruct):
116136
self.params.interactive_start = True
117-
self.first_antiprompt.append(self._tokenize(self.params.instruct_inp_prefix.strip(), False))
137+
_ptn = self._tokenize(self.params.instruct_inp_prefix.strip(), False)
138+
self.first_antiprompt.append(_ptn)
139+
self.antiecho = IterSearch(_ptn)
118140

119141
# enable interactive mode if reverse prompt or interactive start is specified
120142
if (len(self.params.antiprompt) != 0 or self.params.interactive_start):
@@ -217,7 +239,9 @@ def generate(self):
217239
if len(self.embd_inp) <= self.input_consumed:
218240
# out of user input, sample next token
219241

220-
#TODO: self.params.ignore_eos
242+
if (self.params.ignore_eos):
243+
logits = llama_cpp.llama_get_logits(self.ctx)
244+
logits[llama_cpp.llama_token_eos()] = llama_cpp.c_float(0)
221245

222246
_arr = self.last_n_tokens[-min(self.params.repeat_last_n, self.n_past):]
223247
id = llama_cpp.llama_sample_top_p_top_k(
@@ -263,7 +287,11 @@ def generate(self):
263287
# display tokens
264288
if self.output_echo:
265289
for id in self.embd:
266-
yield id
290+
if self.params.instruct:
291+
for r in self.antiecho(id):
292+
yield r
293+
else:
294+
yield id
267295

268296
# reset color to default if we there is no pending user input
269297
if (self.params.input_echo and len(self.embd_inp) == self.input_consumed):
@@ -279,7 +307,7 @@ def generate(self):
279307
break
280308

281309
# if we are using instruction mode, and we have processed the initial prompt
282-
if (self.n_past > 0 and self.params.interactive_start):
310+
if (self.params.interactive_start):
283311
break
284312

285313
# end of text token

0 commit comments

Comments
 (0)