Skip to content

Commit 0dacadb

Browse files
committed
Option to set processor affinity
1 parent c16cf49 commit 0dacadb

File tree

5 files changed

+43
-0
lines changed

5 files changed

+43
-0
lines changed

example_chatbot.py

+4
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,10 @@
6565

6666
model_init.print_options(args, print_opts)
6767

68+
# Globals
69+
70+
model_init.set_globals(args)
71+
6872
# Load prompt file
6973

7074
username = args.username

globals.py

+22
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
import os
2+
3+
def set_affinity_mask(affinity_mask = None):
4+
5+
if affinity_mask is None:
6+
cpu_count = os.cpu_count()
7+
affinity_mask = set(range(cpu_count))
8+
9+
os.sched_setaffinity(0, affinity_mask)
10+
11+
12+
def set_affinity_list(affinity_list = None):
13+
14+
if affinity_list is None: set_affinity_mask(None)
15+
else: set_affinity_mask(set(affinity_list))
16+
17+
18+
def set_affinity_str(affinity_str = None):
19+
20+
if affinity_str is None or affinity_str.isspace(): set_affinity_mask(None)
21+
aff = [int(alloc) for alloc in affinity_str.split(",")]
22+
set_affinity_list(aff)

model_init.py

+11
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from tokenizer import ExLlamaTokenizer
33
import argparse, sys, os, glob
44
from torch import version as torch_version
5+
from globals import set_affinity_str
56

67
def add_args(parser):
78

@@ -33,6 +34,8 @@ def add_args(parser):
3334
parser.add_argument("-fh2", "--force_half2", action = "store_true", help = "Force enable half2 even if unsupported")
3435
parser.add_argument("-cs", "--concurrent_streams", action = "store_true", help = "Use concurrent CUDA streams")
3536

37+
parser.add_argument("-aff", "--affinity", type = str, help = "Comma-separated list, sets processor core affinity. E.g.: -aff 0,1,2,3")
38+
3639

3740
def post_parse(args):
3841

@@ -72,6 +75,7 @@ def print_options(args, extra_options = None):
7275
print_opts = []
7376
if args.gpu_split is not None: print_opts.append(f"gpu_split: {args.gpu_split}")
7477
if args.gpu_peer_fix: print_opts.append("gpu_peer_fix")
78+
if args.affinity: print_opts.append(f" --affinity: {args.affinity}")
7579

7680
if extra_options is not None: print_opts += extra_options
7781

@@ -139,6 +143,13 @@ def make_config(args):
139143
return config
140144

141145

146+
# Global state
147+
148+
def set_globals(args):
149+
150+
if args.affinity: set_affinity_str(args.affinity)
151+
152+
142153
# Print stats after loading model
143154

144155
def print_stats(model):

test_benchmark_inference.py

+4
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,10 @@ def mem(name, total = False):
118118

119119
model_init.print_options(args, print_opts)
120120

121+
# Globals
122+
123+
model_init.set_globals(args)
124+
121125
# Instantiate model
122126

123127
config = model_init.make_config(args)

webui/app.py

+2
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,8 @@ def api_append_block():
138138
model_init.print_options(args)
139139
config = model_init.make_config(args)
140140

141+
model_init.set_globals(args)
142+
141143
print(f" -- Loading model...")
142144
model = ExLlama(config)
143145

0 commit comments

Comments
 (0)