Skip to content

Commit 1948b34

Browse files
committed
Merge branch 'main' of https://github.com/triton-inference-server/server into spolisetty_dlis_7657
2 parents e7b441f + 596925a commit 1948b34

File tree

8 files changed

+129
-34
lines changed

8 files changed

+129
-34
lines changed

build.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#!/usr/bin/env python3
2-
# Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# Copyright 2020-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
33
#
44
# Redistribution and use in source and binary forms, with or without
55
# modification, are permitted provided that the following conditions
@@ -72,7 +72,7 @@
7272

7373
DEFAULT_TRITON_VERSION_MAP = {
7474
"release_version": "2.54.0dev",
75-
"triton_container_version": "24.01dev",
75+
"triton_container_version": "25.01dev",
7676
"upstream_container_version": "24.12",
7777
"ort_version": "1.20.1",
7878
"ort_openvino_version": "2024.4.0",
@@ -1048,6 +1048,8 @@ def create_dockerfile_buildbase(ddir, dockerfile_name, argmap):
10481048
# Install the windows- or linux-specific buildbase dependencies
10491049
if target_platform() == "windows":
10501050
df += """
1051+
RUN python3 -m pip install build
1052+
10511053
SHELL ["cmd", "/S", "/C"]
10521054
"""
10531055
else:

qa/L0_implicit_state/models/growable_memory/config.pbtxt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1+
# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
22
#
33
# Redistribution and use in source and binary forms, with or without
44
# modification, are permitted provided that the following conditions
@@ -28,6 +28,8 @@ name: "growable_memory"
2828
backend: "implicit_state"
2929
max_batch_size: 0
3030
sequence_batching {
31+
# Set large idle timeout to avoid inter-request timeouts for test consistency
32+
max_sequence_idle_microseconds: 10000000
3133
control_input [
3234
{
3335
name: "START"

qa/L0_io/gen_libtorch_model.py

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
#!/usr/bin/python
2+
# Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3+
#
4+
# Redistribution and use in source and binary forms, with or without
5+
# modification, are permitted provided that the following conditions
6+
# are met:
7+
# * Redistributions of source code must retain the above copyright
8+
# notice, this list of conditions and the following disclaimer.
9+
# * Redistributions in binary form must reproduce the above copyright
10+
# notice, this list of conditions and the following disclaimer in the
11+
# documentation and/or other materials provided with the distribution.
12+
# * Neither the name of NVIDIA CORPORATION nor the names of its
13+
# contributors may be used to endorse or promote products derived
14+
# from this software without specific prior written permission.
15+
#
16+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
17+
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19+
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
20+
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
21+
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22+
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23+
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
24+
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25+
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26+
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27+
28+
import torch
29+
import torch.nn as nn
30+
31+
32+
class SumModule(nn.Module):
33+
def __init__(self, device):
34+
super(SumModule, self).__init__()
35+
self.device = device
36+
37+
def forward(self, INPUT0, INPUT1):
38+
INPUT0 = INPUT0.to(self.device)
39+
INPUT1 = INPUT1.to(self.device)
40+
print(
41+
"SumModule - INPUT0 device: {}, INPUT1 device: {}\n".format(
42+
INPUT0.device, INPUT1.device
43+
)
44+
)
45+
return INPUT0 + INPUT1
46+
47+
48+
class DiffModule(nn.Module):
49+
def __init__(self, device):
50+
super(DiffModule, self).__init__()
51+
self.device = device
52+
53+
def forward(self, INPUT0, INPUT1):
54+
INPUT0 = INPUT0.to(self.device)
55+
INPUT1 = INPUT1.to(self.device)
56+
print(
57+
"DiffModule - INPUT0 device: {}, INPUT1 device: {}\n".format(
58+
INPUT0.device, INPUT1.device
59+
)
60+
)
61+
return INPUT0 - INPUT1
62+
63+
64+
class TestModel(nn.Module):
65+
def __init__(self, device0, device1):
66+
super(TestModel, self).__init__()
67+
self.device0 = device0
68+
self.device1 = device1
69+
70+
self.layer1 = SumModule(self.device0)
71+
self.layer2 = DiffModule(self.device1)
72+
73+
def forward(self, INPUT0, INPUT1):
74+
op0 = self.layer1(INPUT0, INPUT1)
75+
op1 = self.layer2(INPUT0, INPUT1)
76+
return op0, op1
77+
78+
79+
if torch.cuda.device_count() < 2:
80+
print("Need at least 2 GPUs to run this test")
81+
exit(1)
82+
83+
devices = [("cuda:1", "cuda:0"), ("cpu", "cuda:1")]
84+
model_names = ["libtorch_multi_gpu", "libtorch_multi_device"]
85+
86+
for device_pair, model_name in zip(devices, model_names):
87+
model = TestModel(device_pair[0], device_pair[1])
88+
model_path = "models/" + model_name + "/1/model.pt"
89+
scripted_model = torch.jit.script(model)
90+
scripted_model.save(model_path)

qa/L0_io/test.sh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#!/bin/bash
2-
# Copyright 2019-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# Copyright 2019-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
33
#
44
# Redistribution and use in source and binary forms, with or without
55
# modification, are permitted provided that the following conditions
@@ -38,7 +38,8 @@ if [ ! -z "$TEST_REPO_ARCH" ]; then
3838
REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
3939
fi
4040

41-
export CUDA_VISIBLE_DEVICES=0,1,2,3
41+
# This test requires at least 2 GPUs to test h2d and d2d transfer combinations
42+
export CUDA_VISIBLE_DEVICES=0,1
4243

4344
IO_TEST_UTIL=./memory_alloc
4445
CLIENT_LOG="./client.log"
@@ -147,7 +148,6 @@ cp -r $ENSEMBLEDIR/nop_TYPE_FP32_-1 $MODELSDIR/. && \
147148

148149
# prepare libtorch multi-device and multi-gpu models
149150
cp -r ../L0_libtorch_instance_group_kind_model/models/libtorch_multi_device $MODELSDIR/.
150-
cp ../L0_libtorch_instance_group_kind_model/gen_models.py ./gen_libtorch_model.py
151151
mkdir -p $MODELSDIR/libtorch_multi_device/1
152152
mkdir -p $MODELSDIR/libtorch_multi_gpu/1
153153
cp $MODELSDIR/libtorch_multi_device/config.pbtxt $MODELSDIR/libtorch_multi_gpu/.

qa/L0_lifecycle/test.sh

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#!/bin/bash
2-
# Copyright 2018-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# Copyright 2018-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
33
#
44
# Redistribution and use in source and binary forms, with or without
55
# modification, are permitted provided that the following conditions
@@ -1576,7 +1576,7 @@ if [ `grep -c "Model 'custom_zero_1_float32' (version 1) has 1 in-flight inferen
15761576
RET=1
15771577
fi
15781578

1579-
kill $SERVER_PID
1579+
kill $SERVER_PID || true
15801580
wait $SERVER_PID
15811581

15821582
rm -f $CLIENT_LOG
@@ -1614,7 +1614,7 @@ if [ `grep -c "Model 'custom_sequence_int32' (version 1) has 1 in-flight inferen
16141614
RET=1
16151615
fi
16161616

1617-
kill $SERVER_PID
1617+
kill $SERVER_PID || true
16181618
wait $SERVER_PID
16191619

16201620
rm -f $CLIENT_LOG
@@ -1655,7 +1655,7 @@ if [ `grep -c "Model 'ensemble_zero_1_float32' (version 1) has 1 in-flight infer
16551655
RET=1
16561656
fi
16571657

1658-
kill $SERVER_PID
1658+
kill $SERVER_PID || true
16591659
wait $SERVER_PID
16601660

16611661
LOG_IDX=$((LOG_IDX+1))
@@ -2128,7 +2128,7 @@ if [ $? -ne 0 ]; then
21282128
fi
21292129
set -e
21302130

2131-
kill $SERVER_PID
2131+
kill $SERVER_PID || true
21322132
wait $SERVER_PID
21332133

21342134
LOG_IDX=$((LOG_IDX+1))

qa/L0_perf_analyzer_capi/test.sh

Lines changed: 4 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#!/bin/bash
2-
# Copyright 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# Copyright 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
33
#
44
# Redistribution and use in source and binary forms, with or without
55
# modification, are permitted provided that the following conditions
@@ -218,6 +218,7 @@ if [ $(cat $CLIENT_LOG | grep ": 0 infer/sec\|: 0 usec" | wc -l) -ne 0 ]; then
218218
fi
219219

220220
$PERF_ANALYZER -v -m simple_savedmodel_sequence_object -p 2000 -t5 --sync \
221+
-s ${STABILITY_THRESHOLD} \
221222
--input-data=$SEQ_JSONDATAFILE \
222223
--service-kind=triton_c_api --model-repository=$DATADIR \
223224
--triton-server-directory=$SERVER_LIBRARY_PATH >$CLIENT_LOG 2>&1
@@ -234,6 +235,7 @@ fi
234235

235236
set +e
236237
$PERF_ANALYZER -v -m graphdef_sequence_float32 --shape INPUT:2 \
238+
-s ${STABILITY_THRESHOLD} \
237239
--input-data=$FLOAT_DIFFSHAPE_JSONDATAFILE \
238240
--input-data=$FLOAT_DIFFSHAPE_JSONDATAFILE -p2000 \
239241
--service-kind=triton_c_api --model-repository=$DATADIR \
@@ -250,21 +252,9 @@ if [ $(cat $CLIENT_LOG | grep -P "The supplied shape .+ is incompatible with th
250252
fi
251253
set -e
252254

253-
# Negative test for the async mode.
254-
set +e
255-
$PERF_ANALYZER -v -m graphdef_int32_int32_int32 -t 1 -p2000 -b 1 -a \
256-
--service-kind=triton_c_api --model-repository=$DATADIR \
257-
--triton-server-directory=$SERVER_LIBRARY_PATH -s ${STABILITY_THRESHOLD} \
258-
>$CLIENT_LOG 2>&1
259-
if [ $(cat $CLIENT_LOG | grep "not supported by triton_c_api service" | wc -l) -ne 1 ]; then
260-
cat $CLIENT_LOG
261-
echo -e "\n***\n*** Test Failed\n***"
262-
RET=1
263-
fi
264-
set -e
265-
266255
for SHARED_MEMORY_TYPE in system cuda; do
267256
$PERF_ANALYZER -v -m graphdef_int32_int32_int32 -t 1 -p2000 -b 1 \
257+
-s ${STABILITY_THRESHOLD} \
268258
--shared-memory=$SHARED_MEMORY_TYPE \
269259
--service-kind=triton_c_api --model-repository=$DATADIR \
270260
--triton-server-directory=$SERVER_LIBRARY_PATH >$CLIENT_LOG 2>&1

qa/L0_pinned_memory/test.sh

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#!/bin/bash
2-
# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
2+
# Copyright (c) 2019-2025, NVIDIA CORPORATION. All rights reserved.
33
#
44
# Redistribution and use in source and binary forms, with or without
55
# modification, are permitted provided that the following conditions
@@ -38,6 +38,9 @@ if [ ! -z "$TEST_REPO_ARCH" ]; then
3838
REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
3939
fi
4040

41+
# Use "--request-count" throughout the test to PA stability criteria and
42+
# reduce flaky failures from PA unstable measurements.
43+
REQUEST_COUNT=10
4144
CLIENT=../clients/perf_client
4245
# Only use libtorch as it accepts GPU I/O and it can handle variable shape
4346
BACKENDS=${BACKENDS:="libtorch"}
@@ -91,7 +94,7 @@ for BACKEND in $BACKENDS; do
9194

9295
# Sanity check that the server allocates pinned memory for large size
9396
set +e
94-
$CLIENT -m${ENSEMBLE_NAME} --shape INPUT0:16777216
97+
$CLIENT -m${ENSEMBLE_NAME} --shape INPUT0:16777216 --request-count ${REQUEST_COUNT}
9598
if (( $? != 0 )); then
9699
RET=1
97100
fi
@@ -128,6 +131,7 @@ for BACKEND in $BACKENDS; do
128131
for TENSOR_SIZE in 16384 1048576 2097152 4194304 8388608 16777216; do
129132
$CLIENT -i grpc -u localhost:8001 -m${ENSEMBLE_NAME} \
130133
--shape INPUT0:${TENSOR_SIZE} \
134+
--request-count ${REQUEST_COUNT} \
131135
>> ${BACKEND}.${TENSOR_SIZE}.pinned.log 2>&1
132136
if (( $? != 0 )); then
133137
RET=1
@@ -150,7 +154,7 @@ for BACKEND in $BACKENDS; do
150154

151155
# Sanity check that the server allocates non-pinned memory
152156
set +e
153-
$CLIENT -m${ENSEMBLE_NAME} --shape INPUT0:1
157+
$CLIENT -m${ENSEMBLE_NAME} --shape INPUT0:1 --request-count ${REQUEST_COUNT}
154158
if (( $? != 0 )); then
155159
RET=1
156160
fi
@@ -180,6 +184,7 @@ for BACKEND in $BACKENDS; do
180184
for TENSOR_SIZE in 16384 1048576 2097152 4194304 8388608 16777216; do
181185
$CLIENT -i grpc -u localhost:8001 -m${ENSEMBLE_NAME} \
182186
--shape INPUT0:${TENSOR_SIZE} \
187+
--request-count ${REQUEST_COUNT} \
183188
>> ${BACKEND}.${TENSOR_SIZE}.nonpinned.log 2>&1
184189
if (( $? != 0 )); then
185190
RET=1

0 commit comments

Comments
 (0)