Skip to content

Commit 8233946

Browse files
authored
Add TLS support with self-signed certificate. (#335)
1 parent ef9b92f commit 8233946

File tree

5 files changed

+114
-4
lines changed

5 files changed

+114
-4
lines changed

Diff for: pkg/ext-proc/main.go

+9-1
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,13 @@ var (
7171
"refreshPrometheusMetricsInterval",
7272
runserver.DefaultRefreshPrometheusMetricsInterval,
7373
"interval to flush prometheus metrics")
74-
logVerbosity = flag.Int("v", logging.DEFAULT, "number for the log level verbosity")
74+
logVerbosity = flag.Int("v", logging.DEFAULT, "number for the log level verbosity")
75+
secureServing = flag.Bool(
76+
"secureServing", runserver.DefaultSecureServing, "Enables secure serving. Defaults to true.")
77+
certPath = flag.String(
78+
"certPath", "", "The path to the certificate for secure serving. The certificate and private key files "+
79+
"are assumed to be named tls.crt and tls.key, respectively. If not set, and secureServing is enabled, "+
80+
"then a self-signed certificate is used.")
7581

7682
scheme = runtime.NewScheme()
7783
)
@@ -133,6 +139,8 @@ func run() error {
133139
RefreshMetricsInterval: *refreshMetricsInterval,
134140
RefreshPrometheusMetricsInterval: *refreshPrometheusMetricsInterval,
135141
Datastore: datastore,
142+
SecureServing: *secureServing,
143+
CertPath: *certPath,
136144
}
137145
if err := serverRunner.SetupWithManager(mgr); err != nil {
138146
klog.ErrorS(err, "Failed to setup ext-proc server")

Diff for: pkg/ext-proc/server/runserver.go

+80-2
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,19 @@ package server
22

33
import (
44
"context"
5+
"crypto/rand"
6+
"crypto/rsa"
7+
"crypto/tls"
8+
"crypto/x509"
9+
"crypto/x509/pkix"
10+
"encoding/pem"
511
"fmt"
12+
"math/big"
613
"time"
714

815
extProcPb "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"
916
"google.golang.org/grpc"
17+
"google.golang.org/grpc/credentials"
1018
"k8s.io/apimachinery/pkg/types"
1119
klog "k8s.io/klog/v2"
1220
ctrl "sigs.k8s.io/controller-runtime"
@@ -27,6 +35,8 @@ type ExtProcServerRunner struct {
2735
RefreshMetricsInterval time.Duration
2836
RefreshPrometheusMetricsInterval time.Duration
2937
Datastore *backend.K8sDatastore
38+
SecureServing bool
39+
CertPath string
3040
}
3141

3242
// Default values for CLI flags in main
@@ -38,6 +48,7 @@ const (
3848
DefaultRefreshPodsInterval = 10 * time.Second // default for --refreshPodsInterval
3949
DefaultRefreshMetricsInterval = 50 * time.Millisecond // default for --refreshMetricsInterval
4050
DefaultRefreshPrometheusMetricsInterval = 5 * time.Second // default for --refreshPrometheusMetricsInterval
51+
DefaultSecureServing = true // default for --secureServing
4152
)
4253

4354
func NewDefaultExtProcServerRunner() *ExtProcServerRunner {
@@ -49,6 +60,7 @@ func NewDefaultExtProcServerRunner() *ExtProcServerRunner {
4960
RefreshPodsInterval: DefaultRefreshPodsInterval,
5061
RefreshMetricsInterval: DefaultRefreshMetricsInterval,
5162
RefreshPrometheusMetricsInterval: DefaultRefreshPrometheusMetricsInterval,
63+
SecureServing: DefaultSecureServing,
5264
// Datastore can be assigned later.
5365
}
5466
}
@@ -107,8 +119,29 @@ func (r *ExtProcServerRunner) AsRunnable(
107119
return err
108120
}
109121

110-
// Init the server.
111-
srv := grpc.NewServer()
122+
var srv *grpc.Server
123+
if r.SecureServing {
124+
var cert tls.Certificate
125+
var err error
126+
if r.CertPath != "" {
127+
cert, err = tls.LoadX509KeyPair(r.CertPath+"/tls.crt", r.CertPath+"/tls.key")
128+
} else {
129+
// Create tls based credential.
130+
cert, err = createSelfSignedTLSCertificate()
131+
}
132+
if err != nil {
133+
klog.ErrorS(err, "Failed to create self signed certificate")
134+
return err
135+
}
136+
137+
creds := credentials.NewTLS(&tls.Config{
138+
Certificates: []tls.Certificate{cert},
139+
})
140+
// Init the server.
141+
srv = grpc.NewServer(grpc.Creds(creds))
142+
} else {
143+
srv = grpc.NewServer()
144+
}
112145
extProcPb.RegisterExternalProcessorServer(
113146
srv,
114147
handlers.NewServer(pp, scheduling.NewScheduler(pp), r.TargetEndpointKey, r.Datastore),
@@ -118,3 +151,48 @@ func (r *ExtProcServerRunner) AsRunnable(
118151
return runnable.GRPCServer("ext-proc", srv, r.GrpcPort).Start(ctx)
119152
}))
120153
}
154+
155+
func createSelfSignedTLSCertificate() (tls.Certificate, error) {
156+
serialNumberLimit := new(big.Int).Lsh(big.NewInt(1), 128)
157+
serialNumber, err := rand.Int(rand.Reader, serialNumberLimit)
158+
if err != nil {
159+
klog.ErrorS(err, "Failed to create serial number for self-signed cert")
160+
return tls.Certificate{}, err
161+
}
162+
now := time.Now()
163+
notBefore := now.UTC()
164+
template := x509.Certificate{
165+
SerialNumber: serialNumber,
166+
Subject: pkix.Name{
167+
Organization: []string{"Inference Ext"},
168+
},
169+
NotBefore: notBefore,
170+
NotAfter: now.Add(time.Hour * 24 * 365 * 10).UTC(), // 10 years
171+
KeyUsage: x509.KeyUsageKeyEncipherment | x509.KeyUsageDigitalSignature,
172+
ExtKeyUsage: []x509.ExtKeyUsage{x509.ExtKeyUsageServerAuth},
173+
BasicConstraintsValid: true,
174+
}
175+
176+
priv, err := rsa.GenerateKey(rand.Reader, 4096)
177+
if err != nil {
178+
klog.ErrorS(err, "Failed to generate key for self-signed cert")
179+
return tls.Certificate{}, err
180+
}
181+
182+
derBytes, err := x509.CreateCertificate(rand.Reader, &template, &template, &priv.PublicKey, priv)
183+
if err != nil {
184+
klog.ErrorS(err, "Failed to create self-signed certificate")
185+
return tls.Certificate{}, err
186+
}
187+
188+
certBytes := pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: derBytes})
189+
190+
privBytes, err := x509.MarshalPKCS8PrivateKey(priv)
191+
if err != nil {
192+
klog.ErrorS(err, "Failed to marshal private key for self-signed certificate")
193+
return tls.Certificate{}, err
194+
}
195+
keyBytes := pem.EncodeToMemory(&pem.Block{Type: "PRIVATE KEY", Bytes: privBytes})
196+
197+
return tls.X509KeyPair(certBytes, keyBytes)
198+
}

Diff for: pkg/manifests/gateway/patch_policy.yaml

+14
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,20 @@ spec:
3535
max_pending_requests: 40000
3636
max_requests: 40000
3737

38+
# This ensures that envoy accepts untrusted certificates. We tried to explicitly
39+
# set TrustChainVerification to ACCEPT_UNSTRUSTED, but that actually didn't work
40+
# and what worked is setting the common_tls_context to empty.
41+
- type: "type.googleapis.com/envoy.config.cluster.v3.Cluster"
42+
name: "envoyextensionpolicy/default/ext-proc-policy/extproc/0"
43+
operation:
44+
op: add
45+
path: "/transport_socket"
46+
value:
47+
name: "envoy.transport_sockets.tls"
48+
typed_config:
49+
"@type": "type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext"
50+
common_tls_context: {}
51+
3852
- type: "type.googleapis.com/envoy.config.route.v3.RouteConfiguration"
3953
name: default/inference-gateway/llm-gw
4054
operation:

Diff for: test/integration/hermetic_test.go

+1
Original file line numberDiff line numberDiff line change
@@ -478,6 +478,7 @@ func BeforeSuit() {
478478
// Adjust from defaults
479479
serverRunner.PoolName = "vllm-llama2-7b-pool"
480480
serverRunner.Datastore = backend.NewK8sDataStore()
481+
serverRunner.SecureServing = false
481482

482483
if err := serverRunner.SetupWithManager(mgr); err != nil {
483484
logutil.Fatal(err, "Failed to setup server runner")

Diff for: test/testdata/envoy.yaml

+10-1
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,15 @@ data:
169169
max_pending_requests: 40000
170170
max_requests: 40000
171171
max_retries: 1024
172+
# This ensures that envoy accepts untrusted certificates. We tried to explicitly
173+
# set TrustChainVerification to ACCEPT_UNSTRUSTED, but that actually didn't work
174+
# and what worked is setting the common_tls_context to empty.
175+
transport_socket:
176+
name: "envoy.transport_sockets.tls"
177+
typed_config:
178+
"@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext
179+
common_tls_context:
180+
validation_context:
172181
typed_extension_protocol_options:
173182
envoy.extensions.upstreams.http.v3.HttpProtocolOptions:
174183
"@type": type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions
@@ -219,7 +228,7 @@ spec:
219228
- "--service-node"
220229
- "$(ENVOY_POD_NAME)"
221230
- "--log-level"
222-
- "debug"
231+
- "trace"
223232
- "--cpuset-threads"
224233
- "--drain-strategy"
225234
- "immediate"

0 commit comments

Comments
 (0)