Skip to content

Commit ea852e2

Browse files
committed
TEP-0124 implement opentelemetry and jaeger tracing
Adds opentelemetry instrumentation code to pipelinerun and taskrun reconcilers. Also made required changes in the main.go to include jaeger as tracing backend
1 parent d310355 commit ea852e2

File tree

193 files changed

+37415
-6
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

193 files changed

+37415
-6
lines changed

cmd/controller/main.go

+79-2
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,12 @@ limitations under the License.
1717
package main
1818

1919
import (
20+
"context"
2021
"flag"
2122
"log"
2223
"net/http"
2324
"os"
25+
"time"
2426

2527
"github.com/tektoncd/pipeline/pkg/apis/pipeline"
2628
"github.com/tektoncd/pipeline/pkg/apis/pipeline/v1beta1"
@@ -37,11 +39,23 @@ import (
3739
"knative.dev/pkg/injection"
3840
"knative.dev/pkg/injection/sharedmain"
3941
"knative.dev/pkg/signals"
42+
43+
"go.opentelemetry.io/otel"
44+
"go.opentelemetry.io/otel/exporters/jaeger"
45+
"go.opentelemetry.io/otel/propagation"
46+
"go.opentelemetry.io/otel/sdk/resource"
47+
tracesdk "go.opentelemetry.io/otel/sdk/trace"
48+
semconv "go.opentelemetry.io/otel/semconv/v1.12.0"
49+
"go.opentelemetry.io/otel/trace"
4050
)
4151

4252
const (
4353
// ControllerLogKey is the name of the logger for the controller cmd
4454
ControllerLogKey = "tekton-pipelines-controller"
55+
// TraceProvider name for pipeline reconciler
56+
TracerProviderPipelineRun = "pipeline-reconciler"
57+
// TraceProvider name for taskrun reconciler
58+
TracerProviderTaskRun = "taskrun-reconciler"
4559
)
4660

4761
func main() {
@@ -103,16 +117,79 @@ func main() {
103117
log.Fatal(http.ListenAndServe(":"+port, mux))
104118
}()
105119

120+
// initialize opentelemetry
121+
tpPipelineRun, err := tracerProvider(TracerProviderPipelineRun)
122+
if err != nil {
123+
log.Printf("failed to initialize tracerProvider for pipelinerun, falling back to no-op provider, %s", err.Error())
124+
tpPipelineRun = trace.NewNoopTracerProvider()
125+
}
126+
tpTaskrun, err := tracerProvider(TracerProviderTaskRun)
127+
if err != nil {
128+
log.Printf("failed to initialize tracerProvider for taskrun, falling back to no-op provider, %s", err.Error())
129+
tpTaskrun = trace.NewNoopTracerProvider()
130+
}
131+
otel.SetTextMapPropagator(propagation.TraceContext{})
132+
ctx, cancel := context.WithCancel(ctx)
133+
defer cancel()
134+
106135
ctx = filteredinformerfactory.WithSelectors(ctx, v1beta1.ManagedByLabelKey)
107136
sharedmain.MainWithConfig(ctx, ControllerLogKey, cfg,
108-
taskrun.NewController(opts, clock.RealClock{}),
109-
pipelinerun.NewController(opts, clock.RealClock{}),
137+
taskrun.NewController(opts, clock.RealClock{}, tpTaskrun),
138+
pipelinerun.NewController(opts, clock.RealClock{}, tpPipelineRun),
110139
run.NewController(),
111140
resolutionrequest.NewController(clock.RealClock{}),
112141
customrun.NewController(),
113142
)
143+
144+
// Cleanly shutdown and flush telemetry when the application exits.
145+
defer func(ctx context.Context) {
146+
// Do not make the application hang when it is shutdown.
147+
ctx, cancel = context.WithTimeout(ctx, time.Second*5)
148+
defer cancel()
149+
150+
// shutdown is only needed when tracerProvider is inialized with jaeger
151+
// not needed when tracerProvider is NewNoopTracerProvider
152+
if tp, ok := tpPipelineRun.(*tracesdk.TracerProvider); ok {
153+
tp.Shutdown(ctx)
154+
}
155+
if tp, ok := tpTaskrun.(*tracesdk.TracerProvider); ok {
156+
tp.Shutdown(ctx)
157+
}
158+
}(ctx)
114159
}
115160

116161
func handler(w http.ResponseWriter, r *http.Request) {
117162
w.WriteHeader(http.StatusOK)
118163
}
164+
165+
// tracerProvider returns an OpenTelemetry TracerProvider configured to use
166+
// the Jaeger exporter that will send spans to the provided url. The returned
167+
// TracerProvider will also use a Resource configured with all the information
168+
// about the application.
169+
func tracerProvider(service string) (trace.TracerProvider, error) {
170+
// Create the Jaeger exporter
171+
// The following env variables are used by the sdk for creating the exporter
172+
// - OTEL_EXPORTER_JAEGER_ENDPOINT is the HTTP endpoint for sending spans directly to a collector.
173+
// - OTEL_EXPORTER_JAEGER_USER is the username to be sent as authentication to the collector endpoint.
174+
// - OTEL_EXPORTER_JAEGER_PASSWORD is the password to be sent as authentication to the collector endpoint.
175+
176+
if _, e := os.LookupEnv("OTEL_EXPORTER_JAEGER_ENDPOINT"); !e {
177+
// jaeger endpoint is not defined, disable tracing and return no-op tracerProvider
178+
return trace.NewNoopTracerProvider(), nil
179+
}
180+
181+
exp, err := jaeger.New(jaeger.WithCollectorEndpoint())
182+
if err != nil {
183+
return nil, err
184+
}
185+
// Initialize tracerProvider with the jaeger exporter
186+
tp := tracesdk.NewTracerProvider(
187+
tracesdk.WithBatcher(exp),
188+
// Record information about the service in a Resource.
189+
tracesdk.WithResource(resource.NewWithAttributes(
190+
semconv.SchemaURL,
191+
semconv.ServiceNameKey.String(service),
192+
)),
193+
)
194+
return tp, nil
195+
}

config/controller.yaml

+7
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,13 @@ spec:
124124
value: /etc/ssl/certs
125125
- name: METRICS_DOMAIN
126126
value: tekton.dev/pipeline
127+
# The following variables can be uncommented with correct values to enable Jaeger tracing
128+
#- name: OTEL_EXPORTER_JAEGER_ENDPOINT
129+
# value: http://jaeger-collector.jaeger:14268/api/traces
130+
#- name: OTEL_EXPORTER_JAEGER_USER
131+
# value: username
132+
#- name: OTEL_EXPORTER_JAEGER_PASSWORD
133+
# value: password
127134
securityContext:
128135
allowPrivilegeEscalation: false
129136
capabilities:

docs/developers/README.md

+1
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ channel for training and tutorials on Tekton!
1111
- Developing on Tekton:
1212
- [Local Setup](./local-setup.md): Getting your local environment set up to develop on Tekton.
1313
- [Testing](../../test/README.md): Running Tekton tests.
14+
- [Tracing](./tracing.md): Enabling Jaeger tracing
1415
- How Tekton is run on Kubernetes:
1516
- [Controller Logic](./controller-logic.md): How Tekton extends Kubernetes using Knative.
1617
- [TaskRun Logic](./taskruns.md): How TaskRuns are run in pods.

docs/developers/tracing.md

+35
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
# Tracing setup
2+
3+
This sections shows how to enable tracing for tekton reconcilers and
4+
capture traces in Jaeger
5+
6+
## Prerequisites
7+
8+
Jaeger should be installed and accessible from the cluster. The easiest
9+
way to set it up is using helm as below
10+
11+
the following command installs Jaeger in `jaeger` namespace
12+
13+
```
14+
helm repo add jaegertracing https://jaegertracing.github.io/helm-charts
15+
helm upgrade -i jaeger jaegertracing/jaeger -n jaeger --create-namespace
16+
```
17+
18+
Use port-forwarding to open the jaeger query UI or adjust the service
19+
type to Loadbalancer for accessing the service directly
20+
21+
```
22+
kubectl port-forward svc/jaeger-query -n jaeger 8080:80
23+
```
24+
25+
Check the official [Jaeger docs](https://www.jaegertracing.io/docs/) on how to work with Jaeger
26+
27+
## Enabling tracing
28+
29+
Tekton pipelines controller expects the following environment variables to be able to connect to jaeger:
30+
31+
* `OTEL_EXPORTER_JAEGER_ENDPOINT` is the HTTP endpoint for sending spans directly to a collector.
32+
* `OTEL_EXPORTER_JAEGER_USER` is the username to be sent as authentication to the collector endpoint.
33+
* `OTEL_EXPORTER_JAEGER_PASSWORD` is the password to be sent as authentication to the collector endpoint.
34+
35+
`OTEL_EXPORTER_JAEGER_ENDPOINT` is the only manadatory variable to enable tracing. You can find these variables in the controller manifest as well.

go.mod

+5
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,10 @@ require (
4646
github.com/letsencrypt/boulder v0.0.0-20220929215747-76583552c2be
4747
github.com/titanous/rocacheck v0.0.0-20171023193734-afe73141d399
4848
k8s.io/utils v0.0.0-20221012122500-cfd413dd9e85
49+
go.opentelemetry.io/otel v1.11.1
50+
go.opentelemetry.io/otel/exporters/jaeger v1.11.1
51+
go.opentelemetry.io/otel/sdk v1.11.1
52+
go.opentelemetry.io/otel/trace v1.11.1
4953
)
5054

5155
require (
@@ -65,6 +69,7 @@ require (
6569
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.13.6 // indirect
6670
github.com/emicklei/go-restful/v3 v3.9.0 // indirect
6771
github.com/google/gnostic v0.6.9 // indirect
72+
github.com/go-logr/stdr v1.2.2 // indirect
6873
github.com/kr/pretty v0.3.0 // indirect
6974
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
7075
github.com/rogpeppe/go-internal v1.8.0 // indirect

go.sum

+14
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pkg/reconciler/pipelinerun/controller.go

+3-1
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ import (
3434
cloudeventclient "github.com/tektoncd/pipeline/pkg/reconciler/events/cloudevent"
3535
"github.com/tektoncd/pipeline/pkg/reconciler/volumeclaim"
3636
resolution "github.com/tektoncd/pipeline/pkg/resolution/resource"
37+
"go.opentelemetry.io/otel/trace"
3738
"k8s.io/client-go/tools/cache"
3839
"k8s.io/utils/clock"
3940
kubeclient "knative.dev/pkg/client/injection/kube/client"
@@ -43,7 +44,7 @@ import (
4344
)
4445

4546
// NewController instantiates a new controller.Impl from knative.dev/pkg/controller
46-
func NewController(opts *pipeline.Options, clock clock.PassiveClock) func(context.Context, configmap.Watcher) *controller.Impl {
47+
func NewController(opts *pipeline.Options, clock clock.PassiveClock, tracerProvider trace.TracerProvider) func(context.Context, configmap.Watcher) *controller.Impl {
4748
return func(ctx context.Context, cmw configmap.Watcher) *controller.Impl {
4849
logger := logging.FromContext(ctx)
4950
kubeclientset := kubeclient.Get(ctx)
@@ -69,6 +70,7 @@ func NewController(opts *pipeline.Options, clock clock.PassiveClock) func(contex
6970
metrics: pipelinerunmetrics.Get(ctx),
7071
pvcHandler: volumeclaim.NewPVCHandler(kubeclientset, logger),
7172
resolutionRequester: resolution.NewCRDRequester(resolutionclient.Get(ctx), resolutionInformer.Lister()),
73+
tracerProvider: tracerProvider,
7274
}
7375
impl := pipelinerunreconciler.NewImpl(ctx, c, func(impl *controller.Impl) controller.Options {
7476
return controller.Options{

0 commit comments

Comments
 (0)