Skip to content

Commit d99fe3b

Browse files
committed
TEP-0124 implement opentelemetry and jaeger tracing
Adds opentelemetry instrumentation code to pipelinerun and taskrun reconcilers. Also made required changes in the main.go to include jaeger as tracing backend
1 parent 38c739a commit d99fe3b

File tree

193 files changed

+37383
-7
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

193 files changed

+37383
-7
lines changed

cmd/controller/main.go

+79-2
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,12 @@ limitations under the License.
1717
package main
1818

1919
import (
20+
"context"
2021
"flag"
2122
"log"
2223
"net/http"
2324
"os"
25+
"time"
2426

2527
"github.com/tektoncd/pipeline/pkg/apis/pipeline"
2628
"github.com/tektoncd/pipeline/pkg/apis/pipeline/v1beta1"
@@ -36,11 +38,23 @@ import (
3638
"knative.dev/pkg/injection"
3739
"knative.dev/pkg/injection/sharedmain"
3840
"knative.dev/pkg/signals"
41+
42+
"go.opentelemetry.io/otel"
43+
"go.opentelemetry.io/otel/exporters/jaeger"
44+
"go.opentelemetry.io/otel/propagation"
45+
"go.opentelemetry.io/otel/sdk/resource"
46+
tracesdk "go.opentelemetry.io/otel/sdk/trace"
47+
semconv "go.opentelemetry.io/otel/semconv/v1.12.0"
48+
"go.opentelemetry.io/otel/trace"
3949
)
4050

4151
const (
4252
// ControllerLogKey is the name of the logger for the controller cmd
4353
ControllerLogKey = "tekton-pipelines-controller"
54+
// TraceProvider name for pipeline reconciler
55+
TracerProviderPipelineRun = "pipeline-reconciler"
56+
// TraceProvider name for taskrun reconciler
57+
TracerProviderTaskRun = "taskrun-reconciler"
4458
)
4559

4660
func main() {
@@ -102,17 +116,80 @@ func main() {
102116
log.Fatal(http.ListenAndServe(":"+port, mux))
103117
}()
104118

119+
// initialize opentelemetry
120+
tpPipelineRun, err := tracerProvider(TracerProviderPipelineRun)
121+
if err != nil {
122+
log.Printf("failed to initialize tracerProvider for pipelinerun, falling back to no-op provider, %s", err.Error())
123+
tpPipelineRun = trace.NewNoopTracerProvider()
124+
}
125+
tpTaskrun, err := tracerProvider(TracerProviderTaskRun)
126+
if err != nil {
127+
log.Printf("failed to initialize tracerProvider for taskrun, falling back to no-op provider, %s", err.Error())
128+
tpTaskrun = trace.NewNoopTracerProvider()
129+
}
130+
otel.SetTextMapPropagator(propagation.TraceContext{})
131+
ctx, cancel := context.WithCancel(ctx)
132+
defer cancel()
133+
105134
ctx = filteredinformerfactory.WithSelectors(ctx, v1beta1.ManagedByLabelKey)
106135
sharedmain.MainWithConfig(ctx, ControllerLogKey, cfg,
107-
taskrun.NewController(opts, clock.RealClock{}),
108-
pipelinerun.NewController(opts, clock.RealClock{}),
136+
taskrun.NewController(opts, clock.RealClock{}, tpTaskrun),
137+
pipelinerun.NewController(opts, clock.RealClock{}, tpPipelineRun),
109138
run.NewController(),
110139
resolutionrequest.NewController(clock.RealClock{}),
111140
// TODO(jerop, abayer) uncomment after we align on retries in customruns
112141
// customrun.NewController(),
113142
)
143+
144+
// Cleanly shutdown and flush telemetry when the application exits.
145+
defer func(ctx context.Context) {
146+
// Do not make the application hang when it is shutdown.
147+
ctx, cancel = context.WithTimeout(ctx, time.Second*5)
148+
defer cancel()
149+
150+
// shutdown is only needed when tracerProvider is inialized with jaeger
151+
// not needed when tracerProvider is NewNoopTracerProvider
152+
if tp, ok := tpPipelineRun.(*tracesdk.TracerProvider); ok {
153+
tp.Shutdown(ctx)
154+
}
155+
if tp, ok := tpTaskrun.(*tracesdk.TracerProvider); ok {
156+
tp.Shutdown(ctx)
157+
}
158+
}(ctx)
114159
}
115160

116161
func handler(w http.ResponseWriter, r *http.Request) {
117162
w.WriteHeader(http.StatusOK)
118163
}
164+
165+
// tracerProvider returns an OpenTelemetry TracerProvider configured to use
166+
// the Jaeger exporter that will send spans to the provided url. The returned
167+
// TracerProvider will also use a Resource configured with all the information
168+
// about the application.
169+
func tracerProvider(service string) (trace.TracerProvider, error) {
170+
// Create the Jaeger exporter
171+
// The following env variables are used by the sdk for creating the exporter
172+
// - OTEL_EXPORTER_JAEGER_ENDPOINT is the HTTP endpoint for sending spans directly to a collector.
173+
// - OTEL_EXPORTER_JAEGER_USER is the username to be sent as authentication to the collector endpoint.
174+
// - OTEL_EXPORTER_JAEGER_PASSWORD is the password to be sent as authentication to the collector endpoint.
175+
176+
if _, e := os.LookupEnv("OTEL_EXPORTER_JAEGER_ENDPOINT"); !e {
177+
// jaeger endpoint is not defined, disable tracing and return no-op tracerProvider
178+
return trace.NewNoopTracerProvider(), nil
179+
}
180+
181+
exp, err := jaeger.New(jaeger.WithCollectorEndpoint())
182+
if err != nil {
183+
return nil, err
184+
}
185+
// Initialize tracerProvider with the jaeger exporter
186+
tp := tracesdk.NewTracerProvider(
187+
tracesdk.WithBatcher(exp),
188+
// Record information about the service in a Resource.
189+
tracesdk.WithResource(resource.NewWithAttributes(
190+
semconv.SchemaURL,
191+
semconv.ServiceNameKey.String(service),
192+
)),
193+
)
194+
return tp, nil
195+
}

config/controller.yaml

+7
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,13 @@ spec:
124124
value: /etc/ssl/certs
125125
- name: METRICS_DOMAIN
126126
value: tekton.dev/pipeline
127+
# The following variables can be uncommented with correct values to enable Jaeger tracing
128+
#- name: OTEL_EXPORTER_JAEGER_ENDPOINT
129+
# value: http://jaeger-collector.jaeger:14268/api/traces
130+
#- name: OTEL_EXPORTER_JAEGER_USER
131+
# value: username
132+
#- name: OTEL_EXPORTER_JAEGER_PASSWORD
133+
# value: password
127134
securityContext:
128135
allowPrivilegeEscalation: false
129136
capabilities:

docs/developers/README.md

+1
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ channel for training and tutorials on Tekton!
1111
- Developing on Tekton:
1212
- [Local Setup](./local-setup.md): Getting your local environment set up to develop on Tekton.
1313
- [Testing](../../test/README.md): Running Tekton tests.
14+
- [Tracing](./tracing.md): Enabling Jaeger tracing
1415
- How Tekton is run on Kubernetes:
1516
- [Controller Logic](./controller-logic.md): How Tekton extends Kubernetes using Knative.
1617
- [TaskRun Logic](./taskruns.md): How TaskRuns are run in pods.

docs/developers/tracing.md

+35
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
# Tracing setup
2+
3+
This sections shows how to enable tracing for tekton reconcilers and
4+
capture traces in Jaeger
5+
6+
## Prerequisites
7+
8+
Jaeger should be installed and accessible from the cluster. The easiest
9+
way to set it up is using helm as below
10+
11+
the following command installs Jaeger in `jaeger` namespace
12+
13+
```
14+
helm repo add jaegertracing https://jaegertracing.github.io/helm-charts
15+
helm upgrade -i jaeger jaegertracing/jaeger -n jaeger --create-namespace
16+
```
17+
18+
Use port-forwarding to open the jaeger query UI or adjust the service
19+
type to Loadbalancer for accessing the service directly
20+
21+
```
22+
kubectl port-forward svc/jaeger-query -n jaeger 8080:80
23+
```
24+
25+
Check the official [Jaeger docs](https://www.jaegertracing.io/docs/) on how to work with Jaeger
26+
27+
## Enabling tracing
28+
29+
Tekton pipelines controller expects the following environment variables to be able to connect to jaeger:
30+
31+
* `OTEL_EXPORTER_JAEGER_ENDPOINT` is the HTTP endpoint for sending spans directly to a collector.
32+
* `OTEL_EXPORTER_JAEGER_USER` is the username to be sent as authentication to the collector endpoint.
33+
* `OTEL_EXPORTER_JAEGER_PASSWORD` is the password to be sent as authentication to the collector endpoint.
34+
35+
`OTEL_EXPORTER_JAEGER_ENDPOINT` is the only manadatory variable to enable tracing. You can find these variables in the controller manifest as well.

go.mod

+5
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,10 @@ require (
4545
github.com/google/go-containerregistry/pkg/authn/k8schain v0.0.0-20221030203717-1711cefd7eec
4646
github.com/letsencrypt/boulder v0.0.0-20220929215747-76583552c2be
4747
github.com/titanous/rocacheck v0.0.0-20171023193734-afe73141d399
48+
go.opentelemetry.io/otel v1.11.1
49+
go.opentelemetry.io/otel/exporters/jaeger v1.11.1
50+
go.opentelemetry.io/otel/sdk v1.11.1
51+
go.opentelemetry.io/otel/trace v1.11.1
4852
k8s.io/utils v0.0.0-20221012122500-cfd413dd9e85
4953
)
5054

@@ -64,6 +68,7 @@ require (
6468
require (
6569
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.13.6 // indirect
6670
github.com/emicklei/go-restful/v3 v3.9.0 // indirect
71+
github.com/go-logr/stdr v1.2.2 // indirect
6772
github.com/google/gnostic v0.6.9 // indirect
6873
github.com/kr/pretty v0.3.0 // indirect
6974
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect

go.sum

+12
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pkg/reconciler/pipelinerun/controller.go

+3-1
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ import (
3434
cloudeventclient "github.com/tektoncd/pipeline/pkg/reconciler/events/cloudevent"
3535
"github.com/tektoncd/pipeline/pkg/reconciler/volumeclaim"
3636
resolution "github.com/tektoncd/pipeline/pkg/resolution/resource"
37+
"go.opentelemetry.io/otel/trace"
3738
"k8s.io/client-go/tools/cache"
3839
"k8s.io/utils/clock"
3940
kubeclient "knative.dev/pkg/client/injection/kube/client"
@@ -43,7 +44,7 @@ import (
4344
)
4445

4546
// NewController instantiates a new controller.Impl from knative.dev/pkg/controller
46-
func NewController(opts *pipeline.Options, clock clock.PassiveClock) func(context.Context, configmap.Watcher) *controller.Impl {
47+
func NewController(opts *pipeline.Options, clock clock.PassiveClock, tracerProvider trace.TracerProvider) func(context.Context, configmap.Watcher) *controller.Impl {
4748
return func(ctx context.Context, cmw configmap.Watcher) *controller.Impl {
4849
logger := logging.FromContext(ctx)
4950
kubeclientset := kubeclient.Get(ctx)
@@ -69,6 +70,7 @@ func NewController(opts *pipeline.Options, clock clock.PassiveClock) func(contex
6970
metrics: pipelinerunmetrics.Get(ctx),
7071
pvcHandler: volumeclaim.NewPVCHandler(kubeclientset, logger),
7172
resolutionRequester: resolution.NewCRDRequester(resolutionclient.Get(ctx), resolutionInformer.Lister()),
73+
tracerProvider: tracerProvider,
7274
}
7375
impl := pipelinerunreconciler.NewImpl(ctx, c, func(impl *controller.Impl) controller.Options {
7476
return controller.Options{

0 commit comments

Comments
 (0)