Skip to content

Commit 01c20d0

Browse files
author
OpenShift Bot
authoredOct 4, 2016
Merge pull request #10964 from jcantrill/207_agl_diagnostics
Merged by openshift-bot
2 parents 74154c1 + 1fbfe81 commit 01c20d0

27 files changed

+1799
-7
lines changed
 

‎.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
/.project
55
/.vagrant
66
/.vscode
7+
/.settings
78
/cpu.pprof
89
/assets/app/config.local.js
910
/assets/nbproject

‎docs/man/man1/oadm-diagnostics.1

+12-1
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,19 @@ diagnostics to run which regular users cannot.
5353
are skipped.
5454
.PP
5555
Diagnostics may be individually run by passing diagnostic name as arguments.
56+
57+
.PP
58+
.RS
59+
60+
.nf
61+
oadm diagnostics <DiagnosticName>
62+
63+
.fi
64+
.RE
65+
66+
.PP
5667
The available diagnostic names are:
57-
AnalyzeLogs ClusterRegistry ClusterRoleBindings ClusterRoles ClusterRouter ConfigContexts DiagnosticPod MasterConfigCheck MasterNode MetricsApiProxy NodeConfigCheck NodeDefinitions ServiceExternalIPs UnitStatus
68+
AggregatedLogging AnalyzeLogs ClusterRegistry ClusterRoleBindings ClusterRoles ClusterRouter ConfigContexts DiagnosticPod MasterConfigCheck MasterNode MetricsApiProxy NodeConfigCheck NodeDefinitions ServiceExternalIPs UnitStatus
5869

5970

6071
.SH OPTIONS

‎docs/man/man1/oc-adm-diagnostics.1

+12-1
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,19 @@ diagnostics to run which regular users cannot.
5353
are skipped.
5454
.PP
5555
Diagnostics may be individually run by passing diagnostic name as arguments.
56+
57+
.PP
58+
.RS
59+
60+
.nf
61+
oc adm diagnostics <DiagnosticName>
62+
63+
.fi
64+
.RE
65+
66+
.PP
5667
The available diagnostic names are:
57-
AnalyzeLogs ClusterRegistry ClusterRoleBindings ClusterRoles ClusterRouter ConfigContexts DiagnosticPod MasterConfigCheck MasterNode MetricsApiProxy NodeConfigCheck NodeDefinitions ServiceExternalIPs UnitStatus
68+
AggregatedLogging AnalyzeLogs ClusterRegistry ClusterRoleBindings ClusterRoles ClusterRouter ConfigContexts DiagnosticPod MasterConfigCheck MasterNode MetricsApiProxy NodeConfigCheck NodeDefinitions ServiceExternalIPs UnitStatus
5869

5970

6071
.SH OPTIONS

‎docs/man/man1/openshift-admin-diagnostics.1

+12-1
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,19 @@ diagnostics to run which regular users cannot.
5353
are skipped.
5454
.PP
5555
Diagnostics may be individually run by passing diagnostic name as arguments.
56+
57+
.PP
58+
.RS
59+
60+
.nf
61+
openshift admin diagnostics <DiagnosticName>
62+
63+
.fi
64+
.RE
65+
66+
.PP
5667
The available diagnostic names are:
57-
AnalyzeLogs ClusterRegistry ClusterRoleBindings ClusterRoles ClusterRouter ConfigContexts DiagnosticPod MasterConfigCheck MasterNode MetricsApiProxy NodeConfigCheck NodeDefinitions ServiceExternalIPs UnitStatus
68+
AggregatedLogging AnalyzeLogs ClusterRegistry ClusterRoleBindings ClusterRoles ClusterRouter ConfigContexts DiagnosticPod MasterConfigCheck MasterNode MetricsApiProxy NodeConfigCheck NodeDefinitions ServiceExternalIPs UnitStatus
5869

5970

6071
.SH OPTIONS

‎docs/man/man1/openshift-cli-adm-diagnostics.1

+12-1
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,19 @@ diagnostics to run which regular users cannot.
5353
are skipped.
5454
.PP
5555
Diagnostics may be individually run by passing diagnostic name as arguments.
56+
57+
.PP
58+
.RS
59+
60+
.nf
61+
openshift cli adm diagnostics <DiagnosticName>
62+
63+
.fi
64+
.RE
65+
66+
.PP
5667
The available diagnostic names are:
57-
AnalyzeLogs ClusterRegistry ClusterRoleBindings ClusterRoles ClusterRouter ConfigContexts DiagnosticPod MasterConfigCheck MasterNode MetricsApiProxy NodeConfigCheck NodeDefinitions ServiceExternalIPs UnitStatus
68+
AggregatedLogging AnalyzeLogs ClusterRegistry ClusterRoleBindings ClusterRoles ClusterRouter ConfigContexts DiagnosticPod MasterConfigCheck MasterNode MetricsApiProxy NodeConfigCheck NodeDefinitions ServiceExternalIPs UnitStatus
5869

5970

6071
.SH OPTIONS

‎docs/man/man1/openshift-ex-diagnostics.1

+12-1
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,19 @@ diagnostics to run which regular users cannot.
5353
are skipped.
5454
.PP
5555
Diagnostics may be individually run by passing diagnostic name as arguments.
56+
57+
.PP
58+
.RS
59+
60+
.nf
61+
openshift ex diagnostics <DiagnosticName>
62+
63+
.fi
64+
.RE
65+
66+
.PP
5667
The available diagnostic names are:
57-
AnalyzeLogs ClusterRegistry ClusterRoleBindings ClusterRoles ClusterRouter ConfigContexts DiagnosticPod MasterConfigCheck MasterNode MetricsApiProxy NodeConfigCheck NodeDefinitions ServiceExternalIPs UnitStatus
68+
AggregatedLogging AnalyzeLogs ClusterRegistry ClusterRoleBindings ClusterRoles ClusterRouter ConfigContexts DiagnosticPod MasterConfigCheck MasterNode MetricsApiProxy NodeConfigCheck NodeDefinitions ServiceExternalIPs UnitStatus
5869

5970

6071
.SH OPTIONS

‎pkg/cmd/admin/diagnostics/cluster.go

+14-1
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,24 @@ import (
1414
"github.com/openshift/origin/pkg/client"
1515
osclientcmd "github.com/openshift/origin/pkg/cmd/util/clientcmd"
1616
clustdiags "github.com/openshift/origin/pkg/diagnostics/cluster"
17+
agldiags "github.com/openshift/origin/pkg/diagnostics/cluster/aggregated_logging"
1718
"github.com/openshift/origin/pkg/diagnostics/types"
1819
)
1920

2021
var (
2122
// availableClusterDiagnostics contains the names of cluster diagnostics that can be executed
2223
// during a single run of diagnostics. Add more diagnostics to the list as they are defined.
23-
availableClusterDiagnostics = sets.NewString(clustdiags.NodeDefinitionsName, clustdiags.ClusterRegistryName, clustdiags.ClusterRouterName, clustdiags.ClusterRolesName, clustdiags.ClusterRoleBindingsName, clustdiags.MasterNodeName, clustdiags.MetricsApiProxyName, clustdiags.ServiceExternalIPsName)
24+
availableClusterDiagnostics = sets.NewString(
25+
agldiags.AggregatedLoggingName,
26+
clustdiags.ClusterRegistryName,
27+
clustdiags.ClusterRouterName,
28+
clustdiags.ClusterRolesName,
29+
clustdiags.ClusterRoleBindingsName,
30+
clustdiags.MasterNodeName,
31+
clustdiags.MetricsApiProxyName,
32+
clustdiags.NodeDefinitionsName,
33+
clustdiags.ServiceExternalIPsName,
34+
)
2435
)
2536

2637
// buildClusterDiagnostics builds cluster Diagnostic objects if a cluster-admin client can be extracted from the rawConfig passed in.
@@ -46,6 +57,8 @@ func (o DiagnosticsOptions) buildClusterDiagnostics(rawConfig *clientcmdapi.Conf
4657
for _, diagnosticName := range requestedDiagnostics {
4758
var d types.Diagnostic
4859
switch diagnosticName {
60+
case agldiags.AggregatedLoggingName:
61+
d = agldiags.NewAggregatedLogging(o.MasterConfigLocation, kclusterClient, clusterClient)
4962
case clustdiags.NodeDefinitionsName:
5063
d = &clustdiags.NodeDefinitions{KubeClient: kclusterClient, OsClient: clusterClient}
5164
case clustdiags.MasterNodeName:

‎pkg/cmd/admin/diagnostics/diagnostics.go

+3
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,9 @@ you will receive an error if they are not found. For example:
8080
are skipped.
8181
8282
Diagnostics may be individually run by passing diagnostic name as arguments.
83+
84+
%[1]s <DiagnosticName>
85+
8386
The available diagnostic names are:
8487
%[2]s
8588
`

‎pkg/diagnostics/README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ save may be your own.
9595

9696
A diagnostic is an object that conforms to the Diagnostic interface
9797
(see pkg/diagnostics/types/diagnostic.go). The diagnostic object should
98-
be built in one of the builders in the pkg/cmd/experimental/diagnostics
98+
be built in one of the builders in the pkg/cmd/admin/diagnostics
9999
package (based on whether it depends on client, cluster-admin, or host
100100
configuration). When executed, the diagnostic logs its findings into
101101
a result object. It should be assumed that they may run in parallel.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
package aggregated_logging
2+
3+
import (
4+
"fmt"
5+
6+
"k8s.io/kubernetes/pkg/apis/rbac"
7+
"k8s.io/kubernetes/pkg/util/sets"
8+
)
9+
10+
const clusterReaderRoleBindingName = "cluster-readers"
11+
12+
var clusterReaderRoleBindingNames = sets.NewString(fluentdServiceAccountName)
13+
14+
const clusterReaderUnboundServiceAccount = `
15+
The ServiceAccount '%[1]s' is not a cluster-reader in the '%[2]s' project. This
16+
is required to enable Fluentd to look up pod metadata for the logs it gathers.
17+
As a user with a cluster-admin role, you can grant the permissions by running
18+
the following:
19+
20+
oadm policy add-cluster-role-to-user cluster-reader system:serviceaccount:%[2]s:%[1]s
21+
`
22+
23+
func checkClusterRoleBindings(r diagnosticReporter, adapter clusterRoleBindingsAdapter, project string) {
24+
r.Debug("AGL0600", "Checking ClusterRoleBindings...")
25+
crb, err := adapter.getClusterRoleBinding(clusterReaderRoleBindingName)
26+
if err != nil {
27+
r.Error("AGL0605", err, fmt.Sprintf("There was an error while trying to retrieve the ClusterRoleBindings for the logging stack: %s", err))
28+
return
29+
}
30+
boundServiceAccounts := sets.NewString()
31+
for _, subject := range crb.Subjects {
32+
if subject.Kind == rbac.ServiceAccountKind && subject.Namespace == project {
33+
boundServiceAccounts.Insert(subject.Name)
34+
}
35+
}
36+
for _, name := range clusterReaderRoleBindingNames.List() {
37+
if !boundServiceAccounts.Has(name) {
38+
r.Error("AGL0610", nil, fmt.Sprintf(clusterReaderUnboundServiceAccount, name, project))
39+
}
40+
}
41+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
package aggregated_logging
2+
3+
import (
4+
"errors"
5+
"testing"
6+
7+
kapi "k8s.io/kubernetes/pkg/api"
8+
"k8s.io/kubernetes/pkg/apis/rbac"
9+
10+
authapi "github.com/openshift/origin/pkg/authorization/api"
11+
"github.com/openshift/origin/pkg/diagnostics/log"
12+
)
13+
14+
type fakeRoleBindingDiagnostic struct {
15+
fakeDiagnostic
16+
fakeClusterRoleBinding authapi.ClusterRoleBinding
17+
}
18+
19+
func newFakeRoleBindingDiagnostic(t *testing.T) *fakeRoleBindingDiagnostic {
20+
return &fakeRoleBindingDiagnostic{
21+
fakeDiagnostic: *newFakeDiagnostic(t),
22+
}
23+
}
24+
25+
func (f *fakeRoleBindingDiagnostic) getClusterRoleBinding(name string) (*authapi.ClusterRoleBinding, error) {
26+
if f.err != nil {
27+
return nil, f.err
28+
}
29+
return &f.fakeClusterRoleBinding, nil
30+
}
31+
func (f *fakeRoleBindingDiagnostic) addBinding(name string, namespace string) {
32+
ref := kapi.ObjectReference{
33+
Name: name,
34+
Kind: rbac.ServiceAccountKind,
35+
Namespace: namespace,
36+
}
37+
f.fakeClusterRoleBinding.Subjects = append(f.fakeClusterRoleBinding.Subjects, ref)
38+
}
39+
40+
//test error when client error
41+
func TestCheckClusterRoleBindingsWhenErrorFromClientRetrievingRoles(t *testing.T) {
42+
d := newFakeRoleBindingDiagnostic(t)
43+
d.err = errors.New("client error")
44+
45+
checkClusterRoleBindings(d, d, fakeProject)
46+
47+
d.assertMessage("AGL0605", "Exp. an error message if client error retrieving ClusterRoleBindings", log.ErrorLevel)
48+
d.dumpMessages()
49+
}
50+
51+
func TestCheckClusterRoleBindingsWhenClusterReaderIsNotInProject(t *testing.T) {
52+
d := newFakeRoleBindingDiagnostic(t)
53+
d.addBinding("someName", "someRandomProject")
54+
d.addBinding(fluentdServiceAccountName, fakeProject)
55+
56+
checkClusterRoleBindings(d, d, fakeProject)
57+
58+
d.assertNoErrors()
59+
d.dumpMessages()
60+
}
61+
62+
func TestCheckClusterRoleBindingsWhenUnboundServiceAccounts(t *testing.T) {
63+
d := newFakeRoleBindingDiagnostic(t)
64+
d.addBinding(fluentdServiceAccountName, "someRandomProject")
65+
66+
checkClusterRoleBindings(d, d, fakeProject)
67+
68+
d.assertMessage("AGL0610", "Exp. an error when the exp service-accounts dont have cluster-reader access", log.ErrorLevel)
69+
d.dumpMessages()
70+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
package aggregated_logging
2+
3+
import (
4+
"fmt"
5+
6+
kapi "k8s.io/kubernetes/pkg/api"
7+
kapisext "k8s.io/kubernetes/pkg/apis/extensions"
8+
"k8s.io/kubernetes/pkg/labels"
9+
)
10+
11+
const daemonSetNoLabeledNodes = `
12+
There are no nodes that match the selector for DaemonSet '%[1]s'. This
13+
means Fluentd is not running and is not gathering logs from any nodes.
14+
An example of a command to target a specific node for this DaemonSet:
15+
16+
oc label node/node1.example.com %[2]s
17+
18+
or to label them all:
19+
20+
oc label node --all %[2]s
21+
`
22+
23+
const daemonSetPartialNodesLabeled = `
24+
There are some nodes that match the selector for DaemonSet '%s'.
25+
A list of matching nodes can be discovered by running:
26+
27+
oc get nodes -l %s
28+
`
29+
const daemonSetNoPodsFound = `
30+
There were no pods found that match DaemonSet '%s' with matchLabels '%s'
31+
`
32+
const daemonSetPodsNotRunning = `
33+
The Pod '%[1]s' matched by DaemonSet '%[2]s' is not in '%[3]s' status: %[4]s.
34+
35+
Depending upon the state, this could mean there is an error running the image
36+
for one or more pod containers, the node could be pulling images, etc. Try running
37+
the following commands to get additional information:
38+
39+
oc describe pod %[1]s -n %[5]s
40+
oc logs %[1]s -n %[5]s
41+
oc get events -n %[5]s
42+
`
43+
const daemonSetNotFound = `
44+
There were no DaemonSets in project '%s' that included label '%s'. This implies
45+
the Fluentd pods are not deployed or the logging stack needs to be upgraded. Try
46+
running the installer to upgrade the logging stack.
47+
`
48+
49+
var loggingInfraFluentdSelector = labels.Set{loggingInfraKey: "fluentd"}
50+
51+
func checkDaemonSets(r diagnosticReporter, adapter daemonsetAdapter, project string) {
52+
r.Debug("AGL0400", fmt.Sprintf("Checking DaemonSets in project '%s'...", project))
53+
dsList, err := adapter.daemonsets(project, kapi.ListOptions{LabelSelector: loggingInfraFluentdSelector.AsSelector()})
54+
if err != nil {
55+
r.Error("AGL0405", err, fmt.Sprintf("There was an error while trying to retrieve the logging DaemonSets in project '%s' which is most likely transient: %s", project, err))
56+
return
57+
}
58+
if len(dsList.Items) == 0 {
59+
r.Error("AGL0407", err, fmt.Sprintf(daemonSetNotFound, project, loggingInfraFluentdSelector.AsSelector()))
60+
return
61+
}
62+
nodeList, err := adapter.nodes(kapi.ListOptions{})
63+
if err != nil {
64+
r.Error("AGL0410", err, fmt.Sprintf("There was an error while trying to retrieve the list of Nodes which is most likely transient: %s", err))
65+
return
66+
}
67+
for _, ds := range dsList.Items {
68+
labeled := 0
69+
nodeSelector := labels.Set(ds.Spec.Template.Spec.NodeSelector).AsSelector()
70+
r.Debug("AGL0415", fmt.Sprintf("Checking DaemonSet '%s' nodeSelector '%s'", ds.ObjectMeta.Name, nodeSelector))
71+
for _, node := range nodeList.Items {
72+
if nodeSelector.Matches(labels.Set(node.Labels)) {
73+
labeled = labeled + 1
74+
}
75+
}
76+
switch {
77+
case labeled == 0:
78+
r.Error("AGL0420", nil, fmt.Sprintf(daemonSetNoLabeledNodes, ds.ObjectMeta.Name, nodeSelector))
79+
break
80+
case labeled < len(nodeList.Items):
81+
r.Warn("AGL0425", nil, fmt.Sprintf(daemonSetPartialNodesLabeled, ds.ObjectMeta.Name, nodeSelector))
82+
break
83+
default:
84+
r.Debug("AGL0430", fmt.Sprintf("DaemonSet '%s' matches all nodes", ds.ObjectMeta.Name))
85+
}
86+
if labeled > 0 {
87+
checkDaemonSetPods(r, adapter, ds, project, labeled)
88+
}
89+
}
90+
}
91+
92+
func checkDaemonSetPods(r diagnosticReporter, adapter daemonsetAdapter, ds kapisext.DaemonSet, project string, numLabeledNodes int) {
93+
if ds.Spec.Selector == nil {
94+
r.Debug("AGL0455", "DaemonSet selector is nil. Unable to verify a pod is running")
95+
return
96+
}
97+
podSelector := labels.Set(ds.Spec.Selector.MatchLabels).AsSelector()
98+
r.Debug("AGL0435", fmt.Sprintf("Checking for running pods for DaemonSet '%s' with matchLabels '%s'", ds.ObjectMeta.Name, podSelector))
99+
podList, err := adapter.pods(project, kapi.ListOptions{LabelSelector: podSelector})
100+
if err != nil {
101+
r.Error("AGL0438", err, fmt.Sprintf("There was an error retrieving pods matched to DaemonSet '%s' that is most likely transient: %s", ds.ObjectMeta.Name, err))
102+
return
103+
}
104+
if len(podList.Items) == 0 {
105+
r.Error("AGL0440", nil, fmt.Sprintf(daemonSetNoPodsFound, ds.ObjectMeta.Name, podSelector))
106+
return
107+
}
108+
if len(podList.Items) != numLabeledNodes {
109+
r.Error("AGL0443", nil, fmt.Sprintf("The number of deployed pods %s does not match the number of labeled nodes %d", len(podList.Items), numLabeledNodes))
110+
}
111+
for _, pod := range podList.Items {
112+
if pod.Status.Phase != kapi.PodRunning {
113+
podName := pod.ObjectMeta.Name
114+
r.Error("AGL0445", nil, fmt.Sprintf(daemonSetPodsNotRunning, podName, ds.ObjectMeta.Name, kapi.PodRunning, pod.Status.Phase, project))
115+
}
116+
117+
}
118+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,188 @@
1+
package aggregated_logging
2+
3+
import (
4+
"errors"
5+
"testing"
6+
7+
kapi "k8s.io/kubernetes/pkg/api"
8+
"k8s.io/kubernetes/pkg/api/unversioned"
9+
kapisext "k8s.io/kubernetes/pkg/apis/extensions"
10+
11+
"github.com/openshift/origin/pkg/diagnostics/log"
12+
)
13+
14+
const (
15+
testPodsKey = "pods"
16+
testNodesKey = "nodes"
17+
testDsKey = "daemonsets"
18+
)
19+
20+
type fakeDaemonSetDiagnostic struct {
21+
fakeDiagnostic
22+
fakePods kapi.PodList
23+
fakeNodes kapi.NodeList
24+
fakeDaemonsets kapisext.DaemonSetList
25+
clienterrors map[string]error
26+
}
27+
28+
func newFakeDaemonSetDiagnostic(t *testing.T) *fakeDaemonSetDiagnostic {
29+
return &fakeDaemonSetDiagnostic{
30+
fakeDiagnostic: *newFakeDiagnostic(t),
31+
clienterrors: map[string]error{},
32+
}
33+
}
34+
35+
func (f *fakeDaemonSetDiagnostic) addDsPodWithPhase(state kapi.PodPhase) {
36+
pod := kapi.Pod{
37+
Spec: kapi.PodSpec{},
38+
Status: kapi.PodStatus{
39+
Phase: state,
40+
},
41+
}
42+
f.fakePods.Items = append(f.fakePods.Items, pod)
43+
}
44+
45+
func (f *fakeDaemonSetDiagnostic) addDaemonSetWithSelector(key string, value string) {
46+
selector := map[string]string{key: value}
47+
ds := kapisext.DaemonSet{
48+
Spec: kapisext.DaemonSetSpec{
49+
Template: kapi.PodTemplateSpec{
50+
Spec: kapi.PodSpec{
51+
NodeSelector: selector,
52+
},
53+
},
54+
Selector: &unversioned.LabelSelector{MatchLabels: selector},
55+
},
56+
}
57+
f.fakeDaemonsets.Items = append(f.fakeDaemonsets.Items, ds)
58+
}
59+
60+
func (f *fakeDaemonSetDiagnostic) addNodeWithLabel(key string, value string) {
61+
labels := map[string]string{key: value}
62+
node := kapi.Node{
63+
ObjectMeta: kapi.ObjectMeta{
64+
Labels: labels,
65+
},
66+
}
67+
f.fakeNodes.Items = append(f.fakeNodes.Items, node)
68+
}
69+
70+
func (f *fakeDaemonSetDiagnostic) daemonsets(project string, options kapi.ListOptions) (*kapisext.DaemonSetList, error) {
71+
value, ok := f.clienterrors[testDsKey]
72+
if ok {
73+
return nil, value
74+
}
75+
return &f.fakeDaemonsets, nil
76+
}
77+
78+
func (f *fakeDaemonSetDiagnostic) nodes(options kapi.ListOptions) (*kapi.NodeList, error) {
79+
value, ok := f.clienterrors[testNodesKey]
80+
if ok {
81+
return nil, value
82+
}
83+
return &f.fakeNodes, nil
84+
}
85+
86+
func (f *fakeDaemonSetDiagnostic) pods(project string, options kapi.ListOptions) (*kapi.PodList, error) {
87+
value, ok := f.clienterrors[testPodsKey]
88+
if ok {
89+
return nil, value
90+
}
91+
return &f.fakePods, nil
92+
}
93+
94+
func TestCheckDaemonsetsWhenErrorResponseFromClientRetrievingDaemonsets(t *testing.T) {
95+
d := newFakeDaemonSetDiagnostic(t)
96+
d.clienterrors[testDsKey] = errors.New("someerror")
97+
98+
checkDaemonSets(d, d, fakeProject)
99+
100+
d.assertMessage("AGL0405", "Exp. error when client errors on retrieving DaemonSets", log.ErrorLevel)
101+
}
102+
103+
func TestCheckDaemonsetsWhenNoDaemonsetsFound(t *testing.T) {
104+
d := newFakeDaemonSetDiagnostic(t)
105+
106+
checkDaemonSets(d, d, fakeProject)
107+
108+
d.assertMessage("AGL0407", "Exp. error when client retrieves no DaemonSets", log.ErrorLevel)
109+
}
110+
111+
func TestCheckDaemonsetsWhenErrorResponseFromClientRetrievingNodes(t *testing.T) {
112+
d := newFakeDaemonSetDiagnostic(t)
113+
d.clienterrors[testNodesKey] = errors.New("someerror")
114+
d.addDaemonSetWithSelector("foo", "bar")
115+
116+
checkDaemonSets(d, d, fakeProject)
117+
118+
d.assertMessage("AGL0410", "Exp. error when client errors on retrieving Nodes", log.ErrorLevel)
119+
}
120+
121+
func TestCheckDaemonsetsWhenDaemonsetsMatchNoNodes(t *testing.T) {
122+
d := newFakeDaemonSetDiagnostic(t)
123+
d.addDaemonSetWithSelector("foo", "bar")
124+
d.addNodeWithLabel("foo", "xyz")
125+
126+
checkDaemonSets(d, d, fakeProject)
127+
128+
d.assertMessage("AGL0420", "Exp. error when daemonsets do not match any nodes", log.ErrorLevel)
129+
}
130+
131+
func TestCheckDaemonsetsWhenDaemonsetsMatchPartialNodes(t *testing.T) {
132+
d := newFakeDaemonSetDiagnostic(t)
133+
d.addDaemonSetWithSelector("foo", "bar")
134+
d.addNodeWithLabel("foo", "bar")
135+
d.addNodeWithLabel("foo", "xyz")
136+
137+
checkDaemonSets(d, d, fakeProject)
138+
139+
d.assertMessage("AGL0425", "Exp. warning when daemonsets matches less then all the nodes", log.WarnLevel)
140+
}
141+
142+
func TestCheckDaemonsetsWhenClientErrorsFetchingPods(t *testing.T) {
143+
d := newFakeDaemonSetDiagnostic(t)
144+
d.clienterrors[testPodsKey] = errors.New("some error")
145+
d.addDaemonSetWithSelector("foo", "bar")
146+
d.addNodeWithLabel("foo", "bar")
147+
148+
checkDaemonSets(d, d, fakeProject)
149+
150+
d.assertMessage("AGL0438", "Exp. error when there is an error retrieving pods for a daemonset", log.ErrorLevel)
151+
152+
d.dumpMessages()
153+
}
154+
155+
func TestCheckDaemonsetsWhenNoPodsMatchDaemonSet(t *testing.T) {
156+
d := newFakeDaemonSetDiagnostic(t)
157+
d.addDaemonSetWithSelector("foo", "bar")
158+
d.addNodeWithLabel("foo", "bar")
159+
160+
checkDaemonSets(d, d, fakeProject)
161+
162+
d.assertMessage("AGL0440", "Exp. error when there are no pods that match a daemonset", log.ErrorLevel)
163+
d.dumpMessages()
164+
}
165+
166+
func TestCheckDaemonsetsWhenNoPodsInRunningState(t *testing.T) {
167+
d := newFakeDaemonSetDiagnostic(t)
168+
d.addDaemonSetWithSelector("foo", "bar")
169+
d.addNodeWithLabel("foo", "bar")
170+
d.addDsPodWithPhase(kapi.PodPending)
171+
172+
checkDaemonSets(d, d, fakeProject)
173+
174+
d.assertMessage("AGL0445", "Exp. error when there are no pods in running state", log.ErrorLevel)
175+
d.dumpMessages()
176+
}
177+
178+
func TestCheckDaemonsetsWhenAllPodsInRunningState(t *testing.T) {
179+
d := newFakeDaemonSetDiagnostic(t)
180+
d.addDaemonSetWithSelector("foo", "bar")
181+
d.addNodeWithLabel("foo", "bar")
182+
d.addDsPodWithPhase(kapi.PodRunning)
183+
184+
checkDaemonSets(d, d, fakeProject)
185+
186+
d.assertNoErrors()
187+
d.dumpMessages()
188+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
package aggregated_logging
2+
3+
import (
4+
"fmt"
5+
"strings"
6+
7+
kapi "k8s.io/kubernetes/pkg/api"
8+
"k8s.io/kubernetes/pkg/labels"
9+
"k8s.io/kubernetes/pkg/selection"
10+
"k8s.io/kubernetes/pkg/util/sets"
11+
12+
deployapi "github.com/openshift/origin/pkg/deploy/api"
13+
)
14+
15+
const (
16+
componentNameEs = "es"
17+
componentNameEsOps = "es-ops"
18+
componentNameKibana = "kibana"
19+
componentNameKibanaOps = "kibana-ops"
20+
componentNameCurator = "curator"
21+
)
22+
23+
// loggingComponents are those 'managed' by rep controllers (e.g. fluentd is deployed with a DaemonSet)
24+
var loggingComponents = sets.NewString(componentNameEs, componentNameEsOps, componentNameKibana, componentNameKibanaOps, componentNameCurator)
25+
26+
const deploymentConfigWarnMissingForOps = `
27+
Did not find a DeploymentConfig to support component '%s'. If you require
28+
a separate ElasticSearch cluster to aggregate operations logs, please re-install
29+
or update logging and specify the appropriate switch to enable the ops cluster.
30+
`
31+
32+
const deploymentConfigZeroPodsFound = `
33+
There were no Pods found that support logging. Try running
34+
the following commands for additional information:
35+
36+
oc describe dc -n %[1]s
37+
oc get events -n %[1]s
38+
`
39+
const deploymentConfigNoPodsFound = `
40+
There were no Pods found for DeploymentConfig '%[1]s'. Try running
41+
the following commands for additional information:
42+
43+
oc describe dc %[1]s -n %[2]s
44+
oc get events -n %[2]s
45+
`
46+
const deploymentConfigPodsNotRunning = `
47+
The Pod '%[1]s' matched by DeploymentConfig '%[2]s' is not in '%[3]s' status: %[4]s.
48+
49+
Depending upon the state, this could mean there is an error running the image
50+
for one or more pod containers, the node could be pulling images, etc. Try running
51+
the following commands for additional information:
52+
53+
oc describe pod %[1]s -n %[5]s
54+
oc logs %[1]s -n %[5]s
55+
oc get events -n %[5]s
56+
`
57+
58+
func checkDeploymentConfigs(r diagnosticReporter, adapter deploymentConfigAdapter, project string) {
59+
req, _ := labels.NewRequirement(loggingInfraKey, selection.Exists, nil)
60+
selector := labels.NewSelector().Add(*req)
61+
r.Debug("AGL0040", fmt.Sprintf("Checking for DeploymentConfigs in project '%s' with selector '%s'", project, selector))
62+
dcList, err := adapter.deploymentconfigs(project, kapi.ListOptions{LabelSelector: selector})
63+
if err != nil {
64+
r.Error("AGL0045", err, fmt.Sprintf("There was an error while trying to retrieve the DeploymentConfigs in project '%s': %s", project, err))
65+
return
66+
}
67+
if len(dcList.Items) == 0 {
68+
r.Error("AGL0047", nil, fmt.Sprintf("Did not find any matching DeploymentConfigs in project '%s' which means no logging components were deployed. Try running the installer.", project))
69+
return
70+
}
71+
found := sets.NewString()
72+
for _, entry := range dcList.Items {
73+
comp := labels.Set(entry.ObjectMeta.Labels).Get(componentKey)
74+
found.Insert(comp)
75+
r.Debug("AGL0050", fmt.Sprintf("Found DeploymentConfig '%s' for component '%s'", entry.ObjectMeta.Name, comp))
76+
}
77+
for _, entry := range loggingComponents.List() {
78+
exists := found.Has(entry)
79+
if !exists {
80+
if strings.HasSuffix(entry, "-ops") {
81+
r.Info("AGL0060", fmt.Sprintf(deploymentConfigWarnMissingForOps, entry))
82+
} else {
83+
r.Error("AGL0065", nil, fmt.Sprintf("Did not find a DeploymentConfig to support component '%s'", entry))
84+
}
85+
}
86+
}
87+
checkDeploymentConfigPods(r, adapter, *dcList, project)
88+
}
89+
90+
func checkDeploymentConfigPods(r diagnosticReporter, adapter deploymentConfigAdapter, dcs deployapi.DeploymentConfigList, project string) {
91+
compReq, _ := labels.NewRequirement(componentKey, selection.In, loggingComponents)
92+
provReq, _ := labels.NewRequirement(providerKey, selection.Equals, sets.NewString(openshiftValue))
93+
podSelector := labels.NewSelector().Add(*compReq, *provReq)
94+
r.Debug("AGL0070", fmt.Sprintf("Getting pods that match selector '%s'", podSelector))
95+
podList, err := adapter.pods(project, kapi.ListOptions{LabelSelector: podSelector})
96+
if err != nil {
97+
r.Error("AGL0075", err, fmt.Sprintf("There was an error while trying to retrieve the pods for the AggregatedLogging stack: %s", err))
98+
return
99+
}
100+
if len(podList.Items) == 0 {
101+
r.Error("AGL0080", nil, fmt.Sprintf(deploymentConfigZeroPodsFound, project))
102+
return
103+
}
104+
dcPodCount := make(map[string]int, len(dcs.Items))
105+
for _, dc := range dcs.Items {
106+
dcPodCount[dc.ObjectMeta.Name] = 0
107+
}
108+
109+
for _, pod := range podList.Items {
110+
r.Debug("AGL0082", fmt.Sprintf("Checking status of Pod '%s'...", pod.ObjectMeta.Name))
111+
dcName, hasDcName := pod.ObjectMeta.Annotations[deployapi.DeploymentConfigAnnotation]
112+
if !hasDcName {
113+
r.Warn("AGL0085", nil, fmt.Sprintf("Found Pod '%s' that that does not reference a logging deployment config which may be acceptable. Skipping check to see if its running.", pod.ObjectMeta.Name))
114+
continue
115+
}
116+
if pod.Status.Phase != kapi.PodRunning {
117+
podName := pod.ObjectMeta.Name
118+
r.Error("AGL0090", nil, fmt.Sprintf(deploymentConfigPodsNotRunning, podName, dcName, kapi.PodRunning, pod.Status.Phase, project))
119+
}
120+
count, _ := dcPodCount[dcName]
121+
dcPodCount[dcName] = count + 1
122+
}
123+
for name, count := range dcPodCount {
124+
if count == 0 {
125+
r.Error("AGL0095", nil, fmt.Sprintf(deploymentConfigNoPodsFound, name, project))
126+
}
127+
}
128+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
package aggregated_logging
2+
3+
import (
4+
"errors"
5+
"testing"
6+
7+
kapi "k8s.io/kubernetes/pkg/api"
8+
9+
deployapi "github.com/openshift/origin/pkg/deploy/api"
10+
"github.com/openshift/origin/pkg/diagnostics/log"
11+
)
12+
13+
const (
14+
testDcPodsKey = "pods"
15+
testDcKey = "deploymentconfigs"
16+
testSkipAnnotation = "skipAddAnnoation"
17+
)
18+
19+
type fakeDeploymentConfigsDiagnostic struct {
20+
fakeDiagnostic
21+
fakePods kapi.PodList
22+
fakeDcs deployapi.DeploymentConfigList
23+
clienterrors map[string]error
24+
}
25+
26+
func newFakeDeploymentConfigsDiagnostic(t *testing.T) *fakeDeploymentConfigsDiagnostic {
27+
return &fakeDeploymentConfigsDiagnostic{
28+
fakeDiagnostic: *newFakeDiagnostic(t),
29+
clienterrors: map[string]error{},
30+
}
31+
}
32+
func (f *fakeDeploymentConfigsDiagnostic) addDeployConfigFor(component string) {
33+
labels := map[string]string{componentKey: component}
34+
dc := deployapi.DeploymentConfig{
35+
ObjectMeta: kapi.ObjectMeta{
36+
Name: component + "Name",
37+
Labels: labels,
38+
},
39+
}
40+
f.fakeDcs.Items = append(f.fakeDcs.Items, dc)
41+
}
42+
43+
func (f *fakeDeploymentConfigsDiagnostic) addPodFor(comp string, state kapi.PodPhase) {
44+
annotations := map[string]string{}
45+
if comp != testSkipAnnotation {
46+
annotations[deployapi.DeploymentConfigAnnotation] = comp
47+
}
48+
pod := kapi.Pod{
49+
ObjectMeta: kapi.ObjectMeta{
50+
Name: comp,
51+
Annotations: annotations,
52+
},
53+
Spec: kapi.PodSpec{},
54+
Status: kapi.PodStatus{
55+
Phase: state,
56+
},
57+
}
58+
f.fakePods.Items = append(f.fakePods.Items, pod)
59+
}
60+
61+
func (f *fakeDeploymentConfigsDiagnostic) deploymentconfigs(project string, options kapi.ListOptions) (*deployapi.DeploymentConfigList, error) {
62+
f.test.Logf(">> calling deploymentconfigs: %s", f.clienterrors)
63+
value, ok := f.clienterrors[testDcKey]
64+
if ok {
65+
f.test.Logf(">> error key found..returning %s", value)
66+
return nil, value
67+
}
68+
f.test.Logf(">> error key not found..")
69+
return &f.fakeDcs, nil
70+
}
71+
72+
func (f *fakeDeploymentConfigsDiagnostic) pods(project string, options kapi.ListOptions) (*kapi.PodList, error) {
73+
value, ok := f.clienterrors[testDcPodsKey]
74+
if ok {
75+
return nil, value
76+
}
77+
return &f.fakePods, nil
78+
}
79+
80+
//test client error listing dcs
81+
func TestCheckDcWhenErrorResponseFromClientRetrievingDc(t *testing.T) {
82+
d := newFakeDeploymentConfigsDiagnostic(t)
83+
d.clienterrors[testDcKey] = errors.New("error")
84+
85+
checkDeploymentConfigs(d, d, fakeProject)
86+
87+
d.assertMessage("AGL0045", "Exp. an error when client returns error retrieving dcs", log.ErrorLevel)
88+
d.dumpMessages()
89+
}
90+
91+
func TestCheckDcWhenNoDeployConfigsFound(t *testing.T) {
92+
d := newFakeDeploymentConfigsDiagnostic(t)
93+
94+
checkDeploymentConfigs(d, d, fakeProject)
95+
96+
d.assertMessage("AGL0047", "Exp. an error when no DeploymentConfigs are found", log.ErrorLevel)
97+
d.dumpMessages()
98+
}
99+
100+
func TestCheckDcWhenOpsOrOtherDeployConfigsMissing(t *testing.T) {
101+
d := newFakeDeploymentConfigsDiagnostic(t)
102+
d.addDeployConfigFor(componentNameEs)
103+
104+
checkDeploymentConfigs(d, d, fakeProject)
105+
106+
d.assertMessage("AGL0060", "Exp. a warning when ops DeploymentConfigs are missing", log.InfoLevel)
107+
d.assertMessage("AGL0065", "Exp. an error when non-ops DeploymentConfigs are missing", log.ErrorLevel)
108+
d.dumpMessages()
109+
}
110+
111+
func TestCheckDcWhenClientErrorListingPods(t *testing.T) {
112+
d := newFakeDeploymentConfigsDiagnostic(t)
113+
d.clienterrors[testDcPodsKey] = errors.New("New pod error")
114+
for _, comp := range loggingComponents.List() {
115+
d.addDeployConfigFor(comp)
116+
}
117+
118+
checkDeploymentConfigs(d, d, fakeProject)
119+
120+
d.assertMessage("AGL0075", "Exp. an error when retrieving pods errors", log.ErrorLevel)
121+
d.dumpMessages()
122+
}
123+
124+
func TestCheckDcWhenNoPodsFoundMatchingDeployConfig(t *testing.T) {
125+
d := newFakeDeploymentConfigsDiagnostic(t)
126+
for _, comp := range loggingComponents.List() {
127+
d.addDeployConfigFor(comp)
128+
}
129+
130+
checkDeploymentConfigs(d, d, fakeProject)
131+
132+
d.assertMessage("AGL0080", "Exp. an error when retrieving pods errors", log.ErrorLevel)
133+
d.dumpMessages()
134+
}
135+
136+
func TestCheckDcWhenInVariousStates(t *testing.T) {
137+
d := newFakeDeploymentConfigsDiagnostic(t)
138+
for _, comp := range loggingComponents.List() {
139+
d.addDeployConfigFor(comp)
140+
d.addPodFor(comp, kapi.PodRunning)
141+
}
142+
d.addPodFor(testSkipAnnotation, kapi.PodRunning)
143+
d.addPodFor("someothercomponent", kapi.PodPending)
144+
d.addDeployConfigFor("somerandom component")
145+
146+
checkDeploymentConfigs(d, d, fakeProject)
147+
148+
d.assertMessage("AGL0085", "Exp. a warning when pod is missing DeployConfig annotation", log.WarnLevel)
149+
d.assertMessage("AGL0090", "Exp. an error when pod is not in running state", log.ErrorLevel)
150+
d.assertMessage("AGL0095", "Exp. an error when pods not found for a DeployConfig", log.ErrorLevel)
151+
152+
d.dumpMessages()
153+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,207 @@
1+
package aggregated_logging
2+
3+
import (
4+
"errors"
5+
"fmt"
6+
"net/url"
7+
8+
kapi "k8s.io/kubernetes/pkg/api"
9+
kapisext "k8s.io/kubernetes/pkg/apis/extensions"
10+
kclient "k8s.io/kubernetes/pkg/client/unversioned"
11+
"k8s.io/kubernetes/pkg/labels"
12+
13+
authapi "github.com/openshift/origin/pkg/authorization/api"
14+
"github.com/openshift/origin/pkg/client"
15+
configapi "github.com/openshift/origin/pkg/cmd/server/api"
16+
deployapi "github.com/openshift/origin/pkg/deploy/api"
17+
hostdiag "github.com/openshift/origin/pkg/diagnostics/host"
18+
"github.com/openshift/origin/pkg/diagnostics/types"
19+
routesapi "github.com/openshift/origin/pkg/route/api"
20+
)
21+
22+
// AggregatedLogging is a Diagnostic to check the configurations
23+
// and general integration of the OpenShift stack
24+
// for aggregating container logs
25+
// https://github.com/openshift/origin-aggregated-logging
26+
type AggregatedLogging struct {
27+
masterConfig *configapi.MasterConfig
28+
MasterConfigFile string
29+
OsClient *client.Client
30+
KubeClient *kclient.Client
31+
result types.DiagnosticResult
32+
}
33+
34+
const (
35+
AggregatedLoggingName = "AggregatedLogging"
36+
37+
loggingInfraKey = "logging-infra"
38+
componentKey = "component"
39+
providerKey = "provider"
40+
openshiftValue = "openshift"
41+
42+
fluentdServiceAccountName = "aggregated-logging-fluentd"
43+
)
44+
45+
var loggingSelector = labels.Set{loggingInfraKey: "support"}
46+
47+
//NewAggregatedLogging returns the AggregatedLogging Diagnostic
48+
func NewAggregatedLogging(masterConfigFile string, kclient *kclient.Client, osclient *client.Client) *AggregatedLogging {
49+
return &AggregatedLogging{nil, masterConfigFile, osclient, kclient, types.NewDiagnosticResult(AggregatedLoggingName)}
50+
}
51+
52+
func (d *AggregatedLogging) getScc(name string) (*kapi.SecurityContextConstraints, error) {
53+
return d.KubeClient.SecurityContextConstraints().Get(name)
54+
}
55+
56+
func (d *AggregatedLogging) getClusterRoleBinding(name string) (*authapi.ClusterRoleBinding, error) {
57+
return d.OsClient.ClusterRoleBindings().Get(name)
58+
}
59+
60+
func (d *AggregatedLogging) routes(project string, options kapi.ListOptions) (*routesapi.RouteList, error) {
61+
return d.OsClient.Routes(project).List(options)
62+
}
63+
64+
func (d *AggregatedLogging) serviceAccounts(project string, options kapi.ListOptions) (*kapi.ServiceAccountList, error) {
65+
return d.KubeClient.ServiceAccounts(project).List(options)
66+
}
67+
68+
func (d *AggregatedLogging) services(project string, options kapi.ListOptions) (*kapi.ServiceList, error) {
69+
return d.KubeClient.Services(project).List(options)
70+
}
71+
72+
func (d *AggregatedLogging) endpointsForService(project string, service string) (*kapi.Endpoints, error) {
73+
return d.KubeClient.Endpoints(project).Get(service)
74+
}
75+
76+
func (d *AggregatedLogging) daemonsets(project string, options kapi.ListOptions) (*kapisext.DaemonSetList, error) {
77+
return d.KubeClient.DaemonSets(project).List(kapi.ListOptions{LabelSelector: loggingInfraFluentdSelector.AsSelector()})
78+
}
79+
80+
func (d *AggregatedLogging) nodes(options kapi.ListOptions) (*kapi.NodeList, error) {
81+
return d.KubeClient.Nodes().List(kapi.ListOptions{})
82+
}
83+
84+
func (d *AggregatedLogging) pods(project string, options kapi.ListOptions) (*kapi.PodList, error) {
85+
return d.KubeClient.Pods(project).List(options)
86+
}
87+
func (d *AggregatedLogging) deploymentconfigs(project string, options kapi.ListOptions) (*deployapi.DeploymentConfigList, error) {
88+
return d.OsClient.DeploymentConfigs(project).List(options)
89+
}
90+
91+
func (d *AggregatedLogging) Info(id string, message string) {
92+
d.result.Info(id, message)
93+
}
94+
95+
func (d *AggregatedLogging) Error(id string, err error, message string) {
96+
d.result.Error(id, err, message)
97+
}
98+
99+
func (d *AggregatedLogging) Debug(id string, message string) {
100+
d.result.Debug(id, message)
101+
}
102+
103+
func (d *AggregatedLogging) Warn(id string, err error, message string) {
104+
d.result.Warn(id, err, message)
105+
}
106+
107+
func (d *AggregatedLogging) Name() string {
108+
return AggregatedLoggingName
109+
}
110+
111+
func (d *AggregatedLogging) Description() string {
112+
return "Check aggregated logging integration for proper configuration"
113+
}
114+
115+
func (d *AggregatedLogging) CanRun() (bool, error) {
116+
if len(d.MasterConfigFile) == 0 {
117+
return false, errors.New("No master config file was provided")
118+
}
119+
if d.OsClient == nil {
120+
return false, errors.New("Config must include a cluster-admin context to run this diagnostic")
121+
}
122+
if d.KubeClient == nil {
123+
return false, errors.New("Config must include a cluster-admin context to run this diagnostic")
124+
}
125+
var err error
126+
d.masterConfig, err = hostdiag.GetMasterConfig(d.result, d.MasterConfigFile)
127+
if err != nil {
128+
return false, errors.New("Unreadable master config; skipping this diagnostic.")
129+
}
130+
return true, nil
131+
}
132+
133+
func (d *AggregatedLogging) Check() types.DiagnosticResult {
134+
project := retrieveLoggingProject(d.result, d.masterConfig, d.OsClient)
135+
if len(project) != 0 {
136+
checkServiceAccounts(d, d, project)
137+
checkClusterRoleBindings(d, d, project)
138+
checkSccs(d, d, project)
139+
checkDeploymentConfigs(d, d, project)
140+
checkDaemonSets(d, d, project)
141+
checkServices(d, d, project)
142+
checkRoutes(d, d, project)
143+
checkKibana(d.result, d.OsClient, d.KubeClient, project)
144+
}
145+
return d.result
146+
}
147+
148+
const projectNodeSelectorWarning = `
149+
The project '%[1]s' was found with a non-empty node selector annotation. This will keep
150+
Fluentd from running on certain nodes and collecting logs from the entire cluster. You
151+
can correct it by editing the project:
152+
153+
oc edit namespace %[1]s
154+
155+
and updating the annotation:
156+
157+
'openshift.io/node-selector' : ""
158+
159+
`
160+
161+
func retrieveLoggingProject(r types.DiagnosticResult, masterCfg *configapi.MasterConfig, osClient *client.Client) string {
162+
r.Debug("AGL0010", fmt.Sprintf("masterConfig.AssetConfig.LoggingPublicURL: '%s'", masterCfg.AssetConfig.LoggingPublicURL))
163+
projectName := ""
164+
if len(masterCfg.AssetConfig.LoggingPublicURL) == 0 {
165+
r.Debug("AGL0017", "masterConfig.AssetConfig.LoggingPublicURL is empty")
166+
return projectName
167+
}
168+
169+
loggingUrl, err := url.Parse(masterCfg.AssetConfig.LoggingPublicURL)
170+
if err != nil {
171+
r.Error("AGL0011", err, fmt.Sprintf("Unable to parse the loggingPublicURL from the masterConfig '%s'", masterCfg.AssetConfig.LoggingPublicURL))
172+
return projectName
173+
}
174+
175+
routeList, err := osClient.Routes(kapi.NamespaceAll).List(kapi.ListOptions{LabelSelector: loggingSelector.AsSelector()})
176+
if err != nil {
177+
r.Error("AGL0012", err, fmt.Sprintf("There was an error while trying to find the route associated with '%s' which is probably transient: %s", loggingUrl, err))
178+
return projectName
179+
}
180+
181+
for _, route := range routeList.Items {
182+
r.Debug("AGL0013", fmt.Sprintf("Comparing URL to route.Spec.Host: %s", route.Spec.Host))
183+
if loggingUrl.Host == route.Spec.Host {
184+
if len(projectName) == 0 {
185+
projectName = route.ObjectMeta.Namespace
186+
r.Info("AGL0015", fmt.Sprintf("Found route '%s' matching logging URL '%s' in project: '%s'", route.ObjectMeta.Name, loggingUrl.Host, projectName))
187+
} else {
188+
r.Warn("AGL0019", nil, fmt.Sprintf("Found additional route '%s' matching logging URL '%s' in project: '%s'. This could mean you have multiple logging deployments.", route.ObjectMeta.Name, loggingUrl.Host, projectName))
189+
}
190+
}
191+
}
192+
if len(projectName) == 0 {
193+
message := fmt.Sprintf("Unable to find a route matching the loggingPublicURL defined in the master config '%s'. Check that the URL is correct and aggregated logging is deployed.", loggingUrl)
194+
r.Error("AGL0014", errors.New(message), message)
195+
return ""
196+
}
197+
project, err := osClient.Projects().Get(projectName)
198+
if err != nil {
199+
r.Error("AGL0018", err, fmt.Sprintf("There was an error retrieving project '%s' which is most likely a transient error: %s", projectName, err))
200+
return ""
201+
}
202+
nodeSelector, ok := project.ObjectMeta.Annotations["openshift.io/node-selector"]
203+
if ok && len(nodeSelector) != 0 {
204+
r.Warn("AGL0030", nil, fmt.Sprintf(projectNodeSelectorWarning, projectName))
205+
}
206+
return projectName
207+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
package aggregated_logging
2+
3+
import (
4+
"github.com/openshift/origin/pkg/diagnostics/log"
5+
"testing"
6+
)
7+
8+
const fakeProject = "someProject"
9+
10+
type fakeLogMessage struct {
11+
id string
12+
logLevel log.Level
13+
message string
14+
}
15+
16+
type fakeDiagnostic struct {
17+
err error
18+
messages map[string]fakeLogMessage
19+
test *testing.T
20+
}
21+
22+
func newFakeDiagnostic(t *testing.T) *fakeDiagnostic {
23+
return &fakeDiagnostic{
24+
messages: map[string]fakeLogMessage{},
25+
test: t,
26+
}
27+
}
28+
29+
func (f *fakeDiagnostic) dumpMessages() {
30+
f.test.Log("<<<<<<<< Dumping test messages >>>>>>>>")
31+
for id, message := range f.messages {
32+
f.test.Logf("id: %s, logLevel: %s, message: %s", id, message.logLevel.Name, message.message)
33+
}
34+
}
35+
36+
func (f *fakeDiagnostic) Info(id string, message string) {
37+
f.messages[id] = fakeLogMessage{id, log.InfoLevel, message}
38+
}
39+
40+
func (f *fakeDiagnostic) Error(id string, err error, message string) {
41+
f.messages[id] = fakeLogMessage{id, log.ErrorLevel, message}
42+
}
43+
44+
func (f *fakeDiagnostic) Debug(id string, message string) {
45+
f.messages[id] = fakeLogMessage{id, log.DebugLevel, message}
46+
}
47+
48+
func (f *fakeDiagnostic) Warn(id string, err error, message string) {
49+
f.messages[id] = fakeLogMessage{id, log.WarnLevel, message}
50+
}
51+
52+
func (d *fakeDiagnostic) assertMessage(id string, missing string, level log.Level) {
53+
message, ok := d.messages[id]
54+
if !ok {
55+
d.test.Errorf("Unable to find message with id %s. %s", id, missing)
56+
return
57+
}
58+
if message.logLevel != level {
59+
d.test.Errorf("Exp logLevel %s for %s but got %s", level.Name, id, message.logLevel.Name)
60+
}
61+
}
62+
63+
func (d *fakeDiagnostic) assertNoWarnings() {
64+
for _, message := range d.messages {
65+
66+
if message.logLevel == log.WarnLevel {
67+
d.test.Errorf("Exp no WarnLevel log messages.")
68+
}
69+
}
70+
}
71+
func (d *fakeDiagnostic) assertNoErrors() {
72+
for _, message := range d.messages {
73+
74+
if message.logLevel == log.ErrorLevel {
75+
d.test.Errorf("Exp no ErrorLevel log messages.")
76+
}
77+
}
78+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
package aggregated_logging
2+
3+
import (
4+
kapi "k8s.io/kubernetes/pkg/api"
5+
kapisext "k8s.io/kubernetes/pkg/apis/extensions"
6+
7+
authapi "github.com/openshift/origin/pkg/authorization/api"
8+
deployapi "github.com/openshift/origin/pkg/deploy/api"
9+
routesapi "github.com/openshift/origin/pkg/route/api"
10+
)
11+
12+
//diagnosticReporter provides diagnostic messages
13+
type diagnosticReporter interface {
14+
Info(id string, message string)
15+
Debug(id string, message string)
16+
Error(id string, err error, message string)
17+
Warn(id string, err error, message string)
18+
}
19+
20+
type routesAdapter interface {
21+
routes(project string, options kapi.ListOptions) (*routesapi.RouteList, error)
22+
}
23+
24+
type sccAdapter interface {
25+
getScc(name string) (*kapi.SecurityContextConstraints, error)
26+
}
27+
28+
type clusterRoleBindingsAdapter interface {
29+
getClusterRoleBinding(name string) (*authapi.ClusterRoleBinding, error)
30+
}
31+
32+
//deploymentConfigAdapter is an abstraction to retrieve resource for validating dcs
33+
//for aggregated logging diagnostics
34+
type deploymentConfigAdapter interface {
35+
deploymentconfigs(project string, options kapi.ListOptions) (*deployapi.DeploymentConfigList, error)
36+
podsAdapter
37+
}
38+
39+
//daemonsetAdapter is an abstraction to retrieve resources for validating daemonsets
40+
//for aggregated logging diagnostics
41+
type daemonsetAdapter interface {
42+
daemonsets(project string, options kapi.ListOptions) (*kapisext.DaemonSetList, error)
43+
nodes(options kapi.ListOptions) (*kapi.NodeList, error)
44+
podsAdapter
45+
}
46+
47+
type podsAdapter interface {
48+
pods(project string, options kapi.ListOptions) (*kapi.PodList, error)
49+
}
50+
51+
//saAdapter abstractions to retrieve service accounts
52+
type saAdapter interface {
53+
serviceAccounts(project string, options kapi.ListOptions) (*kapi.ServiceAccountList, error)
54+
}
55+
56+
//servicesAdapter abstracts retrieving services
57+
type servicesAdapter interface {
58+
services(project string, options kapi.ListOptions) (*kapi.ServiceList, error)
59+
endpointsForService(project string, serviceName string) (*kapi.Endpoints, error)
60+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
package aggregated_logging
2+
3+
import (
4+
"errors"
5+
"fmt"
6+
"net/url"
7+
"strings"
8+
9+
kapi "k8s.io/kubernetes/pkg/api"
10+
kclient "k8s.io/kubernetes/pkg/client/unversioned"
11+
"k8s.io/kubernetes/pkg/util/sets"
12+
13+
"github.com/openshift/origin/pkg/client"
14+
"github.com/openshift/origin/pkg/diagnostics/types"
15+
oauthapi "github.com/openshift/origin/pkg/oauth/api"
16+
)
17+
18+
const (
19+
kibanaProxyOauthClientName = "kibana-proxy"
20+
kibanaProxySecretName = "logging-kibana-proxy"
21+
oauthSecretKeyName = "oauth-secret"
22+
)
23+
24+
//checkKibana verifies the various integration points between Kibana and logging
25+
func checkKibana(r types.DiagnosticResult, osClient *client.Client, kClient *kclient.Client, project string) {
26+
oauthclient, err := osClient.OAuthClients().Get(kibanaProxyOauthClientName)
27+
if err != nil {
28+
r.Error("AGL0115", err, fmt.Sprintf("Error retrieving the OauthClient '%s'. Unable to check Kibana", kibanaProxyOauthClientName))
29+
return
30+
}
31+
checkKibanaSecret(r, osClient, kClient, project, oauthclient)
32+
checkKibanaRoutesInOauthClient(r, osClient, project, oauthclient)
33+
}
34+
35+
//checkKibanaSecret confirms the secret used by kibana matches that configured in the oauth client
36+
func checkKibanaSecret(r types.DiagnosticResult, osClient *client.Client, kClient *kclient.Client, project string, oauthclient *oauthapi.OAuthClient) {
37+
r.Debug("AGL0100", "Checking oauthclient secrets...")
38+
secret, err := kClient.Secrets(project).Get(kibanaProxySecretName)
39+
if err != nil {
40+
r.Error("AGL0105", err, fmt.Sprintf("Error retrieving the secret '%s'", kibanaProxySecretName))
41+
return
42+
}
43+
decoded, err := decodeSecret(secret, oauthSecretKeyName)
44+
if err != nil {
45+
r.Error("AGL0110", err, fmt.Sprintf("Unable to decode Kibana Secret"))
46+
return
47+
}
48+
if decoded != oauthclient.Secret {
49+
r.Debug("AGL0120", fmt.Sprintf("OauthClient Secret: '%s'", oauthclient.Secret))
50+
r.Debug("AGL0125", fmt.Sprintf("Decoded Kibana Secret: '%s'", decoded))
51+
message := fmt.Sprintf("The %s OauthClient.Secret does not match the decoded oauth secret in '%s'", kibanaProxyOauthClientName, kibanaProxySecretName)
52+
r.Error("AGL0130", errors.New(message), message)
53+
}
54+
}
55+
56+
//checkKibanaRoutesInOauthClient verifies the client contains the correct redirect uris
57+
func checkKibanaRoutesInOauthClient(r types.DiagnosticResult, osClient *client.Client, project string, oauthclient *oauthapi.OAuthClient) {
58+
r.Debug("AGL0141", "Checking oauthclient redirectURIs for the logging routes...")
59+
routeList, err := osClient.Routes(project).List(kapi.ListOptions{LabelSelector: loggingSelector.AsSelector()})
60+
if err != nil {
61+
r.Error("AGL0143", err, "Error retrieving the logging routes.")
62+
return
63+
}
64+
redirectUris, err := parseRedirectUris(oauthclient.RedirectURIs)
65+
if err != nil {
66+
r.Error("AGL0145", err, "Error parsing the OAuthClient.RedirectURIs")
67+
return
68+
}
69+
for _, route := range routeList.Items {
70+
if !redirectUris.Has(route.Spec.Host) {
71+
message := fmt.Sprintf("OauthClient '%s' does not include a redirectURI for route '%s' which is '%s'", oauthclient.ObjectMeta.Name, route.ObjectMeta.Name, route.Spec.Host)
72+
r.Error("AGL0147", errors.New(message), message)
73+
}
74+
}
75+
76+
return
77+
}
78+
79+
func parseRedirectUris(uris []string) (sets.String, error) {
80+
urls := sets.String{}
81+
for _, uri := range uris {
82+
url, err := url.Parse(uri)
83+
if err != nil {
84+
return urls, err
85+
}
86+
urls.Insert(url.Host)
87+
}
88+
return urls, nil
89+
}
90+
91+
// decodeSecret decodes a base64 encoded entry in a secret and returns the value as decoded string
92+
func decodeSecret(secret *kapi.Secret, key string) (string, error) {
93+
value, ok := secret.Data[key]
94+
if !ok {
95+
return "", errors.New(fmt.Sprintf("The %s secret did not have a data entry for %s", secret.ObjectMeta.Name, key))
96+
}
97+
return strings.TrimSpace(string(value)), nil
98+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
package aggregated_logging
2+
3+
import (
4+
"crypto/tls"
5+
"crypto/x509"
6+
"encoding/pem"
7+
"errors"
8+
"fmt"
9+
10+
kapi "k8s.io/kubernetes/pkg/api"
11+
12+
routes "github.com/openshift/origin/pkg/route/api"
13+
)
14+
15+
const routeUnaccepted = `
16+
An unaccepted route is most likely due to one of the following reasons:
17+
18+
* No router has been deployed to serve the route.
19+
* Another route with the same host already exists.
20+
21+
If a router has been deployed, look for duplicate matching routes by
22+
running the following:
23+
24+
oc get --all-namespaces routes --template='{{range .items}}{{if eq .spec.host "%[2]s"}}{{println .metadata.name "in" .metadata.namespace}}{{end}}{{end}}'
25+
26+
`
27+
const routeCertMissingHostName = `
28+
Try updating the route certificate to include its host as either the CommonName (CN) or one of the alternate names.
29+
`
30+
31+
//checkRoutes looks through the logging infra routes to see if they have been accepted, and ...
32+
func checkRoutes(r diagnosticReporter, adapter routesAdapter, project string) {
33+
r.Debug("AGL0300", "Checking routes...")
34+
routeList, err := adapter.routes(project, kapi.ListOptions{LabelSelector: loggingSelector.AsSelector()})
35+
if err != nil {
36+
r.Error("AGL0305", err, fmt.Sprintf("There was an error retrieving routes in the project '%s' with selector '%s'", project, loggingSelector.AsSelector()))
37+
return
38+
}
39+
if len(routeList.Items) == 0 {
40+
r.Error("AGL0310", nil, fmt.Sprintf("There were no routes found to support logging in project '%s'", project))
41+
return
42+
}
43+
for _, route := range routeList.Items {
44+
if !wasAccepted(r, route) {
45+
r.Error("AGL0325", nil, fmt.Sprintf("Route '%s' has not been accepted by any routers."+routeUnaccepted, route.ObjectMeta.Name, route.Spec.Host))
46+
}
47+
if route.Spec.TLS != nil && len(route.Spec.TLS.Certificate) != 0 && len(route.Spec.TLS.Key) != 0 {
48+
checkRouteCertificate(r, route)
49+
} else {
50+
r.Debug("AGL0331", fmt.Sprintf("Skipping key and certificate checks on route '%s'. Either of them may be missing.", route.ObjectMeta.Name))
51+
}
52+
}
53+
}
54+
55+
func checkRouteCertificate(r diagnosticReporter, route routes.Route) {
56+
r.Debug("AGL0330", fmt.Sprintf("Checking certificate for route '%s'...", route.ObjectMeta.Name))
57+
block, _ := pem.Decode([]byte(route.Spec.TLS.Certificate))
58+
//verify hostname
59+
if block != nil {
60+
cert, err := x509.ParseCertificate(block.Bytes)
61+
if err != nil {
62+
r.Error("AGL0335", err, fmt.Sprintf("Unable to parse the certificate for route '%s'", route.ObjectMeta.Name))
63+
return
64+
}
65+
r.Debug("AGL0340", fmt.Sprintf("Cert CommonName: '%s' Cert DNSNames: '%s'", cert.Subject.CommonName, cert.DNSNames))
66+
if err := cert.VerifyHostname(route.Spec.Host); err != nil {
67+
r.Error("AGL0345", err, fmt.Sprintf("Route '%[1]s' certficate does not include route host '%[2]s'"+routeCertMissingHostName, route.ObjectMeta.Name, route.Spec.Host))
68+
}
69+
} else {
70+
r.Error("AGL0350", errors.New("Unable to decode the TLS Certificate"), "Unable to decode the TLS Certificate")
71+
}
72+
73+
//verify key matches cert
74+
r.Debug("AGL0355", fmt.Sprintf("Checking certificate matches key for route '%s'", route.ObjectMeta.Name))
75+
_, err := tls.X509KeyPair([]byte(route.Spec.TLS.Certificate), []byte(route.Spec.TLS.Key))
76+
if err != nil {
77+
r.Error("AGL0365", err, fmt.Sprintf("Route '%s' key and certficate do not match: %s. The router will be unable to pass traffic using this route.", route.ObjectMeta.Name, err))
78+
}
79+
}
80+
81+
func wasAccepted(r diagnosticReporter, route routes.Route) bool {
82+
r.Debug("AGL0310", fmt.Sprintf("Checking if route '%s' was accepted...", route.ObjectMeta.Name))
83+
accepted := 0
84+
for _, status := range route.Status.Ingress {
85+
r.Debug("AGL0315", fmt.Sprintf("Status for router: '%s', host: '%s'", status.RouterName, status.Host))
86+
for _, condition := range status.Conditions {
87+
r.Debug("AGL0320", fmt.Sprintf("Condition type: '%s' status: '%s'", condition.Type, condition.Status))
88+
if condition.Type == routes.RouteAdmitted && condition.Status == kapi.ConditionTrue {
89+
accepted = accepted + 1
90+
}
91+
}
92+
}
93+
//Add check to compare acceptance to the number of available routers?
94+
return accepted > 0
95+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
package aggregated_logging
2+
3+
import (
4+
"errors"
5+
"testing"
6+
7+
kapi "k8s.io/kubernetes/pkg/api"
8+
9+
"github.com/openshift/origin/pkg/diagnostics/log"
10+
routesapi "github.com/openshift/origin/pkg/route/api"
11+
)
12+
13+
const (
14+
testRoutesKey = "routes"
15+
)
16+
17+
type fakeRoutesDiagnostic struct {
18+
fakeDiagnostic
19+
fakeRoutes routesapi.RouteList
20+
clienterrors map[string]error
21+
}
22+
23+
func newFakeRoutesDiagnostic(t *testing.T) *fakeRoutesDiagnostic {
24+
return &fakeRoutesDiagnostic{
25+
fakeDiagnostic: *newFakeDiagnostic(t),
26+
clienterrors: map[string]error{},
27+
}
28+
}
29+
30+
func (f *fakeRoutesDiagnostic) addRouteWith(condType routesapi.RouteIngressConditionType, condStatus kapi.ConditionStatus, cert string, key string) {
31+
ingress := routesapi.RouteIngress{
32+
Conditions: []routesapi.RouteIngressCondition{
33+
{
34+
Type: condType,
35+
Status: condStatus,
36+
},
37+
},
38+
}
39+
route := routesapi.Route{
40+
ObjectMeta: kapi.ObjectMeta{Name: "aname"},
41+
Status: routesapi.RouteStatus{
42+
Ingress: []routesapi.RouteIngress{ingress},
43+
},
44+
}
45+
if len(cert) != 0 && len(key) != 0 {
46+
tls := routesapi.TLSConfig{
47+
Certificate: cert,
48+
Key: key,
49+
}
50+
route.Spec.TLS = &tls
51+
}
52+
f.fakeRoutes.Items = append(f.fakeRoutes.Items, route)
53+
}
54+
55+
func (f *fakeRoutesDiagnostic) routes(project string, options kapi.ListOptions) (*routesapi.RouteList, error) {
56+
value, ok := f.clienterrors[testRoutesKey]
57+
if ok {
58+
return nil, value
59+
}
60+
return &f.fakeRoutes, nil
61+
}
62+
63+
func TestRouteWhenErrorFromClient(t *testing.T) {
64+
d := newFakeRoutesDiagnostic(t)
65+
d.clienterrors[testRoutesKey] = errors.New("some client error")
66+
67+
checkRoutes(d, d, fakeProject)
68+
d.assertMessage("AGL0305", "Exp an error when there is a client error retrieving routes", log.ErrorLevel)
69+
d.dumpMessages()
70+
}
71+
72+
func TestRouteWhenZeroRoutesAvailable(t *testing.T) {
73+
d := newFakeRoutesDiagnostic(t)
74+
75+
checkRoutes(d, d, fakeProject)
76+
d.assertMessage("AGL0310", "Exp an error when there are no routes to support logging", log.ErrorLevel)
77+
d.dumpMessages()
78+
}
79+
80+
//test error route != accepted
81+
func TestRouteWhenRouteNotAccepted(t *testing.T) {
82+
d := newFakeRoutesDiagnostic(t)
83+
d.addRouteWith(routesapi.RouteExtendedValidationFailed, kapi.ConditionTrue, "", "")
84+
85+
checkRoutes(d, d, fakeProject)
86+
d.assertMessage("AGL0325", "Exp an error when a route was not accepted", log.ErrorLevel)
87+
d.assertMessage("AGL0331", "Exp to skip the cert check since none specified", log.DebugLevel)
88+
d.dumpMessages()
89+
}
90+
func TestRouteWhenRouteAccepted(t *testing.T) {
91+
d := newFakeRoutesDiagnostic(t)
92+
d.addRouteWith(routesapi.RouteAdmitted, kapi.ConditionTrue, "", "")
93+
94+
checkRoutes(d, d, fakeProject)
95+
d.assertNoErrors()
96+
d.dumpMessages()
97+
}
98+
99+
func TestRouteWhenErrorDecodingCert(t *testing.T) {
100+
d := newFakeRoutesDiagnostic(t)
101+
d.addRouteWith(routesapi.RouteExtendedValidationFailed, kapi.ConditionTrue, "cert", "key")
102+
103+
checkRoutes(d, d, fakeProject)
104+
d.assertMessage("AGL0350", "Exp an error when unable to decode cert", log.ErrorLevel)
105+
d.dumpMessages()
106+
}
107+
108+
func TestRouteWhenErrorParsingCert(t *testing.T) {
109+
d := newFakeRoutesDiagnostic(t)
110+
d.addRouteWith(routesapi.RouteExtendedValidationFailed, kapi.ConditionTrue, "cert", "key")
111+
112+
checkRoutes(d, d, fakeProject)
113+
d.assertMessage("AGL0350", "Exp an error when unable to decode cert", log.ErrorLevel)
114+
d.dumpMessages()
115+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
package aggregated_logging
2+
3+
import (
4+
"fmt"
5+
6+
"k8s.io/kubernetes/pkg/util/sets"
7+
)
8+
9+
const sccPrivilegedName = "privileged"
10+
11+
var sccPrivilegedNames = sets.NewString(fluentdServiceAccountName)
12+
13+
const sccPrivilegedUnboundServiceAccount = `
14+
The ServiceAccount '%[1]s' does not have a privileged SecurityContextConstraint for project '%[2]s'. As a
15+
user with a cluster-admin role, you can grant the permissions by running
16+
the following:
17+
18+
oadm policy add-scc-to-user privileged system:serviceaccount:%[2]s:%[1]s
19+
`
20+
21+
func checkSccs(r diagnosticReporter, adapter sccAdapter, project string) {
22+
r.Debug("AGL0700", "Checking SecurityContextConstraints...")
23+
scc, err := adapter.getScc(sccPrivilegedName)
24+
if err != nil {
25+
r.Error("AGL0705", err, fmt.Sprintf("There was an error while trying to retrieve the SecurityContextConstraints for the logging stack: %s", err))
26+
return
27+
}
28+
privilegedUsers := sets.NewString()
29+
for _, user := range scc.Users {
30+
privilegedUsers.Insert(user)
31+
}
32+
for _, name := range sccPrivilegedNames.List() {
33+
if !privilegedUsers.Has(fmt.Sprintf("system:serviceaccount:%s:%s", project, name)) {
34+
r.Error("AGL0710", nil, fmt.Sprintf(sccPrivilegedUnboundServiceAccount, name, project))
35+
}
36+
}
37+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
package aggregated_logging
2+
3+
import (
4+
"encoding/json"
5+
"errors"
6+
"fmt"
7+
"testing"
8+
9+
kapi "k8s.io/kubernetes/pkg/api"
10+
11+
"github.com/openshift/origin/pkg/diagnostics/log"
12+
)
13+
14+
type fakeSccDiagnostic struct {
15+
fakeDiagnostic
16+
fakeScc kapi.SecurityContextConstraints
17+
}
18+
19+
func newFakeSccDiagnostic(t *testing.T) *fakeSccDiagnostic {
20+
return &fakeSccDiagnostic{
21+
fakeDiagnostic: *newFakeDiagnostic(t),
22+
}
23+
}
24+
25+
func (f *fakeSccDiagnostic) getScc(name string) (*kapi.SecurityContextConstraints, error) {
26+
json, _ := json.Marshal(f.fakeScc)
27+
f.test.Logf(">> test#getScc(%s), err: %s, scc:%s", name, f.err, string(json))
28+
if f.err != nil {
29+
return nil, f.err
30+
}
31+
return &f.fakeScc, nil
32+
}
33+
34+
func (f *fakeSccDiagnostic) addSccFor(name string, project string) {
35+
f.fakeScc.Users = append(f.fakeScc.Users, fmt.Sprintf("system:serviceaccount:%s:%s", project, name))
36+
}
37+
38+
func TestCheckSccWhenClientReturnsError(t *testing.T) {
39+
d := newFakeSccDiagnostic(t)
40+
d.err = errors.New("client error")
41+
42+
checkSccs(d, d, fakeProject)
43+
44+
d.assertMessage("AGL0705", "Exp error when client returns error getting SCC", log.ErrorLevel)
45+
d.dumpMessages()
46+
}
47+
48+
func TestCheckSccWhenMissingPrivelegedUsers(t *testing.T) {
49+
d := newFakeSccDiagnostic(t)
50+
51+
checkSccs(d, d, fakeProject)
52+
53+
d.assertMessage("AGL0710", "Exp error when SCC is missing exp service acount", log.ErrorLevel)
54+
d.dumpMessages()
55+
}
56+
57+
func TestCheckSccWhenEverytingExists(t *testing.T) {
58+
d := newFakeSccDiagnostic(t)
59+
d.addSccFor(fluentdServiceAccountName, fakeProject)
60+
61+
checkSccs(d, d, fakeProject)
62+
63+
d.assertNoErrors()
64+
d.dumpMessages()
65+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
package aggregated_logging
2+
3+
import (
4+
"fmt"
5+
"strings"
6+
7+
kapi "k8s.io/kubernetes/pkg/api"
8+
"k8s.io/kubernetes/pkg/util/sets"
9+
)
10+
11+
var serviceAccountNames = sets.NewString("logging-deployer", "aggregated-logging-kibana", "aggregated-logging-curator", "aggregated-logging-elasticsearch", fluentdServiceAccountName)
12+
13+
const serviceAccountsMissing = `
14+
Did not find ServiceAccounts: %s. The logging infrastructure will not function
15+
properly without them. You may need to re-run the installer.
16+
`
17+
18+
func checkServiceAccounts(d diagnosticReporter, f saAdapter, project string) {
19+
d.Debug("AGL0500", fmt.Sprintf("Checking ServiceAccounts in project '%s'...", project))
20+
saList, err := f.serviceAccounts(project, kapi.ListOptions{})
21+
if err != nil {
22+
d.Error("AGL0505", err, fmt.Sprintf("There was an error while trying to retrieve the pods for the AggregatedLogging stack: %s", err))
23+
return
24+
}
25+
foundNames := sets.NewString()
26+
for _, sa := range saList.Items {
27+
foundNames.Insert(sa.ObjectMeta.Name)
28+
}
29+
missing := sets.NewString()
30+
for _, name := range serviceAccountNames.List() {
31+
if !foundNames.Has(name) {
32+
missing.Insert(name)
33+
}
34+
}
35+
if missing.Len() != 0 {
36+
d.Error("AGL0515", nil, fmt.Sprintf(serviceAccountsMissing, strings.Join(missing.List(), ",")))
37+
}
38+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
package aggregated_logging
2+
3+
import (
4+
"errors"
5+
"testing"
6+
7+
kapi "k8s.io/kubernetes/pkg/api"
8+
9+
"github.com/openshift/origin/pkg/diagnostics/log"
10+
)
11+
12+
type mockServiceAccountDiagnostic struct {
13+
accounts kapi.ServiceAccountList
14+
fakeDiagnostic
15+
}
16+
17+
func newMockServiceAccountDiagnostic(t *testing.T) *mockServiceAccountDiagnostic {
18+
return &mockServiceAccountDiagnostic{
19+
accounts: kapi.ServiceAccountList{},
20+
fakeDiagnostic: *newFakeDiagnostic(t),
21+
}
22+
}
23+
24+
func (m *mockServiceAccountDiagnostic) serviceAccounts(project string, options kapi.ListOptions) (*kapi.ServiceAccountList, error) {
25+
if m.err != nil {
26+
return &m.accounts, m.err
27+
}
28+
return &m.accounts, nil
29+
}
30+
31+
func (d *mockServiceAccountDiagnostic) addServiceAccountNamed(name string) {
32+
meta := kapi.ObjectMeta{Name: name}
33+
d.accounts.Items = append(d.accounts.Items, kapi.ServiceAccount{ObjectMeta: meta})
34+
}
35+
36+
func TestCheckingServiceAccountsWhenFailedResponseFromClient(t *testing.T) {
37+
d := newMockServiceAccountDiagnostic(t)
38+
d.err = errors.New("Some Error")
39+
checkServiceAccounts(d, d, fakeProject)
40+
d.assertMessage("AGL0505",
41+
"Exp an error when unable to retrieve service accounts because of a client error",
42+
log.ErrorLevel)
43+
}
44+
45+
func TestCheckingServiceAccountsWhenMissingExpectedServiceAccount(t *testing.T) {
46+
d := newMockServiceAccountDiagnostic(t)
47+
d.addServiceAccountNamed("foobar")
48+
49+
checkServiceAccounts(d, d, fakeProject)
50+
d.assertMessage("AGL0515",
51+
"Exp an error when an expected service account was not found.",
52+
log.ErrorLevel)
53+
}
54+
55+
func TestCheckingServiceAccountsIsOk(t *testing.T) {
56+
d := newMockServiceAccountDiagnostic(t)
57+
58+
for _, name := range serviceAccountNames.List() {
59+
d.addServiceAccountNamed(name)
60+
}
61+
62+
checkServiceAccounts(d, d, fakeProject)
63+
d.assertNoErrors()
64+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
package aggregated_logging
2+
3+
import (
4+
"fmt"
5+
"strings"
6+
7+
kapi "k8s.io/kubernetes/pkg/api"
8+
"k8s.io/kubernetes/pkg/util/sets"
9+
)
10+
11+
var loggingServices = sets.NewString("logging-es", "logging-es-cluster", "logging-es-ops", "logging-es-ops-cluster", "logging-kibana", "logging-kibana-ops")
12+
13+
const serviceNotFound = `
14+
Expected to find '%s' among the logging services for the project but did not.
15+
`
16+
const serviceOpsNotFound = `
17+
Expected to find '%s' among the logging services for the project but did not. This
18+
may not matter if you chose not to install a separate logging stack to support operations.
19+
`
20+
21+
// checkServices looks to see if the aggregated logging services exist
22+
func checkServices(r diagnosticReporter, adapter servicesAdapter, project string) {
23+
r.Debug("AGL0200", fmt.Sprintf("Checking for services in project '%s' with selector '%s'", project, loggingSelector.AsSelector()))
24+
serviceList, err := adapter.services(project, kapi.ListOptions{LabelSelector: loggingSelector.AsSelector()})
25+
if err != nil {
26+
r.Error("AGL0205", err, fmt.Sprintf("There was an error while trying to retrieve the logging services: %s", err))
27+
return
28+
}
29+
foundServices := sets.NewString()
30+
for _, service := range serviceList.Items {
31+
foundServices.Insert(service.ObjectMeta.Name)
32+
r.Debug("AGL0210", fmt.Sprintf("Retrieved service '%s'", service.ObjectMeta.Name))
33+
}
34+
for _, service := range loggingServices.List() {
35+
if foundServices.Has(service) {
36+
checkServiceEndpoints(r, adapter, project, service)
37+
} else {
38+
if strings.Contains(service, "-ops") {
39+
r.Warn("AGL0215", nil, fmt.Sprintf(serviceOpsNotFound, service))
40+
} else {
41+
r.Error("AGL0217", nil, fmt.Sprintf(serviceNotFound, service))
42+
}
43+
}
44+
}
45+
}
46+
47+
// checkServiceEndpoints validates if there is an available endpoint for the service.
48+
func checkServiceEndpoints(r diagnosticReporter, adapter servicesAdapter, project string, service string) {
49+
endpoints, err := adapter.endpointsForService(project, service)
50+
if err != nil {
51+
r.Warn("AGL0220", err, fmt.Sprintf("Unable to retrieve endpoints for service '%s': %s", service, err))
52+
return
53+
}
54+
if len(endpoints.Subsets) == 0 {
55+
if strings.Contains(service, "-ops") {
56+
r.Info("AGL0223", fmt.Sprintf("There are no endpoints found for service '%s'. This could mean you choose not to install a separate operations cluster during installation.", service))
57+
} else {
58+
r.Warn("AGL0225", nil, fmt.Sprintf("There are no endpoints found for service '%s'. This means there are no pods serviced by this service and this component is not functioning", service))
59+
}
60+
}
61+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
package aggregated_logging
2+
3+
import (
4+
"errors"
5+
"testing"
6+
7+
"github.com/openshift/origin/pkg/diagnostics/log"
8+
kapi "k8s.io/kubernetes/pkg/api"
9+
)
10+
11+
type fakeServicesDiagnostic struct {
12+
list kapi.ServiceList
13+
fakeDiagnostic
14+
endpoints map[string]kapi.Endpoints
15+
endpointErr error
16+
}
17+
18+
func newFakeServicesDiagnostic(t *testing.T) *fakeServicesDiagnostic {
19+
return &fakeServicesDiagnostic{
20+
list: kapi.ServiceList{},
21+
fakeDiagnostic: *newFakeDiagnostic(t),
22+
endpoints: map[string]kapi.Endpoints{},
23+
}
24+
}
25+
26+
func (f *fakeServicesDiagnostic) services(project string, options kapi.ListOptions) (*kapi.ServiceList, error) {
27+
if f.err != nil {
28+
return &f.list, f.err
29+
}
30+
return &f.list, nil
31+
}
32+
func (f *fakeServicesDiagnostic) endpointsForService(project string, service string) (*kapi.Endpoints, error) {
33+
if f.endpointErr != nil {
34+
return nil, f.endpointErr
35+
}
36+
endpoints, _ := f.endpoints[service]
37+
return &endpoints, nil
38+
}
39+
40+
func (f *fakeServicesDiagnostic) addEndpointSubsetTo(service string) {
41+
endpoints := kapi.Endpoints{}
42+
endpoints.Subsets = []kapi.EndpointSubset{{}}
43+
f.endpoints[service] = endpoints
44+
}
45+
46+
func (f *fakeServicesDiagnostic) addServiceNamed(name string) {
47+
meta := kapi.ObjectMeta{Name: name}
48+
f.list.Items = append(f.list.Items, kapi.Service{ObjectMeta: meta})
49+
}
50+
51+
// test error from client
52+
func TestCheckingServicesWhenFailedResponseFromClient(t *testing.T) {
53+
d := newFakeServicesDiagnostic(t)
54+
d.err = errors.New("an error")
55+
checkServices(d, d, fakeProject)
56+
d.assertMessage("AGL0205",
57+
"Exp an error when unable to retrieve services because of a client error",
58+
log.ErrorLevel)
59+
}
60+
61+
func TestCheckingServicesWhenMissingServices(t *testing.T) {
62+
d := newFakeServicesDiagnostic(t)
63+
d.addServiceNamed("logging-es")
64+
65+
checkServices(d, d, fakeProject)
66+
d.assertMessage("AGL0215",
67+
"Exp an warning when an expected sercies is not found",
68+
log.WarnLevel)
69+
}
70+
71+
func TestCheckingServicesWarnsWhenRetrievingEndpointsErrors(t *testing.T) {
72+
d := newFakeServicesDiagnostic(t)
73+
d.addServiceNamed("logging-es")
74+
d.endpointErr = errors.New("an endpoint error")
75+
76+
checkServices(d, d, fakeProject)
77+
d.assertMessage("AGL0220",
78+
"Exp a warning when there is an error retrieving endpoints for a service",
79+
log.WarnLevel)
80+
}
81+
82+
func TestCheckingServicesWarnsWhenServiceHasNoEndpoints(t *testing.T) {
83+
d := newFakeServicesDiagnostic(t)
84+
for _, service := range loggingServices.List() {
85+
d.addServiceNamed(service)
86+
}
87+
88+
checkServices(d, d, fakeProject)
89+
d.assertMessage("AGL0225",
90+
"Exp a warning when an expected service has no endpoints",
91+
log.WarnLevel)
92+
}
93+
94+
func TestCheckingServicesHasNoErrorsOrWarningsForExpServices(t *testing.T) {
95+
d := newFakeServicesDiagnostic(t)
96+
for _, service := range loggingServices.List() {
97+
d.addServiceNamed(service)
98+
d.addEndpointSubsetTo(service)
99+
}
100+
101+
checkServices(d, d, fakeProject)
102+
d.assertNoErrors()
103+
d.assertNoWarnings()
104+
}

0 commit comments

Comments
 (0)
Please sign in to comment.