Skip to content

Prevent leaking seclist ingress and egress rules when updating a service #238

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 30, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 10 additions & 10 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -43,15 +43,15 @@ all: check test build

.PHONY: gofmt
gofmt:
@./hack/check-gofmt.sh ${SRC_DIRS}
@./hack/check-gofmt.sh $(SRC_DIRS)

.PHONY: golint
golint:
@./hack/check-golint.sh ${SRC_DIRS}
@./hack/check-golint.sh $(SRC_DIRS)

.PHONY: govet
govet:
@./hack/check-govet.sh ${SRC_DIRS}
@./hack/check-govet.sh $(SRC_DIRS)

.PHONY: check
check: gofmt govet golint
Expand All @@ -62,17 +62,17 @@ build-dirs:

.PHONY: build
build: build-dirs manifests
@GOOS=${GOOS} GOARCH=${ARCH} go build \
@GOOS=$(GOOS) GOARCH=$(ARCH) go build \
-i \
-o dist/oci-cloud-controller-manager \
-installsuffix "static" \
-ldflags "-X main.version=${VERSION} -X main.build=${BUILD}" \
-ldflags "-X main.version=$(VERSION) -X main.build=$(BUILD)" \
./cmd/oci-cloud-controller-manager

.PHONY: manifests
manifests: build-dirs
@cp -a manifests/* dist
@sed ${SED_INPLACE} \
@sed $(SED_INPLACE) \
's#${IMAGE}:[0-9]\+.[0-9]\+.[0-9]\+#${IMAGE}:${VERSION}#g' \
dist/oci-cloud-controller-manager.yaml

Expand Down Expand Up @@ -114,18 +114,18 @@ clean:

.PHONY: deploy
deploy:
kubectl -n kube-system set image ds/${BIN} ${BIN}=${IMAGE}:${VERSION}
kubectl -n kube-system set image ds/$(BIN) $(BIN)=$(IMAGE):$(VERSION)

.PHONY: run-dev
run-dev: build
@dist/oci-cloud-controller-manager \
--kubeconfig=${KUBECONFIG} \
--cloud-config=${CLOUD_PROVIDER_CFG} \
--kubeconfig=$(KUBECONFIG) \
--cloud-config=$(CLOUD_PROVIDER_CFG) \
--cluster-cidr=10.244.0.0/16 \
--leader-elect-resource-lock=configmaps \
--cloud-provider=oci \
-v=4

.PHONY: version
version:
@echo ${VERSION}
@echo $(VERSION)
5 changes: 3 additions & 2 deletions hack/boilerplate/boilerplate.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ def file_extension(filename):
".wercker",
"vendor",

# Imported from Kubernetes maintaining origional copyright header
# Imported from Kubernetes maintaining original copyright header
"hack/boilerplate/boilerplate.py",
"hack/boilerplate/boilerplate_test.py",
"hack/boilerplate/test",
Expand All @@ -146,9 +146,10 @@ def file_extension(filename):
"test/e2e/load_balancer.go",
"test/e2e/framework/cleanup.go",
"test/e2e/framework/framework.go",
"test/e2e/framework/seclist_util.go",
"test/e2e/framework/service_util.go",
"test/e2e/framework/util.go",
"test/e2e/framework/networking_utils.go",
"test/e2e/framework/networking_util.go",
"test/e2e/framework/ginkgowrapper/wrapper.go",
]

Expand Down
5 changes: 1 addition & 4 deletions hack/test-e2e.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,7 @@ function run_e2e_tests() {
ginkgo -v -progress \
-focus "\[Canary\]" \
test/e2e \
-- \
--kubeconfig="${KUBECONFIG}" \
--cloud-config="${CLOUDCONFIG}" \
--delete-namespace=false
-- --kubeconfig=${KUBECONFIG} --cloud-config=${CLOUDCONFIG} --delete-namespace=true
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agh, the change in the Makefile above might have been to match this usage in the tests. I can still roll it back.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems irrelevant to this PR so would prefer it to be reversed to simplify review.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I rolled this back, but reckon we should have kept it. No-one will fix it in a single PR and in any other PR the same argument can be made! Anyway, should be reverted now!

}

# Main ************************************************************************
Expand Down
6 changes: 3 additions & 3 deletions pkg/oci/ccm.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ func NewCloudProvider(config *Config) (cloudprovider.Interface, error) {
return nil, err
}

rateLimiter := newRateLimiter(logger.Sugar(), config.RateLimiter)
rateLimiter := NewRateLimiter(logger.Sugar(), config.RateLimiter)

c, err := client.New(logger.Sugar(), cp, &rateLimiter)
if err != nil {
Expand Down Expand Up @@ -235,9 +235,9 @@ func buildConfigurationProvider(logger *zap.Logger, config *Config) (common.Conf
return cp, nil
}

// newRateLimiter builds and returns a struct containing read and write
// NewRateLimiter builds and returns a struct containing read and write
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I needed this when creating the new OCI client for the e2e tests. It seemed wrong to cut and paste the code. This function seems to be a 'constructor', so I made it public.

// rate limiters. Defaults are used where no (0) value is provided.
func newRateLimiter(logger *zap.SugaredLogger, config *RateLimiterConfig) client.RateLimiter {
func NewRateLimiter(logger *zap.SugaredLogger, config *RateLimiterConfig) client.RateLimiter {
if config == nil {
config = &RateLimiterConfig{}
}
Expand Down
4 changes: 2 additions & 2 deletions pkg/oci/ccm_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ func TestBuildRateLimiterWithConfig(t *testing.T) {
RateLimitBucketWrite: bucketWrite,
}

rateLimiter := newRateLimiter(zap.S(), rateLimiterConfig)
rateLimiter := NewRateLimiter(zap.S(), rateLimiterConfig)

if rateLimiter.Reader.QPS() != qpsRead {
t.Errorf("unexpected QPS (read) value: expected %f but found %f", qpsRead, rateLimiter.Reader.QPS())
Expand All @@ -47,7 +47,7 @@ func TestBuildRateLimiterWithConfig(t *testing.T) {
func TestBuildRateLimiterWithDefaults(t *testing.T) {
rateLimiterConfig := &RateLimiterConfig{}

rateLimiter := newRateLimiter(zap.S(), rateLimiterConfig)
rateLimiter := NewRateLimiter(zap.S(), rateLimiterConfig)

if rateLimiter.Reader.QPS() != rateLimitQPSDefault {
t.Errorf("unexpected QPS (read) value: expected %f but found %f", rateLimitQPSDefault, rateLimiter.Reader.QPS())
Expand Down
19 changes: 12 additions & 7 deletions pkg/oci/load_balancer.go
Original file line number Diff line number Diff line change
Expand Up @@ -256,12 +256,6 @@ func (cp *CloudProvider) createLoadBalancer(ctx context.Context, spec *LBSpec) (
return nil, errors.Wrap(err, "getting subnets for nodes")
}

for _, ports := range spec.Ports {
if err = spec.securityListManager.Update(ctx, lbSubnets, nodeSubnets, spec.SourceCIDRs, nil, ports); err != nil {
return nil, err
}
}

// Then we create the load balancer and wait for it to be online.
certs, err := spec.Certificates()
if err != nil {
Expand Down Expand Up @@ -293,7 +287,18 @@ func (cp *CloudProvider) createLoadBalancer(ctx context.Context, spec *LBSpec) (
}

logger.With("loadBalancerID", *lb.Id).Info("Load balancer created")
return loadBalancerToStatus(lb)
status, err := loadBalancerToStatus(lb)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I moved adding seclists to after we have created the loadbalancer. If creation fails for some reason the rules will not be added. Could there be unforseen issues with this?

if status != nil && len(status.Ingress) > 0 {
// If the LB is successfully provisioned then open lb/node subnet seclists egress/ingress.
for _, ports := range spec.Ports {
if err = spec.securityListManager.Update(ctx, lbSubnets, nodeSubnets, spec.SourceCIDRs, nil, ports); err != nil {
return nil, err
}
}
}

return status, err

}

// EnsureLoadBalancer creates a new load balancer or updates the existing one.
Expand Down
76 changes: 61 additions & 15 deletions pkg/oci/load_balancer_security_lists.go
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ func (s *baseSecurityListManager) updateBackendRules(ctx context.Context, lbSubn

// updateLoadBalancerRules handles updating the ingress and egress rules for the load balance subnets.
// If the listener is nil, then only egress rules from the load balancer to the backend subnets will be checked.
func (s *baseSecurityListManager) updateLoadBalancerRules(ctx context.Context, lbSubnets []*core.Subnet, nodeSubnets []*core.Subnet, sourceCIDRs []string, ports portSpec) error {
func (s *baseSecurityListManager) updateLoadBalancerRules(ctx context.Context, lbSubnets []*core.Subnet, nodeSubnets []*core.Subnet, sourceCIDRs []string, actualPorts *portSpec, desiredPorts portSpec) error {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We have actualPorts and desiredPorts here but one is a pointer, the other isn't. Is there any reason?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeagh, I agree - darn fuggly init?

To cut a long story short I think it derives from another bit of fuggly + typo on master (here)[https://github.com/oracle/oci-cloud-controller-manager/blob/e11429bc410ec62e52ba0aa504b777677bd3f908/pkg/oci/load_balancer_security_lists.go#L75].

I didn't fix it up on this run because I was trying to keep the changes small and close to the problem I am attempting to fix - as requested. It is really just cosmetic. I am happy to fix it up in this commit or as a subsequent one. Up to you. Please advise.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This logically makes sense. Update is used both for creation and modification and therefore there are call sites that don't have any "actual" (existing) ports associated.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was just curious in case it was a typo. If there are call sites that require nil values then we can leave it as is.

for _, lbSubnet := range lbSubnets {
secList, etag, err := s.getSecurityList(ctx, lbSubnet)
if err != nil {
Expand All @@ -152,12 +152,20 @@ func (s *baseSecurityListManager) updateLoadBalancerRules(ctx context.Context, l

logger := s.logger.With("securityListID", *secList.Id)

lbEgressRules := getLoadBalancerEgressRules(logger, secList.EgressSecurityRules, nodeSubnets, ports.BackendPort, s.serviceLister)
lbEgressRules = getLoadBalancerEgressRules(logger, lbEgressRules, nodeSubnets, ports.HealthCheckerPort, s.serviceLister)
// 0 denotes nil ports.
var currentBackEndPort = 0
var currentHealthCheck = 0
if actualPorts != nil {
currentBackEndPort = actualPorts.BackendPort
currentHealthCheck = actualPorts.HealthCheckerPort
}

lbEgressRules := getLoadBalancerEgressRules(logger, secList.EgressSecurityRules, nodeSubnets, currentBackEndPort, desiredPorts.BackendPort, s.serviceLister)
lbEgressRules = getLoadBalancerEgressRules(logger, lbEgressRules, nodeSubnets, currentHealthCheck, desiredPorts.HealthCheckerPort, s.serviceLister)

lbIngressRules := secList.IngressSecurityRules
if ports.ListenerPort != 0 {
lbIngressRules = getLoadBalancerIngressRules(logger, lbIngressRules, sourceCIDRs, ports.ListenerPort, s.serviceLister)
if desiredPorts.ListenerPort != 0 {
lbIngressRules = getLoadBalancerIngressRules(logger, lbIngressRules, sourceCIDRs, desiredPorts.ListenerPort, s.serviceLister)
}

if !securityListRulesChanged(secList, lbIngressRules, lbEgressRules) {
Expand Down Expand Up @@ -222,7 +230,7 @@ type defaultSecurityListManager struct {
// Egress rules added:
// from LB subnets to backend subnets on the backend port
func (s *defaultSecurityListManager) Update(ctx context.Context, lbSubnets []*core.Subnet, backendSubnets []*core.Subnet, sourceCIDRs []string, actualPorts *portSpec, desiredPorts portSpec) error {
if err := s.updateLoadBalancerRules(ctx, lbSubnets, backendSubnets, sourceCIDRs, desiredPorts); err != nil {
if err := s.updateLoadBalancerRules(ctx, lbSubnets, backendSubnets, sourceCIDRs, actualPorts, desiredPorts); err != nil {
return err
}

Expand All @@ -238,7 +246,7 @@ func (s *defaultSecurityListManager) Delete(ctx context.Context, lbSubnets []*co
noSubnets := []*core.Subnet{}
noSourceCIDRs := []string{}

err := s.updateLoadBalancerRules(ctx, lbSubnets, noSubnets, noSourceCIDRs, ports)
err := s.updateLoadBalancerRules(ctx, lbSubnets, noSubnets, noSourceCIDRs, &ports, ports)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Chose sending in port twice over nil. I think this makes sense.

if err != nil {
return err
}
Expand All @@ -258,14 +266,14 @@ type frontendSecurityListManager struct {
// from source cidrs to lb subnets on the listener port
func (s *frontendSecurityListManager) Update(ctx context.Context, lbSubnets []*core.Subnet, _ []*core.Subnet, sourceCIDRs []string, actualPorts *portSpec, desiredPorts portSpec) error {
noSubnets := []*core.Subnet{}
return s.updateLoadBalancerRules(ctx, lbSubnets, noSubnets, sourceCIDRs, desiredPorts)
return s.updateLoadBalancerRules(ctx, lbSubnets, noSubnets, sourceCIDRs, actualPorts, desiredPorts)
}

// Delete the ingress security list rules associated with the listener.
func (s *frontendSecurityListManager) Delete(ctx context.Context, lbSubnets []*core.Subnet, backendSubnets []*core.Subnet, ports portSpec) error {
noSubnets := []*core.Subnet{}
noSourceCIDRs := []string{}
return s.updateLoadBalancerRules(ctx, lbSubnets, noSubnets, noSourceCIDRs, ports)
return s.updateLoadBalancerRules(ctx, lbSubnets, noSubnets, noSourceCIDRs, &ports, ports)
}

// securityListManagerNOOP implements the securityListManager interface but does
Expand Down Expand Up @@ -352,6 +360,14 @@ func getNodeIngressRules(
desiredPorts portSpec,
serviceLister listersv1.ServiceLister,
) []core.IngressSecurityRule {
// 0 denotes nil ports.
var currentBackEndPort = 0
var currentHealthCheckPort = 0
if actualPorts != nil {
currentBackEndPort = actualPorts.BackendPort
currentHealthCheckPort = actualPorts.HealthCheckerPort
}

desiredBackend := sets.NewString()
desiredHealthChecker := sets.NewString()
for _, lbSubnet := range lbSubnets {
Expand All @@ -362,6 +378,23 @@ func getNodeIngressRules(
ingressRules := []core.IngressSecurityRule{}

for _, rule := range rules {
// Remove (do not re-add) any rule that represents the old case when
// mutating a single ranged backend port or health check port.
if rule.TcpOptions != nil && rule.TcpOptions.DestinationPortRange != nil &&
Copy link
Contributor Author

@templecloud templecloud Aug 21, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the first part of the fix. I wanted to be on the safe side so made it very restrictive. It should only target the 'single port rule mutation' described.

*rule.TcpOptions.DestinationPortRange.Min == *rule.TcpOptions.DestinationPortRange.Max &&
*rule.TcpOptions.DestinationPortRange.Min != desiredPorts.BackendPort && *rule.TcpOptions.DestinationPortRange.Max != desiredPorts.BackendPort &&
Copy link
Member

@owainlewis owainlewis Aug 23, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This large logical conjunction looks quite prone to subtle mistakes and I think is duplicated again below. Would be worth factoring this into a smaller helper function and unit testing.

Copy link
Contributor Author

@templecloud templecloud Aug 23, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeagh, I mentioned this in the JIRA and I started something like this on the 'other branch'. However, I thought we were meant to be making this fix with a few changes possible and then iterating to improve other bits? I am happy to have a go at improving this now, but, was trying to avoid a 'large commit'. Do you want me to refactor this code in this branch along with the fix attempt?

*rule.TcpOptions.DestinationPortRange.Min != desiredPorts.HealthCheckerPort && *rule.TcpOptions.DestinationPortRange.Max != desiredPorts.HealthCheckerPort {
var rulePort = *rule.TcpOptions.DestinationPortRange.Min
if rulePort == currentBackEndPort || rulePort == currentHealthCheckPort {
logger.With(
"source", *rule.Source,
"destinationPortRangeMin", *rule.TcpOptions.DestinationPortRange.Min,
"destinationPortRangeMax", *rule.TcpOptions.DestinationPortRange.Max,
).Debug("Deleting load balancer ingres security rule")
continue
}
}

if rule.TcpOptions == nil || rule.TcpOptions.SourcePortRange != nil || rule.TcpOptions.DestinationPortRange == nil {
// this rule doesn't apply to this service so nothing to do but keep it
ingressRules = append(ingressRules, rule)
Expand Down Expand Up @@ -521,7 +554,7 @@ func getLoadBalancerEgressRules(
logger *zap.SugaredLogger,
rules []core.EgressSecurityRule,
nodeSubnets []*core.Subnet,
port int,
actualPort, desiredPort int,
serviceLister listersv1.ServiceLister,
) []core.EgressSecurityRule {
nodeCIDRs := sets.NewString()
Expand All @@ -531,8 +564,21 @@ func getLoadBalancerEgressRules(

egressRules := []core.EgressSecurityRule{}
for _, rule := range rules {
// Remove (do not re-add) any rule that represents the old case when mutating a single ranged port.
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the second part of the fix. I wanted to be on the safe side so made it very restrictive. It should only target the 'single port rule mutation' described.

if rule.TcpOptions != nil && rule.TcpOptions.DestinationPortRange != nil &&
*rule.TcpOptions.DestinationPortRange.Min == *rule.TcpOptions.DestinationPortRange.Max &&
*rule.TcpOptions.DestinationPortRange.Min != desiredPort && *rule.TcpOptions.DestinationPortRange.Max != desiredPort &&
*rule.TcpOptions.DestinationPortRange.Min == actualPort && *rule.TcpOptions.DestinationPortRange.Max == actualPort {
logger.With(
"destination", *rule.Destination,
"destinationPortRangeMin", *rule.TcpOptions.DestinationPortRange.Min,
"destinationPortRangeMax", *rule.TcpOptions.DestinationPortRange.Max,
).Debug("Deleting load balancer ingres security rule")
continue
}

if rule.TcpOptions == nil || rule.TcpOptions.SourcePortRange != nil || rule.TcpOptions.DestinationPortRange == nil ||
*rule.TcpOptions.DestinationPortRange.Min != port || *rule.TcpOptions.DestinationPortRange.Max != port {
*rule.TcpOptions.DestinationPortRange.Min != desiredPort || *rule.TcpOptions.DestinationPortRange.Max != desiredPort {
// this rule doesn't apply to this service so nothing to do but keep it
egressRules = append(egressRules, rule)
continue
Expand All @@ -545,19 +591,19 @@ func getLoadBalancerEgressRules(
continue
}

inUse, err := healthCheckPortInUse(serviceLister, int32(port))
inUse, err := healthCheckPortInUse(serviceLister, int32(desiredPort))
if err != nil {
// Unable to determine if this port is in use by another service, so I guess
// we better err on the safe side and keep the rule.
logger.With(zap.Error(err), "port", port).Error("Failed to determine if port is still in use")
logger.With(zap.Error(err), "port", desiredPort).Error("Failed to determine if port is still in use")
egressRules = append(egressRules, rule)
continue
}

if inUse {
// This rule is no longer needed for this service, but is still used
// by another service, so we must still keep it.
logger.With("port", port).Debug("Port still in use by another service.")
logger.With("port", desiredPort).Debug("Port still in use by another service.")
egressRules = append(egressRules, rule)
continue
}
Expand All @@ -579,7 +625,7 @@ func getLoadBalancerEgressRules(
// All the remaining node cidr's are new and don't have a corresponding rule
// so we need to create one for each.
for _, desired := range nodeCIDRs.List() {
rule := makeEgressSecurityRule(desired, port)
rule := makeEgressSecurityRule(desired, desiredPort)
logger.With(
"destination", *rule.Destination,
"destinationPortRangeMin", *rule.TcpOptions.DestinationPortRange.Min,
Expand Down
Loading