Skip to content

Support configuration of security list management modes as LB service annotation #225

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
Aug 2, 2018
66 changes: 40 additions & 26 deletions hack/deploy.sh
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@ CCM_NAME="oci-cloud-controller-manager"
# downgrade operations.
CCM_LOCK_LABEL="ccm-deployment-lock"

function ts() {
local res=$(date '+%Y-%m-%d %H:%M:%S')
echo "${res}"
}

# Kubernetes Cluster CCM Functions ********************************************
#
Expand All @@ -50,7 +54,7 @@ function get-k8s-master() {
}

function get-ccm-ds-image-version() {
local res=$(kubectl -n kube-system get ds "${CCM_NAME}" -o=jsonpath="{.spec.template.spec.containers[0].image}")
local res=$(kubectl -n kube-system get ds "${CCM_NAME}" -o=jsonpath="{.spec.template.spec.containers[0].image}" 2>/dev/null)
echo "${res}"
}

Expand All @@ -70,13 +74,13 @@ function get-ccm-ds-json() {
}

function get-ccm-pod-name() {
local name=$(kubectl -n kube-system get pods | grep oci-cloud-controller-manager | awk '{print $1}')
local name=$(kubectl -n kube-system get pods 2>/dev/null | grep oci-cloud-controller-manager | awk '{print $1}')
echo "${name}"
}

function get-ccm-pod-image-version() {
local name=$(get-ccm-pod-name)
local ready=$(kubectl -n kube-system get pod ${name} -o=jsonpath='{.status.containerStatuses[0].image}')
local ready=$(kubectl -n kube-system get pod ${name} -o=jsonpath='{.status.containerStatuses[0].image}' 2>/dev/null)
echo "${ready}"
}

Expand Down Expand Up @@ -105,12 +109,13 @@ function is-ccm-pod-version-ready() {
function wait-for-ccm-pod-version-ready() {
local version=$1
local duration=${2:-60}
local sleep=${3:-10}
local sleep=${3:-1}
local timeout=$(($(date +%s) + $duration))
while [ $(date +%s) -lt $timeout ]; do
if [ $(is-ccm-pod-version-ready ${version}) = 'true' ]; then
return 0
fi
echo "$(ts) : Waiting for ccm pod to be ready."
sleep ${sleep}
done
echo "Failed to wait for pod version '${version}' to be ready."
Expand Down Expand Up @@ -144,10 +149,12 @@ function get-ccm-manifest-version() {
# NB: The date is used to help auto-release a lock that has been placed.
function lock-ccm-deployment() {
kubectl -n kube-system annotate ds "${CCM_NAME}" "${CCM_LOCK_LABEL}"=$(date +%s)
echo "$(ts) : Locked ccm deployment."
}

function unlock-ccm-deployment() {
kubectl -n kube-system annotate ds "${CCM_NAME}" "${CCM_LOCK_LABEL}-"
echo "$(ts) : Unlocked ccm deployment."
}

function get-ccm-deployment-lock() {
Expand Down Expand Up @@ -179,16 +186,17 @@ function auto-release-lock() {
# Wait for the CCM to have no lock present.
function wait-for-ccm-deployment-permitted() {
local duration=${1:-3600}
local sleep=${2:-60}
local sleep=${2:-10}
local timeout=$(($(date +%s) + $duration))
while [ $(date +%s) -lt $timeout ]; do
auto-release-lock
if [ $(is-ccm-deployment-locked) = 'false' ]; then
return 0
fi
echo "$(ts) : Waiting for ccm deployment lock."
sleep ${sleep}
done
echo "Failed to wait for ccm to finish running existing ci pipeline tests."
echo "$(ts) : Failed to wait for ccm to finish running existing ci pipeline tests."
exit 1
}

Expand All @@ -205,13 +213,21 @@ function release-ccm-deployment-lock() {
unlock-ccm-deployment
}

# Get the latest release number of the CCM from github.
function get_latest_ccm_release() {
local repo="oracle/oci-cloud-controller-manager"
local url="https://api.github.com/repos/${repo}/releases/latest"
echo $(curl -s ${url} | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/')
}

# Test clean-up Functions *****************************************************
#

function ensure-clean-e2e-test-namespace() {
local res=$(kubectl get pods --all-namespaces | grep 'cm-e2e-tests-' | awk '{print $1}')
if [ ! -z ${res} ]; then
cat ${res} | xargs kubectl delete ns
echo "ensuring all 'ccm-e2e-tests' namespaces are terminated."
local res=$(kubectl get ns | grep 'cm-e2e-tests-' | awk '{print $1}')
if [ ! -z "${res}" ]; then
echo ${res} | xargs kubectl delete ns 2> /dev/null
fi
}

Expand All @@ -223,23 +239,19 @@ function deploy-build-version-ccm() {
local hack_dir=$(cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd)
local dist_dir=$(dirname "${hack_dir}")/dist
local build_version_manifest="${dist_dir}/oci-cloud-controller-manager.yaml"
local rollback_manifest="${dist_dir}/oci-cloud-controller-manager-rollback.yaml"

local version=$(cat ${dist_dir}/VERSION.txt)
if [ ! -f ${build_version_manifest} ]; then
echo "Error: The CCM deployment manifest '${build_version_manifest}' did not exist."
exit 1
fi
local build_version_image=$(get-ccm-manifest-image ${build_version_manifest})
local build_version=$(get-ccm-manifest-version ${build_version_manifest})
local rollback_image=$(get-ccm-ds-image)
local rollback_version=$(get-ccm-ds-version)

# Wait for there to be no lock on CCM deployment; then take the lock.
# NB: Not threadsafe, but, better then nothing...
obtain-ccm-deployment-lock

# Generate a rollback CCM daemon-set manifest.
sed s#${rollback_image}:.*#${rollback_image}:${rollback_version}#g < ${build_version_manifest} > ${rollback_manifest}

# Apply the build daemon-set manifest.
echo "deploying test '${version}' CCM '${build_version_image}:${build_version}' to cluster '$(get-k8s-master)'."
echo "deploying test build '${build_version}' CCM '${build_version_image}:${build_version}' to cluster '$(get-k8s-master)'."
kubectl apply -f ${build_version_manifest}

# Wait for CCM to be ready...
Expand All @@ -249,6 +261,8 @@ function deploy-build-version-ccm() {
echo "currently deployed CCM daemon-set version: $(get-ccm-ds-image-version)"
echo "currently deployed CCM pod version : $(get-ccm-pod-image-version)"
echo "currently deployed CCM pod ready state : $(get-ccm-pod-ready)"
echo "CCM locked? : $(is-ccm-deployment-locked)"

}

# Rollback to the CCM version the cluster originally used before it was upgraded.
Expand All @@ -257,13 +271,12 @@ function rollback-original-ccm() {
local dist_dir=$(dirname "${hack_dir}")/dist
local build_version_manifest="${dist_dir}/oci-cloud-controller-manager.yaml"
local rollback_manifest="${dist_dir}/oci-cloud-controller-manager-rollback.yaml"
local rollback_image=$(get-ccm-manifest-image ${rollback_manifest})
local rollback_version=$(get-ccm-manifest-version ${rollback_manifest})
local rollback_image=$(get-ccm-ds-image)
local rollback_version=$(get_latest_ccm_release)

# Check the rollback manifest exists.
# Generate a roll-back manifest based on the latest CCM release.
if [ ! -f ${rollback_manifest} ]; then
echo "the rollback manifest '${rollback_manifest}' did not exist."
exit 1
sed s#${rollback_image}:.*#${rollback_image}:${rollback_version}#g < ${build_version_manifest} > ${rollback_manifest}
fi

# Apply original CCM daemon-set manifest.
Expand All @@ -273,13 +286,14 @@ function rollback-original-ccm() {
# Wait for CCM to be ready after rollback...
wait-for-ccm-pod-version-ready "${rollback_version}"

# Release the lock on the CCM deployment mechanism.
release-ccm-deployment-lock

# Display Info
echo "currently deployed CCM daemon-set version: $(get-ccm-ds-image-version)"
echo "currently deployed CCM pod version : $(get-ccm-pod-image-version)"
echo "currently deployed CCM pod ready state : $(get-ccm-pod-ready)"

# Release the lock on the CCM deployment mechanism.
release-ccm-deployment-lock
echo "CCM locked? : $(is-ccm-deployment-locked)"
}


Expand Down
4 changes: 2 additions & 2 deletions hack/test-canary.sh
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ EOF

# A set of test_matcher strings that must match the appropriate gingko test
# descriptions. These are used to extract the required test results.
CREATE_LB_TEST="\[It\] \[Canary\] should be possible to create and mutate a Service type:LoadBalancer"
CREATE_LB_TEST="\[It\] should be possible to create and mutate a Service type:LoadBalancer \[Canary\]"
# Creates a JSON result file for the specified [Canary] tests to be extracted.
function create_results() {
local metrics_dir="$(dirname ${METRICS_FILE})"
Expand All @@ -76,7 +76,7 @@ function create_results() {
cat > "${METRICS_FILE}" <<EOF
{
"start_time": "${START}"
"create_lb": "$(_extract_result ${CREATE_LB_TEST})"
"create_lb": "$(extract_result ${CREATE_LB_TEST})"
"end_time": "$(now)"
}
EOF
Expand Down
30 changes: 14 additions & 16 deletions pkg/oci/ccm.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ import (
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
wait "k8s.io/apimachinery/pkg/util/wait"
informers "k8s.io/client-go/informers"
informersv1 "k8s.io/client-go/informers/core/v1"
clientset "k8s.io/client-go/kubernetes"
listersv1 "k8s.io/client-go/listers/core/v1"
cache "k8s.io/client-go/tools/cache"
Expand Down Expand Up @@ -59,8 +58,8 @@ type CloudProvider struct {
client client.Interface
kubeclient clientset.Interface

securityListManager securityListManager
config *Config
securityListManagerFactory securityListManagerFactory
config *Config
}

// Compile time check that CloudProvider implements the cloudprovider.Interface
Expand Down Expand Up @@ -130,24 +129,23 @@ func (cp *CloudProvider) Initialize(clientBuilder controller.ControllerClientBui

nodeInformer := factory.Core().V1().Nodes()
go nodeInformer.Informer().Run(wait.NeverStop)
serviceInformer := factory.Core().V1().Services()
go serviceInformer.Informer().Run(wait.NeverStop)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Might want to stick a \n under here to separate out the setup vs the cache sync


glog.Info("Waiting for node informer cache to sync")
if !cache.WaitForCacheSync(wait.NeverStop, nodeInformer.Informer().HasSynced) {
utilruntime.HandleError(fmt.Errorf("Timed out waiting for node informer to sync"))
if !cache.WaitForCacheSync(wait.NeverStop, nodeInformer.Informer().HasSynced, serviceInformer.Informer().HasSynced) {
utilruntime.HandleError(fmt.Errorf("Timed out waiting for informers to sync"))
}
cp.NodeLister = nodeInformer.Lister()

if !cp.config.LoadBalancer.Disabled {
var serviceInformer informersv1.ServiceInformer
if cp.config.LoadBalancer.SecurityListManagementMode != ManagementModeNone {
serviceInformer = factory.Core().V1().Services()
go serviceInformer.Informer().Run(wait.NeverStop)
glog.Info("Waiting for service informer cache to sync")
if !cache.WaitForCacheSync(wait.NeverStop, serviceInformer.Informer().HasSynced) {
utilruntime.HandleError(fmt.Errorf("Timed out waiting for service informer to sync"))
}
cp.securityListManagerFactory = func(mode string) securityListManager {
if cp.config.LoadBalancer.Disabled {
return newSecurityListManagerNOOP()
}

cp.securityListManager = newSecurityListManager(cp.client, serviceInformer, cp.config.LoadBalancer.SecurityLists, cp.config.LoadBalancer.SecurityListManagementMode)
if len(mode) == 0 {
mode = cp.config.LoadBalancer.SecurityListManagementMode
}
return newSecurityListManager(cp.client, serviceInformer, cp.config.LoadBalancer.SecurityLists, mode)
}
}

Expand Down
35 changes: 21 additions & 14 deletions pkg/oci/load_balancer.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,14 +58,18 @@ const (
ServiceAnnotationLoadBalancerSSLPorts = "service.beta.kubernetes.io/oci-load-balancer-ssl-ports"

// ServiceAnnotationLoadBalancerTLSSecret is a Service annotation for
// specifying the TLS secret ti install on the load balancer listeners which
// specifying the TLS secret to install on the load balancer listeners which
// have SSL enabled.
// See: https://kubernetes.io/docs/concepts/services-networking/ingress/#tls
ServiceAnnotationLoadBalancerTLSSecret = "service.beta.kubernetes.io/oci-load-balancer-tls-secret"

// ServiceAnnotationLoadBalancerConnectionIdleTimeout is the annotation used
// on the service to specify the idle connection timeout.
ServiceAnnotationLoadBalancerConnectionIdleTimeout = "service.beta.kubernetes.io/oci-load-balancer-connection-idle-timeout"

// ServiceAnnotaionLoadBalancerSecurityListManagementMode is a Service annotation for
// specifying the security list managment mode ("All", "Frontend", "None") that configures how security lists are managed by the CCM
ServiceAnnotaionLoadBalancerSecurityListManagementMode = "service.beta.kubernetes.io/oci-load-balancer-security-list-management-mode"
)

// DefaultLoadBalancerPolicy defines the default traffic policy for load
Expand Down Expand Up @@ -242,7 +246,7 @@ func (cp *CloudProvider) createLoadBalancer(ctx context.Context, spec *LBSpec) (
}

for _, ports := range spec.Ports {
if err = cp.securityListManager.Update(ctx, lbSubnets, nodeSubnets, spec.SourceCIDRs, nil, ports); err != nil {
if err = spec.securityListManager.Update(ctx, lbSubnets, nodeSubnets, spec.SourceCIDRs, nil, ports); err != nil {
return nil, err
}
}
Expand Down Expand Up @@ -305,7 +309,7 @@ func (cp *CloudProvider) EnsureLoadBalancer(ctx context.Context, clusterName str
ssl = NewSSLConfig(lbName, ports, cp)
}
subnets := []string{cp.config.LoadBalancer.Subnet1, cp.config.LoadBalancer.Subnet2}
spec, err := NewLBSpec(service, nodes, subnets, ssl)
spec, err := NewLBSpec(service, nodes, subnets, ssl, cp.securityListManagerFactory)
if err != nil {
glog.Errorf("Failed to derive LBSpec: %+v", err)
return nil, err
Expand Down Expand Up @@ -364,7 +368,7 @@ func (cp *CloudProvider) updateLoadBalancer(ctx context.Context, lb *loadbalance
for _, action := range actions {
switch a := action.(type) {
case *BackendSetAction:
err := cp.updateBackendSet(ctx, lbID, a, lbSubnets, nodeSubnets)
err := cp.updateBackendSet(ctx, lbID, a, lbSubnets, nodeSubnets, spec.securityListManager)
if err != nil {
return errors.Wrap(err, "updating BackendSet")
}
Expand All @@ -381,7 +385,7 @@ func (cp *CloudProvider) updateLoadBalancer(ctx context.Context, lb *loadbalance
ports = spec.Ports[backendSetName]
}

err := cp.updateListener(ctx, lbID, a, ports, lbSubnets, nodeSubnets, spec.SourceCIDRs)
err := cp.updateListener(ctx, lbID, a, ports, lbSubnets, nodeSubnets, spec.SourceCIDRs, spec.securityListManager)
if err != nil {
return errors.Wrap(err, "updating listener")
}
Expand All @@ -390,7 +394,7 @@ func (cp *CloudProvider) updateLoadBalancer(ctx context.Context, lb *loadbalance
return nil
}

func (cp *CloudProvider) updateBackendSet(ctx context.Context, lbID string, action *BackendSetAction, lbSubnets, nodeSubnets []*core.Subnet) error {
func (cp *CloudProvider) updateBackendSet(ctx context.Context, lbID string, action *BackendSetAction, lbSubnets, nodeSubnets []*core.Subnet, secListManager securityListManager) error {
var (
sourceCIDRs = []string{}
workRequestID string
Expand All @@ -403,19 +407,19 @@ func (cp *CloudProvider) updateBackendSet(ctx context.Context, lbID string, acti

switch action.Type() {
case Create:
err = cp.securityListManager.Update(ctx, lbSubnets, nodeSubnets, sourceCIDRs, nil, ports)
err = secListManager.Update(ctx, lbSubnets, nodeSubnets, sourceCIDRs, nil, ports)
if err != nil {
return err
}

workRequestID, err = cp.client.LoadBalancer().CreateBackendSet(ctx, lbID, action.Name(), bs)
case Update:
if err = cp.securityListManager.Update(ctx, lbSubnets, nodeSubnets, sourceCIDRs, action.OldPorts, ports); err != nil {
if err = secListManager.Update(ctx, lbSubnets, nodeSubnets, sourceCIDRs, action.OldPorts, ports); err != nil {
return err
}
workRequestID, err = cp.client.LoadBalancer().UpdateBackendSet(ctx, lbID, action.Name(), bs)
case Delete:
err = cp.securityListManager.Delete(ctx, lbSubnets, nodeSubnets, ports)
err = secListManager.Delete(ctx, lbSubnets, nodeSubnets, ports)
if err != nil {
return err
}
Expand All @@ -435,7 +439,7 @@ func (cp *CloudProvider) updateBackendSet(ctx context.Context, lbID string, acti
return nil
}

func (cp *CloudProvider) updateListener(ctx context.Context, lbID string, action *ListenerAction, ports portSpec, lbSubnets, nodeSubnets []*core.Subnet, sourceCIDRs []string) error {
func (cp *CloudProvider) updateListener(ctx context.Context, lbID string, action *ListenerAction, ports portSpec, lbSubnets, nodeSubnets []*core.Subnet, sourceCIDRs []string, secListManager securityListManager) error {
var workRequestID string
var err error
listener := action.Listener
Expand All @@ -445,21 +449,21 @@ func (cp *CloudProvider) updateListener(ctx context.Context, lbID string, action

switch action.Type() {
case Create:
err = cp.securityListManager.Update(ctx, lbSubnets, nodeSubnets, sourceCIDRs, nil, ports)
err = secListManager.Update(ctx, lbSubnets, nodeSubnets, sourceCIDRs, nil, ports)
if err != nil {
return err
}

workRequestID, err = cp.client.LoadBalancer().CreateListener(ctx, lbID, action.Name(), listener)
case Update:
err = cp.securityListManager.Update(ctx, lbSubnets, nodeSubnets, sourceCIDRs, nil, ports)
err = secListManager.Update(ctx, lbSubnets, nodeSubnets, sourceCIDRs, nil, ports)
if err != nil {
return err
}

workRequestID, err = cp.client.LoadBalancer().UpdateListener(ctx, lbID, action.Name(), listener)
case Delete:
err = cp.securityListManager.Delete(ctx, lbSubnets, nodeSubnets, ports)
err = secListManager.Delete(ctx, lbSubnets, nodeSubnets, ports)
if err != nil {
return err
}
Expand Down Expand Up @@ -547,6 +551,9 @@ func (cp *CloudProvider) EnsureLoadBalancerDeleted(ctx context.Context, clusterN
return errors.Wrap(err, "getting subnets for load balancers")
}

securityListManager := cp.securityListManagerFactory(
service.Annotations[ServiceAnnotaionLoadBalancerSecurityListManagementMode])

for listenerName, listener := range lb.Listeners {
backendSetName := *listener.DefaultBackendSetName
bs, ok := lb.BackendSets[backendSetName]
Expand All @@ -559,7 +566,7 @@ func (cp *CloudProvider) EnsureLoadBalancerDeleted(ctx context.Context, clusterN

glog.V(4).Infof("Deleting security rules for listener %q for load balancer %q ports=%+v", listenerName, id, ports)

if err := cp.securityListManager.Delete(ctx, lbSubnets, nodeSubnets, ports); err != nil {
if err := securityListManager.Delete(ctx, lbSubnets, nodeSubnets, ports); err != nil {
return errors.Wrapf(err, "delete security rules for listener %q on load balancer %q", listenerName, name)
}
}
Expand Down
Loading