pkg/daemon/daemon.go

package daemon

import (
	"bufio"
	"context"
	"crypto/tls"
	"encoding/json"
	"errors"
	"fmt"
	"io"
	"io/fs"
	"net/http"
	"os"
	"os/exec"
	"os/signal"
	"path/filepath"
	"slices"
	"strings"
	"sync"
	"syscall"
	"time"

	mcfgclientset "github.com/openshift/client-go/machineconfiguration/clientset/versioned"
	mcopclientset "github.com/openshift/client-go/operator/clientset/versioned"
	"github.com/openshift/library-go/pkg/operator/configobserver/featuregates"

	ign3types "github.com/coreos/ignition/v2/config/v3_4/types"
	"github.com/google/go-cmp/cmp"
	"github.com/google/renameio"
	"golang.org/x/time/rate"
	corev1 "k8s.io/api/core/v1"
	apierrors "k8s.io/apimachinery/pkg/api/errors"
	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
	"k8s.io/apimachinery/pkg/labels"
	utilruntime "k8s.io/apimachinery/pkg/util/runtime"
	"k8s.io/apimachinery/pkg/util/wait"
	coreinformersv1 "k8s.io/client-go/informers/core/v1"
	"k8s.io/client-go/kubernetes"
	"k8s.io/klog/v2"

	corev1lister "k8s.io/client-go/listers/core/v1"
	"k8s.io/client-go/tools/cache"
	"k8s.io/client-go/util/workqueue"

	configv1 "github.com/openshift/api/config/v1"
	mcfgv1 "github.com/openshift/api/machineconfiguration/v1"
	mcfgalphav1 "github.com/openshift/api/machineconfiguration/v1alpha1"
	mcfginformersv1 "github.com/openshift/client-go/machineconfiguration/informers/externalversions/machineconfiguration/v1"
	mcfglistersv1 "github.com/openshift/client-go/machineconfiguration/listers/machineconfiguration/v1"

	mcoResourceRead "github.com/openshift/machine-config-operator/lib/resourceread"
	ctrlcommon "github.com/openshift/machine-config-operator/pkg/controller/common"
	"github.com/openshift/machine-config-operator/pkg/daemon/constants"
	"github.com/openshift/machine-config-operator/pkg/daemon/osrelease"
	"github.com/openshift/machine-config-operator/pkg/helpers"
	"github.com/openshift/machine-config-operator/pkg/upgrademonitor"
)

// Daemon is the dispatch point for the functions of the agent on the
// machine. it keeps track of connections and the current state of the update
// process.
type Daemon struct {
	// name is the node name.
	name string

	// os the operating system the MCD is running on
	os osrelease.OperatingSystem

	// mock is set if we're running as non-root, probably under unit tests
	mock bool

	// NodeUpdaterClient wraps rpm-ostree and will eventually be removed with a direct rpmostreeclient value
	NodeUpdaterClient *RpmOstreeClient

	// bootID is a unique value per boot (generated by the kernel)
	bootID string

	// bootedOSImageURL is the currently booted URL of the operating system
	bootedOSImageURL string

	// bootedOScommit is the commit hash of the currently booted operating system
	bootedOSCommit string

	// previousFinalizationFailure caches a failure of ostree-finalize-staged.service
	// we may have seen from the previous boot.
	previousFinalizationFailure string

	// kubeClient allows interaction with Kubernetes, including the node we are running on.
	kubeClient kubernetes.Interface

	mcfgClient mcfgclientset.Interface

	// mcopClient allows interaction with Openshift operator level objects, such as MachineConfiguration
	mcopClient mcopclientset.Interface

	// nodeLister is used to watch for updates via the informer
	nodeLister       corev1lister.NodeLister
	nodeListerSynced cache.InformerSynced

	mcLister       mcfglistersv1.MachineConfigLister
	mcListerSynced cache.InformerSynced

	mcpLister       mcfglistersv1.MachineConfigPoolLister
	mcpListerSynced cache.InformerSynced

	ccLister       mcfglistersv1.ControllerConfigLister
	ccListerSynced cache.InformerSynced

	// skipReboot skips the reboot after a sync, only valid with onceFrom != ""
	skipReboot bool

	kubeletHealthzEnabled  bool
	kubeletHealthzEndpoint string

	updateActive     bool
	updateActiveLock sync.Mutex

	nodeWriter NodeWriter

	featureGatesAccessor featuregates.FeatureGateAccess

	// channel used by callbacks to signal Run() of an error
	exitCh chan<- error

	// channel used to ensure all spawned goroutines exit when we exit.
	stopCh <-chan struct{}

	// node is the current instance of the node being processed through handleNodeEvent
	// or the very first instance grabbed when the daemon starts
	node *corev1.Node

	queue       workqueue.TypedRateLimitingInterface[string]
	ccQueue     workqueue.TypedRateLimitingInterface[string]
	cmQueue     workqueue.TypedRateLimitingInterface[string]
	enqueueNode func(*corev1.Node)
	syncHandler func(node string) error

	// isControlPlane is true if this node is a control plane (master).
	// The machine may also be a worker (with schedulable masters).
	isControlPlane bool
	// nodeInitialized is true when we've performed one-time initialization
	// after having updated the node object
	nodeInitialized bool
	// booting is true when all initial synchronization to the target
	// machineconfig is done
	booting bool
	// rebootQueued is true when the node is waiting for graceful shutdown
	rebootQueued bool

	currentConfigPath string
	currentImagePath  string

	// Config Drift Monitor
	configDriftMonitor ConfigDriftMonitor

	// Used for Hypershift
	hypershiftConfigMap string

	initializeHealthServer bool

	deferKubeletRestart bool

	// Ensures that only a single syncOSImagePullSecrets call can run at a time.
	osImageMux *sync.Mutex
}

// CoreOSDaemon protects the methods that should only be called on CoreOS variants
// Ideally New() would return a Daemon interface that could either be a base Daemon or a
// CoreOSDaemon. Besides adding some type-checking and clarity, that would allow moving fields like
// bootedOSImageURL or functions like checkOS() to CoreOSDaemon. Both Daemon and CoreOSDaemon,
// however, have to share the update() method, and update() requires access to many fields from
// Daemon. That eliminates the possibility of update() being defined on an interface. So we have to
// cast Daemon to CoreOSDaemon manually in update()
type CoreOSDaemon struct {
	*Daemon
}

var ErrAuxiliary = errors.New("Error from auxiliary packages")

const (
	// pathSystemd is the path systemd modifiable units, services, etc.. reside
	pathSystemd = "/etc/systemd/system"
	// pathDevNull is the systems path to and endless blackhole
	pathDevNull = "/dev/null"
	// currentConfigPath is where we store the current config on disk to validate
	// against annotations changes
	currentConfigPath = "/etc/machine-config-daemon/currentconfig"
	// bootstrapConfigDiffPath is where we store the current config on disk to validate
	// against annotations changes
	bootstrapConfigDiffPath = "/etc/machine-config-daemon/bootstrapconfigdiff"

	// currentImagePath is where we store the current image on disk to validate
	// against annotation changes.
	currentImagePath = "/etc/machine-config-daemon/currentimage"

	// originalContainerBin is the path at which we've stashed the MCD container's /usr/bin
	// in the host namespace.  We use this for executing any extra binaries we have in our
	// container image.
	originalContainerBin = "/run/machine-config-daemon-bin"

	kubeletHealthzPollingInterval = 30 * time.Second
	kubeletHealthzTimeout         = 30 * time.Second

	// updateDelay is the baseline speed at which we react to changes.  We don't
	// need to react in milliseconds as any change would involve rebooting the node.
	// Having this be relatively high limits the number of times we retry before
	// the MCC/MCO will time out.  We don't want to spam our logs with the same
	// error.
	updateDelay = 5 * time.Second

	// maxUpdateBackoff is the maximum time to react to a change as we back off
	// in the face of errors.
	maxUpdateBackoff = 60 * time.Second

	// used for Hypershift daemon
	mcsServedConfigPath         = "/etc/mcs-machine-config-content.json"
	hypershiftCurrentConfigPath = "/etc/mcd-currentconfig.json"
	configMapConfigKey          = "config"
	configMapHashKey            = "hash"

	imageCAFilePath = "/etc/docker/certs.d"

	// used for certificate syncing
	caBundleFilePath      = "/etc/kubernetes/kubelet-ca.crt"
	cloudCABundleFilePath = "/etc/kubernetes/static-pod-resources/configmaps/cloud-config/ca-bundle.pem"
	userCABundleFilePath  = "/etc/pki/ca-trust/source/anchors/openshift-config-user-ca-bundle.crt"
	kubeConfigPath        = "/etc/kubernetes/kubeconfig"

	// Where nmstate writes the link files if it persisted ifnames.
	// https://github.com/nmstate/nmstate/blob/03c7b03bd4c9b0067d3811dbbf72635201519356/rust/src/cli/persist_nic.rs#L32-L36
	systemdNetworkDir = "etc/systemd/network"
)

type onceFromOrigin int

const (
	onceFromUnknownConfig onceFromOrigin = iota
	onceFromLocalConfig
	onceFromRemoteConfig
)

var (
	defaultRebootTimeout = 24 * time.Hour
)

// Create a custom error type to hold the missing MachineConfig name.
type ErrMissingMachineConfig struct {
	missingMC string
}

// Optional constructor for the error type.
func newErrMissingMachineConfig(missingMC string) error {
	return &ErrMissingMachineConfig{
		missingMC: missingMC,
	}
}

// This implements the error interface within Go.
func (e *ErrMissingMachineConfig) Error() string {
	return fmt.Sprintf("missing MachineConfig %s", e.missingMC)
}

// This is an optional accessor to get the missing MachineConfig. useful when trying to increment the metric in one line.
func (e *ErrMissingMachineConfig) MissingMachineConfig() string {
	return e.missingMC
}

// rebootCommand creates a new transient systemd unit to reboot the system.
// With the upstream implementation of kubelet graceful shutdown feature,
// we don't explicitly stop the kubelet so that kubelet can gracefully shutdown
// pods when `GracefulNodeShutdown` feature gate is enabled.
// kubelet uses systemd inhibitor locks to delay node shutdown to terminate pods.
// https://kubernetes.io/docs/concepts/architecture/nodes/#graceful-node-shutdown
func rebootCommand(rationale string) *exec.Cmd {
	return exec.Command("systemd-run", "--unit", "machine-config-daemon-reboot",
		"--description", fmt.Sprintf("machine-config-daemon: %s", rationale), "/bin/sh", "-c", "systemctl reboot")
}

// getBootID loads the unique "boot id" which is generated by the Linux kernel.
func getBootID() (string, error) {
	currentBootIDBytes, err := os.ReadFile("/proc/sys/kernel/random/boot_id")
	if err != nil {
		return "", err
	}
	return strings.TrimSpace(string(currentBootIDBytes)), nil
}

// New sets up the systemd and kubernetes connections needed to update the
// machine.
func New(
	exitCh chan<- error,
) (*Daemon, error) {
	mock := false
	if os.Getuid() != 0 {
		mock = true
	}

	var (
		osImageURL string
		osVersion  string
		osCommit   string
		err        error
	)

	hostos := osrelease.OperatingSystem{}
	if !mock {
		hostos, err = osrelease.GetHostRunningOS()
		if err != nil {
			hostOS.WithLabelValues("unsupported", "").Set(1)
			return nil, fmt.Errorf("checking operating system: %w", err)
		}
	}

	var nodeUpdaterClient *RpmOstreeClient

	// Only pull the osImageURL from OSTree when we are on RHCOS or FCOS
	if hostos.IsCoreOSVariant() {
		nodeUpdaterClientVal := NewNodeUpdaterClient()
		nodeUpdaterClient = &nodeUpdaterClientVal
		err := nodeUpdaterClient.Initialize()
		if err != nil {
			return nil, fmt.Errorf("error initializing rpm-ostree: %w", err)
		}
		osImageURL, osVersion, osCommit, err = nodeUpdaterClient.GetBootedOSImageURL()
		if err != nil {
			return nil, fmt.Errorf("error reading osImageURL from rpm-ostree: %w", err)
		}
		klog.Infof("Booted osImageURL: %s (%s) %s", osImageURL, osVersion, osCommit)
	}

	bootID := ""
	if !mock {
		bootID, err = getBootID()
		if err != nil {
			return nil, fmt.Errorf("failed to read boot ID: %w", err)
		}
	}
	// report OS & version (if RHCOS or FCOS) to prometheus
	hostOS.WithLabelValues(hostos.ToPrometheusLabel(), osVersion).Set(1)

	return &Daemon{
		mock:                   mock,
		booting:                true,
		initializeHealthServer: true,
		rebootQueued:           false,
		os:                     hostos,
		NodeUpdaterClient:      nodeUpdaterClient,
		bootedOSImageURL:       osImageURL,
		bootedOSCommit:         osCommit,
		bootID:                 bootID,
		exitCh:                 exitCh,
		currentConfigPath:      currentConfigPath,
		currentImagePath:       currentImagePath,
		configDriftMonitor:     NewConfigDriftMonitor(),
		osImageMux:             &sync.Mutex{},
	}, nil
}

// ClusterConnect sets up the systemd and kubernetes connections needed to update the
// machine.
func (dn *Daemon) ClusterConnect(
	name string,
	kubeClient kubernetes.Interface,
	mcfgClient mcfgclientset.Interface,
	mcInformer mcfginformersv1.MachineConfigInformer,
	nodeInformer coreinformersv1.NodeInformer,
	ccInformer mcfginformersv1.ControllerConfigInformer,
	mcpInformer mcfginformersv1.MachineConfigPoolInformer,
	mcopClient mcopclientset.Interface,
	kubeletHealthzEnabled bool,
	kubeletHealthzEndpoint string,
	featureGatesAccessor featuregates.FeatureGateAccess,
) error {
	dn.name = name
	dn.kubeClient = kubeClient
	dn.mcfgClient = mcfgClient
	dn.mcopClient = mcopClient
	// Other controllers start out with the default controller limiter which retries
	// in milliseconds; since any change here will involve rebooting the node
	// we don't need to react in milliseconds.  See also updateDelay above.
	dn.queue = workqueue.NewTypedRateLimitingQueueWithConfig[string](workqueue.NewTypedMaxOfRateLimiter[string](
		&workqueue.TypedBucketRateLimiter[string]{Limiter: rate.NewLimiter(rate.Limit(updateDelay), 1)},
		workqueue.NewTypedItemExponentialFailureRateLimiter[string](1*time.Second, maxUpdateBackoff)),
		workqueue.TypedRateLimitingQueueConfig[string]{Name: "machineconfigdaemon"})

	nodeInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
		AddFunc:    dn.handleNodeEvent,
		UpdateFunc: func(_, newObj interface{}) { dn.handleNodeEvent(newObj) },
	})
	dn.nodeLister = nodeInformer.Lister()
	dn.nodeListerSynced = nodeInformer.Informer().HasSynced
	dn.mcLister = mcInformer.Lister()
	dn.mcListerSynced = mcInformer.Informer().HasSynced
	dn.ccQueue = workqueue.NewTypedRateLimitingQueue(workqueue.DefaultTypedControllerRateLimiter[string]())
	ccInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
		AddFunc:    dn.handleControllerConfigEvent,
		UpdateFunc: func(_, newObj interface{}) { dn.handleControllerConfigEvent(newObj) },
		// In theory the configmap we care about shouldn't get deleted
		DeleteFunc: dn.handleControllerConfigEvent,
	})
	dn.ccLister = ccInformer.Lister()
	dn.ccListerSynced = ccInformer.Informer().HasSynced
	dn.mcpLister = mcpInformer.Lister()
	dn.mcpListerSynced = mcpInformer.Informer().HasSynced

	nw, err := newNodeWriter(dn.name, dn.stopCh)
	if err != nil {
		return err
	}
	dn.nodeWriter = nw
	go dn.nodeWriter.Run(dn.stopCh)

	dn.enqueueNode = dn.enqueueDefault
	dn.syncHandler = dn.syncNode

	dn.kubeletHealthzEnabled = kubeletHealthzEnabled
	dn.kubeletHealthzEndpoint = kubeletHealthzEndpoint

	dn.featureGatesAccessor = featureGatesAccessor

	return nil
}

// HypershiftConnect sets up a simplified daemon for Hypershift updates
func (dn *Daemon) HypershiftConnect(
	name string,
	kubeClient kubernetes.Interface,
	nodeInformer coreinformersv1.NodeInformer,
	configMap string,
) error {
	dn.name = name
	dn.kubeClient = kubeClient
	dn.hypershiftConfigMap = configMap

	node, err := dn.kubeClient.CoreV1().Nodes().Get(context.TODO(), name, metav1.GetOptions{})
	if err != nil {
		klog.Fatalf("Cannot fetch node object: %v", err)
	}
	dn.node = node

	nodeInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
		AddFunc:    dn.handleNodeEvent,
		UpdateFunc: func(_, newObj interface{}) { dn.handleNodeEvent(newObj) },
	})

	dn.queue = workqueue.NewTypedRateLimitingQueueWithConfig[string](workqueue.NewTypedMaxOfRateLimiter[string](
		&workqueue.TypedBucketRateLimiter[string]{Limiter: rate.NewLimiter(rate.Limit(updateDelay), 1)},
		workqueue.NewTypedItemExponentialFailureRateLimiter[string](1*time.Second, maxUpdateBackoff)),
		workqueue.TypedRateLimitingQueueConfig[string]{Name: "machineconfigdaemon"})

	dn.enqueueNode = dn.enqueueDefault
	dn.syncHandler = dn.syncNodeHypershift

	nw, err := newNodeWriter(dn.name, dn.stopCh)
	if err != nil {
		return err
	}
	dn.nodeWriter = nw
	go dn.nodeWriter.Run(dn.stopCh)

	return nil
}

// PrepareNamespace is invoked before chrooting into the target root
func PrepareNamespace(target string) error {
	// With ReexecuteForTargetRoot, we have already chroot into rootfs.
	// So, we should already have necessary MCD pod content mounted inside the host.
	// This also avoids overriding previously mounted content.
	if target == "/" {
		return nil
	}

	// This contains the /run/secrets/kubernetes.io service account tokens that we still need
	secretsMount := "/run/secrets"
	targetSecrets := filepath.Join(target, secretsMount)
	if err := os.MkdirAll(targetSecrets, 0o755); err != nil {
		return err
	}
	// This will only affect our mount namespace, not the host
	if err := runCmdSync("mount", "--rbind", secretsMount, targetSecrets); err != nil {
		return fmt.Errorf("failed to mount %s to %s: %w", secretsMount, targetSecrets, err)
	}

	targetSavedBin := filepath.Join(target, originalContainerBin)
	if err := os.MkdirAll(targetSavedBin, 0o755); err != nil {
		return fmt.Errorf("failed to create %s: %w", targetSavedBin, err)
	}

	usrbin := "/usr/bin"
	if err := runCmdSync("mount", "--rbind", usrbin, targetSavedBin); err != nil {
		return fmt.Errorf("failed to mount %s to %s: %w", usrbin, targetSavedBin, err)
	}

	return nil
}

// ReexecuteForTargetRoot detects the OS in the host root filesystem, then
// copies the appropriate binary into `/run/bin/` there, then does a `chroot`
// and re-executes the new binary.
func ReexecuteForTargetRoot(target string) error {
	// Nothing to do in this case
	if target == "/" {
		return nil
	}
	// Extra check to avoid recursion
	reexecEnv := "_MCD_DID_REEXEC"
	if _, ok := os.LookupEnv(reexecEnv); ok {
		return nil
	}

	sourceOsVersion, err := osrelease.GetHostRunningOSFromRoot("/")
	if err != nil {
		return fmt.Errorf("failed to get source OS: %w", err)
	}

	targetOsVersion, err := osrelease.GetHostRunningOSFromRoot(target)
	if err != nil {
		return fmt.Errorf("failed to get target OS: %w", err)
	}

	var sourceBinarySuffix string
	if sourceOsVersion.IsLikeRHEL() && targetOsVersion.IsLikeRHEL() {
		sourceMajor := sourceOsVersion.BaseVersionMajor()
		targetMajor := targetOsVersion.BaseVersionMajor()
		if sourceMajor == "9" && targetMajor == "8" {
			sourceBinarySuffix = ".rhel8"
			klog.Info("container is rhel9, target is rhel8")
		} else {
			klog.Infof("using appropriate binary for source=rhel-%s target=rhel-%s", sourceMajor, targetMajor)
		}
	} else {
		klog.Info("assuming we can use container binary chroot() to host")
	}
	sourceBinary := "/usr/bin/machine-config-daemon" + sourceBinarySuffix
	src, err := os.Open(sourceBinary)
	if err != nil {
		return fmt.Errorf("opening %s: %w", sourceBinary, err)
	}
	defer src.Close()

	targetBinBase := "run/bin/machine-config-daemon"
	targetBin := filepath.Join(target, targetBinBase)
	targetBinDir := filepath.Dir(targetBin)
	if err := os.MkdirAll(targetBinDir, 0o755); err != nil {
		return fmt.Errorf("mkdir %s: %w", targetBinDir, err)
	}

	f, err := renameio.TempFile(targetBinDir, targetBin)
	if err != nil {
		return fmt.Errorf("writing %s: %w", targetBin, err)
	}
	defer f.Cleanup()
	if _, err := io.Copy(f, src); err != nil {
		f.Close()
		return fmt.Errorf("writing %s: %w", targetBin, err)
	}
	if err := f.Chmod(0o755); err != nil {
		return err
	}
	// Must close our writable fd
	if err := f.CloseAtomicallyReplace(); err != nil {
		return err
	}

	if err := syscall.Chroot(target); err != nil {
		return fmt.Errorf("failed to chroot to %s: %w", target, err)
	}

	if err := os.Chdir("/"); err != nil {
		return fmt.Errorf("failed to change directory to /: %w", err)
	}

	// Now we will see the binary in the target root
	targetBin = "/" + targetBinBase
	// We have a "belt and suspenders" approach for detecting the case where
	// we're running in the target root.  First we inject --root-mount=/, and
	// we also set an environment variable to be really sure.
	newArgv := []string{targetBin}
	newArgv = append(newArgv, os.Args[1:]...)
	newArgv = append(newArgv, "--root-mount=/")
	newEnv := append(os.Environ(), fmt.Sprintf("%s=1", reexecEnv))
	klog.Infof("Invoking re-exec %s", targetBin)
	return syscall.Exec(targetBin, newArgv, newEnv)
}

// worker runs a worker thread that just dequeues items, processes them, and marks them done.
// It enforces that the syncHandler is never invoked concurrently with the same key.
func (dn *Daemon) worker() {
	for dn.processNextWorkItem() {
	}
}

func (dn *Daemon) processNextWorkItem() bool {
	key, quit := dn.queue.Get()
	if quit {
		return false
	}
	defer dn.queue.Done(key)

	err := dn.syncHandler(key)
	dn.handleErr(err, key)

	return true
}

func (dn *Daemon) handleErr(err error, key string) {
	if err == nil {
		dn.queue.Forget(key)
		return
	}

	// Exit if nodewriter is not initialized, used for Hypershift
	if dn.nodeWriter == nil {
		dn.updateErrorStateHypershift(err)
		klog.Fatalf("Error handling node sync: %v", err)
	}

	if err := dn.updateErrorState(err); err != nil {
		klog.Errorf("Could not update annotation: %v", err)
	}
	// This is at V(2) since the updateErrorState() call above ends up logging too
	klog.V(2).Infof("Error syncing node %v (retries %d): %v", key, dn.queue.NumRequeues(key), err)
	dn.queue.AddRateLimited(key)
}

type unreconcilableErr struct {
	error
}

func (dn *Daemon) updateErrorState(err error) error {
	var uErr *unreconcilableErr
	if errors.As(err, &uErr) {
		dn.nodeWriter.SetUnreconcilable(err)
	} else {
		if err := dn.nodeWriter.SetDegraded(err); err != nil {
			return err
		}
	}
	return nil
}

func (dn *Daemon) updateErrorStateHypershift(err error) {
	// truncatedErr caps error message at a reasonable length to limit the risk of hitting the total
	// annotation size limit (256 kb) at any point
	truncatedErr := fmt.Sprintf("%.2000s", err.Error())
	annos := map[string]string{
		constants.MachineConfigDaemonStateAnnotationKey:  constants.MachineConfigDaemonStateDegraded,
		constants.MachineConfigDaemonReasonAnnotationKey: truncatedErr,
	}
	if _, annoErr := dn.nodeWriter.SetAnnotations(annos); annoErr != nil {
		klog.Fatalf("Error setting degraded annotation %v, original error %v", annoErr, err)
	}
}

// initializeNode is called the first time we get our node object; however to
// ensure we handle failures: everything called from here should be idempotent.
func (dn *Daemon) initializeNode() error {
	if dn.nodeInitialized {
		return nil
	}
	// Some parts of the MCO dispatch on whether or not we're managing a control plane node
	if _, isControlPlane := dn.node.Labels[ctrlcommon.MasterLabel]; isControlPlane {
		klog.Infof("Node %s is part of the control plane", dn.node.Name)
		if err := dn.initializeControlPlane(); err != nil {
			return err
		}
		dn.isControlPlane = true
	} else {
		klog.Infof("Node %s is not labeled %s", dn.node.Name, ctrlcommon.MasterLabel)
	}
	dn.nodeInitialized = true
	return nil
}

//nolint:gocyclo
func (dn *Daemon) syncNode(key string) error {
	startTime := time.Now()
	klog.V(4).Infof("Started syncing node %q (%v)", key, startTime)
	defer func() {
		klog.V(4).Infof("Finished syncing node %q (%v)", key, time.Since(startTime))
	}()

	_, name, err := cache.SplitMetaNamespaceKey(key)
	if err != nil {
		return err
	}
	// If this isn't our node, nothing to do.  The node controller
	// handles other nodes.
	if name != dn.name {
		return nil
	}

	node, err := dn.nodeLister.Get(name)
	if apierrors.IsNotFound(err) {
		klog.V(2).Infof("node %v has been deleted", key)
		return nil
	}
	if err != nil {
		return err
	}
	// Check for Deleted Node
	if node.DeletionTimestamp != nil {
		klog.Infof("Node %s was deleted!", node.Name)
		return nil
	}

	// Check for queued reboot. If we attempt to sync while waiting for a reboot,
	// it will cause the update to start again, so we skip the sync.
	if dn.rebootQueued {
		klog.Infof("Node %s is queued for a reboot, skipping sync.", node.Name)
		return nil
	}

	// Get MCP associated with node
	pool, err := helpers.GetPrimaryPoolNameForMCN(dn.mcpLister, node)
	if err != nil {
		return err
	}

	if node.Annotations[constants.MachineConfigDaemonPostConfigAction] == constants.MachineConfigDaemonStateRebooting {
		klog.Info("Detected Rebooting Annotation, applying MCN.")
		err := upgrademonitor.GenerateAndApplyMachineConfigNodes(
			&upgrademonitor.Condition{State: mcfgalphav1.MachineConfigNodeUpdatePostActionComplete, Reason: string(mcfgalphav1.MachineConfigNodeUpdateRebooted), Message: "Node has rebooted"},
			&upgrademonitor.Condition{State: mcfgalphav1.MachineConfigNodeUpdateRebooted, Reason: fmt.Sprintf("%s%s", string(mcfgalphav1.MachineConfigNodeUpdatePostActionComplete), string(mcfgalphav1.MachineConfigNodeUpdateRebooted)), Message: "Upgrade required a reboot. Completed this as the post update action."},
			metav1.ConditionTrue,
			metav1.ConditionTrue,
			node,
			dn.mcfgClient,
			dn.featureGatesAccessor,
			pool,
		)
		if err != nil {
			klog.Errorf("Error making MCN for Rebooted: %v", err)
		}
		removeRebooting := make(map[string]string)
		removeRebooting[constants.MachineConfigDaemonPostConfigAction] = ""
		_, err = dn.nodeWriter.SetAnnotations(removeRebooting)
		if err != nil {
			klog.Errorf("Could not unset rebooting Anno: %v", err)
		}
	}

	// Deep-copy otherwise we are mutating our cache.
	node = node.DeepCopy()

	if dn.node == nil {
		dn.node = node
		if err := dn.initializeNode(); err != nil {
			return err
		}
	} else {
		// Log state transitions here
		oldState := dn.node.Annotations[constants.MachineConfigDaemonStateAnnotationKey]
		newState := node.Annotations[constants.MachineConfigDaemonStateAnnotationKey]
		oldReason := dn.node.Annotations[constants.MachineConfigDaemonReasonAnnotationKey]
		newReason := node.Annotations[constants.MachineConfigDaemonReasonAnnotationKey]
		if oldState != newState {
			klog.Infof("Transitioned from state: %v -> %v", oldState, newState)
		}
		if oldReason != newReason {
			klog.Infof("Transitioned from degraded/unreconcilable reason %v -> %v", oldReason, newReason)
		}
		dn.node = node
	}

	// Sync our OS image pull secrets here. This will account for any changes to
	// the ControllerConfig.
	//
	// I'm not sure if this needs to be done right here or as frequently as this,
	// but it shouldn't cause too much impact.
	if err := dn.syncInternalRegistryPullSecrets(nil); err != nil {
		return err
	}

	// Take care of the very first sync of the MCD on a node.
	// This loads the node annotation from the bootstrap (if we're really bootstrapping)
	// and then proceeds to check the state of the node, which includes
	// finalizing an update and/or reconciling the current and desired machine configs.
	if dn.booting {
		// Be sure only the MCD is running now, disable -firstboot.service
		if err := upgradeHackFor44AndBelow(); err != nil {
			return err
		}
		if err := removeIgnitionArtifacts(); err != nil {
			return err
		}
		if err := PersistNetworkInterfaces("/"); err != nil {
			return err
		}
		if err := dn.checkStateOnFirstRun(); err != nil {
			return err
		}
		// finished syncing node for the first time;
		// currently we return immediately here, although
		// I think we should change this to continue.
		dn.booting = false

		err = upgrademonitor.GenerateAndApplyMachineConfigNodes(
			&upgrademonitor.Condition{State: mcfgalphav1.MachineConfigNodeResumed, Reason: string(mcfgalphav1.MachineConfigNodeResumed), Message: fmt.Sprintf("In desired config %s. Resumed normal operations.", node.Annotations[constants.CurrentMachineConfigAnnotationKey])},
			nil,
			metav1.ConditionTrue,
			metav1.ConditionFalse,
			node,
			dn.mcfgClient,
			dn.featureGatesAccessor,
			pool,
		)
		if err != nil {
			klog.Errorf("Error making MCN for Resumed true: %v", err)
		}
		removeRebooting := make(map[string]string)
		removeRebooting[constants.MachineConfigDaemonReasonAnnotationKey] = ""
		node.SetAnnotations(removeRebooting)
		// Start the Config Drift Monitor since we're booted up.
		dn.startConfigDriftMonitor()

		return nil
	}

	// Check if a previous drain caused us to degrade. If the drain
	// has yet to complete and we are in a degrade state, continue
	// to stay in this state
	if dn.node.Annotations[constants.DesiredDrainerAnnotationKey] != "" &&
		dn.node.Annotations[constants.DesiredDrainerAnnotationKey] != dn.node.Annotations[constants.LastAppliedDrainerAnnotationKey] {
		klog.Infof("A previously requested drain has not yet completed. Waiting for machine-config-controller to finish draining node.")
		return nil
	}

	// Pass to the shared update prep method
	ufc, err := dn.prepUpdateFromCluster()
	if err != nil {
		maybeReportOnMissingMC(err)
		return err
	}

	if ufc != nil {
		err = upgrademonitor.GenerateAndApplyMachineConfigNodes(
			&upgrademonitor.Condition{State: mcfgalphav1.MachineConfigNodeUpdated, Reason: string(mcfgalphav1.MachineConfigNodeUpdated), Message: fmt.Sprintf("Node %s needs an update", dn.node.GetName())},
			nil,
			metav1.ConditionFalse,
			metav1.ConditionFalse,
			dn.node,
			dn.mcfgClient,
			dn.featureGatesAccessor,
			pool,
		)
		if err != nil {
			klog.Errorf("Error making MCN for Updated false: %v", err)
		}

		// Only check for config drift if we need to update.
		if err := dn.runPreflightConfigDriftCheck(); err != nil {
			return err
		}

		if err := dn.triggerUpdate(ufc.currentConfig, ufc.desiredConfig, ufc.currentImage, ufc.desiredImage); err != nil {
			// if MC was not found, let user know where they can find more info on this.
			maybeReportOnMissingMC(err)
			return err
		}
	} else {
		err = upgrademonitor.GenerateAndApplyMachineConfigNodes(
			&upgrademonitor.Condition{State: mcfgalphav1.MachineConfigNodeUpdated, Reason: string(mcfgalphav1.MachineConfigNodeUpdated), Message: fmt.Sprintf("Node %s Updated", dn.node.GetName())},
			nil,
			metav1.ConditionTrue,
			metav1.ConditionFalse,
			dn.node,
			dn.mcfgClient,
			dn.featureGatesAccessor,
			pool,
		)
		if err != nil {
			klog.Errorf("Error making MCN for Updated: %v", err)
		}
	}
	klog.V(2).Infof("Node %s is already synced", node.Name)
	if !dn.booting && dn.initializeHealthServer {
		// we want to wait until we are done booting AND we only want to do this once
		// we also want to give ourselves a little extra buffer. The corner case here is sometimes we get thru the first sync, and then the errors
		// begin ~1 minute later. So, list some api items until then. if we get to here, then we must be safe.
		if err := wait.PollUntilContextTimeout(context.TODO(), 10*time.Second, 1*time.Minute, false, func(_ context.Context) (bool, error) {
			_, err := dn.ccLister.List(labels.Everything())
			if err != nil {
				return false, err
			}
			return false, nil
		}); err != nil {
			if !wait.Interrupted(err) {
				return fmt.Errorf("could not list API items: %v", err)
			}
		}
		go func() {
			klog.Infof("Starting health listener on 127.0.0.1:8798")
			mux := http.NewServeMux()
			mux.Handle("/health", &healthHandler{})
			s := http.Server{
				TLSConfig: &tls.Config{
					MinVersion:   tls.VersionTLS12,
					NextProtos:   []string{"http/1.1"},
					CipherSuites: cipherOrder(),
				},
				TLSNextProto: make(map[string]func(*http.Server, *tls.Conn, http.Handler)),
				Addr:         "127.0.0.1:8798",
				Handler:      mux}

			go func() {
				if err := s.ListenAndServe(); err != nil && err != http.ErrServerClosed {
					klog.Errorf("health listener exited with error: %v", err)
				}
			}()
			<-dn.stopCh
			if err := s.Shutdown(context.Background()); err != nil {
				if err != http.ErrServerClosed {
					klog.Errorf("error stopping health listener: %v", err)
				}
			} else {
				klog.Infof("health listener successfully stopped")
			}

		}()
		dn.initializeHealthServer = false
	}
	return nil
}

// Validates that the on-disk state matches the currently applied machineconfig
// before an update occurs.
func (dn *Daemon) runPreflightConfigDriftCheck() error {
	// This allows skip behavior based upon the presence of
	// the forcefile: /run/machine-config-daemon-force.
	if forceFileExists() {
		klog.Infof("Skipping preflight config drift check; %s present", constants.MachineConfigDaemonForceFile)
		return nil
	}

	currentOnDisk, err := dn.getCurrentConfigOnDisk()
	if err != nil && !os.IsNotExist(err) {
		return err
	}

	if currentOnDisk == nil {
		currentOnDisk, err = dn.getCurrentConfigFromNode()
		if err != nil {
			return err
		}
	}

	start := time.Now()

	if err := dn.validateOnDiskStateOrImage(currentOnDisk.currentConfig, currentOnDisk.currentImage); err != nil {
		dn.nodeWriter.Eventf(corev1.EventTypeWarning, "PreflightConfigDriftCheckFailed", err.Error())
		klog.Errorf("Preflight config drift check failed: %v", err)
		return &configDriftErr{err}
	}

	klog.Infof("Preflight config drift check successful (took %s)", time.Since(start))

	return nil
}

// enqueueDefault calls a default enqueue function
func (dn *Daemon) enqueueDefault(node *corev1.Node) {
	key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(node)
	if err != nil {
		utilruntime.HandleError(fmt.Errorf("couldn't get key for object %#v: %w", node, err))
		return
	}
	dn.queue.AddRateLimited(key)
}

// RunHypershift is the entry point for the simplified Hypershift mode daemon
func (dn *Daemon) RunHypershift(stopCh <-chan struct{}, exitCh <-chan error) error {
	klog.Info("Starting MachineConfigDaemon - Hypershift")

	signaled := make(chan struct{})
	dn.InstallSignalHandler(signaled)

	defer utilruntime.HandleCrash()
	defer dn.queue.ShutDown()

	go wait.Until(dn.worker, time.Second, stopCh)

	for {
		select {
		case <-stopCh:
			return nil
		case <-signaled:
			return nil
		case err := <-exitCh:
			// This channel gets errors from auxiliary goroutines like kubehealth
			// TODO we really shouldn't have any for hypershift
			klog.Warningf("Got an error from auxiliary tools: %v", err)
		}
	}
}

//nolint:gocyclo
func (dn *Daemon) syncNodeHypershift(key string) error {
	// First, get the current and desired configurations for the node
	// current configuration will be read from on-disk state, either
	//   a) /etc/mcd-currentconfig.json, written by a previous hypershift-mode MCD
	//   b) /etc/mcs-machine-config-content.json, written by MCS when the node is provisioned,
	//      if no MCD has operated on this node
	// desired configuration will be read directly off a ConfigMap in our namespace, specified by
	// dn.hypershiftConfigMap. This currently has a "config" key (full ignition served json) and a "hash"
	// key, which is the TargetVersionConfigHash for Hypershift nodepools

	// This isn't strictly necessary but we should only react to our own node changes, like normal MCD
	_, name, err := cache.SplitMetaNamespaceKey(key)
	if err != nil {
		return err
	}
	if name != dn.name {
		return nil
	}

	// First, check if our drain/uncordon request was honored by the controller
	node, err := dn.kubeClient.CoreV1().Nodes().Get(context.TODO(), dn.name, metav1.GetOptions{})
	if err != nil {
		return err
	}
	if node.Annotations[constants.DesiredDrainerAnnotationKey] != node.Annotations[constants.LastAppliedDrainerAnnotationKey] {
		// The controller has not yet performed our previous request
		klog.Infof("The controller has not yet performed our previous drain/uncordon request %s", node.Annotations[constants.DesiredDrainerAnnotationKey])
		return nil
	}
	if node.Annotations[constants.DesiredMachineConfigAnnotationKey] == "" ||
		node.Annotations[constants.CurrentMachineConfigAnnotationKey] == node.Annotations[constants.DesiredMachineConfigAnnotationKey] {
		// We have not yet been signaled to update, just return
		// This may cause issues because the desiredConfig here doesn't necessarily match the config in the configmap
		// TODO consider revisiting that
		klog.V(4).Info("CurrentConfig == DesiredConfig in node annotations.")
		return nil
	}

	// /etc/machine-config-daemon/currentconfig actually exists in hypershift nodes, but is empty.
	// So we are using another location instead
	currentConfigBytes, err := os.ReadFile(hypershiftCurrentConfigPath)
	if err != nil {
		if os.IsNotExist(err) {
			currentConfigBytes, err = os.ReadFile(mcsServedConfigPath)
			if err != nil {
				return fmt.Errorf("cannot find any existing configuration on disk: %w", err)
			}
		} else {
			return fmt.Errorf("failed to load local config: %w", err)
		}
	}

	var currentConfig mcfgv1.MachineConfig
	err = json.Unmarshal(currentConfigBytes, &currentConfig)
	if err != nil {
		return fmt.Errorf("cannot read on-disk state into MachineConfig: %w", err)
	}

	// Instead of reading from configmap directly, let's mount it in as a volumn, such that we don't have to give that
	// additional RBAC rule
	ignServedConfigPath := filepath.Join(dn.hypershiftConfigMap, configMapConfigKey)
	ignServedConfigBytes, err := os.ReadFile(ignServedConfigPath)
	if err != nil {
		return fmt.Errorf("failed to load desiredConfig: %w", err)
	}
	targetHashPath := filepath.Join(dn.hypershiftConfigMap, configMapHashKey)
	targetHashBytes, err := os.ReadFile(targetHashPath)
	if err != nil {
		return fmt.Errorf("failed to load desiredConfig hash: %w", err)
	}
	targetHash := string(targetHashBytes)

	ignConfig, err := ctrlcommon.ParseAndConvertGzippedConfig(ignServedConfigBytes)
	if err != nil {
		return fmt.Errorf("failed to parse Ignition from configmap data.config: %w", err)
	}

	desiredConfigBytes, err := ctrlcommon.GetIgnitionFileDataByPath(&ignConfig, mcsServedConfigPath)
	if err != nil {
		return fmt.Errorf("failed to find desiredConfig from configmap data: %w", err)
	}

	var desiredConfig mcfgv1.MachineConfig
	err = json.Unmarshal(desiredConfigBytes, &desiredConfig)
	if err != nil {
		return fmt.Errorf("cannot decode desiredConfig from configmap data: %w", err)
	}

	klog.Infof("Successfully read current/desired Config")

	// check update reconcilability
	mcDiff, err := reconcilable(&currentConfig, &desiredConfig)
	if err != nil {
		return fmt.Errorf("the update is not reconcilable: %w", err)
	}
	if mcDiff.isEmpty() {
		// No diff was detected. Check if we are in the right state.
		klog.Infof("No diff detected. Assuming a previous update was completed. Checking on-disk state.")
		if err := dn.validateOnDiskState(&desiredConfig); err != nil {
			return fmt.Errorf("disk validation failed: %w", err)
		}

		if node.Annotations[constants.CurrentMachineConfigAnnotationKey] == targetHash &&
			node.Annotations[constants.DesiredDrainerAnnotationKey] == fmt.Sprintf("%s-%s", constants.DrainerStateUncordon, targetHash) {
			// We are in a done state
			klog.Infof("The pod is in a completed state. Awaiting removal.")
			return nil
		}
		// Assume an update is completed. Set node state to done. Also request an uncordon

		annos := map[string]string{
			constants.MachineConfigDaemonStateAnnotationKey:  constants.MachineConfigDaemonStateDone,
			constants.MachineConfigDaemonReasonAnnotationKey: "",
			constants.CurrentMachineConfigAnnotationKey:      targetHash,
			constants.DesiredDrainerAnnotationKey:            fmt.Sprintf("%s-%s", constants.DrainerStateUncordon, targetHash),
		}
		if _, err := dn.nodeWriter.SetAnnotations(annos); err != nil {
			return fmt.Errorf("failed to set Done annotation on node: %w", err)
		}

		klog.Infof("The pod has completed update. Awaiting removal.")
		// TODO os.Exit here
		return nil
	}

	klog.Infof("Update is reconcilable. Diff: %+v", mcDiff)

	// This should be eventually de-duplicated with the update() function.
	oldIgnConfig, err := ctrlcommon.ParseAndConvertConfig(currentConfig.Spec.Config.Raw)
	if err != nil {
		return fmt.Errorf("parsing old Ignition config failed: %w", err)
	}
	newIgnConfig, err := ctrlcommon.ParseAndConvertConfig(desiredConfig.Spec.Config.Raw)
	if err != nil {
		return fmt.Errorf("parsing new Ignition config failed: %w", err)
	}
	diffFileSet := ctrlcommon.CalculateConfigFileDiffs(&oldIgnConfig, &newIgnConfig)
	actions, err := calculatePostConfigChangeAction(mcDiff, diffFileSet)
	if err != nil {
		return err
	}

	// Check and perform node drain if required
	drain, err := isDrainRequired(actions, diffFileSet, oldIgnConfig, newIgnConfig, false)
	if err != nil {
		return err
	}

	if drain {
		targetDrainValue := fmt.Sprintf("%s-%s", constants.DrainerStateDrain, targetHash)
		if node.Annotations[constants.DesiredDrainerAnnotationKey] != targetDrainValue {
			// Make a request to perform drain
			annos := map[string]string{
				constants.MachineConfigDaemonStateAnnotationKey:  constants.MachineConfigDaemonStateWorking,
				constants.MachineConfigDaemonReasonAnnotationKey: "",
				constants.DesiredDrainerAnnotationKey:            targetDrainValue,
			}
			if _, err := dn.nodeWriter.SetAnnotations(annos); err != nil {
				return fmt.Errorf("failed to set Done annotation on node: %w", err)
			}
			// Wait for a future sync to perform post-drain actions
			klog.Info("Setting drain request via annotation to controller.")
			return nil
		}
	}

	// For us to be here, DesiredDrainerAnnotationKey == LastAppliedDrainerAnnotationKey == drain-targetHash
	// perform the actual update
	if err := dn.updateHypershift(&currentConfig, &desiredConfig, mcDiff); err != nil {
		return fmt.Errorf("failed to update configuration: %w", err)
	}

	// write new config to disk, used for future updates
	err = writeFileAtomicallyWithDefaults(hypershiftCurrentConfigPath, desiredConfigBytes)
	if err != nil {
		return fmt.Errorf("cannot store new config to disk: %w", err)
	}

	// Finally, once we are successful, we perform the necessary post config change action
	// TODO should be de-duplicated with update()
	if ctrlcommon.InSlice(postConfigChangeActionReboot, actions) {
		klog.Info("Rebooting node")
		return dn.reboot(fmt.Sprintf("Node will reboot into config %s", desiredConfig.Name))
	}

	if ctrlcommon.InSlice(postConfigChangeActionNone, actions) {
		klog.Infof("Node has Desired Config %s, skipping reboot", desiredConfig.Name)
	}

	if ctrlcommon.InSlice(postConfigChangeActionReloadCrio, actions) {
		serviceName := constants.CRIOServiceName
		if err := reloadService(serviceName); err != nil {
			return fmt.Errorf("could not apply update: reloading %s configuration failed. Error: %w", serviceName, err)
		}
		klog.Infof("%s config reloaded successfully! Desired config %s has been applied, skipping reboot", serviceName, desiredConfig.Name)
	}

	// We are here, which means reboot was not needed to apply the configuration.
	// Complete the update and return. Future syncs should see the update has completed.
	annos := map[string]string{
		constants.MachineConfigDaemonStateAnnotationKey:  constants.MachineConfigDaemonStateDone,
		constants.MachineConfigDaemonReasonAnnotationKey: "",
		constants.CurrentMachineConfigAnnotationKey:      targetHash,
		constants.DesiredDrainerAnnotationKey:            fmt.Sprintf("%s-%s", constants.DrainerStateUncordon, targetHash),
	}
	if _, err := dn.nodeWriter.SetAnnotations(annos); err != nil {
		return fmt.Errorf("failed to set Done annotation on node: %w", err)
	}
	klog.Info("A rebootless update was completed.")
	return nil
}

// RunOnceFrom is the primary entrypoint for the non-cluster case
func (dn *Daemon) RunOnceFrom(onceFrom string, skipReboot bool) error {
	dn.skipReboot = skipReboot
	configi, contentFrom, err := dn.senseAndLoadOnceFrom(onceFrom)
	if err != nil {
		klog.Warningf("Unable to decipher onceFrom config type: %s", err)
		return err
	}
	switch c := configi.(type) {
	case ign3types.Config:
		klog.V(2).Info("Daemon running directly from Ignition")
		return dn.runOnceFromIgnition(c)
	case mcfgv1.MachineConfig:
		klog.V(2).Info("Daemon running directly from MachineConfig")
		return dn.runOnceFromMachineConfig(c, contentFrom)
	}
	return fmt.Errorf("unsupported onceFrom type provided")
}

// RunFirstbootCompleteMachineconfig is run via systemd on the first boot
// to complete processing of the target MachineConfig.
func (dn *Daemon) RunFirstbootCompleteMachineconfig(machineConfigFile string) error {
	data, err := os.ReadFile(machineConfigFile)
	if err != nil {
		return err
	}
	var mc mcfgv1.MachineConfig
	err = json.Unmarshal(data, &mc)
	if err != nil {
		return fmt.Errorf("failed to parse MachineConfig: %w", err)
	}
	newEnough, err := dn.NodeUpdaterClient.IsNewEnoughForLayering()
	if err != nil {
		return err
	}

	// If the host isn't new enough to understand the new container model natively, run as a privileged container.
	// See https://github.com/coreos/rpm-ostree/pull/3961 and https://issues.redhat.com/browse/MCO-356
	// This currently will incur a double reboot; see https://github.com/coreos/rpm-ostree/issues/4018
	if !newEnough {
		logSystem("rpm-ostree is not new enough for new-format image; forcing an update via container and queuing immediate reboot")
		if err := dn.InplaceUpdateViaNewContainer(mc.Spec.OSImageURL); err != nil {
			return err
		}
		rebootCmd := rebootCommand("extra reboot for in-place update")
		if err := rebootCmd.Run(); err != nil {
			logSystem("failed to run reboot: %v", err)
			return err
		}
		// Wait to be killed via SIGTERM; we want to ensure the firstboot process completes before e.g. kubelet.service
		// has a chance to start.  Now, a better way to handle all this would be to use a systemd generator
		// to e.g. mask kubelet if we detect the firstboot scenario - or better, only *enable* kubelet on the non-firstboot
		// setup.
		time.Sleep(defaultRebootTimeout)
		return fmt.Errorf("failed to reboot for secondary in-place update")
	}

	klog.Info("rpm-ostree has container feature")

	// Start with an empty config, then add our *booted* osImageURL to
	// it, reflecting the current machine state.
	oldConfig := canonicalizeEmptyMC(nil)
	oldConfig.Spec.OSImageURL = dn.bootedOSImageURL

	// Setting the Kernel Arguments is for comparison only with the desired MachineConfig.
	// The resulting MC should not be for updating node configuration.
	if err = setRunningKargs(oldConfig, mc.Spec.KernelArguments); err != nil {
		return fmt.Errorf("failed to set kernel arguments from /proc/cmdline: %w", err)
	}

	// Set the running fips setting to oldConfig in order to correctly include this value
	// during comparison
	if err = setNodeFipsIntoMC(oldConfig); err != nil {
		return fmt.Errorf("failed to set node FIPS into MC: %w", err)
	}

	// Currently, we generally expect the bootimage to be older, but in the special
	// case of having bootimage == rhel-coreos, and no kernel arguments
	// specified, then we don't need to do anything here.
	mcDiffNotEmpty, err := dn.compareMachineConfig(oldConfig, &mc)
	if err != nil {
		return fmt.Errorf("failed to compare MachineConfig: %w", err)
	}
	if !mcDiffNotEmpty {
		// Removing this file signals completion of the initial MC processing.
		if err := os.Remove(constants.MachineConfigEncapsulatedPath); err != nil {
			return fmt.Errorf("failed to remove %s: %w", constants.MachineConfigEncapsulatedPath, err)
		}
		logSystem("skipping reboot since no changes were detected from %s to %s", oldConfig.GetName(), mc.GetName())
		return nil
	}

	dn.skipReboot = true
	// This "false" is a compatibility for IBM's use case, where they are using the MCD to write the full configuration instead of just
	// the encapsulated config. This shouldn't affect normal OCP operations, but will allow anyone using this code to write configs to
	// still get the kubelet cert
	err = dn.update(oldConfig, &mc, false)
	if err != nil {
		return err
	}

	if machineConfigFile == constants.MachineConfigEncapsulatedPath {
		// Removing this file signals completion of the initial MC processing.
		if err := os.Rename(constants.MachineConfigEncapsulatedPath, constants.MachineConfigEncapsulatedBakPath); err != nil {
			return fmt.Errorf("failed to rename encapsulated MachineConfig after processing on firstboot: %w", err)
		}
	}

	// If we re-bootstrapped the node, we should disable and remove the systemd
	// unit that we used to do that. This function will no-op and return nil if
	// the systemd unit is not present.
	if err := dn.disableRevertSystemdUnit(); err != nil {
		return err
	}

	dn.skipReboot = false
	return dn.reboot(fmt.Sprintf("Completing firstboot provisioning to %s", mc.GetName()))
}

// InstallSignalHandler installs the handler for the signals the daemon should act on
func (dn *Daemon) InstallSignalHandler(signaled chan struct{}) {
	termChan := make(chan os.Signal, 2048)
	signal.Notify(termChan, syscall.SIGTERM)

	// Catch SIGTERM - if we're actively updating, we should avoid
	// having the process be killed.
	// https://github.com/openshift/machine-config-operator/issues/407
	go func() {
		for sig := range termChan {
			//nolint:gocritic
			switch sig {
			case syscall.SIGTERM:
				dn.updateActiveLock.Lock()
				updateActive := dn.updateActive
				dn.updateActiveLock.Unlock()
				if updateActive {
					klog.Info("Got SIGTERM, but actively updating")
					dn.maybeEventf(corev1.EventTypeWarning, "GotSigtermDuringUpdate", "Got SIGTERM but actively updating")
				} else {
					close(signaled)
					return
				}
			}
		}
	}()
}

type pipeErrorHandler struct {
	pipe chan error
}

func (e *pipeErrorHandler) handle(_ context.Context, err error, _ string, _ ...interface{}) {
	e.pipe <- err
}

// Will emit an event via the nodewriter only if the nodewriter has been instantiated and is not nil.
func (dn *Daemon) maybeEventf(eventtype, reason, messageFmt string, args ...interface{}) {
	if dn.nodeWriter != nil {
		dn.nodeWriter.Eventf(eventtype, reason, messageFmt, args...)
	}
}

// Run finishes informer setup and then blocks, and the informer will be
// responsible for triggering callbacks to handle updates. Successful
// updates shouldn't return, and should just reboot the node.
func (dn *Daemon) Run(stopCh <-chan struct{}, exitCh <-chan error, errCh chan error) error {
	logSystem("Starting to manage node: %s", dn.name)
	dn.LogSystemData()

	klog.Info("Starting MachineConfigDaemon")
	defer klog.Info("Shutting down MachineConfigDaemon")

	signaled := make(chan struct{})
	dn.InstallSignalHandler(signaled)

	if dn.kubeletHealthzEnabled {
		klog.Info("Enabling Kubelet Healthz Monitor")
		go dn.runKubeletHealthzMonitor(stopCh, dn.exitCh)
	}

	errHandlersBefore := utilruntime.ErrorHandlers
	utilruntime.ErrorHandlers = append(utilruntime.ErrorHandlers, (&pipeErrorHandler{errCh}).handle)
	defer func() { utilruntime.ErrorHandlers = errHandlersBefore }()
	defer utilruntime.HandleCrash()
	defer dn.queue.ShutDown()
	defer dn.ccQueue.ShutDown()
	defer dn.preserveDaemonLogs()

	if !cache.WaitForCacheSync(stopCh, dn.nodeListerSynced, dn.mcListerSynced, dn.ccListerSynced, dn.mcpListerSynced) {
		return fmt.Errorf("failed to sync initial listers cache")
	}

	// Collect metrics
	dn.getUnsupportedPackages()

	go wait.Until(dn.worker, time.Second, stopCh)
	go wait.Until(dn.controllerConfigWorker, time.Second, stopCh)

	for {
		select {
		case <-stopCh:
			if dn.deferKubeletRestart {
				err := dn.kubeletRebootstrap(context.TODO())
				if err != nil {
					return err
				}
			}
			return nil
		case <-signaled:
			return nil
		case err := <-exitCh:
			// This channel gets errors from auxiliary goroutines like loginmonitor and kubehealth
			klog.Warningf("Got an error from auxiliary tools: %v", err)
		case err := <-errCh:
			klog.Errorf("Got an error from auxiliary tools: %v", err)
			// we do not want to fail on any .HandleError call. Need to only fail when it is a watcher
			// we might want to remove this last one. We will see.
			if dn.deferKubeletRestart && strings.Contains(strings.ToLower(err.Error()), "x509") {
				logSystem("Re-bootstrapping kubelet in response to deferred kubeconfig changes and %v", err)
				err := dn.kubeletRebootstrap(context.TODO())
				if err != nil {
					return err
				}
				return ErrAuxiliary
			}
		}
	}
}

func (dn *Daemon) kubeletRebootstrap(ctx context.Context) error {
	dn.deferKubeletRestart = false
	if err := os.Remove("/var/lib/kubelet/kubeconfig"); err != nil {
		return fmt.Errorf("could not remove kubelet's kubeconfig file: %v", err)
	}
	if err := runCmdSync("systemctl", "restart", "kubelet"); err != nil {
		return err
	}
	if err := wait.PollUntilContextTimeout(ctx, 10*time.Second, 2*time.Minute, false, func(_ context.Context) (bool, error) {
		_, err := os.ReadFile("/var/lib/kubelet/kubeconfig")
		if err != nil && os.IsNotExist(err) {
			klog.Warningf("Failed to get kubeconfig file: %v", err)
			return false, nil
		} else if err != nil {
			return false, fmt.Errorf("unexpected error reading kubeconfig file, %v", err)
		}

		return true, nil
	}); err != nil {
		return fmt.Errorf("something went wrong while waiting for kubeconfig file to generate: %v", err)
	}

	return nil
}

// Called whenever the on-disk config has drifted from the current machineconfig.
func (dn *Daemon) onConfigDrift(err error) {
	mcdConfigDrift.SetToCurrentTime()
	dn.nodeWriter.Eventf(corev1.EventTypeWarning, "ConfigDriftDetected", err.Error())
	klog.Error(err)
	if err := dn.updateErrorState(err); err != nil {
		klog.Errorf("Could not update annotation: %v", err)
	}
}

// getCurrentConfigFromNode fetch the current config through node annotations to respond to getCurrentConfigDisk
// calls where the ODC is missing due to manual deletion and other reasons.
func (dn *Daemon) getCurrentConfigFromNode() (*onDiskConfig, error) {
	state, err := dn.getStateAndConfigs()
	if err != nil {
		return nil, fmt.Errorf("could not get the state: %w", err)
	}
	tempConfig := &onDiskConfig{
		currentImage:  state.currentImage,
		currentConfig: state.currentConfig,
	}
	return tempConfig, nil
}

func (dn *Daemon) startConfigDriftMonitor() {
	mcdConfigDrift.Set(0)
	// Even though the Config Drift Monitor object ensures that only a single
	// Config Drift Watcher is running at any given time, other things, such as
	// emitting Kube events on startup, should only occur if we weren't
	// previously running. This provides us with a way to short-circuit that path
	// if we already have a Config Drift Watcher running.
	if dn.configDriftMonitor.IsRunning() {
		return
	}

	odc, err := dn.getCurrentConfigOnDisk()
	if err != nil && !os.IsNotExist(err) {
		dn.exitCh <- fmt.Errorf("could not get current config from disk: %w", err)
		return
	}

	if odc == nil {
		odc, err = dn.getCurrentConfigFromNode()
		if err != nil {
			dn.exitCh <- err
			return
		}
	}

	opts := ConfigDriftMonitorOpts{
		OnDrift:       dn.onConfigDrift,
		SystemdPath:   pathSystemd,
		ErrChan:       dn.exitCh,
		MachineConfig: odc.currentConfig,
	}

	if err := dn.configDriftMonitor.Start(opts); err != nil {
		dn.exitCh <- fmt.Errorf("could not start Config Drift Monitor: %w", err)
		return
	}

	dn.nodeWriter.Eventf(corev1.EventTypeNormal, "ConfigDriftMonitorStarted",
		"Config Drift Monitor started, watching against %s", odc.currentConfig.Name)

	go func() {
		// Common shutdown function
		shutdown := func() {
			// Stop the Config Drift Monitor, if it's not already stopped.
			dn.configDriftMonitor.Stop()
			// Report that we've shut down
			dn.nodeWriter.Eventf(corev1.EventTypeNormal, "ConfigDriftMonitorStopped", "Config Drift Monitor stopped")
		}

		for {
			select {
			case <-dn.stopCh:
				// We got a stop signal from outside the MCD.
				shutdown()
				return
			case <-dn.configDriftMonitor.Done():
				// We got a stop signal from the Config Drift Monitor.
				shutdown()
				return
			}
		}
	}()
}

func (dn *Daemon) stopConfigDriftMonitor() {
	dn.configDriftMonitor.Stop()
}

func (dn *Daemon) runKubeletHealthzMonitor(stopCh <-chan struct{}, exitCh chan<- error) {
	failureCount := 0
	kubeletHealthState.Set(float64(failureCount))
	for {
		select {
		case <-stopCh:
			return
		case <-time.After(kubeletHealthzPollingInterval):
			err := dn.getHealth()
			if err != nil {
				failureCount++
				exitCh <- fmt.Errorf("kubelet health check has failed %d times: %w", failureCount, err)
			} else {
				// reset failure count on success
				failureCount = 0
			}
			kubeletHealthState.Set(float64(failureCount))
		}
	}
}

func (dn *Daemon) getHealth() error {
	klog.V(2).Info("Kubelet health running")
	ctx, cancel := context.WithTimeout(context.Background(), kubeletHealthzTimeout)
	defer cancel()

	req, err := http.NewRequest("GET", dn.kubeletHealthzEndpoint, nil)
	if err != nil {
		return err
	}
	req = req.WithContext(ctx)

	client := http.Client{}
	resp, err := client.Do(req)
	if err != nil {
		return err
	}
	defer resp.Body.Close()

	respData, err := io.ReadAll(resp.Body)
	if err != nil {
		return err
	}

	if string(respData) != "ok" {
		klog.Warningf("Kubelet Healthz Endpoint returned: %s", string(respData))
		return nil
	}

	klog.V(2).Info("Kubelet health ok")

	return nil
}

// syncFinalizationFailureAnnotation proxies any failures of ostree-finalize-staged
// from the previous boot to an explicit node annotation, since it is very
// likely to be a root cause.
func (dn *Daemon) syncFinalizationFailureAnnotation(finalizeError string) error {
	if finalizeError != "" {
		klog.Warningf("Failed to finalize previous deployment: %s", finalizeError)
		// Truncate this to a reasonable size in case it's somehow very long
		finalizeError = fmt.Sprintf("%.2000s", finalizeError)
	} else {
		klog.Infof("Previous boot ostree-finalize-staged.service appears successful")
	}

	// Cache it globally so we can include it in any other error messages if applicable
	dn.previousFinalizationFailure = finalizeError

	// If there's no node writer, there's no annotations to sync
	if dn.nodeWriter == nil {
		return nil
	}
	_, ok := dn.node.Annotations[constants.MachineConfigDaemonFinalizeFailureAnnotationKey]
	// In the happy path where there's no error and no annotation, we're done
	if finalizeError == "" && !ok {
		return nil
	}

	// And let's emit this as an event
	if dn.previousFinalizationFailure != finalizeError {
		dn.nodeWriter.Eventf(corev1.EventTypeWarning, "FailedFinalization", finalizeError)
	}

	annos := map[string]string{
		constants.MachineConfigDaemonFinalizeFailureAnnotationKey: finalizeError,
	}
	_, err := dn.nodeWriter.SetAnnotations(annos)
	return err
}

// stateAndConfigs is the "state" node annotation plus parsed machine configs
// referenced by the currentConfig and desiredConfig annotations.  If we have
// a "pending" config (we're coming up after a reboot attempting to apply a config),
// we'll load that as well - otherwise it will be nil.
//
// If any of the object names are the same, they will be pointer-equal.
type stateAndConfigs struct {
	bootstrapping bool
	state         string
	currentConfig *mcfgv1.MachineConfig
	desiredConfig *mcfgv1.MachineConfig
	currentImage  string
	desiredImage  string
}

func (s *stateAndConfigs) getCurrentName() string {
	if s.currentImage == "" {
		return fmt.Sprintf("MachineConfig: %s", s.currentConfig.GetName())
	}

	return fmt.Sprintf("MachineConfig: %s / Image: %s", s.currentConfig.GetName(), s.currentImage)
}

func (dn *Daemon) getStateAndConfigs() (*stateAndConfigs, error) {
	_, err := os.Lstat(constants.InitialNodeAnnotationsFilePath)
	var bootstrapping bool
	if err != nil {
		if !os.IsNotExist(err) {
			return nil, err
		}
		// The node annotation file (laid down by the MCS)
		// doesn't exist, we must not be bootstrapping
	} else {
		bootstrapping = true
		klog.Info("In bootstrap mode")
	}

	currentConfigName, err := getNodeAnnotation(dn.node, constants.CurrentMachineConfigAnnotationKey)
	if err != nil {
		return nil, err
	}
	desiredConfigName, err := getNodeAnnotation(dn.node, constants.DesiredMachineConfigAnnotationKey)
	if err != nil {
		return nil, err
	}
	currentConfig, err := dn.mcLister.Get(currentConfigName)
	if err != nil {
		// This is to handle better erroring for https://issues.redhat.com/browse/MCO-466
		// If the following are true:
		// - the current config can't be fetched from the lister
		// - the node is in bootstrap mode
		// - the current config can be found on disk
		// then the bootstrap generated MC != in-cluster generated MC due to a variance between install-time manifests
		// and in-cluster objects. Be more specific about the error in this case.
		if bootstrapping && apierrors.IsNotFound(err) {
			currentConfigOnDisk, err := dn.getCurrentConfigOnDisk()
			if err != nil {
				return nil, fmt.Errorf("error fetching current config on disk during bootstrap: %s, config: %v", err, currentConfigOnDisk)
			}
			if currentConfigOnDisk.currentConfig.Name != currentConfigName {
				return nil, fmt.Errorf("error current config %s on disk does not match current config on annotation: %s", currentConfigOnDisk.currentConfig.Name, currentConfigName)
			}
			return nil, dn.generateBootstrappingMCMismatchError(currentConfigOnDisk, currentConfigName)
		}
		// If this happens outside of bootstrap, return the general error
		return nil, maybeAddMachineConfigInfo(currentConfigName, err)
	}
	state, err := getNodeAnnotationExt(dn.node, constants.MachineConfigDaemonStateAnnotationKey, true)
	if err != nil {
		return nil, err
	}
	currentImage, err := getNodeAnnotationExt(dn.node, constants.CurrentImageAnnotationKey, true)
	if err != nil {
		klog.Infof("%s is not set. any errors? %s", constants.CurrentImageAnnotationKey, err)
		return nil, err
	}
	desiredImage, err := getNodeAnnotationExt(dn.node, constants.DesiredImageAnnotationKey, true)
	if err != nil {
		klog.Infof("%s is not set. any errors? %s", constants.DesiredImageAnnotationKey, err)
		return nil, err
	}

	// Temporary hack: the MCS used to not write the state=done annotation
	// key.  If it's unset, let's write it now.
	if state == "" {
		state = constants.MachineConfigDaemonStateDone
	}

	// Gather any failure from ostree-finalize-staged.service from the previous
	// boot, and proxy it to an annotation for high visibility if we did
	// fail.
	if dn.NodeUpdaterClient != nil {
		finalizeError, err := dn.NodeUpdaterClient.Peel().QueryPreviousDeploymentError()
		if err != nil {
			klog.Warningf("failed to query for deployment failure: %v", err)
		} else {
			var finalizeErrorVal string
			if finalizeError != nil {
				finalizeErrorVal = *finalizeError
			}
			dn.syncFinalizationFailureAnnotation(finalizeErrorVal)
		}
	}

	var desiredConfig *mcfgv1.MachineConfig
	if currentConfigName == desiredConfigName {
		desiredConfig = currentConfig
		klog.Infof("Current+desired config: %s", currentConfigName)
	} else {
		desiredConfig, err = dn.mcLister.Get(desiredConfigName)
		if err != nil {
			return nil, maybeAddMachineConfigInfo(desiredConfigName, err)
		}
		klog.Infof("Current config: %s", currentConfigName)
		klog.Infof("Desired config: %s", desiredConfigName)
	}

	if currentImage == desiredImage && desiredImage != "" {
		klog.Infof("Current image: %s", currentImage)
		klog.Infof("Desired image: %s", desiredImage)
	}

	klog.Infof("state: %s", state)

	var degradedReason string
	if state == constants.MachineConfigDaemonStateDegraded {
		degradedReason, err = getNodeAnnotation(dn.node, constants.MachineConfigDaemonReasonAnnotationKey)
		if err != nil {
			klog.Errorf("Could not retrieve degraded reason. err: %v", err)
		}
	}

	UpdateStateMetric(mcdState, state, degradedReason)

	return &stateAndConfigs{
		bootstrapping: bootstrapping,
		currentConfig: currentConfig,
		desiredConfig: desiredConfig,
		state:         state,
		currentImage:  currentImage,
		desiredImage:  desiredImage,
	}, nil
}

// LogSystemData gathers data from the OS and adds it to our stdout; should only
// be called once on MCD startup to log things which generally shouldn't change
// dynamically after a reboot.
func (dn *Daemon) LogSystemData() {
	// Print status if available
	if dn.os.IsCoreOSVariant() {
		out, err := runGetOut("rpm-ostree", "status")
		if err != nil {
			klog.Fatalf("unable to get rpm-ostree status: %s", err)
		}
		klog.Infof("%s", out)

		logProvisioningInformation()
	}

	boots, err := runGetOut("journalctl", "--list-boots")
	if err != nil {
		klog.Errorf("Listing boots: %v", err)
	}
	klog.Info("journalctl --list-boots:\n" + string(boots))

	// Since nothing in the cluster today watches systemd units, let's
	// at least capture them in our logs to start.  See also
	// https://github.com/openshift/machine-config-operator/issues/1365
	// This only captures units that started *before* the MCD, we need
	// to also watch dynamically of course.
	//
	// also xref https://github.com/coreos/console-login-helper-messages/blob/e8a849f4c23910e7c556c10719911cc59873fc23/usr/share/console-login-helper-messages/profile.sh
	failedServices, err := runGetOut("systemctl", "list-units", "--state=failed", "--no-legend")
	switch {
	case err != nil:
		klog.Errorf("Listing failed systemd services: %v", err)
	case len(failedServices) > 0:
		klog.Info("systemctl --failed:\n" + string(failedServices))
	default:
		klog.Info("systemd service state: OK")
	}
}

type onDiskConfig struct {
	currentConfig *mcfgv1.MachineConfig
	currentImage  string
}

// This reads a file (/etc/machine-config-daemon/currentimage) to determine
// what the currently applied OS image is, if we're using layering. This file's
// purpose is similar to the /etc/machine-config-daemon/currentconfig file.
func (dn *Daemon) getCurrentImageOnDisk() (string, error) {
	currentImageBytes, err := os.ReadFile(dn.currentImagePath)

	switch {
	case err != nil && !errors.Is(err, fs.ErrNotExist):
		// If the current image path could not be read and it doesn't match the
		// nonexistent file error, return here.
		return "", fmt.Errorf("could not read current image path %s: %w", dn.currentImagePath, err)
	case errors.Is(err, fs.ErrNotExist):
		// If the current image path does not exist, default to an empty string.
		klog.Infof("File %q does not yet exist, defaulting to empty value", dn.currentImagePath)
		return "", nil
	default:
		// If we read the current image path successfully, convert it to a string
		// and trim any spaces, newlines, etc.
		return strings.TrimSpace(string(currentImageBytes)), nil
	}
}

// getCurrentConfigOnDisk retrieves the serialized MachineConfig written to /etc
// which exists during the time we're trying to perform an update.
func (dn *Daemon) getCurrentConfigOnDisk() (*onDiskConfig, error) {
	mcJSON, err := os.Open(dn.currentConfigPath)
	if err != nil {
		return nil, err
	}
	defer mcJSON.Close()
	currentOnDisk := &mcfgv1.MachineConfig{}
	if err := json.NewDecoder(bufio.NewReader(mcJSON)).Decode(currentOnDisk); err != nil {
		return nil, err
	}

	currentImage, err := dn.getCurrentImageOnDisk()
	if err != nil {
		return nil, err
	}

	odc := &onDiskConfig{
		currentConfig: currentOnDisk,
		currentImage:  currentImage,
	}

	return odc, nil
}

// generateBootstrappingMCMismatchError constructs a specialized error message for the
// case where the node fails to  complete the bootstrapping process due to a variance
// between the first MachineConfig generated during bootstrap and the first
// in-cluster-generated MachineConfig.
func (dn *Daemon) generateBootstrappingMCMismatchError(currentConfigOnDisk *onDiskConfig, currentConfigName string) error {
	// Determine in-cluster generated MC; which should be assigned to this pool
	// If pool cannot found, default to the master pool, as that is the typical
	// case for this failure mode.
	var mcpName string
	ownerMCPs := currentConfigOnDisk.currentConfig.GetOwnerReferences()
	if len(ownerMCPs) != 0 {
		mcpName = ownerMCPs[0].Name
	} else {
		mcpName = ctrlcommon.MachineConfigPoolMaster
	}
	mcp, err := dn.mcfgClient.MachineconfigurationV1().MachineConfigPools().Get(context.TODO(), mcpName, metav1.GetOptions{})
	if err != nil {
		// We shouldn't be here but if we are, let's return something useful
		return fmt.Errorf("bootstrap generated MC %s does not match in-cluster MC generated by the controller", currentConfigName)
	}
	inClusterGeneratedMC, err := dn.mcLister.Get(mcp.Spec.Configuration.Name)
	if err != nil {
		// Similar to the failure above, let's return something useful
		return fmt.Errorf("bootstrap generated MC %s does not match in-cluster MC generated by the controller", currentConfigName)
	}
	// Attempt to create a file on-disk for debugging
	dn.createBootstrapMachineConfigDiffFile(currentConfigOnDisk.currentConfig, inClusterGeneratedMC)
	return fmt.Errorf("bootstrap generated MC %s and in-cluster generated MC %s for this node do not match. Please check machine-config-daemon logs or the on disk diff file located at %s", currentConfigName, inClusterGeneratedMC.Name, bootstrapConfigDiffPath)
}

func (dn *Daemon) createBootstrapMachineConfigDiffFile(oldConfig, newConfig *mcfgv1.MachineConfig) {

	if _, err := os.Stat(bootstrapConfigDiffPath); err == nil {
		// If the file already exists, we don't need to write it again
		return
	}
	mcDiff, err := newMachineConfigDiff(oldConfig, newConfig)
	if err != nil {
		klog.Errorf("error generating diff between bootstrap MC %s and in-cluster MC %s", oldConfig.Name, newConfig.Name)
		return
	}
	if mcDiff.isEmpty() {
		// we shouldn't be here, but best signal that if we are
		klog.Errorf("no diff found between bootstrap MC %s and in-cluster MC %s", oldConfig.Name, newConfig.Name)
		return
	}
	diffResult := fmt.Sprintf("Bootstrap generated MC %s vs In-cluster generated MC %s diffs:\nGranular diff:\n %+v \nRaw diff:\n%s\n", oldConfig.Name, newConfig.Name, mcDiff, cmp.Diff(oldConfig.Spec, newConfig.Spec))

	klog.Error(diffResult)
	if err := writeFileAtomicallyWithDefaults(bootstrapConfigDiffPath, []byte(diffResult)); err != nil {
		klog.Errorf("failed to write bootstrap MachineConfig diff to %s: %v", bootstrapConfigDiffPath, err)
	} else {
		klog.Infof("bootstrap MachineConfig diff written to %s", bootstrapConfigDiffPath)
	}
}

// storeCurrentConfigOnDisk serializes a machine config into a file in /etc,
// which we use to denote that we are expecting the system has transitioned
// into this state.
func (dn *Daemon) storeCurrentConfigOnDisk(odc *onDiskConfig) error {
	mcJSON, err := json.Marshal(odc.currentConfig)
	if err != nil {
		return err
	}

	if err := writeFileAtomicallyWithDefaults(dn.currentConfigPath, mcJSON); err != nil {
		return err
	}

	return writeFileAtomicallyWithDefaults(dn.currentImagePath, []byte(odc.currentImage))
}

// https://bugzilla.redhat.com/show_bug.cgi?id=1842906
// If we didn't successfully complete -firstboot.service, because
// 4.5 and newer removed the BindsTo=, the service may start downgrading
// things.  At this point we should have already applied all target
// changes, so just rename the file to .bak the same as the -firstboot
// path does.
func upgradeHackFor44AndBelow() error {
	_, err := os.Stat(constants.MachineConfigEncapsulatedPath)
	if err == nil {
		klog.Warningf("Failed to complete machine-config-daemon-firstboot before joining cluster!")
		// Removing this file signals completion of the initial MC processing.
		if err := os.Rename(constants.MachineConfigEncapsulatedPath, constants.MachineConfigEncapsulatedBakPath); err != nil {
			return fmt.Errorf("failed to rename encapsulated MachineConfig after processing on firstboot: %w", err)
		}
	}
	return nil
}

// Remove artifacts used by ignition, that the MCO should no longer
// use since the machine is up.
// Currently removes the systemd preset file written by Ignition.
func removeIgnitionArtifacts() error {
	if err := os.Remove(constants.IgnitionSystemdPresetFile); err != nil && !os.IsNotExist(err) {
		return fmt.Errorf("failed to remove Ignition-written systemd preset file: %w", err)
	}
	return nil
}

// PersistNetworkInterfaces runs if the host is RHEL8, which can happen
// when scaling up older bootimages and targeting 4.13+ (rhel9).  In this case,
// we may want to pin NIC interface names that reference static IP addresses.
// More information in https://issues.redhat.com/browse/OCPBUGS-10787
func PersistNetworkInterfaces(osRoot string) error {
	hostos, err := osrelease.GetHostRunningOSFromRoot(osRoot)
	if err != nil {
		return fmt.Errorf("checking operating system: %w", err)
	}

	nmstateBinary := "/usr/bin/nmstatectl"
	// If we're already chrooted into the host / in the MCD case, then we
	// need to find the binary in our saved copy of /usr/bin from the host.
	if osRoot == "/" {
		nmstateBinary = filepath.Join(originalContainerBin, "nmstatectl")
	}

	// For the moment, we only look at RHEL-like systems...this logic isn't
	// yet aiming to try to handle Fedora-level updates.  For that, most
	// likely this NIC pinning should actually be driven automatically by
	// host updates.  If you change this, you'll need to change the conditions
	// below too.
	persisting := hostos.IsEL8()
	cleanup := hostos.IsEL9()
	if !(persisting || cleanup) {
		return nil
	}

	tmpKargs, err := os.CreateTemp("", "nmstate-kargs")
	if err != nil {
		return err
	}
	defer os.Remove(tmpKargs.Name())

	cmd := exec.Command(nmstateBinary, "persist-nic-names", "--root", osRoot, "--kargs-out", tmpKargs.Name())

	switch {
	case persisting:
		klog.Info("Persisting NIC names for RHEL8 host system")
	case cleanup:
		cmd.Args = append(cmd.Args, "--cleanup")
	default:
		return fmt.Errorf("Unexpected host OS %s", hostos.ToPrometheusLabel())
	}

	// nmstate always logs to stderr, so we need to capture/forward that too
	cmd.Stdout = os.Stdout
	cmd.Stderr = os.Stderr
	klog.Infof("Running: %s", strings.Join(cmd.Args, " "))
	if err := cmd.Run(); err != nil {
		if cleanup {
			// nmstatectl clean up will fail if stamp file not
			// found or `ROOT/etc/systemd/network` folder not
			// found, these error is OK to ignore
			klog.Infof("Cleanup error ignored: %v", err)
			return nil
		}
		return fmt.Errorf("failed to run nmstatectl: %w", err)
	}

	kargsBuf, err := io.ReadAll(tmpKargs)
	if err != nil {
		return fmt.Errorf("failed to read %s: %w", tmpKargs.Name(), err)
	}
	// If there are no kargs, then nmstate took care of everything else.
	if len(kargsBuf) == 0 {
		return nil
	}
	kargs := strings.Split(string(kargsBuf), " ")

	var rpmOstreeArgs []string
	switch {
	case persisting:
		for _, karg := range kargs {
			rpmOstreeArgs = append(rpmOstreeArgs, "--append", karg)
		}
	case cleanup:
		for _, karg := range kargs {
			rpmOstreeArgs = append(rpmOstreeArgs, "--remove", karg)
		}
	default:
		return fmt.Errorf("Unexpected host OS %s", hostos.ToPrometheusLabel())
	}

	if osRoot != "/" {
		cmd = exec.Command("chroot", osRoot, "rpm-ostree", "kargs")
	} else {
		cmd = exec.Command("rpm-ostree", "kargs")
	}
	cmd.Args = append(cmd.Args, rpmOstreeArgs...)

	cmd.Stdout = os.Stdout
	cmd.Stderr = os.Stderr
	klog.Infof("Running: %s", strings.Join(cmd.Args, " "))
	if err := cmd.Run(); err != nil {
		return fmt.Errorf("failed to run rpm-ostree kargs: %w", err)
	}
	return nil
}

// When we move from RHCOS 8 -> RHCOS 9, the SSH keys do not get written to the
// new location before the node reboots into RHCOS 9 because:
//
// 1. When the upgrade configs are written to the node, it is still running
// RHCOS 8, so the keys are not being written to the new location since the
// location is inferred from the currently booted OS.
// 2. The node reboots into RHCOS 9 to complete the upgrade.
// 3. The "are we on the latest config" functions detect that we are indeed on
// the latest config and so it does not attempt to perform an update.
//
// To work around that check on bootup if the we should use the new SSH key
// path and if the old SSH key path exists, we know that we need to migrate tot
// he new key path by calling dn.updateSSHKeyLocation().
func (dn *Daemon) isSSHKeyLocationUpdateRequired() (bool, error) {
	if !dn.useNewSSHKeyPath() {
		// Return early because we're not using the new SSH key path.
		return false, nil
	}

	oldKeyExists, err := fileExists(constants.RHCOS8SSHKeyPath)
	if err != nil {
		return false, err
	}

	newKeyExists, err := fileExists(constants.RHCOS9SSHKeyPath)
	if err != nil {
		return false, err
	}

	// If the old key exists and the new key does not, we need to update.
	return oldKeyExists && !newKeyExists, nil
}

// Decode the Ignition config and perform the SSH key update.
func (dn *Daemon) updateSSHKeyLocation(cfg *mcfgv1.MachineConfig) error {
	klog.Infof("SSH key location update required. Moving SSH keys from %q to %q.", constants.RHCOS8SSHKeyPath, constants.RHCOS9SSHKeyPath)

	ignConfig, err := ctrlcommon.ParseAndConvertConfig(cfg.Spec.Config.Raw)
	if err != nil {
		return fmt.Errorf("ignition failure when updating SSH key location: %w", err)
	}

	if err := dn.updateSSHKeys(ignConfig.Passwd.Users, ignConfig.Passwd.Users); err != nil {
		return fmt.Errorf("could not write SSH keys to new location: %w", err)
	}

	return nil
}

// Determines if we need to update the SSH key location and performs the
// necessary update if so.
func (dn *Daemon) updateSSHKeyLocationIfNeeded(cfg *mcfgv1.MachineConfig) error {
	sshKeyLocationUpdateRequired, err := dn.isSSHKeyLocationUpdateRequired()
	if err != nil {
		return fmt.Errorf("unable to determine if SSH key location update is required: %w", err)
	}

	if !sshKeyLocationUpdateRequired {
		klog.Infof("SSH key location (%q) up-to-date!", constants.RHCOS9SSHKeyPath)
		return nil
	}

	return dn.updateSSHKeyLocation(cfg)
}

// checkStateOnFirstRun is a core entrypoint for our state machine.
// It determines whether we're in our desired state, or if we're
// transitioning between states, and whether or not we need to update
// to a new state. It also checks if someone jumped on a node before
// the daemon took over.
//
// Some more background in this PR: https://github.com/openshift/machine-config-operator/pull/245
//
//nolint:gocyclo
func (dn *Daemon) checkStateOnFirstRun() error {
	node, err := dn.loadNodeAnnotations(dn.node)
	if err != nil {
		return err
	}
	// Update our cached copy
	dn.node = node

	state, err := dn.getStateAndConfigs()
	if err != nil {
		maybeReportOnMissingMC(err)
		return err
	}

	if err := dn.removeRollback(); err != nil {
		return fmt.Errorf("failed to remove rollback: %w", err)
	}

	// Bootstrapping state is when we have the node annotations file
	if state.bootstrapping {
		targetOSImageURL := state.currentConfig.Spec.OSImageURL
		osMatch := dn.checkOS(targetOSImageURL)
		if !osMatch {
			logSystem("Bootstrap pivot required to: %s", targetOSImageURL)

			if err := dn.updateLayeredOS(state.currentConfig); err != nil {
				return err
			}

			return dn.reboot(fmt.Sprintf("Node will reboot into config %v", state.currentConfig.GetName()))
		}
		logSystem("No bootstrap pivot required; unlinking bootstrap node annotations")

		// Rename the bootstrap node annotations; the
		// currentConfig's osImageURL should now be *truth*.
		// In other words if it drifts somehow, we go degraded.
		if err := os.Rename(constants.InitialNodeAnnotationsFilePath, constants.InitialNodeAnnotationsBakPath); err != nil {
			return fmt.Errorf("renaming initial node annotation file: %w", err)
		}
	}

	var odc *onDiskConfig
	if !state.bootstrapping {
		var err error
		odc, err = dn.getCurrentConfigOnDisk()
		// we allow ENOENT for previous MCO versions that don't write this...
		if err != nil && !os.IsNotExist(err) {
			return err
		}
	}

	if odc != nil {
		if state.currentConfig.GetName() != odc.currentConfig.GetName() {
			// The on disk state (if available) is always considered truth.
			// We want to handle the case where etcd state was restored from a backup.
			logSystem("Disk currentConfig %q overrides node's currentConfig annotation %q", odc.currentConfig.GetName(), state.currentConfig.GetName())
			state.currentConfig = odc.currentConfig
		}

		if state.currentImage != odc.currentImage {
			logSystem("Disk currentImage %q overrides node's currentImage annotation %q", odc.currentImage, state.currentImage)
			state.currentImage = odc.currentImage
		}
	}

	// Validate the on-disk state against what we *expect*.
	//
	// In the case where we're booting a node for the first time, or the MCD
	// is restarted, that will be the current config.
	if state.desiredImage != "" {
		klog.Infof("Validating against current image %s", state.currentImage)
	} else {
		klog.Infof("Validating against current config %s", state.currentConfig.GetName())
	}

	if forceFileExists() {
		logSystem("Skipping on-disk validation; %s present", constants.MachineConfigDaemonForceFile)
		return dn.triggerUpdate(state.currentConfig, state.desiredConfig, state.currentImage, state.desiredImage)

	}

	// When upgrading the OS, it is possible that the SSH key location will
	// change. We should detect whether that is the case and update before we
	// check for any config drift.
	if err := dn.updateSSHKeyLocationIfNeeded(state.currentConfig); err != nil {
		return err
	}

	if err := dn.validateOnDiskStateOrImage(state.currentConfig, state.currentImage); err != nil {
		dn.nodeWriter.Eventf(corev1.EventTypeWarning, "OnDiskStateValidationFailed", err.Error())
		return err
	}

	logSystem("Validated on-disk state")

	// We've validated state. Now, ensure that node is in desired state
	var inDesiredConfig bool
	if _, inDesiredConfig, err = dn.updateConfigAndState(state); err != nil {
		return err
	}
	if inDesiredConfig {
		return nil
	}

	if dn.nodeWriter != nil {
		dn.nodeWriter.Eventf(corev1.EventTypeNormal, "BootResync", fmt.Sprintf("Booting node %s, currentConfig %s, desiredConfig %s", dn.node.Name, state.currentConfig.GetName(), state.desiredConfig.GetName()))
	}
	// currentConfig != desiredConfig, and we're not booting up into the desiredConfig.
	// Kick off an update.
	err = dn.triggerUpdate(state.currentConfig, state.desiredConfig, state.currentImage, state.desiredImage)
	if err != nil {
		maybeReportOnMissingMC(err)
	}
	return err
}

func (dn *Daemon) isInDesiredConfig(state *stateAndConfigs) bool {
	if state.desiredImage == "" && state.currentImage == "" {
		return state.currentConfig.GetName() == state.desiredConfig.GetName()
	}

	return state.currentConfig.GetName() == state.desiredConfig.GetName() && state.desiredImage == state.currentImage
}

// updateConfigAndState updates node to desired state, labels nodes as done and uncordon
func (dn *Daemon) updateConfigAndState(state *stateAndConfigs) (bool, bool, error) {
	missingODC := false

	if state.bootstrapping {
		odc := &onDiskConfig{
			currentConfig: state.currentConfig,
			currentImage:  state.currentImage,
		}
		if err := dn.storeCurrentConfigOnDisk(odc); err != nil {
			return missingODC, false, err
		}
	}

	// Set the current config to the last written config to disk. This will be the last
	// "successful" config update we have completed.
	odc, err := dn.getCurrentConfigOnDisk()

	if odc == nil {
		missingODC = true
	}

	if err == nil {
		state.currentConfig = odc.currentConfig
		state.currentImage = odc.currentImage
	} else if err != nil && !os.IsNotExist(err) {
		klog.Infof("Error reading config from disk")
		return missingODC, false, fmt.Errorf("error reading config from disk: %w", err)
	}

	// In case of node reboot, it may be the case that desiredConfig changed while we
	// were coming up, so we next look at that before uncordoning the node (so
	// we don't uncordon and then immediately re-cordon)

	inDesiredConfig := dn.isInDesiredConfig(state)
	if inDesiredConfig {
		// Great, we've successfully rebooted for the desired config,
		// let's mark it done!

		// Get MCP associated with node
		pool, err := helpers.GetPrimaryPoolNameForMCN(dn.mcpLister, dn.node)
		if err != nil {
			return missingODC, inDesiredConfig, err
		}

		err = upgrademonitor.GenerateAndApplyMachineConfigNodes(
			&upgrademonitor.Condition{State: mcfgalphav1.MachineConfigNodeResumed, Reason: string(mcfgalphav1.MachineConfigNodeResumed), Message: fmt.Sprintf("In desired config %s. Resumed normal operations. Applying proper annotations.", state.currentConfig.Name)},
			nil,
			metav1.ConditionTrue,
			metav1.ConditionFalse,
			dn.node,
			dn.mcfgClient,
			dn.featureGatesAccessor,
			pool,
		)
		if err != nil {
			klog.Errorf("Error making MCN for Resumed true: %v", err)
		}
		klog.Infof("Completing update to target %s", state.getCurrentName())
		if err := dn.completeUpdate(state.currentConfig.GetName()); err != nil {
			UpdateStateMetric(mcdUpdateState, "", err.Error())
			return missingODC, inDesiredConfig, err
		}

		// We update the node annotation, and pop an event saying we're done.
		if dn.nodeWriter != nil {
			dn.nodeWriter.Eventf(corev1.EventTypeNormal, "NodeDone", fmt.Sprintf("Setting node %s, currentConfig %s to Done", dn.node.Name, state.currentConfig.GetName()))
		}

		if err := dn.nodeWriter.SetDone(state); err != nil {
			return missingODC, true, fmt.Errorf("error setting node's state to Done: %w", err)
		}

		// If we're degraded here, it means we got an error likely on startup and we retried.
		// If that's the case, clear it out.
		if state.state == constants.MachineConfigDaemonStateDegraded {
			if err := dn.nodeWriter.SetDone(state); err != nil {
				errLabelStr := fmt.Sprintf("error setting node's state to Done: %v", err)
				UpdateStateMetric(mcdUpdateState, "", errLabelStr)
				return missingODC, inDesiredConfig, fmt.Errorf("error setting node's state to Done: %w", err)
			}
		}

		klog.Infof("In desired state %s", state.getCurrentName())
		UpdateStateMetric(mcdUpdateState, state.getCurrentName(), "")
	}

	// No errors have occurred. Returns true if currentConfig == desiredConfig, false otherwise (needs update)
	return missingODC, inDesiredConfig, nil
}

// runOnceFromMachineConfig utilizes a parsed machineConfig and executes in onceFrom
// mode. If the content was remote, it executes cluster calls, otherwise it assumes
// no cluster is present yet.
func (dn *Daemon) runOnceFromMachineConfig(machineConfig mcfgv1.MachineConfig, contentFrom onceFromOrigin) error {
	if contentFrom == onceFromRemoteConfig {
		if dn.kubeClient == nil {
			panic("running in onceFrom mode with a remote MachineConfig without a cluster")
		}
		// NOTE: This case expects a cluster to exists already.
		ufc, err := dn.prepUpdateFromCluster()
		if err != nil {
			if err := dn.nodeWriter.SetDegraded(err); err != nil {
				return err
			}
			maybeReportOnMissingMC(err)
			return err
		}
		if ufc.currentConfig == nil || ufc.desiredConfig == nil {
			return nil
		}
		// At this point we have verified we need to update
		if err = dn.triggerUpdateWithMachineConfig(ufc.currentConfig, &machineConfig, false); err != nil {
			dn.nodeWriter.SetDegraded(err)
			return err
		}
		return nil
	}
	if contentFrom == onceFromLocalConfig {
		// Execute update without hitting the cluster
		return dn.update(nil, &machineConfig, false)
	}
	// Otherwise return an error as the input format is unsupported
	return fmt.Errorf("%v is not a path nor url; can not run once", contentFrom)
}

// runOnceFromIgnition executes MCD's subset of Ignition functionality in onceFrom mode
func (dn *Daemon) runOnceFromIgnition(ignConfig ign3types.Config) error {
	// Execute update without hitting the cluster
	if err := dn.writeFiles(ignConfig.Storage.Files, false); err != nil {
		return err
	}
	if err := dn.writeUnits(ignConfig.Systemd.Units); err != nil {
		return err
	}
	// Unconditionally remove this file in the once-from (classic RHEL)
	// case.  We use this file to suppress things like kubelet and SDN
	// starting on CoreOS during the firstboot/pivot boot, but there's
	// no such thing on classic RHEL.
	_, err := os.Stat(constants.MachineConfigEncapsulatedPath)
	if err == nil {
		if err := os.Remove(constants.MachineConfigEncapsulatedPath); err != nil {
			return fmt.Errorf("failed to remove %s: %w", constants.MachineConfigEncapsulatedPath, err)
		}
	}
	return dn.reboot("runOnceFromIgnition complete")
}

func (dn *Daemon) handleNodeEvent(node interface{}) {
	n := node.(*corev1.Node)

	klog.V(4).Infof("Updating Node %s", n.Name)

	dn.enqueueNode(n)
}

type updateFromCluster struct {
	currentConfig *mcfgv1.MachineConfig
	desiredConfig *mcfgv1.MachineConfig
	currentImage  string
	desiredImage  string
}

// prepUpdateFromCluster handles the shared update prepping functionality for
// flows that expect the cluster to already be available. Returns true if an
// update is required, false otherwise.
func (dn *Daemon) prepUpdateFromCluster() (*updateFromCluster, error) {
	desiredConfigName, err := getNodeAnnotationExt(dn.node, constants.DesiredMachineConfigAnnotationKey, true)
	if err != nil {
		return nil, err
	}

	desiredConfig, err := dn.mcLister.Get(desiredConfigName)
	if err != nil {
		return nil, maybeAddMachineConfigInfo(desiredConfigName, err)
	}
	// currentConfig is always expected to be there as loadNodeAnnotations
	// is one of the very first calls when the daemon starts.
	currentConfigName, err := getNodeAnnotation(dn.node, constants.CurrentMachineConfigAnnotationKey)
	if err != nil {
		return nil, err
	}
	currentConfig, err := dn.mcLister.Get(currentConfigName)
	if err != nil {
		return nil, maybeAddMachineConfigInfo(currentConfigName, err)
	}
	state, err := getNodeAnnotation(dn.node, constants.MachineConfigDaemonStateAnnotationKey)
	if err != nil {
		return nil, err
	}

	odc, err := dn.getCurrentConfigOnDisk()
	if err != nil && !os.IsNotExist(err) {
		return nil, err
	}

	desiredImage, err := getNodeAnnotationExt(dn.node, constants.DesiredImageAnnotationKey, true)
	if err != nil {
		return nil, err
	}

	if odc != nil && odc.currentConfig.GetName() != currentConfig.GetName() {
		return &updateFromCluster{
			currentConfig: odc.currentConfig,
			desiredConfig: desiredConfig,
			currentImage:  odc.currentImage,
			desiredImage:  desiredImage,
		}, nil
	}

	if odc == nil {
		odc, err = dn.getCurrentConfigFromNode()
		if err != nil {
			return nil, err
		}
	}

	// Detect if there is an update
	if desiredImage == "" && odc.currentImage == "" {
		if desiredConfigName == currentConfigName {
			if state == constants.MachineConfigDaemonStateDone {
				// No actual update to the config
				klog.V(2).Info("No updating is required")
				return nil, nil
			}
			// This seems like it shouldn't happen...let's just warn for now.
			klog.Warningf("current+desiredConfig is %s but state is %s", currentConfigName, state)
		}
	} else {
		if desiredImage == odc.currentImage && desiredConfigName == currentConfigName {
			if state == constants.MachineConfigDaemonStateDone {
				// No actual update to the config
				klog.V(2).Info("No updating is required")
				return nil, nil
			}
			// This seems like it shouldn't happen...let's just warn for now.
			klog.Warningf("current+desiredConfig is %s, current+desiredImage is %s but state is %s", currentConfigName, odc.currentImage, state)
		}
	}

	return &updateFromCluster{
		currentConfig: odc.currentConfig,
		desiredConfig: desiredConfig,
		currentImage:  odc.currentImage,
		desiredImage:  desiredImage,
	}, nil
}

// completeUpdate marks the node as schedulable again, then deletes the
// "transient state" file, which signifies that all of those prior steps have
// been completed.
func (dn *Daemon) completeUpdate(desiredConfigName string) error {
	if err := dn.nodeWriter.SetDesiredDrainer(fmt.Sprintf("%s-%s", "uncordon", desiredConfigName)); err != nil {
		return fmt.Errorf("could not set drain annotation: %w", err)
	}

	ctx := context.TODO()

	if err := wait.PollUntilContextTimeout(ctx, 10*time.Second, 10*time.Minute, false, func(ctx context.Context) (bool, error) {
		node, err := dn.kubeClient.CoreV1().Nodes().Get(ctx, dn.name, metav1.GetOptions{})
		if err != nil {
			klog.Warningf("Failed to get node: %v", err)
			return false, nil
		}
		if node.Annotations[constants.DesiredDrainerAnnotationKey] != node.Annotations[constants.LastAppliedDrainerAnnotationKey] {
			return false, nil
		}
		return true, nil
	}); err != nil {
		if wait.Interrupted(err) {
			failMsg := fmt.Sprintf("failed to uncordon node: %s after 10 minutes. Please see machine-config-controller logs for more information", dn.node.Name)
			dn.nodeWriter.Eventf(corev1.EventTypeWarning, "FailedToUncordon", failMsg)
			return errors.New(failMsg)
		}
		return fmt.Errorf("something went wrong while attempting to uncordon node: %v", err)
	}

	logSystem("Update completed for config %s and node has been successfully uncordoned", desiredConfigName)
	dn.nodeWriter.Eventf(corev1.EventTypeNormal, "Uncordon", fmt.Sprintf("Update completed for config %s and node has been uncordoned", desiredConfigName))

	return nil
}

func (dn *Daemon) triggerUpdate(currentConfig, desiredConfig *mcfgv1.MachineConfig, currentImage, desiredImage string) error {
	// Before we do any updates, ensure that the image pull secrets that rpm-ostree uses are up-to-date.
	if err := dn.syncInternalRegistryPullSecrets(nil); err != nil {
		return err
	}

	// If both of the image annotations are empty, this is a regular MachineConfig update.
	if desiredImage == "" && currentImage == "" {
		return dn.triggerUpdateWithMachineConfig(currentConfig, desiredConfig, true)
	}

	// Shut down the Config Drift Monitor since we'll be performing an update
	// and the config will "drift" while the update is occurring.
	dn.stopConfigDriftMonitor()

	klog.Infof("Performing layered OS update")
	return dn.updateOnClusterBuild(currentConfig, desiredConfig, currentImage, desiredImage, true)
}

// triggerUpdateWithMachineConfig starts the update. It queries the cluster for
// the current and desired config if they weren't passed.
func (dn *Daemon) triggerUpdateWithMachineConfig(currentConfig, desiredConfig *mcfgv1.MachineConfig, skipCertificateWrite bool) error {
	if currentConfig == nil {
		ccAnnotation, err := getNodeAnnotation(dn.node, constants.CurrentMachineConfigAnnotationKey)
		if err != nil {
			return err
		}
		currentConfig, err = dn.mcLister.Get(ccAnnotation)
		if err != nil {
			return maybeAddMachineConfigInfo(ccAnnotation, err)
		}
	}

	if desiredConfig == nil {
		dcAnnotation, err := getNodeAnnotation(dn.node, constants.DesiredMachineConfigAnnotationKey)
		if err != nil {
			return err
		}
		desiredConfig, err = dn.mcLister.Get(dcAnnotation)
		if err != nil {
			return maybeAddMachineConfigInfo(dcAnnotation, err)
		}
	}

	// Shut down the Config Drift Monitor since we'll be performing an update
	// and the config will "drift" while the update is occurring.
	dn.stopConfigDriftMonitor()

	// run the update process. this function doesn't currently return.
	return dn.update(currentConfig, desiredConfig, skipCertificateWrite)
}

// validateKernelArguments checks that the current boot has all arguments specified
// in the target machineconfig.
func (dn *CoreOSDaemon) validateKernelArguments(currentConfig *mcfgv1.MachineConfig) error {
	rpmostreeKargsBytes, err := runGetOut("rpm-ostree", "kargs")
	if err != nil {
		return err
	}
	rpmostreeKargs := strings.TrimSpace(string(rpmostreeKargsBytes))
	foundArgsArray := strings.Split(rpmostreeKargs, " ")
	foundArgs := make(map[string]bool)
	for _, arg := range foundArgsArray {
		foundArgs[arg] = true
	}
	expected := parseKernelArguments(currentConfig.Spec.KernelArguments)
	missing := []string{}
	for _, karg := range expected {
		if _, ok := foundArgs[karg]; !ok {
			missing = append(missing, karg)
		}
	}
	if len(missing) > 0 {
		cmdlinebytes, err := os.ReadFile(CmdLineFile)
		if err != nil {
			klog.Warningf("Failed to read %s: %v", CmdLineFile, err)
		} else {
			klog.Infof("Booted command line: %s", string(cmdlinebytes))
		}
		klog.Infof("Current ostree kargs: %s", rpmostreeKargs)
		klog.Infof("Expected MachineConfig kargs: %v", expected)
		return fmt.Errorf("missing expected kernel arguments: %v", missing)
	}
	return nil
}

// Implementation of validateOnDiskState which checks a few conditions
func (dn *Daemon) validateOnDiskStateImpl(currentConfig *mcfgv1.MachineConfig, imageToCheck string) error {
	// Be sure we're booted into the OS we expect
	osMatch := dn.checkOS(imageToCheck)
	if !osMatch {
		return fmt.Errorf("expected target osImageURL %q, have %q (%q)", imageToCheck, dn.bootedOSImageURL, dn.bootedOSCommit)
	}

	if dn.os.IsCoreOSVariant() {
		coreOSDaemon := CoreOSDaemon{dn}
		if err := coreOSDaemon.validateKernelArguments(currentConfig); err != nil {
			return err
		}
	}

	return validateOnDiskState(currentConfig, pathSystemd)
}

// validateOnDiskState compares the on-disk state against what a configuration
// specifies.  If for example an admin ssh'd into a node, or another operator
// is stomping on our files, we want to highlight that and mark the system
// degraded.
// This outer function appends the error text from ostree-finalize-staged, if it
// was hit.
func (dn *Daemon) validateOnDiskState(currentConfig *mcfgv1.MachineConfig) error {
	// Call the inner validator
	err := dn.validateOnDiskStateImpl(currentConfig, currentConfig.Spec.OSImageURL)
	if err != nil {
		// If we have a previous finalization failure, include it
		if dn.previousFinalizationFailure != "" {
			return fmt.Errorf("%w; possible root cause: %s", err, dn.previousFinalizationFailure)
		}
		return err
	}
	return nil
}

func (dn *Daemon) validateOnDiskStateOrImage(currentConfig *mcfgv1.MachineConfig, image string) error {
	wrapErr := func(err error) error {
		if err == nil {
			return nil
		}

		if image == "" {
			return fmt.Errorf("unexpected on-disk state validating against %s: %w", currentConfig.GetName(), err)
		}

		return fmt.Errorf("unexpected on-disk state validating against %s: %w", image, err)
	}

	if image == "" {
		return wrapErr(dn.validateOnDiskState(currentConfig))
	}

	return wrapErr(dn.validateOnDiskStateWithImage(currentConfig, image))
}

func (dn *Daemon) validateOnDiskStateWithImage(currentConfig *mcfgv1.MachineConfig, image string) error {
	// Call the inner validator
	err := dn.validateOnDiskStateImpl(currentConfig, image)
	if err != nil {
		// If we have a previous finalization failure, include it
		if dn.previousFinalizationFailure != "" {
			return fmt.Errorf("%w; possible root cause: %s", err, dn.previousFinalizationFailure)
		}
		return err
	}
	return nil
}

// checkOS determines whether the booted system matches the target
// osImageURL and if not whether we need to take action.  This function
// returns `true` if no action is required, which is the case if we're
// not running RHCOS or FCOS, or if the target osImageURL is "" (unspecified),
// or if the digests match.
// Otherwise if `false` is returned, then we need to perform an update.
func (dn *Daemon) checkOS(osImageURL string) bool {
	// Nothing to do if we're not on RHCOS or FCOS
	if !dn.os.IsCoreOSVariant() {
		klog.Infof(`Not booted into a CoreOS variant, ignoring target OSImageURL %s`, osImageURL)
		return true
	}

	// TODO(jkyros): the header for this functions says "if the digests match"
	// so I'm wondering if at one point this used to work this way....
	inspection, _, err := imageInspect(osImageURL)
	if err != nil {
		klog.Warningf("Unable to check manifest for matching hash: %s", err)
	} else if ostreeCommit, ok := inspection.Labels["ostree.commit"]; ok {
		if ostreeCommit == dn.bootedOSCommit {
			klog.Infof("We are technically in the right image even if the URL doesn't match (%s == %s)", ostreeCommit, osImageURL)
			return true
		}
	}

	return dn.bootedOSImageURL == osImageURL
}

// Close closes all the connections the node agent has open for it's lifetime
func (dn *Daemon) Close() {
}

// ValidPath attempts to see if the path provided is indeed an acceptable
// filesystem path. This function does not check if the path exists.
func ValidPath(path string) bool {
	for _, validStart := range []string{".", "..", "/"} {
		if strings.HasPrefix(path, validStart) {
			return true
		}
	}
	return false
}

// senseAndLoadOnceFrom gets a hold of the content for supported onceFrom configurations,
// parses to verify the type, and returns back the genericInterface, the type description,
// if it was local or remote, and error.
func (dn *Daemon) senseAndLoadOnceFrom(onceFrom string) (interface{}, onceFromOrigin, error) {
	var (
		content     []byte
		contentFrom onceFromOrigin
	)
	// Read the content from a remote endpoint if requested
	/* #nosec */
	if strings.HasPrefix(onceFrom, "http://") || strings.HasPrefix(onceFrom, "https://") {
		contentFrom = onceFromRemoteConfig
		resp, err := http.Get(onceFrom)
		if err != nil {
			return nil, contentFrom, err
		}
		defer resp.Body.Close()
		// Read the body content from the request
		content, err = io.ReadAll(resp.Body)
		if err != nil {
			return nil, contentFrom, err
		}

	} else {
		// Otherwise read it from a local file
		contentFrom = onceFromLocalConfig
		absoluteOnceFrom, err := filepath.Abs(filepath.Clean(onceFrom))
		if err != nil {
			return nil, contentFrom, err
		}
		content, err = os.ReadFile(absoluteOnceFrom)
		if err != nil {
			return nil, contentFrom, err
		}
	}

	// Try each supported parser
	ignConfig, err := ctrlcommon.ParseAndConvertConfig(content)
	if err == nil && ignConfig.Ignition.Version != "" {
		klog.V(2).Info("onceFrom file is of type Ignition")
		return ignConfig, contentFrom, nil
	}

	klog.V(2).Infof("%s is not an Ignition config: %v\nTrying MachineConfig.", onceFrom, err)

	// Try to parse as a machine config
	mc, err := mcoResourceRead.ReadMachineConfigV1(content)
	if err == nil && mc != nil {
		klog.V(2).Info("onceFrom file is of type MachineConfig")
		return *mc, contentFrom, nil
	}

	return nil, onceFromUnknownConfig, fmt.Errorf("unable to decipher onceFrom config type: %w", err)
}

func isSingleNodeTopology(topology configv1.TopologyMode) bool {
	return topology == configv1.SingleReplicaTopologyMode
}

// getControlPlaneTopology reads from node annotation and returns
// controlPlaneTopology value set in the cluster. If annotation value
// is unrecognized, we consider it as a highly available cluster.
func (dn *Daemon) getControlPlaneTopology() configv1.TopologyMode {
	controlPlaneTopology := dn.node.Annotations[constants.ClusterControlPlaneTopologyAnnotationKey]
	switch configv1.TopologyMode(controlPlaneTopology) {
	case configv1.SingleReplicaTopologyMode:
		return configv1.SingleReplicaTopologyMode
	case configv1.HighlyAvailableTopologyMode:
		return configv1.HighlyAvailableTopologyMode
	case configv1.HighlyAvailableArbiterMode:
		return configv1.HighlyAvailableArbiterMode
	default:
		// for any unhandled case, default to HighlyAvailableTopologyMode
		return configv1.HighlyAvailableTopologyMode
	}
}

// forceFileExists determines if /run/machine-config-daemon-force is present.
func forceFileExists() bool {
	_, err := os.Stat(constants.MachineConfigDaemonForceFile)

	// No error means we could stat the file; it exists
	return err == nil
}

func maybeAddMachineConfigInfo(configName string, err error) error {
	if apierrors.IsNotFound(err) {
		// We actually know the MC is missing, so lets add the additional context.
		return errors.Join(newErrMissingMachineConfig(configName), err)
	}

	// We couldn't get the MC for any other reason.
	return err
}

func maybeReportOnMissingMC(err error) {
	var missingMCErr *ErrMissingMachineConfig
	if errors.As(err, &missingMCErr) {
		mcdMissingMC.WithLabelValues(missingMCErr.MissingMachineConfig()).Inc()
	}
}

type healthHandler struct{}

func (h *healthHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
	w.Header().Set("Content-Length", "0")
	if r.Method == http.MethodGet || r.Method == http.MethodHead {
		w.WriteHeader(http.StatusOK)
		return
	}

	w.WriteHeader(http.StatusMethodNotAllowed)
}

// Disable insecure cipher suites for CVE-2016-2183
// cipherOrder returns an ordered list of Ciphers that are considered secure
// Deprecated ciphers are not returned.
func cipherOrder() []uint16 {
	var first []uint16
	var second []uint16

	allowable := func(c *tls.CipherSuite) bool {
		// Disallow block ciphers using straight SHA1
		// See: https://tools.ietf.org/html/rfc7540#appendix-A
		if strings.HasSuffix(c.Name, "CBC_SHA") {
			return false
		}
		// 3DES is considered insecure
		if strings.Contains(c.Name, "3DES") {
			return false
		}
		return true
	}

	for _, c := range tls.CipherSuites() {
		for _, v := range c.SupportedVersions {
			if v == tls.VersionTLS13 {
				first = append(first, c.ID)
			}
			if v == tls.VersionTLS12 && allowable(c) {
				inFirst := false
				for _, id := range first {
					if c.ID == id {
						inFirst = true
						break
					}
				}
				if !inFirst {
					second = append(second, c.ID)
				}
			}
		}
	}

	return append(first, second...)
}

type Deployment struct {
	Booted                             bool     `json:"booted"`
	RequestedPackages                  []string `json:"requested-packages"`
	RequestedLocalPackages             []string `json:"requested-local-packages"`
	RequestedLocalFileoverridePackages []string `json:"requested-local-fileoverride-packages"`
	RequestedBaseRemovals              []string `json:"requested-base-removals"`
	RequestedBaseLocalReplacements     []string `json:"requested-base-local-replacements"`
}

type rpmOstreePackageStatus struct {
	Deployments []Deployment `json:"deployments"`
}

func (dn *Daemon) getUnsupportedPackages() {
	// Check if rpm-ostree command exists
	if _, err := exec.LookPath("rpm-ostree"); err != nil {
		klog.Infof("rpm-ostree command not found, skipping unsupported package check")
		unsupportedPackages.WithLabelValues(dn.name).Set(0)
		return
	}

	cmd := exec.Command("rpm-ostree", "status", "--json")
	output, err := cmd.Output()
	if err != nil {
		klog.Errorf("Failed to run rpm-ostree status: %v", err)
		unsupportedPackages.WithLabelValues(dn.name).Set(0)
		return
	}

	var status rpmOstreePackageStatus
	if err := json.Unmarshal(output, &status); err != nil {
		klog.Errorf("Failed to parse rpm-ostree status output: %v", err)
		return
	}

	// Find the booted deployment
	var activeDeployment *Deployment
	for _, dep := range status.Deployments {
		if dep.Booted {
			activeDeployment = &dep
			break
		}
	}
	if activeDeployment == nil {
		klog.Warning("No booted deployment found in rpm-ostree status")
		return
	}

	supportedPackages := make(map[string]bool)
	for _, packages := range ctrlcommon.SupportedExtensions() {
		for _, pkg := range packages {
			supportedPackages[pkg] = true
		}
	}

	unsupportedPackageCount := 0
	allPackageChanges := slices.Concat(
		activeDeployment.RequestedPackages,
		activeDeployment.RequestedLocalPackages,
		activeDeployment.RequestedLocalFileoverridePackages,
		activeDeployment.RequestedBaseRemovals,
		activeDeployment.RequestedBaseLocalReplacements,
	)
	klog.Infof("Found %d requested local packages in the booted deployment", len(allPackageChanges))

	for _, pkg := range allPackageChanges {
		// Check if the package is in the supported list
		if supportedPackages[pkg] {
			continue
		}

		unsupportedPackageCount++
		klog.Infof("Unsupported package %s", pkg)
	}
	unsupportedPackages.WithLabelValues(dn.name).Set(float64(unsupportedPackageCount))
}

func (dn *Daemon) preserveDaemonLogs() {
	// Find current daemon log location
	var currentLogLocation, logDir string
	if walkErr := filepath.WalkDir("/var/log/pods/", func(path string, d fs.DirEntry, err error) error {
		if err != nil {
			return err
		}
		if strings.Contains(d.Name(), "openshift-machine-config-operator_machine-config-daemon-") {
			currentLogLocation = path
			logDir = d.Name()
		}
		return nil
	}); walkErr != nil {
		klog.Errorf("Daemon logs could could not be found due to error: %v", walkErr)
		return
	}

	baseDir := "/etc/machine-config-daemon/previous-logs/"
	// Remove the old logs if it exists
	if err := os.RemoveAll(baseDir); err != nil {
		klog.Errorf("Failed to clear out old daemon logs: %v", err)
		return
	}

	// Ensure the base directory exists
	if err := os.MkdirAll(baseDir, 0o755); err != nil {
		klog.Errorf("Failed to create logs directory: %v", err)
		return
	}

	// Create a context with a timeout
	ctx, cancel := context.WithTimeout(context.Background(), time.Minute)
	defer cancel()

	// Create preservation location; this will change based on the daemon name/uid
	preservationLocation := baseDir + logDir
	cmd := exec.CommandContext(ctx, "cp", "--recursive", currentLogLocation, preservationLocation)
	if err := cmd.Run(); err != nil {
		if ctx.Err() == context.DeadlineExceeded {
			klog.Errorf("Timeout reached: Daemon logs from %s could not be preserved at %s", currentLogLocation, preservationLocation)
		} else {
			klog.Errorf("Daemon logs from %s could not be preserved at %s due to error: %v", currentLogLocation, preservationLocation, err)
		}
	} else {
		klog.Infof("Daemon logs from %s preserved at %s", currentLogLocation, preservationLocation)
	}
}