Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Set mon_target_pg_per_osd to 200 in new clusters for better performance #3080

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions controllers/storagecluster/cephconfig.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ const (
rookOverrideConfigMapName = "rook-config-override"
globalSectionKey = "global"
publicNetworkKey = "public_network"
targetPgPerOSDKey = "mon_target_pg_per_osd"
)

var (
Expand Down Expand Up @@ -66,6 +67,30 @@ func (obj *ocsCephConfig) ensureCreated(r *StorageClusterReconciler, sc *ocsv1.S
Data: rookConfigOverrideData,
}
_, err := ctrl.CreateOrUpdate(context.Background(), r.Client, rookConfigOverrideCM, func() error {
// mon_target_pg_per_osd=200 added only during new CM creation, as setting it on existing clusters can cause data movement.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To keep this simple, what if the new setting is only added to the defaultRookConfig? It's not critical for upgraded clusters, right?

Alternatively, instead of using the configmap at all, this setting could be the first one that we add to the CephCluster spec under the CephConfig setting. I'd suggest this for the preferred approach for all the ceph settings that don't need to be overridable by the user, and also are not settings that affect the mons at first cluster creation. This setting is just for the PG auto scaler, so it's fine to set later.

if rookConfigOverrideCM.ObjectMeta.CreationTimestamp.IsZero() {
updatedConfig, err := updateRookConfig(rookConfigOverrideData["config"], globalSectionKey, targetPgPerOSDKey, "200")
if err != nil {
return err
}
rookConfigOverrideData["config"] = updatedConfig
} else {
// Ensure if mon_target_pg_per_osd=200 added during new CM creation is not removed during updates.
if configData, exists := rookConfigOverrideCM.Data["config"]; exists && configData != "" {
cfg, err := ini.Load([]byte(configData))
if err != nil {
return fmt.Errorf("failed to parse existing config: %w", err)
}
if val := cfg.Section(globalSectionKey).Key(targetPgPerOSDKey).String(); val != "" {
updatedConfig, err := updateRookConfig(rookConfigOverrideData["config"], globalSectionKey, targetPgPerOSDKey, val)
if err != nil {
return fmt.Errorf("failed to update Rook config during update: %w", err)
}
rookConfigOverrideData["config"] = updatedConfig
}
}
}

if !reflect.DeepEqual(rookConfigOverrideCM.Data, rookConfigOverrideData) {
r.Log.Info("updating rook config override configmap", "ConfigMap", klog.KRef(sc.Namespace, rookOverrideConfigMapName))
rookConfigOverrideCM.Data = rookConfigOverrideData
Expand Down
74 changes: 74 additions & 0 deletions controllers/storagecluster/cephconfig_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -158,3 +158,77 @@ func TestDualStack(t *testing.T) {
}

}

func TestMonTargetPGPerOSD(t *testing.T) {
testTable := []struct {
label string
existingConfig string
reconcileCount int
expectedValue string
shouldExist bool
}{
{
label: "Fresh install - should set mon_target_pg_per_osd=200",
existingConfig: "",
reconcileCount: 2,
expectedValue: "200",
shouldExist: true,
},
{
label: "Existing cluster - should not add mon_target_pg_per_osd",
existingConfig: `[global]
some_other_key = value
`,
reconcileCount: 1,
expectedValue: "",
shouldExist: false,
},
}

for i, testCase := range testTable {
t.Logf("Case #%+v: %s", i+1, testCase.label)
r := createFakeStorageClusterReconciler(t)
sc := &api.StorageCluster{
ObjectMeta: metav1.ObjectMeta{Namespace: "test"},
}

// Create existing ConfigMap if specified
if testCase.existingConfig != "" {
existingCM := &corev1.ConfigMap{
ObjectMeta: metav1.ObjectMeta{
Name: rookOverrideConfigMapName,
Namespace: sc.Namespace,
CreationTimestamp: metav1.Now(),
},
Data: map[string]string{
"config": testCase.existingConfig,
},
}
err := r.Client.Create(context.TODO(), existingCM)
assert.NilError(t, err, "failed to create existing configmap")
}

cephConfigReconciler := &ocsCephConfig{}
for j := 0; j < testCase.reconcileCount; j++ {
_, err := cephConfigReconciler.ensureCreated(&r, sc)
assert.NilError(t, err, "reconcile %d failed", j+1)

// Verify ConfigMap after each reconcile
configMap := &corev1.ConfigMap{}
err = r.Client.Get(context.TODO(), types.NamespacedName{Name: rookOverrideConfigMapName, Namespace: sc.Namespace}, configMap)
assert.NilError(t, err, "expected to find configmap")

cfg, err := ini.Load([]byte(configMap.Data["config"]))
assert.NilError(t, err, "expected ini string to load")

sect, err := cfg.GetSection(globalSectionKey)
assert.NilError(t, err, "expected section to exist")

keyFound := sect.HasKey(targetPgPerOSDKey)
assert.Equal(t, keyFound, testCase.shouldExist, "mon_target_pg_per_osd key existence mismatch")
if testCase.shouldExist {
assert.Equal(t, sect.Key(targetPgPerOSDKey).Value(), testCase.expectedValue, "mon_target_pg_per_osd value mismatch")
}
}
}
}
Loading