Skip to content

Provide pressure stall information for workspaces #13703

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Oct 21, 2022
48 changes: 48 additions & 0 deletions components/common-go/cgroups/cgroup.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
package cgroups

import (
"bufio"
"fmt"
"math"
"os"
"strconv"
Expand Down Expand Up @@ -85,3 +87,49 @@ func ReadFlatKeyedFile(path string) (map[string]uint64, error) {

return kv, nil
}

// Read the total stalled time in microseconds for full and some
// It is not necessary to read avg10, avg60 and avg300 as these
// are only for convenience. They are calculated as the rate during
// the desired time frame.
func ReadPSIValue(path string) (PSI, error) {
file, err := os.Open(path)
if err != nil {
return PSI{}, err
}
defer file.Close()

scanner := bufio.NewScanner(file)
var psi PSI
for scanner.Scan() {
line := scanner.Text()
if err = scanner.Err(); err != nil {
return PSI{}, fmt.Errorf("could not read psi file: %w", err)
}

i := strings.LastIndex(line, "total=")
if i == -1 {
return PSI{}, fmt.Errorf("could not find total stalled time")
}

total, err := strconv.ParseUint(line[i+6:], 10, 64)
if err != nil {
return PSI{}, fmt.Errorf("could not parse total stalled time: %w", err)
}

if strings.HasPrefix(line, "some") {
psi.Some = total
}

if strings.HasPrefix(line, "full") {
psi.Full = total
}
}

return psi, nil
}

type PSI struct {
Some uint64
Full uint64
}
54 changes: 54 additions & 0 deletions components/common-go/cgroups/cgroups_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -134,3 +134,57 @@ func TestReadSingleValue(t *testing.T) {
})
}
}

func TestReadPSI(t *testing.T) {
scenarios := []struct {
name string
content string
expected PSI
}{
{
name: "psi some",
content: "some avg10=61.00 avg60=64.28 avg300=29.94 total=149969752",
expected: PSI{
Some: 149969752,
Full: 0,
},
},
{
name: "psi full",
content: "full avg10=36.27 avg60=37.15 avg300=17.59 total=93027571",
expected: PSI{
Some: 0,
Full: 93027571,
},
},
{
name: "psi some and full",
content: "some avg10=61.00 avg60=64.28 avg300=29.94 total=149969752\nfull avg10=36.27 avg60=37.15 avg300=17.59 total=93027571",
expected: PSI{
Some: 149969752,
Full: 93027571,
},
},
}

for _, s := range scenarios {
t.Run(s.name, func(t *testing.T) {
f, err := os.CreateTemp("", "cgroup_test*")
if err != nil {
t.Fatal(err)
}
defer os.Remove(f.Name())

if _, err := f.Write([]byte(s.content)); err != nil {
t.Fatal(err)
}

v, err := ReadPSIValue(f.Name())
if err != nil {
t.Fatal(err)
}

assert.Equal(t, s.expected, v)
})
}
}
2 changes: 1 addition & 1 deletion components/common-go/cgroups/v1/cpu.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// Licensed under the GNU Affero General Public License (AGPL).
// See License-AGPL.txt in the project root for license information.

package v1
package cgroups_v1

import (
"path/filepath"
Expand Down
2 changes: 1 addition & 1 deletion components/common-go/cgroups/v1/memory.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// Licensed under the GNU Affero General Public License (AGPL).
// See License-AGPL.txt in the project root for license information.

package v1
package cgroups_v1

import (
"path/filepath"
Expand Down
7 changes: 6 additions & 1 deletion components/common-go/cgroups/v2/cpu.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// Licensed under the GNU Affero General Public License (AGPL).
// See License-AGPL.txt in the project root for license information.

package v2
package cgroups_v2

import (
"math"
Expand Down Expand Up @@ -84,3 +84,8 @@ func (c *Cpu) Stat() (*cgroups.CpuStats, error) {

return &stats, nil
}

func (c *Cpu) PSI() (cgroups.PSI, error) {
path := filepath.Join(c.path, "cpu.pressure")
return cgroups.ReadPSIValue(path)
}
2 changes: 1 addition & 1 deletion components/common-go/cgroups/v2/cpu_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// Licensed under the GNU Affero General Public License (AGPL).
// See License-AGPL.txt in the project root for license information.

package v2
package cgroups_v2

import (
"fmt"
Expand Down
33 changes: 33 additions & 0 deletions components/common-go/cgroups/v2/io.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
// Copyright (c) 2022 Gitpod GmbH. All rights reserved.
// Licensed under the GNU Affero General Public License (AGPL).
// See License-AGPL.txt in the project root for license information.

package cgroups_v2

import (
"path/filepath"

"github.com/gitpod-io/gitpod/common-go/cgroups"
)

type IO struct {
path string
}

func NewIOControllerWithMount(mountPoint, path string) *IO {
fullPath := filepath.Join(mountPoint, path)
return &IO{
path: fullPath,
}
}

func NewIOController(path string) *IO {
return &IO{
path: path,
}
}

func (io *IO) PSI() (cgroups.PSI, error) {
path := filepath.Join(io.path, "io.pressure")
return cgroups.ReadPSIValue(path)
}
7 changes: 6 additions & 1 deletion components/common-go/cgroups/v2/memory.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// Licensed under the GNU Affero General Public License (AGPL).
// See License-AGPL.txt in the project root for license information.

package v2
package cgroups_v2

import (
"path/filepath"
Expand Down Expand Up @@ -63,3 +63,8 @@ func (m *Memory) Stat() (*cgroups.MemoryStats, error) {
InactiveFileTotal: statMap["inactive_file"],
}, nil
}

func (m *Memory) PSI() (cgroups.PSI, error) {
path := filepath.Join(m.path, "memory.pressure")
return cgroups.ReadPSIValue(path)
}
3 changes: 3 additions & 0 deletions components/common-go/kubernetes/kubernetes.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,9 @@ const (

// workspaceNetConnLimit denotes the maximum number of connections a workspace can make per minute
WorkspaceNetConnLimitAnnotation = "gitpod.io/netConnLimitPerMinute"

// workspacePressureStallInfo indicates if pressure stall information should be retrieved for the workspace
WorkspacePressureStallInfoAnnotation = "gitpod.io/psi"
)

// GetOWIFromObject finds the owner, workspace and instance information on a Kubernetes object using labels
Expand Down
1 change: 1 addition & 0 deletions components/gitpod-protocol/src/protocol.ts
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,7 @@ export const WorkspaceFeatureFlags = {
protected_secrets: undefined,
workspace_class_limiting: undefined,
workspace_connection_limiting: undefined,
workspace_psi: undefined,
};
export type NamedWorkspaceFeatureFlag = keyof typeof WorkspaceFeatureFlags;
export namespace NamedWorkspaceFeatureFlag {
Expand Down
85 changes: 57 additions & 28 deletions components/server/src/workspace/workspace-starter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ import {
IDESettings,
WithReferrerContext,
EnvVarWithValue,
BillingTier,
} from "@gitpod/gitpod-protocol";
import { IAnalyticsWriter } from "@gitpod/gitpod-protocol/lib/analytics";
import { log } from "@gitpod/gitpod-protocol/lib/util/logging";
Expand Down Expand Up @@ -849,37 +850,12 @@ export class WorkspaceStarter {
);
}

if (
await getExperimentsClientForBackend().getValueAsync("protected_secrets", false, {
user,
billingTier,
})
) {
// We roll out the protected secrets feature using a ConfigCat feature flag, to ensure
// a smooth, gradual roll out without breaking users.
featureFlags = featureFlags.concat(["protected_secrets"]);
}
await this.tryEnableProtectedSecrets(featureFlags, user, billingTier);

featureFlags = featureFlags.filter((f) => !excludeFeatureFlags.includes(f));

const wsConnectionLimitingEnabled = await getExperimentsClientForBackend().getValueAsync(
"workspace_connection_limiting",
false,
{
user,
billingTier,
},
);

if (wsConnectionLimitingEnabled) {
const shouldLimitNetworkConnections = await this.entitlementService.limitNetworkConnections(
user,
new Date(),
);
if (shouldLimitNetworkConnections) {
featureFlags = featureFlags.concat(["workspace_connection_limiting"]);
}
}
await this.tryEnableConnectionLimiting(featureFlags, user, billingTier);
await this.tryEnablePSI(featureFlags, user, billingTier);

const usageAttributionId = await this.userService.getWorkspaceUsageAttributionId(user, workspace.projectId);
const billingMode = await this.billingModes.getBillingMode(usageAttributionId, new Date());
Expand Down Expand Up @@ -965,6 +941,59 @@ export class WorkspaceStarter {
}
}

private async tryEnableProtectedSecrets(
featureFlags: NamedWorkspaceFeatureFlag[],
user: User,
billingTier: BillingTier,
) {
if (
await getExperimentsClientForBackend().getValueAsync("protected_secrets", false, {
user,
billingTier,
})
) {
// We roll out the protected secrets feature using a ConfigCat feature flag, to ensure
// a smooth, gradual roll out without breaking users.
featureFlags.push("protected_secrets");
}
}

private async tryEnableConnectionLimiting(
featureFlags: NamedWorkspaceFeatureFlag[],
user: User,
billingTier: BillingTier,
) {
const wsConnectionLimitingEnabled = await getExperimentsClientForBackend().getValueAsync(
"workspace_connection_limiting",
false,
{
user,
billingTier,
},
);

if (wsConnectionLimitingEnabled) {
const shouldLimitNetworkConnections = await this.entitlementService.limitNetworkConnections(
user,
new Date(),
);
if (shouldLimitNetworkConnections) {
featureFlags.push("workspace_connection_limiting");
}
}
}

private async tryEnablePSI(featureFlags: NamedWorkspaceFeatureFlag[], user: User, billingTier: BillingTier) {
const psiEnabled = await getExperimentsClientForBackend().getValueAsync("pressure_stall_info", false, {
user,
billingTier,
});

if (psiEnabled && billingTier === "paid") {
featureFlags.push("workspace_psi");
}
}

// TODO(ak) move to IDE service
protected resolveReferrerIDE(
workspace: Workspace,
Expand Down
18 changes: 16 additions & 2 deletions components/ws-daemon/pkg/cgroup/cgroup.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,13 +84,20 @@ func (host *PluginHost) WorkspaceAdded(ctx context.Context, ws *dispatch.Workspa
return xerrors.Errorf("cannot get cgroup path for container %s: %w", ws.ContainerID, err)
}

opts := &PluginOptions{
BasePath: host.CGroupBasePath,
CgroupPath: cgroupPath,
InstanceId: ws.InstanceID,
Annotations: ws.Pod.Annotations,
}

for _, plg := range host.Plugins {
if plg.Type() != host.CGroupVersion {
continue
}

go func(plg Plugin) {
err := plg.Apply(ctx, host.CGroupBasePath, cgroupPath)
err := plg.Apply(ctx, opts)
if err == context.Canceled || err == context.DeadlineExceeded {
err = nil
}
Expand All @@ -109,7 +116,7 @@ func (host *PluginHost) WorkspaceAdded(ctx context.Context, ws *dispatch.Workspa
type Plugin interface {
Name() string
Type() Version
Apply(ctx context.Context, basePath, cgroupPath string) error
Apply(ctx context.Context, options *PluginOptions) error
}

type Version int
Expand All @@ -118,3 +125,10 @@ const (
Version1 Version = iota
Version2
)

type PluginOptions struct {
BasePath string
CgroupPath string
InstanceId string
Annotations map[string]string
}
4 changes: 2 additions & 2 deletions components/ws-daemon/pkg/cgroup/plugin_cachereclaim.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ type CacheReclaim struct{}
func (c *CacheReclaim) Name() string { return "cache-reclaim-v1" }
func (c *CacheReclaim) Type() Version { return Version1 }

func (c *CacheReclaim) Apply(ctx context.Context, basePath, cgroupPath string) error {
memPath := filepath.Join(string(basePath), "memory", cgroupPath)
func (c *CacheReclaim) Apply(ctx context.Context, opts *PluginOptions) error {
memPath := filepath.Join(string(opts.BasePath), "memory", opts.CgroupPath)

t := time.NewTicker(10 * time.Second)
defer t.Stop()
Expand Down
Loading