-
Notifications
You must be signed in to change notification settings - Fork 88
cleanup: refactor volume cloning #1521
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -366,36 +366,12 @@ func (d *Driver) CreateVolume(ctx context.Context, req *csi.CreateVolumeRequest) | |
defer d.volumeLocks.Release(volName) | ||
|
||
requestName := "controller_create_volume" | ||
|
||
var srcAzcopyAuthEnv []string | ||
var srcSubscriptionID, srcResourceGroupName, srcAccountName, srcContainerName, srcPath, srcAccountSASToken string | ||
if volContentSource != nil { | ||
switch volContentSource.Type.(type) { | ||
case *csi.VolumeContentSource_Snapshot: | ||
return nil, status.Errorf(codes.InvalidArgument, "VolumeContentSource Snapshot is not yet implemented") | ||
case *csi.VolumeContentSource_Volume: | ||
requestName = "controller_create_volume_from_volume" | ||
var srcVolumeID string | ||
if volContentSource.GetVolume() != nil { | ||
srcVolumeID = volContentSource.GetVolume().GetVolumeId() | ||
} | ||
srcResourceGroupName, srcAccountName, srcContainerName, _, srcSubscriptionID, err = GetContainerInfo(srcVolumeID) | ||
if err != nil { | ||
return nil, status.Error(codes.NotFound, err.Error()) | ||
} | ||
srcAccountOptions := &azure.AccountOptions{ | ||
Name: srcAccountName, | ||
SubscriptionID: srcSubscriptionID, | ||
ResourceGroup: srcResourceGroupName, | ||
GetLatestAccountKey: getLatestAccountKey, | ||
} | ||
srcAccountSASToken, srcAzcopyAuthEnv, err = d.getAzcopyAuth(ctx, srcAccountName, "", storageEndpointSuffix, srcAccountOptions, secrets, secretName, secretNamespace) | ||
if err != nil { | ||
return nil, status.Errorf(codes.Internal, "failed to getAzcopyAuth on account(%s) rg(%s), error: %v", accountOptions.Name, accountOptions.ResourceGroup, err) | ||
} | ||
srcPath = fmt.Sprintf("https://%s.blob.%s/%s", srcAccountName, storageEndpointSuffix, srcContainerName) | ||
default: | ||
return nil, status.Errorf(codes.InvalidArgument, "VolumeContentSource is not recognized: %v", volContentSource) | ||
} | ||
} | ||
|
||
|
@@ -466,16 +442,11 @@ func (d *Driver) CreateVolume(ctx context.Context, req *csi.CreateVolumeRequest) | |
return nil, status.Errorf(codes.Internal, "failed to create container(%s) on account(%s) type(%s) rg(%s) location(%s) size(%d), error: %v", validContainerName, accountName, storageAccountType, resourceGroup, location, requestGiB, err) | ||
} | ||
if volContentSource != nil { | ||
dstAzcopyAuthEnv := srcAzcopyAuthEnv | ||
dstAccountSASToken := srcAccountSASToken | ||
if srcAccountName != accountName { | ||
if dstAccountSASToken, dstAzcopyAuthEnv, err = d.getAzcopyAuth(ctx, accountName, accountKey, storageEndpointSuffix, accountOptions, secrets, secretName, secretNamespace); err != nil { | ||
return nil, status.Errorf(codes.Internal, "failed to getAzcopyAuth on account(%s) rg(%s), error: %v", accountOptions.Name, accountOptions.ResourceGroup, err) | ||
} | ||
accountSASToken, authAzcopyEnv, err := d.getAzcopyAuth(ctx, accountName, accountKey, storageEndpointSuffix, accountOptions, secrets, secretName, secretNamespace) | ||
andyzhangx marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if err != nil { | ||
return nil, status.Errorf(codes.Internal, "failed to getAzcopyAuth on account(%s) rg(%s), error: %v", accountOptions.Name, accountOptions.ResourceGroup, err) | ||
} | ||
|
||
dstPath := fmt.Sprintf("https://%s.blob.%s/%s", accountName, storageEndpointSuffix, validContainerName) | ||
if err := d.copyBlobContainer(dstAzcopyAuthEnv, srcPath, srcAccountSASToken, dstPath, dstAccountSASToken, validContainerName); err != nil { | ||
if err := d.copyVolume(ctx, req, accountName, accountSASToken, authAzcopyEnv, validContainerName, secretNamespace, accountOptions, storageEndpointSuffix); err != nil { | ||
return nil, err | ||
} | ||
} | ||
|
@@ -786,11 +757,36 @@ func (d *Driver) DeleteBlobContainer(ctx context.Context, subsID, resourceGroupN | |
} | ||
|
||
// copyBlobContainer copies source volume content into a destination volume | ||
func (d *Driver) copyBlobContainer(authAzcopyEnv []string, srcPath string, srcAccountSASToken string, dstPath string, dstAccountSASToken string, dstContainerName string) error { | ||
func (d *Driver) copyBlobContainer(ctx context.Context, req *csi.CreateVolumeRequest, dstAccountName string, dstAccountSasToken string, authAzcopyEnv []string, dstContainerName string, secretNamespace string, accountOptions *azure.AccountOptions, storageEndpointSuffix string) error { | ||
var sourceVolumeID string | ||
if req.GetVolumeContentSource() != nil && req.GetVolumeContentSource().GetVolume() != nil { | ||
sourceVolumeID = req.GetVolumeContentSource().GetVolume().GetVolumeId() | ||
|
||
if srcPath == "" || dstPath == "" || dstContainerName == "" { | ||
return fmt.Errorf("srcPath(%s) or dstPath(%s) or dstContainerName(%s) is empty", srcPath, dstPath, dstContainerName) | ||
} | ||
srcResourceGroupName, srcAccountName, srcContainerName, _, srcSubscriptionID, err := GetContainerInfo(sourceVolumeID) //nolint:dogsled | ||
if err != nil { | ||
return status.Error(codes.NotFound, err.Error()) | ||
} | ||
if dstAccountName == "" { | ||
dstAccountName = srcAccountName | ||
} | ||
if srcAccountName == "" || srcContainerName == "" || dstContainerName == "" { | ||
return fmt.Errorf("srcAccountName(%s) or srcContainerName(%s) or dstContainerName(%s) is empty", srcAccountName, srcContainerName, dstContainerName) | ||
} | ||
srcAccountSasToken := dstAccountSasToken | ||
if srcAccountName != dstAccountName && dstAccountSasToken != "" { | ||
srcAccountOptions := &azure.AccountOptions{ | ||
Name: srcAccountName, | ||
ResourceGroup: srcResourceGroupName, | ||
SubscriptionID: srcSubscriptionID, | ||
GetLatestAccountKey: accountOptions.GetLatestAccountKey, | ||
} | ||
if srcAccountSasToken, _, err = d.getAzcopyAuth(ctx, srcAccountName, "", storageEndpointSuffix, srcAccountOptions, nil, "", secretNamespace); err != nil { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it may failed with There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we don't know here, from intuition, There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I tested it to only give
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It seems strange and doesn't match the public docs for AzCopy. I asked the team and they indicated that:
In short-- sounds like perms are lacking on the destination. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here's a bicep POC that shows The param stem string = uniqueString(resourceGroup().name)
param location string = resourceGroup().location
param azCliVersion string = '2.52.0'
param forceUpdateTag string = utcNow()
var blobOwnerRoleId = subscriptionResourceId('Microsoft.Authorization/roleDefinitions', 'b7e6dc6d-f1e8-4753-8033-0f276bb0955b')
var blobReaderRoleId = subscriptionResourceId('Microsoft.Authorization/roleDefinitions', '2a2b9908-6ea1-4ae2-8e65-a410df84e7d1')
var blobContributorRoleId = subscriptionResourceId('Microsoft.Authorization/roleDefinitions', 'ba92f5b4-2d11-453d-a403-e96b0029c9fe')
resource srcAccount 'Microsoft.Storage/storageAccounts@2023-01-01' = {
name: '${stem}src'
location: location
sku: {
name: 'Standard_LRS'
}
kind: 'StorageV2'
properties: {
accessTier: 'Hot'
allowSharedKeyAccess: false
defaultToOAuthAuthentication: true
isLocalUserEnabled: false
}
}
resource dstAccount 'Microsoft.Storage/storageAccounts@2023-01-01' = {
name: '${stem}dst'
location: location
sku: {
name: 'Standard_LRS'
}
kind: 'StorageV2'
properties: {
accessTier: 'Hot'
allowSharedKeyAccess: false
defaultToOAuthAuthentication: true
isLocalUserEnabled: false
}
}
resource makeIdentity 'Microsoft.ManagedIdentity/userAssignedIdentities@2023-01-31' = {
name: '${stem}-make-id'
location: location
}
resource copyIdentity 'Microsoft.ManagedIdentity/userAssignedIdentities@2023-01-31' = {
name: '${stem}-copy-id'
location: location
}
resource makeSourceBlobOwner 'Microsoft.Authorization/roleAssignments@2022-04-01' = {
name: guid(makeIdentity.id, blobOwnerRoleId, srcAccount.id)
properties: {
roleDefinitionId: blobOwnerRoleId
principalId: makeIdentity.properties.principalId
}
scope: srcAccount
}
resource copySourceBlobReader 'Microsoft.Authorization/roleAssignments@2022-04-01' = {
name: guid(copyIdentity.id, blobReaderRoleId, srcAccount.id)
properties: {
roleDefinitionId: blobReaderRoleId
principalId: copyIdentity.properties.principalId
}
scope: srcAccount
}
resource copyDestinationBlobContributor 'Microsoft.Authorization/roleAssignments@2022-04-01' = {
name: guid(copyIdentity.id, blobContributorRoleId, dstAccount.id)
properties: {
roleDefinitionId: blobContributorRoleId
principalId: copyIdentity.properties.principalId
}
scope: dstAccount
}
resource makeScript 'Microsoft.Resources/deploymentScripts@2023-08-01' = {
name: '${stem}-make-script'
location: location
kind: 'AzureCLI'
identity: {
type: 'UserAssigned'
userAssignedIdentities: {
'${makeIdentity.id}': {}
}
}
properties: {
azCliVersion: azCliVersion
forceUpdateTag: forceUpdateTag
environmentVariables: [
{
name: 'AZCOPY_AUTO_LOGIN_TYPE'
value: 'MSI'
}
{
name: 'SRCACCOUNT'
value: srcAccount.properties.primaryEndpoints.blob
}
{
name: 'SRCCONTAINER'
value: 'data'
}
]
scriptContent: '${installScriptContent}\n${makeScriptContent}'
retentionInterval: 'PT1H'
}
dependsOn: [
makeSourceBlobOwner
]
}
resource copyScript 'Microsoft.Resources/deploymentScripts@2023-08-01' = {
name: '${stem}-copy-script'
location: location
kind: 'AzureCLI'
identity: {
type: 'UserAssigned'
userAssignedIdentities: {
'${copyIdentity.id}': {}
}
}
properties: {
azCliVersion: azCliVersion
forceUpdateTag: forceUpdateTag
environmentVariables: [
{
name: 'AZCOPY_AUTO_LOGIN_TYPE'
value: 'MSI'
}
{
name: 'SRCACCOUNT'
value: srcAccount.properties.primaryEndpoints.blob
}
{
name: 'SRCCONTAINER'
value: 'data'
}
{
name: 'DSTACCOUNT'
value: dstAccount.properties.primaryEndpoints.blob
}
{
name: 'DSTCONTAINER'
value: 'data'
}
]
scriptContent: '${installScriptContent}\n${copyScriptContent}'
retentionInterval: 'PT1H'
}
dependsOn: [
copySourceBlobReader
copyDestinationBlobContributor
makeScript
]
}
var installScriptContent = '''
cd /tmp
mkdir bin
apk add -q --no-interactive gcompat
wget -q https://aka.ms/downloadazcopy-v10-linux
tar -xf downloadazcopy-v10-linux
cp -vf ./azcopy_linux_amd64_*/azcopy bin/
rm -rf ./azcopy_linux_amd64_* downloadazcopy-v10-linux
'''
var makeScriptContent = '''
mkdir data
date > data/0.txt
date > data/1.txt
date > data/3.txt
date > data/4.txt
./bin/azcopy jobs list
./bin/azcopy list ${SRCACCOUNT}
./bin/azcopy make ${SRCACCOUNT}${SRCCONTAINER}
./bin/azcopy copy data ${SRCACCOUNT}${SRCCONTAINER} --recursive
'''
var copyScriptContent = '''
./bin/azcopy jobs list
./bin/azcopy list ${SRCACCOUNT}
./bin/azcopy list ${DSTACCOUNT}
./bin/azcopy copy ${SRCACCOUNT}${SRCCONTAINER} ${DSTACCOUNT}${DSTCONTAINER} --recursive --check-length=false --s2s-preserve-access-tier=false
''' There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. thanks for the verification, we could address this in the doc or other PR |
||
return err | ||
} | ||
} | ||
srcPath := fmt.Sprintf("https://%s.blob.%s/%s%s", srcAccountName, storageEndpointSuffix, srcContainerName, srcAccountSasToken) | ||
dstPath := fmt.Sprintf("https://%s.blob.%s/%s%s", dstAccountName, storageEndpointSuffix, dstContainerName, dstAccountSasToken) | ||
|
||
jobState, percent, err := d.azcopy.GetAzcopyJob(dstContainerName, authAzcopyEnv) | ||
klog.V(2).Infof("azcopy job status: %s, copy percent: %s%%, error: %v", jobState, percent, err) | ||
|
@@ -800,13 +796,9 @@ func (d *Driver) copyBlobContainer(authAzcopyEnv []string, srcPath string, srcAc | |
case util.AzcopyJobRunning: | ||
return fmt.Errorf("wait for the existing AzCopy job to complete, current copy percentage is %s%%", percent) | ||
case util.AzcopyJobNotFound: | ||
klog.V(2).Infof("copy blob container %s to %s", srcPath, dstContainerName) | ||
klog.V(2).Infof("copy blob container %s:%s to %s:%s", srcAccountName, srcContainerName, dstAccountName, dstContainerName) | ||
execFunc := func() error { | ||
cmd := exec.Command("azcopy", "copy", srcPath+srcAccountSASToken, dstPath+dstAccountSASToken, "--recursive", "--check-length=false", "--s2s-preserve-access-tier=false") | ||
if len(authAzcopyEnv) > 0 { | ||
cmd.Env = append(os.Environ(), authAzcopyEnv...) | ||
} | ||
if out, err := cmd.CombinedOutput(); err != nil { | ||
if out, err := d.execAzcopyCopy(srcPath, dstPath, azcopyCloneVolumeOptions, authAzcopyEnv); err != nil { | ||
return fmt.Errorf("exec error: %v, output: %v", err, string(out)) | ||
} | ||
return nil | ||
|
@@ -817,15 +809,38 @@ func (d *Driver) copyBlobContainer(authAzcopyEnv []string, srcPath string, srcAc | |
} | ||
copyErr := util.WaitUntilTimeout(time.Duration(d.waitForAzCopyTimeoutMinutes)*time.Minute, execFunc, timeoutFunc) | ||
if copyErr != nil { | ||
klog.Warningf("CopyBlobContainer(%s, %s, %s) failed with error: %v", srcPath, dstPath, dstContainerName, copyErr) | ||
klog.Warningf("CopyBlobContainer(%s, %s, %s) failed with error: %v", accountOptions.ResourceGroup, dstAccountName, dstContainerName, copyErr) | ||
} else { | ||
klog.V(2).Infof("copied blob container %s to %s successfully", srcPath, dstContainerName) | ||
klog.V(2).Infof("copied blob container %s to %s successfully", srcContainerName, dstContainerName) | ||
} | ||
return copyErr | ||
} | ||
return err | ||
} | ||
|
||
// copyVolume copies a volume form volume or snapshot, snapshot is not supported now | ||
func (d *Driver) copyVolume(ctx context.Context, req *csi.CreateVolumeRequest, accountName string, accountSASToken string, authAzcopyEnv []string, dstContainerName, secretNamespace string, accountOptions *azure.AccountOptions, storageEndpointSuffix string) error { | ||
vs := req.VolumeContentSource | ||
switch vs.Type.(type) { | ||
case *csi.VolumeContentSource_Snapshot: | ||
return status.Errorf(codes.InvalidArgument, "VolumeContentSource Snapshot is not yet implemented") | ||
case *csi.VolumeContentSource_Volume: | ||
return d.copyBlobContainer(ctx, req, accountName, accountSASToken, authAzcopyEnv, dstContainerName, secretNamespace, accountOptions, storageEndpointSuffix) | ||
default: | ||
return status.Errorf(codes.InvalidArgument, "%v is not a proper volume source", vs) | ||
} | ||
} | ||
|
||
// execAzcopyCopy exec azcopy copy command | ||
func (d *Driver) execAzcopyCopy(srcPath, dstPath string, azcopyCopyOptions, authAzcopyEnv []string) ([]byte, error) { | ||
cmd := exec.Command("azcopy", "copy", srcPath, dstPath) | ||
cmd.Args = append(cmd.Args, azcopyCopyOptions...) | ||
if len(authAzcopyEnv) > 0 { | ||
cmd.Env = append(os.Environ(), authAzcopyEnv...) | ||
} | ||
return cmd.CombinedOutput() | ||
} | ||
|
||
// authorizeAzcopyWithIdentity returns auth env for azcopy using cluster identity | ||
func (d *Driver) authorizeAzcopyWithIdentity() ([]string, error) { | ||
azureAuthConfig := d.cloud.Config.AzureAuthConfig | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this might have been causing your failures.
getAzcopyAuth()
has a few issues:len(secrets) == 0 && len(secretName) == 0
.All of this account key / sas handling seems like hacks around the root issue that the default permissions for the control plane identity neglected to include
Storage Blob Data Contributor
along withContributor
.Changing
secrets, secretName, secretNamespace
tonil, "", ""
would be a quick fix, and preferablygetAzcopyAuth
would always try the identity first.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
we could address this in other PR, this is refactor not bug fix
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I also tried again and
Reader
can be worked after it failed before. But I think the reason is not here since we have use MSI in that test sincelen(secrets) == 0 && len(secretName) == 0
, I will still work on this issue to find the reason.