Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Check for both folder and folder/ as hdi_isfolder blobs #2809

Merged
merged 10 commits into from
Oct 21, 2024
4 changes: 4 additions & 0 deletions cmd/copyEnumeratorInit.go
Original file line number Diff line number Diff line change
Expand Up @@ -585,6 +585,10 @@ func (cca *CookedCopyCmdArgs) MakeEscapedRelativePath(source bool, dstIsDir bool
return "" // ignore path encode rules
}

if object.relativePath == "\x00" { // Short circuit, our relative path is requesting root/
return "\x00"
}

// source is a EXACT path to the file
if object.isSingleSourceFile() {
// If we're finding an object from the source, it returns "" if it's already got it.
Expand Down
2 changes: 1 addition & 1 deletion cmd/list.go
Original file line number Diff line number Diff line change
Expand Up @@ -481,7 +481,7 @@ func getPath(containerName, relativePath string, level LocationLevel, entityType
builder.WriteString(containerName + "/")
}
builder.WriteString(relativePath)
if entityType == common.EEntityType.Folder() {
if entityType == common.EEntityType.Folder() && !strings.HasSuffix(relativePath, "/") {
builder.WriteString("/")
}
return builder.String()
Expand Down
5 changes: 5 additions & 0 deletions cmd/sync.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ type rawSyncCmdArgs struct {
compareHash string
localHashStorageMode string

includeDirectoryStubs bool // Includes hdi_isfolder objects in the sync even w/o preservePermissions.
preservePermissions bool
preserveSMBPermissions bool // deprecated and synonymous with preservePermissions
preserveOwner bool
Expand Down Expand Up @@ -368,6 +369,8 @@ func (raw *rawSyncCmdArgs) cook() (cookedSyncCmdArgs, error) {

cooked.deleteDestinationFileIfNecessary = raw.deleteDestinationFileIfNecessary

cooked.includeDirectoryStubs = raw.includeDirectoryStubs

return cooked, nil
}

Expand Down Expand Up @@ -417,6 +420,7 @@ type cookedSyncCmdArgs struct {
putBlobSize int64
forceIfReadOnly bool
backupMode bool
includeDirectoryStubs bool

// commandString hold the user given command which is logged to the Job log file
commandString string
Expand Down Expand Up @@ -789,6 +793,7 @@ func init() {
rootCmd.AddCommand(syncCmd)
syncCmd.PersistentFlags().BoolVar(&raw.recursive, "recursive", true, "True by default, look into sub-directories recursively when syncing between directories. (default true).")
syncCmd.PersistentFlags().StringVar(&raw.fromTo, "from-to", "", "Optionally specifies the source destination combination. For Example: LocalBlob, BlobLocal, LocalFile, FileLocal, BlobFile, FileBlob, etc.")
syncCmd.PersistentFlags().BoolVar(&raw.includeDirectoryStubs, "include-directory-stub", false, "False by default, includes blobs with the hdi_isfolder metadata in the transfer.")

// TODO: enable for copy with IfSourceNewer
// smb info/permissions can be persisted in the scenario of File -> File
Expand Down
4 changes: 2 additions & 2 deletions cmd/syncEnumerator.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ func (cca *cookedSyncCmdArgs) initEnumerator(ctx context.Context) (enumerator *s
}
}

includeDirStubs := cca.fromTo.From().SupportsHnsACLs() && cca.fromTo.To().SupportsHnsACLs() && cca.preservePermissions.IsTruthy()
includeDirStubs := (cca.fromTo.From().SupportsHnsACLs() && cca.fromTo.To().SupportsHnsACLs() && cca.preservePermissions.IsTruthy()) || cca.includeDirectoryStubs

// TODO: enable symlink support in a future release after evaluating the implications
// TODO: Consider passing an errorChannel so that enumeration errors during sync can be conveyed to the caller.
Expand Down Expand Up @@ -129,7 +129,7 @@ func (cca *cookedSyncCmdArgs) initEnumerator(ctx context.Context) (enumerator *s
}

// decide our folder transfer strategy
fpo, folderMessage := NewFolderPropertyOption(cca.fromTo, cca.recursive, true, filters, cca.preserveSMBInfo, cca.preservePermissions.IsTruthy(), false, strings.EqualFold(cca.destination.Value, common.Dev_Null), false) // sync always acts like stripTopDir=true
fpo, folderMessage := NewFolderPropertyOption(cca.fromTo, cca.recursive, true, filters, cca.preserveSMBInfo, cca.preservePermissions.IsTruthy(), false, strings.EqualFold(cca.destination.Value, common.Dev_Null), cca.includeDirectoryStubs) // sync always acts like stripTopDir=true
if !cca.dryrunMode {
glcm.Info(folderMessage)
}
Expand Down
20 changes: 13 additions & 7 deletions cmd/zc_processor.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,14 +68,20 @@ func (s *copyTransferProcessor) scheduleCopyTransfer(storedObject StoredObject)

// Escape paths on destinations where the characters are invalid
// And re-encode them where the characters are valid.
srcRelativePath := pathEncodeRules(storedObject.relativePath, s.copyJobTemplate.FromTo, false, true)
dstRelativePath := pathEncodeRules(storedObject.relativePath, s.copyJobTemplate.FromTo, false, false)
if srcRelativePath != "" {
srcRelativePath = "/" + srcRelativePath
}
if dstRelativePath != "" {
dstRelativePath = "/" + dstRelativePath
var srcRelativePath, dstRelativePath string
if storedObject.relativePath == "\x00" { // Short circuit when we're talking about root/, because the STE is funky about this.
srcRelativePath, dstRelativePath = storedObject.relativePath, storedObject.relativePath
} else {
srcRelativePath = pathEncodeRules(storedObject.relativePath, s.copyJobTemplate.FromTo, false, true)
dstRelativePath = pathEncodeRules(storedObject.relativePath, s.copyJobTemplate.FromTo, false, false)
if srcRelativePath != "" {
srcRelativePath = "/" + srcRelativePath
}
if dstRelativePath != "" {
dstRelativePath = "/" + dstRelativePath
}
}

copyTransfer, shouldSendToSte := storedObject.ToNewCopyTransfer(false, srcRelativePath, dstRelativePath, s.preserveAccessTier, s.folderPropertiesOption, s.symlinkHandlingType)

if s.copyJobTemplate.FromTo.To() == common.ELocation.None() {
Expand Down
82 changes: 58 additions & 24 deletions cmd/zc_traverser_blob.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,8 @@ func (t *blobTraverser) IsDirectory(isSource bool) (bool, error) {
}

// All sources and DFS-destinations we'll look further

_, _, isDirStub, blobErr := t.getPropertiesIfSingleBlob()
// This call is fine, because there is no trailing / here-- If there's a trailing /, this is surely referring
_, _, isDirStub, _, blobErr := t.getPropertiesIfSingleBlob()

// We know for sure this is a single blob still, let it walk on through to the traverser.
if bloberror.HasCode(blobErr, bloberror.BlobUsesCustomerSpecifiedEncryption) {
Expand Down Expand Up @@ -127,36 +127,45 @@ func (t *blobTraverser) IsDirectory(isSource bool) (bool, error) {
return true, nil
}

func (t *blobTraverser) getPropertiesIfSingleBlob() (response *blob.GetPropertiesResponse, isBlob bool, isDirStub bool, err error) {
func (t *blobTraverser) getPropertiesIfSingleBlob() (response *blob.GetPropertiesResponse, isBlob bool, isDirStub bool, blobName string, err error) {
// trim away the trailing slash before we check whether it's a single blob
// so that we can detect the directory stub in case there is one
blobURLParts, err := blob.ParseURL(t.rawURL)
if err != nil {
return nil, false, false, err
return nil, false, false, "", err
}
blobURLParts.BlobName = strings.TrimSuffix(blobURLParts.BlobName, common.AZCOPY_PATH_SEPARATOR_STRING)

if blobURLParts.BlobName == "" {
// This is a container, which needs to be given a proper listing.
return nil, false, false, nil
return nil, false, false, "", nil
}

/*
If the user specified a trailing /, they may mean:
A) `folder/` with `hdi_isfolder`, this is intentional.
B) `folder` with `hdi_isfolder`
C) a virtual directory with children, but no stub
*/

retry:
blobClient, err := createBlobClientFromServiceClient(blobURLParts, t.serviceClient)
if err != nil {
return nil, false, false, err
return nil, false, false, blobURLParts.BlobName, err
}
props, err := blobClient.GetProperties(t.ctx, &blob.GetPropertiesOptions{CPKInfo: t.cpkOptions.GetCPKInfo()})

// if there was no problem getting the properties, it means that we are looking at a single blob
if err == nil {
if gCopyUtil.doesBlobRepresentAFolder(props.Metadata) {
return &props, false, true, nil
}

return &props, true, false, err
if err != nil && strings.HasSuffix(blobURLParts.BlobName, common.AZCOPY_PATH_SEPARATOR_STRING) {
// Trim & retry, maybe the directory stub is DFS style.
blobURLParts.BlobName = strings.TrimSuffix(blobURLParts.BlobName, common.AZCOPY_PATH_SEPARATOR_STRING)
goto retry
} else if err == nil {
// We found the target blob, great! Let's return the details.
isDir := gCopyUtil.doesBlobRepresentAFolder(props.Metadata)
return &props, !isDir, isDir, blobURLParts.BlobName, nil
}

return nil, false, false, err
// We found nothing.
return nil, false, false, "", err
}

func (t *blobTraverser) getBlobTags() (common.BlobTags, error) {
Expand Down Expand Up @@ -190,7 +199,7 @@ func (t *blobTraverser) Traverse(preprocessor objectMorpher, processor objectPro
}

// check if the url points to a single blob
blobProperties, isBlob, isDirStub, err := t.getPropertiesIfSingleBlob()
blobProperties, isBlob, isDirStub, blobName, err := t.getPropertiesIfSingleBlob()

var respErr *azcore.ResponseError
if errors.As(err, &respErr) {
Expand Down Expand Up @@ -223,11 +232,16 @@ func (t *blobTraverser) Traverse(preprocessor objectMorpher, processor objectPro
azcopyScanningLogger.Log(common.LogDebug, fmt.Sprintf("Root entity type: %s", getEntityType(blobProperties.Metadata)))
}

relPath := ""
if strings.HasSuffix(blobName, "/") {
relPath = "\x00" // Because the ste will trim the / suffix from our source, or we may not already have it.
}

blobPropsAdapter := blobPropertiesResponseAdapter{blobProperties}
storedObject := newStoredObject(
preprocessor,
getObjectNameOnly(strings.TrimSuffix(blobURLParts.BlobName, common.AZCOPY_PATH_SEPARATOR_STRING)),
"",
getObjectNameOnly(blobName),
relPath,
getEntityType(blobPropsAdapter.Metadata),
blobPropsAdapter.LastModified(),
blobPropsAdapter.ContentLength(),
Expand Down Expand Up @@ -339,15 +353,27 @@ func (t *blobTraverser) parallelList(containerClient *container.Client, containe

if t.includeDirectoryStubs {
// try to get properties on the directory itself, since it's not listed in BlobItems
blobClient := containerClient.NewBlobClient(strings.TrimSuffix(*virtualDir.Name, common.AZCOPY_PATH_SEPARATOR_STRING))
dName := strings.TrimSuffix(*virtualDir.Name, common.AZCOPY_PATH_SEPARATOR_STRING)
blobClient := containerClient.NewBlobClient(dName)
altNameCheck:
pResp, err := blobClient.GetProperties(t.ctx, nil)
pbPropAdapter := blobPropertiesResponseAdapter{&pResp}
folderRelativePath := strings.TrimSuffix(*virtualDir.Name, common.AZCOPY_PATH_SEPARATOR_STRING)
folderRelativePath = strings.TrimPrefix(folderRelativePath, searchPrefix)
if err == nil {
if !t.doesBlobRepresentAFolder(pResp.Metadata) { // We've picked up on a file *named* the folder, not the folder itself. Does folder/ exist?
if !strings.HasSuffix(dName, "/") {
blobClient = containerClient.NewBlobClient(dName + common.AZCOPY_PATH_SEPARATOR_STRING) // Tack on the path separator, check.
dName += common.AZCOPY_PATH_SEPARATOR_STRING
goto altNameCheck // "foo" is a file, what about "foo/"?
}

goto skipDirAdd // We shouldn't add a blob that isn't a folder as a folder. You either have the folder metadata, or you don't.
}

pbPropAdapter := blobPropertiesResponseAdapter{&pResp}
folderRelativePath := strings.TrimPrefix(dName, searchPrefix)

storedObject := newStoredObject(
preprocessor,
getObjectNameOnly(strings.TrimSuffix(*virtualDir.Name, common.AZCOPY_PATH_SEPARATOR_STRING)),
getObjectNameOnly(dName),
folderRelativePath,
common.EEntityType.Folder(),
pbPropAdapter.LastModified(),
Expand All @@ -371,7 +397,15 @@ func (t *blobTraverser) parallelList(containerClient *container.Client, containe
}

enqueueOutput(storedObject, err)
} else {
// There was nothing there, but is there folder/?
if !strings.HasSuffix(dName, "/") {
blobClient = containerClient.NewBlobClient(dName + common.AZCOPY_PATH_SEPARATOR_STRING) // Tack on the path separator, check.
dName += common.AZCOPY_PATH_SEPARATOR_STRING
goto altNameCheck // "foo" is a file, what about "foo/"?
}
}
skipDirAdd:
}
}
}
Expand Down Expand Up @@ -487,7 +521,7 @@ func (t *blobTraverser) createStoredObjectForBlob(preprocessor objectMorpher, bl

func (t *blobTraverser) doesBlobRepresentAFolder(metadata map[string]*string) bool {
util := copyHandlerUtil{}
return util.doesBlobRepresentAFolder(metadata) && !(t.includeDirectoryStubs && t.recursive)
return util.doesBlobRepresentAFolder(metadata) // We should ignore these, because we pick them up in other ways.
}

func (t *blobTraverser) serialList(containerClient *container.Client, containerName string, searchPrefix string,
Expand Down
3 changes: 3 additions & 0 deletions common/extensions.go
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,8 @@ func GenerateFullPath(rootPath, childPath string) string {
// if the childPath is empty, it means the rootPath already points to the desired entity
if childPath == "" {
return rootPath
} else if childPath == "\x00" { // The enumerator has asked us to target with a / at the end of our root path. This is a massive hack. When the footgun happens later, ping Adele!
return rootPath + rootSeparator
}

// otherwise, make sure a path separator is inserted between the rootPath if necessary
Expand All @@ -167,6 +169,7 @@ func GenerateFullPathWithQuery(rootPath, childPath, extraQuery string) string {
// Block Names of blobs are of format noted below.
// <5B empty placeholder> <16B GUID of AzCopy re-interpreted as string><5B PartNum><5B Index in the jobPart><5B blockNum>
const AZCOPY_BLOCKNAME_LENGTH = 48

func GenerateBlockBlobBlockID(blockNamePrefix string, index int32) string {
blockID := []byte(fmt.Sprintf("%s%05d", blockNamePrefix, index))
return base64.StdEncoding.EncodeToString(blockID)
Expand Down
43 changes: 43 additions & 0 deletions e2etest/newe2e_generic_wrangling.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,22 @@ func ListOfAny[T any](in []T) []any {
return out
}

func Keys[K comparable, V any](in map[K]V) []K {
out := make([]K, 0, len(in))
for k, _ := range in {
out = append(out, k)
}
return out
}

func AnyKeys[K comparable, V any](in map[K]V) []any {
out := make([]any, 0, len(in))
for k, _ := range in {
out = append(out, k)
}
return out
}

func CloneMap[K comparable, V any](in map[K]V) map[K]V {
out := make(map[K]V)

Expand All @@ -98,6 +114,23 @@ func CloneMap[K comparable, V any](in map[K]V) map[K]V {
return out
}

func CloneMapWithRule[K comparable, V any](in map[K]V, rule func(K, V) (key K, value V, include bool)) map[K]V {
out := make(map[K]V)

for k, v := range in {
var include bool
k, v, include = rule(k, v)

if !include {
continue
}

out[k] = v
}

return out
}

func ListContains[I comparable](item I, in []I) bool {
for _, v := range in {
if item == v {
Expand All @@ -108,6 +141,16 @@ func ListContains[I comparable](item I, in []I) bool {
return false
}

func Any[I any](items []I, f func(I) bool) bool {
for _, v := range items {
if f(v) {
return true
}
}

return false
}

func ClonePointer[T any](in *T) *T {
if in == nil {
return nil
Expand Down
45 changes: 42 additions & 3 deletions e2etest/newe2e_object_content.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ import (
"github.com/Azure/azure-storage-azcopy/v10/common"
"io"
"math/rand"
"strconv"
"time"
)

type ObjectContentContainer interface {
Expand All @@ -25,10 +27,47 @@ func SizeFromString(objectSize string) int64 {
return longSize
}

func NewRandomObjectContentContainer(a Asserter, size int64) ObjectContentContainer {
var megaSize = []string{
"B",
"KB",
"MB",
"GB",
"TB",
"PB",
"EB",
}

func SizeToString(size int64, megaUnits bool) string {
units := []string{
"B",
"KiB",
"MiB",
"GiB",
"TiB",
"PiB",
"EiB", // Let's face it, a file, account, or container probably won't be more than 1000 exabytes in YEARS.
// (and int64 literally isn't large enough to handle too many exbibytes. 128 bit processors when)
}
unit := 0
floatSize := float64(size)
gigSize := 1024

if megaUnits {
gigSize = 1000
units = megaSize
}

for floatSize/float64(gigSize) >= 1 {
unit++
floatSize /= float64(gigSize)
}

return strconv.FormatFloat(floatSize, 'f', 2, 64) + " " + units[unit]
}

func NewRandomObjectContentContainer(size int64) ObjectContentContainer {
buf := make([]byte, size)
_, err := rand.Read(buf)
a.NoError("Generate random data", err)
_, _ = rand.New(rand.NewSource(time.Now().Unix())).Read(buf)
return &ObjectContentContainerBuffer{buf}
}

Expand Down
Loading
Loading