From 2728303b77240d3e7bfe38f0b33d3f325c34ac37 Mon Sep 17 00:00:00 2001 From: Viraj Jasani Date: Thu, 8 Feb 2024 17:20:48 -0800 Subject: [PATCH 1/8] HADOOP-19072. S3A: expand optimisations on stores with "fs.s3a.create.performance" --- .../filesystem/fsdataoutputstreambuilder.md | 6 +- .../fs/FileContextCreateMkdirBaseTest.java | 7 +- .../contract/AbstractContractMkdirTest.java | 7 +- .../apache/hadoop/fs/s3a/S3AFileSystem.java | 7 +- .../hadoop/fs/s3a/impl/MkdirOperation.java | 55 ++++++++++---- .../contract/s3a/ITestS3AContractMkdir.java | 11 +++ .../ITestS3AContractMkdirWithCreatePerf.java | 69 ++++++++++++++++++ .../ITestS3AFileContextCreateMkdir.java | 9 ++- ...stS3AFileContextCreateMkdirCreatePerf.java | 72 +++++++++++++++++++ 9 files changed, 219 insertions(+), 24 deletions(-) create mode 100644 hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractMkdirWithCreatePerf.java create mode 100644 hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextCreateMkdirCreatePerf.java diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/fsdataoutputstreambuilder.md b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/fsdataoutputstreambuilder.md index 5f24e75569786..b4e6191fc369d 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/fsdataoutputstreambuilder.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/fsdataoutputstreambuilder.md @@ -200,8 +200,10 @@ Prioritize file creation performance over safety checks for filesystem consisten This: 1. Skips the `LIST` call which makes sure a file is being created over a directory. Risk: a file is created over a directory. -1. Ignores the overwrite flag. -1. Never issues a `DELETE` call to delete parent directory markers. +2. Ignores the overwrite flag. +3. Never issues a `DELETE` call to delete parent directory markers. +4. Mkdir does not check whether the parent is directory of file. + Risk: a dir can be created even though its parent is a file. It is possible to probe an S3A Filesystem instance for this capability through the `hasPathCapability(path, "fs.s3a.create.performance")` check. diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/FileContextCreateMkdirBaseTest.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/FileContextCreateMkdirBaseTest.java index fbd598c9deb6a..4081ce6abdb91 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/FileContextCreateMkdirBaseTest.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/FileContextCreateMkdirBaseTest.java @@ -55,7 +55,10 @@ public abstract class FileContextCreateMkdirBaseTest { protected final FileContextTestHelper fileContextTestHelper; protected static FileContext fc; - + + public static final String MKDIR_FILE_PRESENT_ERROR = + " should have failed as a file was present"; + static { GenericTestUtils.setLogLevel(FileSystem.LOG, Level.DEBUG); } @@ -144,7 +147,7 @@ public void testMkdirRecursiveWithExistingFile() throws IOException { try { fc.mkdir(dirPath, FileContext.DEFAULT_PERM, true); Assert.fail("Mkdir for " + dirPath - + " should have failed as a file was present"); + + MKDIR_FILE_PRESENT_ERROR); } catch(IOException e) { // failed as expected } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractMkdirTest.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractMkdirTest.java index de44bc232e784..65ca0ee218fd9 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractMkdirTest.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractMkdirTest.java @@ -35,6 +35,9 @@ */ public abstract class AbstractContractMkdirTest extends AbstractFSContractTestBase { + public static final String MKDIRS_NOT_FAILED_OVER_FILE = + "mkdirs did not fail over a file but returned "; + @Test public void testMkDirRmDir() throws Throwable { FileSystem fs = getFileSystem(); @@ -66,7 +69,7 @@ public void testNoMkdirOverFile() throws Throwable { createFile(getFileSystem(), path, false, dataset); try { boolean made = fs.mkdirs(path); - fail("mkdirs did not fail over a file but returned " + made + fail(MKDIRS_NOT_FAILED_OVER_FILE + made + "; " + ls(path)); } catch (ParentNotDirectoryException | FileAlreadyExistsException e) { //parent is a directory @@ -93,7 +96,7 @@ public void testMkdirOverParentFile() throws Throwable { Path child = new Path(path,"child-to-mkdir"); try { boolean made = fs.mkdirs(child); - fail("mkdirs did not fail over a file but returned " + made + fail(MKDIRS_NOT_FAILED_OVER_FILE + made + "; " + ls(path)); } catch (ParentNotDirectoryException | FileAlreadyExistsException e) { //parent is a directory diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index 3aec03766dacf..86e0a6adaa38b 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -3769,7 +3769,8 @@ public boolean mkdirs(Path p, FsPermission permission) throws IOException, createStoreContext(), path, createMkdirOperationCallbacks(), - isMagicCommitPath(path))); + isMagicCommitPath(path), + performanceCreation)); } /** @@ -4207,7 +4208,9 @@ public boolean createEmptyDir(Path path, StoreContext storeContext) new MkdirOperation( storeContext, path, - createMkdirOperationCallbacks(), false)); + createMkdirOperationCallbacks(), + false, + performanceCreation)); } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/MkdirOperation.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/MkdirOperation.java index 98a91b1881ba1..89ccde195e638 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/MkdirOperation.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/MkdirOperation.java @@ -26,6 +26,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.fs.FileAlreadyExistsException; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; @@ -54,6 +56,8 @@ *
  • If needed, one PUT
  • * */ +@InterfaceAudience.Private +@InterfaceStability.Evolving public class MkdirOperation extends ExecutingStoreOperation { private static final Logger LOG = LoggerFactory.getLogger( @@ -63,6 +67,8 @@ public class MkdirOperation extends ExecutingStoreOperation { private final MkdirCallbacks callbacks; + private final boolean performanceCreation; + /** * Should checks for ancestors existing be skipped? * This flag is set when working with magic directories. @@ -73,11 +79,13 @@ public MkdirOperation( final StoreContext storeContext, final Path dir, final MkdirCallbacks callbacks, - final boolean isMagicPath) { + final boolean isMagicPath, + final boolean performanceCreation) { super(storeContext); this.dir = dir; this.callbacks = callbacks; this.isMagicPath = isMagicPath; + this.performanceCreation = performanceCreation; } /** @@ -124,7 +132,32 @@ public Boolean execute() throws IOException { return true; } - // Walk path to root, ensuring closest ancestor is a directory, not file + // if performance creation mode is set, no need to check + // whether the closest ancestor is dir. + if (!performanceCreation) { + verifyFileStatusOfClosestAncestor(); + } + + // if we get here there is no directory at the destination. + // so create one. + + // Create the marker file, delete the parent entries + // if the filesystem isn't configured to retain them + callbacks.createFakeDirectory(dir, false); + return true; + } + + /** + * Verify the file status of the closest ancestor, if it is + * dir, the mkdir operation should proceed. If it is file, + * the mkdir operation should throw error. + * + * @throws IOException If either file status could not be retrieved, + * or if the closest ancestor is a file. + */ + private void verifyFileStatusOfClosestAncestor() throws IOException { + FileStatus fileStatus; + // Walk path to root, ensuring the closest ancestor is a directory, not file Path fPart = dir.getParent(); try { while (fPart != null && !fPart.isRoot()) { @@ -140,24 +173,18 @@ public Boolean execute() throws IOException { } // there's a file at the parent entry - throw new FileAlreadyExistsException(String.format( - "Can't make directory for path '%s' since it is a file.", - fPart)); + throw new FileAlreadyExistsException( + String.format( + "Can't make directory for path '%s' since it is a file.", + fPart)); } } catch (AccessDeniedException e) { LOG.info("mkdirs({}}: Access denied when looking" + " for parent directory {}; skipping checks", - dir, fPart); + dir, + fPart); LOG.debug("{}", e, e); } - - // if we get here there is no directory at the destination. - // so create one. - - // Create the marker file, delete the parent entries - // if the filesystem isn't configured to retain them - callbacks.createFakeDirectory(dir, false); - return true; } /** diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractMkdir.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractMkdir.java index d953e7eb6aea9..bace0a79f2458 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractMkdir.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractMkdir.java @@ -22,11 +22,22 @@ import org.apache.hadoop.fs.contract.AbstractContractMkdirTest; import org.apache.hadoop.fs.contract.AbstractFSContract; +import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_CREATE_PERFORMANCE; +import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides; + /** * Test dir operations on S3A. */ public class ITestS3AContractMkdir extends AbstractContractMkdirTest { + @Override + protected Configuration createConfiguration() { + Configuration conf = super.createConfiguration(); + removeBaseAndBucketOverrides(conf, + FS_S3A_CREATE_PERFORMANCE); + return conf; + } + @Override protected AbstractFSContract createContract(Configuration conf) { return new S3AContract(conf); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractMkdirWithCreatePerf.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractMkdirWithCreatePerf.java new file mode 100644 index 0000000000000..fcb11c4f85280 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractMkdirWithCreatePerf.java @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.contract.s3a; + +import org.assertj.core.api.Assertions; +import org.junit.Test; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.contract.AbstractContractMkdirTest; +import org.apache.hadoop.fs.contract.AbstractFSContract; + +import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_CREATE_PERFORMANCE; +import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides; + +/** + * Test mkdir operations on S3A with create performance mode. + */ +public class ITestS3AContractMkdirWithCreatePerf extends AbstractContractMkdirTest { + + @Override + protected Configuration createConfiguration() { + Configuration conf = super.createConfiguration(); + removeBaseAndBucketOverrides( + conf, + FS_S3A_CREATE_PERFORMANCE); + conf.setBoolean(FS_S3A_CREATE_PERFORMANCE, + true); + return conf; + } + + @Override + protected AbstractFSContract createContract(Configuration conf) { + return new S3AContract(conf); + } + + @Test + public void testMkdirOverParentFile() throws Throwable { + try { + super.testMkdirOverParentFile(); + throw new RuntimeException( + "Dir creation should not have failed. " + + "Creation performance mode is expected " + + "to create dir without checking file " + + "status of parent dir."); + } catch (AssertionError e) { + Assertions + .assertThat(e) + .describedAs("assertion error from testMkdirOverParentFile") + .hasMessageStartingWith(MKDIRS_NOT_FAILED_OVER_FILE); + } + } + +} diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextCreateMkdir.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextCreateMkdir.java index dcc9da933656f..e71ca2ae52ca0 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextCreateMkdir.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextCreateMkdir.java @@ -13,12 +13,14 @@ */ package org.apache.hadoop.fs.s3a.fileContext; -import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileContextCreateMkdirBaseTest; import org.apache.hadoop.fs.s3a.S3ATestUtils; import org.junit.Before; +import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_CREATE_PERFORMANCE; +import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides; + /** * Extends FileContextCreateMkdirBaseTest for a S3a FileContext. */ @@ -26,8 +28,11 @@ public class ITestS3AFileContextCreateMkdir extends FileContextCreateMkdirBaseTest { @Before - public void setUp() throws IOException, Exception { + public void setUp() throws Exception { Configuration conf = new Configuration(); + removeBaseAndBucketOverrides( + conf, + FS_S3A_CREATE_PERFORMANCE); fc = S3ATestUtils.createTestFileContext(conf); super.setUp(); } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextCreateMkdirCreatePerf.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextCreateMkdirCreatePerf.java new file mode 100644 index 0000000000000..99afd6a187ed4 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextCreateMkdirCreatePerf.java @@ -0,0 +1,72 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs.s3a.fileContext; + +import java.io.IOException; + +import org.assertj.core.api.Assertions; +import org.junit.Before; +import org.junit.Test; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileContextCreateMkdirBaseTest; +import org.apache.hadoop.fs.s3a.S3ATestUtils; + +import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_CREATE_PERFORMANCE; +import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides; + +/** + * Extends FileContextCreateMkdirBaseTest for a S3a FileContext with + * create performance mode. + */ +public class ITestS3AFileContextCreateMkdirCreatePerf + extends FileContextCreateMkdirBaseTest { + + @Before + public void setUp() throws Exception { + Configuration conf = new Configuration(); + removeBaseAndBucketOverrides( + conf, + FS_S3A_CREATE_PERFORMANCE); + conf.setBoolean(FS_S3A_CREATE_PERFORMANCE, + true); + fc = S3ATestUtils.createTestFileContext(conf); + super.setUp(); + } + + @Override + public void tearDown() throws Exception { + if (fc != null) { + super.tearDown(); + } + } + + @Test + public void testMkdirRecursiveWithExistingFile() throws IOException { + try { + super.testMkdirRecursiveWithExistingFile(); + throw new RuntimeException( + "Dir creation should not have failed. " + + "Creation performance mode is expected " + + "to create dir without checking file " + + "status of parent dir."); + } catch (AssertionError e) { + Assertions + .assertThat(e) + .describedAs("assertion error from testMkdirOverParentFile") + .hasMessageContaining(MKDIR_FILE_PRESENT_ERROR); + } + } + +} From 3874f72bc9e7a186e03a64cc777f0235b8029f02 Mon Sep 17 00:00:00 2001 From: Viraj Jasani Date: Thu, 8 Feb 2024 20:31:40 -0800 Subject: [PATCH 2/8] minor change --- .../fileContext/ITestS3AFileContextCreateMkdirCreatePerf.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextCreateMkdirCreatePerf.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextCreateMkdirCreatePerf.java index 99afd6a187ed4..429ac44567907 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextCreateMkdirCreatePerf.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextCreateMkdirCreatePerf.java @@ -64,7 +64,7 @@ public void testMkdirRecursiveWithExistingFile() throws IOException { } catch (AssertionError e) { Assertions .assertThat(e) - .describedAs("assertion error from testMkdirOverParentFile") + .describedAs("assertion error from testMkdirRecursiveWithExistingFile") .hasMessageContaining(MKDIR_FILE_PRESENT_ERROR); } } From 12e6bff73832fd72a82ad43667aa531b4976d1ab Mon Sep 17 00:00:00 2001 From: Viraj Jasani Date: Thu, 15 Feb 2024 23:57:21 -0800 Subject: [PATCH 3/8] source and test changes --- .../fs/FileContextCreateMkdirBaseTest.java | 16 +++---- .../apache/hadoop/fs/s3a/S3AFileSystem.java | 4 +- .../hadoop/fs/s3a/impl/MkdirOperation.java | 42 ++++++++----------- .../ITestS3AContractMkdirWithCreatePerf.java | 28 ++++++++----- ...stS3AFileContextCreateMkdirCreatePerf.java | 24 +++++------ 5 files changed, 53 insertions(+), 61 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/FileContextCreateMkdirBaseTest.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/FileContextCreateMkdirBaseTest.java index 4081ce6abdb91..fcb1b6925a494 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/FileContextCreateMkdirBaseTest.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/FileContextCreateMkdirBaseTest.java @@ -27,6 +27,7 @@ import static org.apache.hadoop.fs.FileContextTestHelper.*; import static org.apache.hadoop.fs.contract.ContractTestUtils.assertIsDirectory; import static org.apache.hadoop.fs.contract.ContractTestUtils.assertIsFile; +import static org.apache.hadoop.test.LambdaTestUtils.intercept; import org.apache.hadoop.test.GenericTestUtils; import org.slf4j.event.Level; @@ -131,7 +132,7 @@ public void testMkdirsRecursiveWithExistingDir() throws IOException { } @Test - public void testMkdirRecursiveWithExistingFile() throws IOException { + public void testMkdirRecursiveWithExistingFile() throws Exception { Path f = getTestRootPath(fc, "NonExistant3/aDir"); fc.mkdir(f, FileContext.DEFAULT_PERM, true); assertIsDirectory(fc.getFileStatus(f)); @@ -144,13 +145,12 @@ public void testMkdirRecursiveWithExistingFile() throws IOException { // try creating another folder which conflicts with filePath Path dirPath = new Path(filePath, "bDir/cDir"); - try { - fc.mkdir(dirPath, FileContext.DEFAULT_PERM, true); - Assert.fail("Mkdir for " + dirPath - + MKDIR_FILE_PRESENT_ERROR); - } catch(IOException e) { - // failed as expected - } + intercept( + IOException.class, + null, + "Mkdir for " + dirPath + MKDIR_FILE_PRESENT_ERROR, + () -> fc.mkdir(dirPath, FileContext.DEFAULT_PERM, true) + ); } @Test diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index 86e0a6adaa38b..b958c35aaa20f 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -3769,8 +3769,7 @@ public boolean mkdirs(Path p, FsPermission permission) throws IOException, createStoreContext(), path, createMkdirOperationCallbacks(), - isMagicCommitPath(path), - performanceCreation)); + performanceCreation || isMagicCommitPath(path))); } /** @@ -4209,7 +4208,6 @@ public boolean createEmptyDir(Path path, StoreContext storeContext) storeContext, path, createMkdirOperationCallbacks(), - false, performanceCreation)); } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/MkdirOperation.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/MkdirOperation.java index 89ccde195e638..9a4dc7d454a0b 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/MkdirOperation.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/MkdirOperation.java @@ -70,21 +70,22 @@ public class MkdirOperation extends ExecutingStoreOperation { private final boolean performanceCreation; /** - * Should checks for ancestors existing be skipped? - * This flag is set when working with magic directories. + * Initialize Mkdir Operation context for S3A. + * + * @param storeContext Store context. + * @param dir Dir path of the directory. + * @param callbacks MkdirCallbacks object used by the Mkdir operation. + * @param performanceCreation If true, skip validation of the parent directory + * structure. */ - private final boolean isMagicPath; - public MkdirOperation( final StoreContext storeContext, final Path dir, final MkdirCallbacks callbacks, - final boolean isMagicPath, final boolean performanceCreation) { super(storeContext); this.dir = dir; this.callbacks = callbacks; - this.isMagicPath = isMagicPath; this.performanceCreation = performanceCreation; } @@ -106,14 +107,13 @@ public Boolean execute() throws IOException { } // get the file status of the path. - // this is done even for a magic path, to avoid always issuing PUT - // requests. Doing that without a check wouild seem to be an - // optimization, but it is not because - // 1. PUT is slower than HEAD - // 2. Write capacity is less than read capacity on a shard - // 3. It adds needless entries in versioned buckets, slowing - // down subsequent operations. - FileStatus fileStatus = getPathStatusExpectingDir(dir); + // this is not done for magic path i.e. performanceCreation mode. + // For performanceCreation mode, we would probe for HEAD only. + // For non-performance or regular mode, the probe for both HEAD and LIST would + // be done. + S3AFileStatus fileStatus = performanceCreation + ? probePathStatusOrNull(dir, StatusProbeEnum.HEAD_ONLY) + : getPathStatusExpectingDir(dir); if (fileStatus != null) { if (fileStatus.isDirectory()) { return true; @@ -123,15 +123,6 @@ public Boolean execute() throws IOException { } // file status was null - // is the path magic? - // If so, we declare success without looking any further - if (isMagicPath) { - // Create the marker file immediately, - // and don't delete markers - callbacks.createFakeDirectory(dir, true); - return true; - } - // if performance creation mode is set, no need to check // whether the closest ancestor is dir. if (!performanceCreation) { @@ -143,7 +134,8 @@ public Boolean execute() throws IOException { // Create the marker file, delete the parent entries // if the filesystem isn't configured to retain them - callbacks.createFakeDirectory(dir, false); + callbacks.createFakeDirectory(dir, + performanceCreation); return true; } @@ -220,7 +212,7 @@ private S3AFileStatus getPathStatusExpectingDir(final Path path) throws IOException { S3AFileStatus status = probePathStatusOrNull(path, StatusProbeEnum.DIRECTORIES); - if (status == null && !isMagicPath) { + if (status == null && !performanceCreation) { status = probePathStatusOrNull(path, StatusProbeEnum.FILE); } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractMkdirWithCreatePerf.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractMkdirWithCreatePerf.java index fcb11c4f85280..34d85201a060e 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractMkdirWithCreatePerf.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractMkdirWithCreatePerf.java @@ -18,7 +18,9 @@ package org.apache.hadoop.fs.contract.s3a; -import org.assertj.core.api.Assertions; +import java.io.IOException; +import java.io.UncheckedIOException; + import org.junit.Test; import org.apache.hadoop.conf.Configuration; @@ -27,6 +29,7 @@ import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_CREATE_PERFORMANCE; import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides; +import static org.apache.hadoop.test.LambdaTestUtils.intercept; /** * Test mkdir operations on S3A with create performance mode. @@ -51,18 +54,21 @@ protected AbstractFSContract createContract(Configuration conf) { @Test public void testMkdirOverParentFile() throws Throwable { + intercept( + UncheckedIOException.class, + MKDIRS_NOT_FAILED_OVER_FILE, + "Dir creation should not have failed. " + + "Creation performance mode is expected " + + "to create dir without checking file " + + "status of parent dir.", + this::callTestMkdirOverParentFile); + } + + private void callTestMkdirOverParentFile() { try { super.testMkdirOverParentFile(); - throw new RuntimeException( - "Dir creation should not have failed. " - + "Creation performance mode is expected " - + "to create dir without checking file " - + "status of parent dir."); - } catch (AssertionError e) { - Assertions - .assertThat(e) - .describedAs("assertion error from testMkdirOverParentFile") - .hasMessageStartingWith(MKDIRS_NOT_FAILED_OVER_FILE); + } catch (Throwable e) { + throw new UncheckedIOException(new IOException(e)); } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextCreateMkdirCreatePerf.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextCreateMkdirCreatePerf.java index 429ac44567907..96c5f70c9b8ee 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextCreateMkdirCreatePerf.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextCreateMkdirCreatePerf.java @@ -25,6 +25,7 @@ import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_CREATE_PERFORMANCE; import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides; +import static org.apache.hadoop.test.LambdaTestUtils.intercept; /** * Extends FileContextCreateMkdirBaseTest for a S3a FileContext with @@ -53,20 +54,15 @@ public void tearDown() throws Exception { } @Test - public void testMkdirRecursiveWithExistingFile() throws IOException { - try { - super.testMkdirRecursiveWithExistingFile(); - throw new RuntimeException( - "Dir creation should not have failed. " - + "Creation performance mode is expected " - + "to create dir without checking file " - + "status of parent dir."); - } catch (AssertionError e) { - Assertions - .assertThat(e) - .describedAs("assertion error from testMkdirRecursiveWithExistingFile") - .hasMessageContaining(MKDIR_FILE_PRESENT_ERROR); - } + public void testMkdirRecursiveWithExistingFile() throws Exception { + intercept( + AssertionError.class, + MKDIR_FILE_PRESENT_ERROR, + "Dir creation should not have failed. " + + "Creation performance mode is expected " + + "to create dir without checking file " + + "status of parent dir.", + super::testMkdirRecursiveWithExistingFile); } } From ff8a9d2e9dab36887a266a0eb00d6268c4331a1c Mon Sep 17 00:00:00 2001 From: Viraj Jasani Date: Fri, 16 Feb 2024 00:07:09 -0800 Subject: [PATCH 4/8] remove unused import --- .../fileContext/ITestS3AFileContextCreateMkdirCreatePerf.java | 3 --- 1 file changed, 3 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextCreateMkdirCreatePerf.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextCreateMkdirCreatePerf.java index 96c5f70c9b8ee..430bc43533fe9 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextCreateMkdirCreatePerf.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextCreateMkdirCreatePerf.java @@ -13,9 +13,6 @@ */ package org.apache.hadoop.fs.s3a.fileContext; -import java.io.IOException; - -import org.assertj.core.api.Assertions; import org.junit.Before; import org.junit.Test; From 8d3012c3e40f3c0575951064ea43fa790debb974 Mon Sep 17 00:00:00 2001 From: Viraj Jasani Date: Mon, 1 Apr 2024 23:16:03 -0700 Subject: [PATCH 5/8] keep testMkdirOverParentFile simpler --- .../ITestS3AContractMkdirWithCreatePerf.java | 38 +++++++++---------- 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractMkdirWithCreatePerf.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractMkdirWithCreatePerf.java index 34d85201a060e..7c0f61711f66e 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractMkdirWithCreatePerf.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractMkdirWithCreatePerf.java @@ -18,18 +18,19 @@ package org.apache.hadoop.fs.contract.s3a; -import java.io.IOException; -import java.io.UncheckedIOException; - import org.junit.Test; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.contract.AbstractContractMkdirTest; import org.apache.hadoop.fs.contract.AbstractFSContract; +import org.apache.hadoop.fs.contract.ContractTestUtils; +import static org.apache.hadoop.fs.contract.ContractTestUtils.createFile; +import static org.apache.hadoop.fs.contract.ContractTestUtils.dataset; import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_CREATE_PERFORMANCE; import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides; -import static org.apache.hadoop.test.LambdaTestUtils.intercept; /** * Test mkdir operations on S3A with create performance mode. @@ -54,22 +55,19 @@ protected AbstractFSContract createContract(Configuration conf) { @Test public void testMkdirOverParentFile() throws Throwable { - intercept( - UncheckedIOException.class, - MKDIRS_NOT_FAILED_OVER_FILE, - "Dir creation should not have failed. " - + "Creation performance mode is expected " - + "to create dir without checking file " - + "status of parent dir.", - this::callTestMkdirOverParentFile); - } - - private void callTestMkdirOverParentFile() { - try { - super.testMkdirOverParentFile(); - } catch (Throwable e) { - throw new UncheckedIOException(new IOException(e)); - } + describe("try to mkdir where a parent is a file, should pass"); + FileSystem fs = getFileSystem(); + Path path = methodPath(); + byte[] dataset = dataset(1024, ' ', 'z'); + createFile(getFileSystem(), path, false, dataset); + Path child = new Path(path, "child-to-mkdir"); + boolean childCreated = fs.mkdirs(child); + assertTrue("Child dir is created", childCreated); + assertIsFile(path); + byte[] bytes = ContractTestUtils.readDataset(getFileSystem(), path, dataset.length); + ContractTestUtils.compareByteArrays(dataset, bytes, dataset.length); + assertPathExists("mkdir failed", child); + assertDeleted(child, true); } } From fc306d5e87f9ba64320b9b181f2d4d0f71e678b2 Mon Sep 17 00:00:00 2001 From: Viraj Jasani Date: Wed, 3 Apr 2024 21:18:28 -0700 Subject: [PATCH 6/8] Revert "source and test changes" This reverts commit 12e6bff73832fd72a82ad43667aa531b4976d1ab. --- .../apache/hadoop/fs/s3a/S3AFileSystem.java | 4 ++- .../hadoop/fs/s3a/impl/MkdirOperation.java | 34 +++++++++++++------ 2 files changed, 27 insertions(+), 11 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index b958c35aaa20f..86e0a6adaa38b 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -3769,7 +3769,8 @@ public boolean mkdirs(Path p, FsPermission permission) throws IOException, createStoreContext(), path, createMkdirOperationCallbacks(), - performanceCreation || isMagicCommitPath(path))); + isMagicCommitPath(path), + performanceCreation)); } /** @@ -4208,6 +4209,7 @@ public boolean createEmptyDir(Path path, StoreContext storeContext) storeContext, path, createMkdirOperationCallbacks(), + false, performanceCreation)); } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/MkdirOperation.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/MkdirOperation.java index 9a4dc7d454a0b..99405ae6b128f 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/MkdirOperation.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/MkdirOperation.java @@ -69,12 +69,15 @@ public class MkdirOperation extends ExecutingStoreOperation { private final boolean performanceCreation; + private final boolean isMagicPath; + /** * Initialize Mkdir Operation context for S3A. * * @param storeContext Store context. * @param dir Dir path of the directory. * @param callbacks MkdirCallbacks object used by the Mkdir operation. + * @param isMagicPath True if the path is magic commit path. * @param performanceCreation If true, skip validation of the parent directory * structure. */ @@ -82,10 +85,12 @@ public MkdirOperation( final StoreContext storeContext, final Path dir, final MkdirCallbacks callbacks, + final boolean isMagicPath, final boolean performanceCreation) { super(storeContext); this.dir = dir; this.callbacks = callbacks; + this.isMagicPath = isMagicPath; this.performanceCreation = performanceCreation; } @@ -107,13 +112,14 @@ public Boolean execute() throws IOException { } // get the file status of the path. - // this is not done for magic path i.e. performanceCreation mode. - // For performanceCreation mode, we would probe for HEAD only. - // For non-performance or regular mode, the probe for both HEAD and LIST would - // be done. - S3AFileStatus fileStatus = performanceCreation - ? probePathStatusOrNull(dir, StatusProbeEnum.HEAD_ONLY) - : getPathStatusExpectingDir(dir); + // this is done even for a magic path, to avoid always issuing PUT + // requests. Doing that without a check wouild seem to be an + // optimization, but it is not because + // 1. PUT is slower than HEAD + // 2. Write capacity is less than read capacity on a shard + // 3. It adds needless entries in versioned buckets, slowing + // down subsequent operations. + FileStatus fileStatus = getPathStatusExpectingDir(dir); if (fileStatus != null) { if (fileStatus.isDirectory()) { return true; @@ -123,6 +129,15 @@ public Boolean execute() throws IOException { } // file status was null + // is the path magic? + // If so, we declare success without looking any further + if (isMagicPath) { + // Create the marker file immediately, + // and don't delete markers + callbacks.createFakeDirectory(dir, true); + return true; + } + // if performance creation mode is set, no need to check // whether the closest ancestor is dir. if (!performanceCreation) { @@ -134,8 +149,7 @@ public Boolean execute() throws IOException { // Create the marker file, delete the parent entries // if the filesystem isn't configured to retain them - callbacks.createFakeDirectory(dir, - performanceCreation); + callbacks.createFakeDirectory(dir, false); return true; } @@ -212,7 +226,7 @@ private S3AFileStatus getPathStatusExpectingDir(final Path path) throws IOException { S3AFileStatus status = probePathStatusOrNull(path, StatusProbeEnum.DIRECTORIES); - if (status == null && !performanceCreation) { + if (status == null && !isMagicPath) { status = probePathStatusOrNull(path, StatusProbeEnum.FILE); } From 142f0d4db75fa02576b95b819b36a6aab67f596c Mon Sep 17 00:00:00 2001 From: Viraj Jasani Date: Mon, 29 Jul 2024 14:59:38 -0700 Subject: [PATCH 7/8] sync changes --- .../apache/hadoop/fs/s3a/S3AFileSystem.java | 4 ++-- .../hadoop/fs/s3a/impl/MkdirOperation.java | 22 ++++++++++++++----- .../ITestS3AContractMkdirWithCreatePerf.java | 8 ++++--- ...stS3AFileContextCreateMkdirCreatePerf.java | 8 ++++--- 4 files changed, 29 insertions(+), 13 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index 447e2cd657a64..25b036b5fc7f9 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -3829,7 +3829,7 @@ public boolean mkdirs(Path p, FsPermission permission) throws IOException, path, createMkdirOperationCallbacks(), isMagicCommitPath(path), - performanceCreation)); + performanceFlags.enabled(PerformanceFlagEnum.Mkdir))); } /** @@ -4284,7 +4284,7 @@ public boolean createEmptyDir(Path path, StoreContext storeContext) path, createMkdirOperationCallbacks(), false, - performanceCreation)); + performanceFlags.enabled(PerformanceFlagEnum.Mkdir))); } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/MkdirOperation.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/MkdirOperation.java index 99405ae6b128f..a027cabffd46d 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/MkdirOperation.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/MkdirOperation.java @@ -63,12 +63,24 @@ public class MkdirOperation extends ExecutingStoreOperation { private static final Logger LOG = LoggerFactory.getLogger( MkdirOperation.class); + /** + * Path of the directory to be created. + */ private final Path dir; + /** + * Mkdir Callbacks object to be used by the Mkdir operation. + */ private final MkdirCallbacks callbacks; - private final boolean performanceCreation; + /** + * Whether to skip the validation of the parent directory. + */ + private final boolean performanceMkdir; + /** + * Whether the path is magic commit path. + */ private final boolean isMagicPath; /** @@ -78,7 +90,7 @@ public class MkdirOperation extends ExecutingStoreOperation { * @param dir Dir path of the directory. * @param callbacks MkdirCallbacks object used by the Mkdir operation. * @param isMagicPath True if the path is magic commit path. - * @param performanceCreation If true, skip validation of the parent directory + * @param performanceMkdir If true, skip validation of the parent directory * structure. */ public MkdirOperation( @@ -86,12 +98,12 @@ public MkdirOperation( final Path dir, final MkdirCallbacks callbacks, final boolean isMagicPath, - final boolean performanceCreation) { + final boolean performanceMkdir) { super(storeContext); this.dir = dir; this.callbacks = callbacks; this.isMagicPath = isMagicPath; - this.performanceCreation = performanceCreation; + this.performanceMkdir = performanceMkdir; } /** @@ -140,7 +152,7 @@ public Boolean execute() throws IOException { // if performance creation mode is set, no need to check // whether the closest ancestor is dir. - if (!performanceCreation) { + if (!performanceMkdir) { verifyFileStatusOfClosestAncestor(); } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractMkdirWithCreatePerf.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractMkdirWithCreatePerf.java index 7c0f61711f66e..cacd6945d2fa0 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractMkdirWithCreatePerf.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractMkdirWithCreatePerf.java @@ -30,6 +30,7 @@ import static org.apache.hadoop.fs.contract.ContractTestUtils.createFile; import static org.apache.hadoop.fs.contract.ContractTestUtils.dataset; import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_CREATE_PERFORMANCE; +import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_PERFORMANCE_FLAGS; import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides; /** @@ -42,9 +43,10 @@ protected Configuration createConfiguration() { Configuration conf = super.createConfiguration(); removeBaseAndBucketOverrides( conf, - FS_S3A_CREATE_PERFORMANCE); - conf.setBoolean(FS_S3A_CREATE_PERFORMANCE, - true); + FS_S3A_CREATE_PERFORMANCE, + FS_S3A_PERFORMANCE_FLAGS); + conf.setStrings(FS_S3A_PERFORMANCE_FLAGS, + "create,mkdir"); return conf; } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextCreateMkdirCreatePerf.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextCreateMkdirCreatePerf.java index 430bc43533fe9..64039e4c5206c 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextCreateMkdirCreatePerf.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextCreateMkdirCreatePerf.java @@ -21,6 +21,7 @@ import org.apache.hadoop.fs.s3a.S3ATestUtils; import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_CREATE_PERFORMANCE; +import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_PERFORMANCE_FLAGS; import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides; import static org.apache.hadoop.test.LambdaTestUtils.intercept; @@ -36,9 +37,10 @@ public void setUp() throws Exception { Configuration conf = new Configuration(); removeBaseAndBucketOverrides( conf, - FS_S3A_CREATE_PERFORMANCE); - conf.setBoolean(FS_S3A_CREATE_PERFORMANCE, - true); + FS_S3A_CREATE_PERFORMANCE, + FS_S3A_PERFORMANCE_FLAGS); + conf.setStrings(FS_S3A_PERFORMANCE_FLAGS, + "mkdir"); fc = S3ATestUtils.createTestFileContext(conf); super.setUp(); } From e7ef0d8eff7dccfa4a521a551a88905a98918258 Mon Sep 17 00:00:00 2001 From: Viraj Jasani Date: Tue, 30 Jul 2024 13:12:07 -0700 Subject: [PATCH 8/8] doc updates --- .../filesystem/fsdataoutputstreambuilder.md | 2 -- .../markdown/tools/hadoop-aws/performance.md | 21 ++++++++++++++++++- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/fsdataoutputstreambuilder.md b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/fsdataoutputstreambuilder.md index b4e6191fc369d..7dd3170036ce9 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/fsdataoutputstreambuilder.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/fsdataoutputstreambuilder.md @@ -202,8 +202,6 @@ This: Risk: a file is created over a directory. 2. Ignores the overwrite flag. 3. Never issues a `DELETE` call to delete parent directory markers. -4. Mkdir does not check whether the parent is directory of file. - Risk: a dir can be created even though its parent is a file. It is possible to probe an S3A Filesystem instance for this capability through the `hasPathCapability(path, "fs.s3a.create.performance")` check. diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/performance.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/performance.md index 876072e81e8fd..b8cb3ff732b36 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/performance.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/performance.md @@ -299,8 +299,11 @@ understands the risks. | *Option* | *Meaning* | Since | |----------|--------------------|:------| | `create` | Create Performance | 3.4.1 | +| `mkdir` | Mkdir Performance | 3.4.1 | -The `create` flag has the same semantics as [`fs.s3a.create.performance`](#create-performance) + +* The `create` flag has the same semantics as [`fs.s3a.create.performance`](#create-performance) +* The `mkdir` flag semantics are explained in [Mkdir Performance](#mkdir-performance) ### Create Performance `fs.s3a.create.performance` @@ -321,6 +324,22 @@ It may however result in Use with care, and, ideally, enable versioning on the S3 store. + +### Mkdir Performance + +`fs.s3a.performance.flag` flag option `mkdir`: + +* Mkdir does not check whether the parent is directory or file. + +This avoids the verification of the file status of the parent file +or the closest ancestor. Unlike the default mkdir operation, if the +parent is not a directory, the mkdir operation does not throw any +error. + +This option can help with mkdir performance improvement but must be used +only if the person setting them understands the above-mentioned risk. + + ### Thread and connection pool settings. Each S3A client interacting with a single bucket, as a single user, has its