Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor list_large_dir tests to use storage client to delete directories instead of gcloud. #2905

Merged
merged 8 commits into from
Jan 20, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,18 @@
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package list_large_dir_test
package list_large_dir

import (
"fmt"
"math"
"os"
"path"
Expand All @@ -23,140 +24,115 @@ import (
"testing"
"time"

. "github.com/googlecloudplatform/gcsfuse/v2/tools/integration_tests/util/client"
"github.com/googlecloudplatform/gcsfuse/v2/tools/integration_tests/util/operations"
"github.com/googlecloudplatform/gcsfuse/v2/tools/integration_tests/util/setup"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/suite"
)

// //////////////////////////////////////////////////////////////////////
// Helpers
// Boilerplate
// //////////////////////////////////////////////////////////////////////
func validateDirectoryWithTwelveThousandFiles(objs []os.DirEntry, t *testing.T) {
// number of objs - 12000
if len(objs) != NumberOfFilesInDirectoryWithTwelveThousandFiles {
t.Errorf("Listed incorrect number of files from directory: %v, expected 12000", len(objs))
}

// Checking if all the object is File type.
for i := 0; i < len(objs); i++ {
if objs[i].IsDir() {
t.Errorf("Listed object is incorrect.")
}
}

for i := 0; i < len(objs); i++ {
checkIfObjNameIsCorrect(objs[i].Name(), PrefixFileInDirectoryWithTwelveThousandFiles, NumberOfFilesInDirectoryWithTwelveThousandFiles, t)
}
type listLargeDir struct {
suite.Suite
}

func validateDirectoryWithTwelveThousandFilesHundredExplicitDirAndHundredImplicitDir(objs []os.DirEntry, t *testing.T) {
var numberOfFiles = 0
var numberOfDirs = 0
func (t *listLargeDir) TearDownSuite() {
err := DeleteAllObjectsWithPrefix(ctx, storageClient, t.T().Name())
assert.NoError(t.T(), err)
}

// Checking if correct objects present in bucket.
for i := 0; i < len(objs); i++ {
if !objs[i].IsDir() {
numberOfFiles++
// //////////////////////////////////////////////////////////////////////
// Helpers
// //////////////////////////////////////////////////////////////////////

// Checking if Prefix1 to Prefix12000 present in the bucket
checkIfObjNameIsCorrect(objs[i].Name(), PrefixFileInDirectoryWithTwelveThousandFiles, NumberOfFilesInDirectoryWithTwelveThousandFiles, t)
}
// validateDirectory checks if the directory listing matches expectations.
func validateDirectory(t *testing.T, objs []os.DirEntry, expectExplicitDirs, expectImplicitDirs bool) {
t.Helper()

if objs[i].IsDir() {
numberOfDirs++
var (
numberOfFiles int
numberOfExplicitDirs int
numberOfImplicitDirs int
)

if strings.Contains(objs[i].Name(), PrefixExplicitDirInLargeDirListTest) {
// Checking if explicitDir1 to explicitDir100 present in the bucket.
checkIfObjNameIsCorrect(objs[i].Name(), PrefixExplicitDirInLargeDirListTest, NumberOfExplicitDirsInDirectoryWithTwelveThousandFiles, t)
} else {
// Checking if implicitDir1 to implicitDir100 present in the bucket.
checkIfObjNameIsCorrect(objs[i].Name(), PrefixImplicitDirInLargeDirListTest, NumberOfImplicitDirsInDirectoryWithTwelveThousandFiles, t)
}
for _, obj := range objs {
if !obj.IsDir() {
numberOfFiles++
checkIfObjNameIsCorrect(t, obj.Name(), prefixFileInDirectoryWithTwelveThousandFiles, numberOfFilesInDirectoryWithTwelveThousandFiles)
} else if strings.Contains(obj.Name(), prefixExplicitDirInLargeDirListTest) {
numberOfExplicitDirs++
checkIfObjNameIsCorrect(t, obj.Name(), prefixExplicitDirInLargeDirListTest, numberOfExplicitDirsInDirectoryWithTwelveThousandFiles)
} else if strings.Contains(obj.Name(), prefixImplicitDirInLargeDirListTest) {
numberOfImplicitDirs++
checkIfObjNameIsCorrect(t, obj.Name(), prefixImplicitDirInLargeDirListTest, numberOfImplicitDirsInDirectoryWithTwelveThousandFiles)
}
}

// number of dirs = 200(Number of implicit + Number of explicit directories)
if numberOfDirs != NumberOfImplicitDirsInDirectoryWithTwelveThousandFiles+NumberOfExplicitDirsInDirectoryWithTwelveThousandFiles {
t.Errorf("Listed incorrect number of directories from directory: %v, expected 200", numberOfDirs)
}
// number of files = 12000
if numberOfFiles != NumberOfFilesInDirectoryWithTwelveThousandFiles {
t.Errorf("Listed incorrect number of files from directory: %v, expected 12000", numberOfFiles)
if numberOfFiles != numberOfFilesInDirectoryWithTwelveThousandFiles {
t.Errorf("Incorrect number of files: got %d, want %d", numberOfFiles, numberOfFilesInDirectoryWithTwelveThousandFiles)
}
}

func validateDirectoryWithTwelveThousandFilesAndHundredExplicitDirectory(objs []os.DirEntry, t *testing.T) {
var numberOfFiles = 0
var numberOfDirs = 0

// Checking if correct objects present in bucket.
for i := 0; i < len(objs); i++ {
if !objs[i].IsDir() {
numberOfFiles++
// Checking if Prefix1 to Prefix12000 present in the bucket
checkIfObjNameIsCorrect(objs[i].Name(), PrefixFileInDirectoryWithTwelveThousandFiles, NumberOfFilesInDirectoryWithTwelveThousandFiles, t)
}

if objs[i].IsDir() {
numberOfDirs++
// Checking if Prefix1 to Prefix100 present in the bucket
checkIfObjNameIsCorrect(objs[i].Name(), PrefixExplicitDirInLargeDirListTest, NumberOfExplicitDirsInDirectoryWithTwelveThousandFiles, t)
}
if expectExplicitDirs && numberOfExplicitDirs != numberOfExplicitDirsInDirectoryWithTwelveThousandFiles {
t.Errorf("Incorrect number of explicit directories: got %d, want %d", numberOfExplicitDirs, numberOfExplicitDirsInDirectoryWithTwelveThousandFiles)
}

// number of explicit dirs = 100
if numberOfDirs != NumberOfExplicitDirsInDirectoryWithTwelveThousandFiles {
t.Errorf("Listed incorrect number of directories from directory: %v, expected 100", numberOfDirs)
}
// number of files = 12000
if numberOfFiles != NumberOfFilesInDirectoryWithTwelveThousandFiles {
t.Errorf("Listed incorrect number of files from directory: %v, expected 12000", numberOfFiles)
if expectImplicitDirs && numberOfImplicitDirs != numberOfImplicitDirsInDirectoryWithTwelveThousandFiles {
t.Errorf("Incorrect number of implicit directories: got %d, want %d", numberOfImplicitDirs, numberOfImplicitDirsInDirectoryWithTwelveThousandFiles)
}
}

func checkIfObjNameIsCorrect(objName string, prefix string, maxNumber int, t *testing.T) {
// Extracting object number.
objNumberStr := strings.ReplaceAll(objName, prefix, "")
// checkIfObjNameIsCorrect validates the object name against a prefix and expected range.
func checkIfObjNameIsCorrect(t *testing.T, objName string, prefix string, maxNumber int) {
t.Helper()

objNumberStr := strings.TrimPrefix(objName, prefix)
objNumber, err := strconv.Atoi(objNumberStr)
if err != nil {
t.Errorf("Error in extracting file number: %v", err)
t.Errorf("Error extracting object number from %q: %v", objName, err)
}
if objNumber < 1 || objNumber > maxNumber {
t.Errorf("Correct object does not exist.")
t.Errorf("Invalid object number in %q: %d (should be between 1 and %d)", objName, objNumber, maxNumber)
}
}

func createTwelveThousandFilesAndUploadOnTestBucket(t *testing.T) {
// Creating twelve thousand files in DirectoryWithTwelveThousandFiles directory to upload them on a bucket for testing.
localDirPath := path.Join(os.Getenv("HOME"), DirectoryWithTwelveThousandFiles)
operations.CreateDirectoryWithNFiles(NumberOfFilesInDirectoryWithTwelveThousandFiles, localDirPath, PrefixFileInDirectoryWithTwelveThousandFiles, t)
// createFilesAndUpload generates files and uploads them to the specified directory.
func createFilesAndUpload(t *testing.T, dirPath string) {
t.Helper()

// Uploading twelve thousand files to directoryWithTwelveThousandFiles in testBucket.
dirPath := path.Join(setup.TestBucket(), DirectoryForListLargeFileTests, DirectoryWithTwelveThousandFiles)
setup.RunScriptForTestData("testdata/upload_files_to_bucket.sh", dirPath, DirectoryWithTwelveThousandFiles, PrefixFileInDirectoryWithTwelveThousandFiles)
localDirPath := path.Join(os.Getenv("HOME"), directoryWithTwelveThousandFiles)
operations.CreateDirectoryWithNFiles(numberOfFilesInDirectoryWithTwelveThousandFiles, localDirPath, prefixFileInDirectoryWithTwelveThousandFiles, t)

setup.RunScriptForTestData("testdata/upload_files_to_bucket.sh", dirPath, localDirPath, prefixFileInDirectoryWithTwelveThousandFiles)
}

// Create a hundred explicit directories.
func createHundredExplicitDir(dirPath string, t *testing.T) {
// Create hundred explicit directories.
for i := 1; i <= NumberOfExplicitDirsInDirectoryWithTwelveThousandFiles; i++ {
subDirPath := path.Join(dirPath, PrefixExplicitDirInLargeDirListTest+strconv.Itoa(i))
// createExplicitDirs creates empty explicit directories in the specified directory.
func createExplicitDirs(t *testing.T, dirPath string) {
t.Helper()

for i := 1; i <= numberOfExplicitDirsInDirectoryWithTwelveThousandFiles; i++ {
subDirPath := path.Join(dirPath, fmt.Sprintf("%s%d", prefixExplicitDirInLargeDirListTest, i))
operations.CreateDirectoryWithNFiles(0, subDirPath, "", t)
}
}

func listDirectoryTime(dirPath string, validateDirectory func([]os.DirEntry, *testing.T), t *testing.T) (time.Duration, time.Duration) {
// List Directory first time
// listDirTime measures the time taken to list a directory with and without cache.
func listDirTime(t *testing.T, dirPath string, expectExplicitDirs bool, expectImplicitDirs bool) (time.Duration, time.Duration) {
t.Helper()

startTime := time.Now()
objs, err := os.ReadDir(dirPath)
if err != nil {
t.Fatalf("Error in listing directory: %v", err)
}
endTime := time.Now()
validateDirectory(objs, t)

validateDirectory(t, objs, expectExplicitDirs, expectImplicitDirs)
firstListTime := endTime.Sub(startTime)

// Listing the directory a second time should retrieve the response from the kernel cache.
minSecondListTime := time.Duration(math.MaxInt64)
for i := 0; i < 5; i++ {
startTime = time.Now()
Expand All @@ -165,77 +141,75 @@ func listDirectoryTime(dirPath string, validateDirectory func([]os.DirEntry, *te
t.Fatalf("Error in listing directory: %v", err)
}
endTime = time.Now()
validateDirectory(objs, t)
validateDirectory(t, objs, expectExplicitDirs, expectImplicitDirs)
secondListTime := endTime.Sub(startTime)

// Update the minimum listing time for the second listing
if secondListTime < minSecondListTime {
minSecondListTime = secondListTime
}
}
return firstListTime, minSecondListTime
}

////////////////////////////////////////////////////////////////////////
// Tests
////////////////////////////////////////////////////////////////////////
// prepareTestDirectory sets up a test directory with files and required explicit and implicit directories.
func prepareTestDirectory(t *testing.T, withExplicitDirs bool, withImplicitDirs bool) string {
t.Helper()

// Test with a bucket with twelve thousand files.
func TestListDirectoryWithTwelveThousandFiles(t *testing.T) {
createTwelveThousandFilesAndUploadOnTestBucket(t)
testDirPath := path.Join(setup.MntDir(), DirectoryForListLargeFileTests)
testDirPathOnBucket := path.Join(setup.TestBucket(), DirectoryForListLargeFileTests)
dirPath := path.Join(testDirPath, DirectoryWithTwelveThousandFiles)
testDirPathOnBucket := path.Join(setup.TestBucket(), t.Name())
testDirPath := path.Join(setup.MntDir(), t.Name())

firstListTime, secondListTime := listDirectoryTime(dirPath, validateDirectoryWithTwelveThousandFiles, t)
err := os.MkdirAll(testDirPath, 0755)
if err != nil {
t.Fatalf("Failed to create directory: %v", err)
}

// Fetching data from the kernel for the second list will be faster.
assert.Less(t, secondListTime, firstListTime)
// The second directory listing should be 2 times better performant since it
// will be retrieved from the kernel cache.
assert.Less(t, 2*secondListTime, firstListTime)
createFilesAndUpload(t, testDirPathOnBucket)

// Clear the data after testing.
setup.RunScriptForTestData("testdata/delete_objects.sh", testDirPathOnBucket)
if withExplicitDirs {
createExplicitDirs(t, testDirPath)
}

if withImplicitDirs {
setup.RunScriptForTestData("testdata/create_implicit_dir.sh", testDirPathOnBucket, prefixImplicitDirInLargeDirListTest, strconv.Itoa(numberOfImplicitDirsInDirectoryWithTwelveThousandFiles))
}

return testDirPath
}

// Test with a bucket with twelve thousand files and hundred explicit directories.
func TestListDirectoryWithTwelveThousandFilesAndHundredExplicitDir(t *testing.T) {
createTwelveThousandFilesAndUploadOnTestBucket(t)
testDirPath := path.Join(setup.MntDir(), DirectoryForListLargeFileTests)
testDirPathOnBucket := path.Join(setup.TestBucket(), DirectoryForListLargeFileTests)
dirPath := path.Join(testDirPath, DirectoryWithTwelveThousandFiles)
createHundredExplicitDir(dirPath, t)
// //////////////////////////////////////////////////////////////////////
// Tests
// //////////////////////////////////////////////////////////////////////

func (t *listLargeDir) TestListDirectoryWithTwelveThousandFiles() {
dirPath := prepareTestDirectory(t.T(), false, false)

firstListTime, secondListTime := listDirTime(t.T(), dirPath, false, false)

assert.Less(t.T(), secondListTime, firstListTime)
assert.Less(t.T(), 2*secondListTime, firstListTime)
}

func (t *listLargeDir) TestListDirectoryWithTwelveThousandFilesAndHundredExplicitDir() {
dirPath := prepareTestDirectory(t.T(), true, false)

firstListTime, secondListTime := listDirectoryTime(dirPath, validateDirectoryWithTwelveThousandFilesAndHundredExplicitDirectory, t)
firstListTime, secondListTime := listDirTime(t.T(), dirPath, true, false)

// Fetching data from the kernel for the second list will be faster.
assert.Less(t, secondListTime, firstListTime)
// The second directory listing should be 2 times better performant since it
// will be retrieved from the kernel cache.
assert.Less(t, 2*secondListTime, firstListTime)
assert.Less(t.T(), secondListTime, firstListTime)
assert.Less(t.T(), 2*secondListTime, firstListTime)
}

func (t *listLargeDir) TestListDirectoryWithTwelveThousandFilesAndHundredExplicitDirAndHundredImplicitDir() {
dirPath := prepareTestDirectory(t.T(), true, true)

firstListTime, secondListTime := listDirTime(t.T(), dirPath, true, true)

// Clear the bucket after testing.
setup.RunScriptForTestData("testdata/delete_objects.sh", testDirPathOnBucket)
assert.Less(t.T(), secondListTime, firstListTime)
assert.Less(t.T(), 2*secondListTime, firstListTime)
}

// Test with a bucket with twelve thousand files, hundred explicit directories, and hundred implicit directories.
func TestListDirectoryWithTwelveThousandFilesAndHundredExplicitDirAndHundredImplicitDir(t *testing.T) {
createTwelveThousandFilesAndUploadOnTestBucket(t)
testDirPath := path.Join(setup.MntDir(), DirectoryForListLargeFileTests)
testDirPathOnBucket := path.Join(setup.TestBucket(), DirectoryForListLargeFileTests)
dirPath := path.Join(testDirPath, DirectoryWithTwelveThousandFiles)
createHundredExplicitDir(dirPath, t)
subDirPath := path.Join(testDirPathOnBucket, DirectoryWithTwelveThousandFiles)
setup.RunScriptForTestData("testdata/create_implicit_dir.sh", subDirPath, PrefixImplicitDirInLargeDirListTest, strconv.Itoa(NumberOfImplicitDirsInDirectoryWithTwelveThousandFiles))

firstListTime, secondListTime := listDirectoryTime(dirPath, validateDirectoryWithTwelveThousandFilesHundredExplicitDirAndHundredImplicitDir, t)

// Fetching data from the kernel for the second list will be faster.
assert.Less(t, secondListTime, firstListTime)
// The second directory listing should be 2 times better performant since it
// will be retrieved from the kernel cache.
assert.Less(t, 2*secondListTime, firstListTime)
// Clear the bucket after testing.
setup.RunScriptForTestData("testdata/delete_objects.sh", testDirPathOnBucket)
////////////////////////////////////////////////////////////////////////
// Test Suite Function
////////////////////////////////////////////////////////////////////////

func TestListLargeDir(t *testing.T) {
suite.Run(t, new(listLargeDir))
}
Loading
Loading