Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Snapshot : azure module - accelerate the listing of files (used in delete snapshot) #25710

Merged
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import com.microsoft.azure.storage.RetryExponentialRetry;
import com.microsoft.azure.storage.RetryPolicy;
import com.microsoft.azure.storage.StorageException;
import com.microsoft.azure.storage.blob.BlobListingDetails;
import com.microsoft.azure.storage.blob.BlobProperties;
import com.microsoft.azure.storage.blob.CloudBlobClient;
import com.microsoft.azure.storage.blob.CloudBlobContainer;
Expand All @@ -45,9 +46,7 @@
import java.io.OutputStream;
import java.net.URI;
import java.net.URISyntaxException;
import java.security.AccessController;
import java.security.PrivilegedActionException;
import java.security.PrivilegedExceptionAction;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.Map;

Expand Down Expand Up @@ -280,33 +279,27 @@ public Map<String, BlobMetaData> listBlobsByPrefix(String account, LocationMode

logger.debug("listing container [{}], keyPath [{}], prefix [{}]", container, keyPath, prefix);
MapBuilder<String, BlobMetaData> blobsBuilder = MapBuilder.newMapBuilder();
EnumSet<BlobListingDetails> enumBlobListingDetails = EnumSet.of(BlobListingDetails.METADATA);
CloudBlobClient client = this.getSelectedClient(account, mode);
CloudBlobContainer blobContainer = client.getContainerReference(container);

SocketAccess.doPrivilegedVoidException(() -> {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This doPrivileged needs to stay.

if (blobContainer.exists()) {
for (ListBlobItem blobItem : blobContainer.listBlobs(keyPath + (prefix == null ? "" : prefix))) {
URI uri = blobItem.getUri();
logger.trace("blob url [{}]", uri);

// uri.getPath is of the form /container/keyPath.* and we want to strip off the /container/
// this requires 1 + container.length() + 1, with each 1 corresponding to one of the /
String blobPath = uri.getPath().substring(1 + container.length() + 1);

CloudBlockBlob blob = blobContainer.getBlockBlobReference(blobPath);

// fetch the blob attributes from Azure (getBlockBlobReference does not do this)
// this is needed to retrieve the blob length (among other metadata) from Azure Storage
blob.downloadAttributes();

BlobProperties properties = blob.getProperties();
String name = blobPath.substring(keyPath.length());
logger.trace("blob url [{}], name [{}], size [{}]", uri, name, properties.getLength());
blobsBuilder.put(name, new PlainBlobMetaData(name, properties.getLength()));
if (blobContainer.exists()) {
for (ListBlobItem blobItem : blobContainer.listBlobs(keyPath + (prefix == null ? "" : prefix),false,enumBlobListingDetails,null,null)) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: please use spaces after the commas

Also, check the length of this line, it looks close to the 140 char limit

URI uri = blobItem.getUri();
logger.trace("blob url [{}]", uri);

// uri.getPath is of the form /container/keyPath.* and we want to strip off the /container/
// this requires 1 + container.length() + 1, with each 1 corresponding to one of the /
String blobPath = uri.getPath().substring(1 + container.length() + 1);
if (!(blobItem instanceof CloudBlockBlob)){
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we need this check, it should be an error if we don't get back CloudBlockBlob (which is what we expect for all keys here). I think it can be a hard assert.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree it is the assumption on the previous version of code but I prefer to be defensive : if the azure storage container has been modified by an outside component, it could have other type of blob (Append/Page) so I propose ignoring it and put a warning.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am +1 on what @rjernst said - silently ignoring is not the way to go here, very few read log messages, its more important to fail hard if something changed that alters our underlying assumptions of what object type is returned there.

logger.warn("blob url [{}] is not a CloudBlockBlob",uri);
continue;
}
BlobProperties properties = ((CloudBlockBlob) blobItem).getProperties();
String name = blobPath.substring(keyPath.length());
logger.trace("blob url [{}], name [{}], size [{}]", uri, name, properties.getLength());
blobsBuilder.put(name, new PlainBlobMetaData(name, properties.getLength()));
}
});

}
return blobsBuilder.immutableMap();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,10 @@
import org.elasticsearch.repositories.RepositoryVerificationException;
import org.elasticsearch.repositories.azure.AzureRepository.Repository;
import org.elasticsearch.snapshots.SnapshotMissingException;
import org.elasticsearch.snapshots.SnapshotRestoreException;
import org.elasticsearch.snapshots.SnapshotState;
import org.elasticsearch.test.ESIntegTestCase;
import org.elasticsearch.test.ESIntegTestCase.ClusterScope;
import org.elasticsearch.test.store.MockFSDirectoryService;
import org.junit.After;
import org.junit.Before;

Expand Down Expand Up @@ -70,20 +70,16 @@ private String getRepositoryPath() {
}

public static String getContainerName() {
String testName = "snapshot-itest-".concat(RandomizedTest.getContext().getRunnerSeedAsString().toLowerCase(Locale.ROOT));
/* Have a different name per test so that there is no possible race condition. As the long can be negative,
* there mustn't be an hyphen between the 2 concat numbers
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

typo: an -> a, concat -> concatenated

* (can't have 2 consecutives hypens on Azure containers)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

typo: hypens -> hyphens

*/
String testName = "snapshot-itest-"
.concat(RandomizedTest.getContext().getRunnerSeedAsString().toLowerCase(Locale.ROOT))
.concat(new Long(RandomizedTest.getContext().getRandom().nextLong()).toString());
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This can simply be randomLong(). However, I don't think this will work because the cleanup in wipeAzureRepositories() would then not know about the generated name. If these need to be unique, then this method needs to stash the generated name so that the repository can be cleaned up after the test.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you are right : I rollback as it will not work in wipeAzureRepository .

return testName.contains(" ") ? Strings.split(testName, " ")[0] : testName;
}

@Override
public Settings indexSettings() {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think these should be removed, but if they should, then please explain why.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

After reflexion, ok to keep it but we have to add the MockFSIndexStore.TestPlugin so that the configuration variable are known. If we don't have, we have random error as MockFSIndexStore.TestPlugin is added randomly in ESIntegTestCase superclass

// During restore we frequently restore index to exactly the same state it was before, that might cause the same
// checksum file to be written twice during restore operation
return Settings.builder().put(super.indexSettings())
.put(MockFSDirectoryService.RANDOM_PREVENT_DOUBLE_WRITE_SETTING.getKey(), false)
.put(MockFSDirectoryService.RANDOM_NO_DELETE_OPEN_FILE_SETTING.getKey(), false)
.build();
}

@Before @After
public final void wipeAzureRepositories() throws StorageException, URISyntaxException {
wipeRepositories();
Expand All @@ -94,9 +90,10 @@ public final void wipeAzureRepositories() throws StorageException, URISyntaxExce
}

public void testSimpleWorkflow() {
String repo_name = "test-repo-simple";
Client client = client();
logger.info("--> creating azure repository with path [{}]", getRepositoryPath());
PutRepositoryResponse putRepositoryResponse = client.admin().cluster().preparePutRepository("test-repo")
PutRepositoryResponse putRepositoryResponse = client.admin().cluster().preparePutRepository(repo_name)
.setType("azure").setSettings(Settings.builder()
.put(Repository.CONTAINER_SETTING.getKey(), getContainerName())
.put(Repository.BASE_PATH_SETTING.getKey(), getRepositoryPath())
Expand All @@ -119,13 +116,13 @@ public void testSimpleWorkflow() {
assertThat(client.prepareSearch("test-idx-3").setSize(0).get().getHits().getTotalHits(), equalTo(100L));

logger.info("--> snapshot");
CreateSnapshotResponse createSnapshotResponse = client.admin().cluster().prepareCreateSnapshot("test-repo", "test-snap")
CreateSnapshotResponse createSnapshotResponse = client.admin().cluster().prepareCreateSnapshot(repo_name, "test-snap")
.setWaitForCompletion(true).setIndices("test-idx-*", "-test-idx-3").get();
assertThat(createSnapshotResponse.getSnapshotInfo().successfulShards(), greaterThan(0));
assertThat(createSnapshotResponse.getSnapshotInfo().successfulShards(),
equalTo(createSnapshotResponse.getSnapshotInfo().totalShards()));

assertThat(client.admin().cluster().prepareGetSnapshots("test-repo").setSnapshots("test-snap").get().getSnapshots()
assertThat(client.admin().cluster().prepareGetSnapshots(repo_name).setSnapshots("test-snap").get().getSnapshots()
.get(0).state(), equalTo(SnapshotState.SUCCESS));

logger.info("--> delete some data");
Expand All @@ -147,7 +144,7 @@ public void testSimpleWorkflow() {
client.admin().indices().prepareClose("test-idx-1", "test-idx-2").get();

logger.info("--> restore all indices from the snapshot");
RestoreSnapshotResponse restoreSnapshotResponse = client.admin().cluster().prepareRestoreSnapshot("test-repo", "test-snap")
RestoreSnapshotResponse restoreSnapshotResponse = client.admin().cluster().prepareRestoreSnapshot(repo_name, "test-snap")
.setWaitForCompletion(true).get();
assertThat(restoreSnapshotResponse.getRestoreInfo().totalShards(), greaterThan(0));

Expand All @@ -160,7 +157,7 @@ public void testSimpleWorkflow() {
logger.info("--> delete indices");
cluster().wipeIndices("test-idx-1", "test-idx-2");
logger.info("--> restore one index after deletion");
restoreSnapshotResponse = client.admin().cluster().prepareRestoreSnapshot("test-repo", "test-snap").setWaitForCompletion(true)
restoreSnapshotResponse = client.admin().cluster().prepareRestoreSnapshot(repo_name, "test-snap").setWaitForCompletion(true)
.setIndices("test-idx-*", "-test-idx-2").get();
assertThat(restoreSnapshotResponse.getRestoreInfo().totalShards(), greaterThan(0));
ensureGreen();
Expand All @@ -176,7 +173,7 @@ public void testSimpleWorkflow() {
public void testMultipleSnapshots() throws URISyntaxException, StorageException {
final String indexName = "test-idx-1";
final String typeName = "doc";
final String repositoryName = "test-repo";
final String repositoryName = "test-repo-multiple-snapshot";
final String snapshot1Name = "test-snap-1";
final String snapshot2Name = "test-snap-2";

Expand Down Expand Up @@ -313,6 +310,7 @@ public void testMultipleRepositories() {
* For issue #26: https://github.com/elastic/elasticsearch-cloud-azure/issues/26
*/
public void testListBlobs_26() throws StorageException, URISyntaxException {
final String repositoryName="test-repo-26";
createIndex("test-idx-1", "test-idx-2", "test-idx-3");
ensureGreen();

Expand All @@ -326,45 +324,45 @@ public void testListBlobs_26() throws StorageException, URISyntaxException {

ClusterAdminClient client = client().admin().cluster();
logger.info("--> creating azure repository without any path");
PutRepositoryResponse putRepositoryResponse = client.preparePutRepository("test-repo").setType("azure")
PutRepositoryResponse putRepositoryResponse = client.preparePutRepository(repositoryName).setType("azure")
.setSettings(Settings.builder()
.put(Repository.CONTAINER_SETTING.getKey(), getContainerName())
).get();
assertThat(putRepositoryResponse.isAcknowledged(), equalTo(true));

// Get all snapshots - should be empty
assertThat(client.prepareGetSnapshots("test-repo").get().getSnapshots().size(), equalTo(0));
assertThat(client.prepareGetSnapshots(repositoryName).get().getSnapshots().size(), equalTo(0));

logger.info("--> snapshot");
CreateSnapshotResponse createSnapshotResponse = client.prepareCreateSnapshot("test-repo", "test-snap-26")
CreateSnapshotResponse createSnapshotResponse = client.prepareCreateSnapshot(repositoryName, "test-snap-26")
.setWaitForCompletion(true).setIndices("test-idx-*").get();
assertThat(createSnapshotResponse.getSnapshotInfo().successfulShards(), greaterThan(0));

// Get all snapshots - should have one
assertThat(client.prepareGetSnapshots("test-repo").get().getSnapshots().size(), equalTo(1));
assertThat(client.prepareGetSnapshots(repositoryName).get().getSnapshots().size(), equalTo(1));

// Clean the snapshot
client.prepareDeleteSnapshot("test-repo", "test-snap-26").get();
client.prepareDeleteRepository("test-repo").get();
client.prepareDeleteSnapshot(repositoryName, "test-snap-26").get();
client.prepareDeleteRepository(repositoryName).get();

logger.info("--> creating azure repository path [{}]", getRepositoryPath());
putRepositoryResponse = client.preparePutRepository("test-repo").setType("azure")
putRepositoryResponse = client.preparePutRepository(repositoryName).setType("azure")
.setSettings(Settings.builder()
.put(Repository.CONTAINER_SETTING.getKey(), getContainerName())
.put(Repository.BASE_PATH_SETTING.getKey(), getRepositoryPath())
).get();
assertThat(putRepositoryResponse.isAcknowledged(), equalTo(true));

// Get all snapshots - should be empty
assertThat(client.prepareGetSnapshots("test-repo").get().getSnapshots().size(), equalTo(0));
assertThat(client.prepareGetSnapshots(repositoryName).get().getSnapshots().size(), equalTo(0));

logger.info("--> snapshot");
createSnapshotResponse = client.prepareCreateSnapshot("test-repo", "test-snap-26").setWaitForCompletion(true)
createSnapshotResponse = client.prepareCreateSnapshot(repositoryName, "test-snap-26").setWaitForCompletion(true)
.setIndices("test-idx-*").get();
assertThat(createSnapshotResponse.getSnapshotInfo().successfulShards(), greaterThan(0));

// Get all snapshots - should have one
assertThat(client.prepareGetSnapshots("test-repo").get().getSnapshots().size(), equalTo(1));
assertThat(client.prepareGetSnapshots(repositoryName).get().getSnapshots().size(), equalTo(1));


}
Expand All @@ -373,23 +371,24 @@ public void testListBlobs_26() throws StorageException, URISyntaxException {
* For issue #28: https://github.com/elastic/elasticsearch-cloud-azure/issues/28
*/
public void testGetDeleteNonExistingSnapshot_28() throws StorageException, URISyntaxException {
final String repositoryName="test-repo-28";
ClusterAdminClient client = client().admin().cluster();
logger.info("--> creating azure repository without any path");
PutRepositoryResponse putRepositoryResponse = client.preparePutRepository("test-repo").setType("azure")
PutRepositoryResponse putRepositoryResponse = client.preparePutRepository(repositoryName).setType("azure")
.setSettings(Settings.builder()
.put(Repository.CONTAINER_SETTING.getKey(), getContainerName())
).get();
assertThat(putRepositoryResponse.isAcknowledged(), equalTo(true));

try {
client.prepareGetSnapshots("test-repo").addSnapshots("nonexistingsnapshotname").get();
client.prepareGetSnapshots(repositoryName).addSnapshots("nonexistingsnapshotname").get();
fail("Shouldn't be here");
} catch (SnapshotMissingException ex) {
// Expected
}

try {
client.prepareDeleteSnapshot("test-repo", "nonexistingsnapshotname").get();
client.prepareDeleteSnapshot(repositoryName, "nonexistingsnapshotname").get();
fail("Shouldn't be here");
} catch (SnapshotMissingException ex) {
// Expected
Expand Down Expand Up @@ -418,18 +417,19 @@ public void testForbiddenContainerName() throws Exception {
* @param correct Is this container name correct
*/
private void checkContainerName(final String container, final boolean correct) throws Exception {
String repositoryName = "test-repo-checkContainerName";
logger.info("--> creating azure repository with container name [{}]", container);
// It could happen that we just removed from a previous test the same container so
// we can not create it yet.
assertBusy(() -> {
try {
PutRepositoryResponse putRepositoryResponse = client().admin().cluster().preparePutRepository("test-repo")
PutRepositoryResponse putRepositoryResponse = client().admin().cluster().preparePutRepository(repositoryName)
.setType("azure").setSettings(Settings.builder()
.put(Repository.CONTAINER_SETTING.getKey(), container)
.put(Repository.BASE_PATH_SETTING.getKey(), getRepositoryPath())
.put(Repository.CHUNK_SIZE_SETTING.getKey(), randomIntBetween(1000, 10000), ByteSizeUnit.BYTES)
).get();
client().admin().cluster().prepareDeleteRepository("test-repo").get();
client().admin().cluster().prepareDeleteRepository(repositoryName).get();
try {
logger.info("--> remove container [{}]", container);
cleanRepositoryFiles(container);
Expand All @@ -450,9 +450,10 @@ private void checkContainerName(final String container, final boolean correct) t
* Test case for issue #23: https://github.com/elastic/elasticsearch-cloud-azure/issues/23
*/
public void testNonExistingRepo_23() {
final String repositoryName = "test-repo-test23";
Client client = client();
logger.info("--> creating azure repository with path [{}]", getRepositoryPath());
PutRepositoryResponse putRepositoryResponse = client.admin().cluster().preparePutRepository("test-repo")
PutRepositoryResponse putRepositoryResponse = client.admin().cluster().preparePutRepository(repositoryName)
.setType("azure").setSettings(Settings.builder()
.put(Repository.CONTAINER_SETTING.getKey(), getContainerName())
.put(Repository.BASE_PATH_SETTING.getKey(), getRepositoryPath())
Expand All @@ -462,9 +463,9 @@ public void testNonExistingRepo_23() {

logger.info("--> restore non existing snapshot");
try {
client.admin().cluster().prepareRestoreSnapshot("test-repo", "no-existing-snapshot").setWaitForCompletion(true).get();
client.admin().cluster().prepareRestoreSnapshot(repositoryName, "no-existing-snapshot").setWaitForCompletion(true).get();
fail("Shouldn't be here");
} catch (SnapshotMissingException ex) {
} catch (SnapshotRestoreException ex) {
// Expected
}
}
Expand All @@ -474,7 +475,7 @@ public void testNonExistingRepo_23() {
*/
public void testRemoveAndCreateContainer() throws Exception {
final String container = getContainerName().concat("-testremove");
final AzureStorageService storageService = new AzureStorageServiceImpl(internalCluster().getDefaultSettings());
final AzureStorageService storageService = new AzureStorageServiceImpl(nodeSettings(0));

// It could happen that we run this test really close to a previous one
// so we might need some time to be able to create the container
Expand Down