Skip to content

Commit

Permalink
Improve Copy Operation by taking the Source URI (#482)
Browse files Browse the repository at this point in the history
  • Loading branch information
waahm7 authored Jan 13, 2025
1 parent 0559eb8 commit 1c80418
Show file tree
Hide file tree
Showing 11 changed files with 276 additions and 47 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/codecov.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,4 @@ jobs:
run: |
python3 -c "from urllib.request import urlretrieve; urlretrieve('${{ env.BUILDER_HOST }}/${{ env.BUILDER_SOURCE }}/${{ env.BUILDER_VERSION }}/builder.pyz?run=${{ env.RUN }}', 'builder')"
chmod a+x builder
./builder build -p ${{ env.PACKAGE_NAME }} --compiler=gcc-9 --cmake-extra=-DASSERT_LOCK_HELD=ON --coverage --coverage-exclude=source/s3_copy_object.c
./builder build -p ${{ env.PACKAGE_NAME }} --compiler=gcc-12 --cmake-extra=-DASSERT_LOCK_HELD=ON --coverage --coverage-exclude=source/s3_copy_object.c
4 changes: 4 additions & 0 deletions include/aws/s3/private/s3_copy_object.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
*/

#include "aws/s3/private/s3_meta_request_impl.h"
#include <aws/common/uri.h>

enum aws_s3_copy_object_request_tag {
AWS_S3_COPY_OBJECT_REQUEST_TAG_GET_OBJECT_SIZE,
Expand All @@ -25,6 +26,9 @@ struct aws_s3_copy_object {
/* Usable after the Create Multipart Upload request succeeds. */
struct aws_string *upload_id;

/* (Optional) source_uri for the copy operation. */
struct aws_uri source_uri;

/* Only meant for use in the update function, which is never called concurrently. */
struct {
uint32_t next_part_number;
Expand Down
3 changes: 2 additions & 1 deletion include/aws/s3/private/s3_request_messages.h
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,8 @@ struct aws_http_message *aws_s3_get_object_size_message_new(
AWS_S3_API
struct aws_http_message *aws_s3_get_source_object_size_message_new(
struct aws_allocator *allocator,
struct aws_http_message *base_message);
struct aws_http_message *base_message,
struct aws_uri *source_uri);

/* Add content-md5 header to the http message passed in. The MD5 will be computed from the input_buf */
AWS_S3_API
Expand Down
17 changes: 13 additions & 4 deletions include/aws/s3/s3_client.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,12 +77,14 @@ enum aws_s3_meta_request_type {
* a CopyObject request to S3 if the object size is not large enough for
* a multipart upload.
* Note: copy support is still in development and has following limitations:
* - host header must use virtual host addressing style (path style is not
* 1. host header must use virtual host addressing style (path style is not
* supported) and both source and dest buckets must have dns compliant name
* - only {bucket}/{key} format is supported for source and passing arn as
* 2. only {bucket}/{key} format is supported for source and passing arn as
* source will not work
* - source bucket is assumed to be in the same region as dest
* - source bucket and dest bucket must both be either directory buckets or regular buckets.
* 3. source bucket is assumed to be in the same region as dest
* 4. source bucket and dest bucket must both be either directory buckets or regular buckets.
*
* Provide the `meta_request_options.copy_source_uri` to bypass limitation 1 & 2.
*/
AWS_S3_META_REQUEST_TYPE_COPY_OBJECT,

Expand Down Expand Up @@ -869,6 +871,13 @@ struct aws_s3_meta_request_options {
* This is just used as an estimate, so it's okay to provide an approximate value if the exact size is unknown.
*/
const uint64_t *object_size_hint;

/*
* (Optional)
* If performing a copy operation, provide the source URI here to bypass limitations 1 and 2 of the copy operation.
* This will be ignored for other operations.
*/
const struct aws_byte_cursor copy_source_uri;
};

/* Result details of a meta request.
Expand Down
15 changes: 14 additions & 1 deletion source/s3_copy_object.c
Original file line number Diff line number Diff line change
Expand Up @@ -92,10 +92,22 @@ struct aws_s3_meta_request *aws_s3_meta_request_copy_object_new(
copy_object->synced_data.content_length = UNKNOWN_CONTENT_LENGTH;
copy_object->synced_data.total_num_parts = UNKNOWN_NUM_PARTS;
copy_object->threaded_update_data.next_part_number = 1;
if (options->copy_source_uri.len != 0) {
if (aws_uri_init_parse(&copy_object->source_uri, allocator, &options->copy_source_uri)) {
AWS_LOGF_ERROR(
AWS_LS_S3_META_REQUEST,
"Unable to parse the copy_source_uri provided in the request: " PRInSTR "",
AWS_BYTE_CURSOR_PRI(options->copy_source_uri));
goto on_error;
}
}

AWS_LOGF_DEBUG(AWS_LS_S3_META_REQUEST, "id=%p Created new CopyObject Meta Request.", (void *)&copy_object->base);

return &copy_object->base;
on_error:
aws_s3_meta_request_release(&copy_object->base);
return NULL;
}

static void s_s3_meta_request_copy_object_destroy(struct aws_s3_meta_request *meta_request) {
Expand All @@ -105,6 +117,7 @@ static void s_s3_meta_request_copy_object_destroy(struct aws_s3_meta_request *me
struct aws_s3_copy_object *copy_object = meta_request->impl;

aws_string_destroy(copy_object->upload_id);
aws_uri_clean_up(&copy_object->source_uri);
copy_object->upload_id = NULL;

for (size_t part_index = 0; part_index < aws_array_list_length(&copy_object->synced_data.part_list); ++part_index) {
Expand Down Expand Up @@ -364,7 +377,7 @@ static struct aws_future_void *s_s3_copy_object_prepare_request(struct aws_s3_re
/* Prepares the GetObject HEAD request to get the source object size. */
case AWS_S3_COPY_OBJECT_REQUEST_TAG_GET_OBJECT_SIZE: {
message = aws_s3_get_source_object_size_message_new(
meta_request->allocator, meta_request->initial_request_message);
meta_request->allocator, meta_request->initial_request_message, &copy_object->source_uri);
break;
}

Expand Down
53 changes: 39 additions & 14 deletions source/s3_request_messages.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include <aws/common/byte_buf.h>
#include <aws/common/encoding.h>
#include <aws/common/string.h>
#include <aws/common/uri.h>
#include <aws/http/request_response.h>
#include <aws/io/async_stream.h>
#include <aws/io/stream.h>
Expand Down Expand Up @@ -457,30 +458,63 @@ static const struct aws_byte_cursor s_slash_char = AWS_BYTE_CUR_INIT_FROM_STRING
*/
struct aws_http_message *aws_s3_get_source_object_size_message_new(
struct aws_allocator *allocator,
struct aws_http_message *base_message) {
struct aws_http_message *message = NULL;
struct aws_http_message *base_message,
struct aws_uri *source_uri) {

struct aws_http_message *message = aws_http_message_new_request(allocator);
struct aws_byte_buf head_object_host_header;
AWS_ZERO_STRUCT(head_object_host_header);

if (message == NULL) {
goto error_cleanup;
}

if (aws_http_message_set_request_method(message, g_head_method)) {
goto error_cleanup;
}
if (source_uri != NULL && source_uri->self_size > 0) {
/* Parse source host header and path from the provided URI */
struct aws_byte_cursor host = *aws_uri_host_name(source_uri);
struct aws_byte_cursor path = *aws_uri_path(source_uri);
if (host.len == 0 || path.len == 0) {
aws_raise_error(AWS_ERROR_INVALID_ARGUMENT);
goto error_cleanup;
}
struct aws_http_header host_header = {
.name = g_host_header_name,
.value = host,
};
if (aws_http_message_add_header(message, host_header)) {
goto error_cleanup;
}

if (aws_http_message_set_request_path(message, path)) {
goto error_cleanup;
}
return message;
}

/* Parse the source host header and path from the x-amz-copy-source header and the destination URI */

AWS_PRECONDITION(allocator);

/* Find the x-amz-copy-source header, to extract source bucket/key information. */
struct aws_http_headers *headers = aws_http_message_get_headers(base_message);
if (!headers) {
AWS_LOGF_ERROR(AWS_LS_S3_GENERAL, "CopyRequest is missing headers");
return NULL;
goto error_cleanup;
}

struct aws_byte_cursor source_header;
const struct aws_byte_cursor copy_source_header = AWS_BYTE_CUR_INIT_FROM_STRING_LITERAL("x-amz-copy-source");
if (aws_http_headers_get(headers, copy_source_header, &source_header) != AWS_OP_SUCCESS) {
AWS_LOGF_ERROR(AWS_LS_S3_GENERAL, "CopyRequest is missing the x-amz-copy-source header");
return NULL;
goto error_cleanup;
}
struct aws_byte_cursor host;
if (aws_http_headers_get(headers, g_host_header_name, &host) != AWS_OP_SUCCESS) {
AWS_LOGF_ERROR(AWS_LS_S3_GENERAL, "CopyRequest is missing the Host header");
return NULL;
goto error_cleanup;
}

struct aws_byte_cursor request_path = source_header;
Expand Down Expand Up @@ -529,15 +563,6 @@ struct aws_http_message *aws_s3_get_source_object_size_message_new(
goto error_cleanup;
}

message = aws_http_message_new_request(allocator);
if (message == NULL) {
goto error_cleanup;
}

if (aws_http_message_set_request_method(message, g_head_method)) {
goto error_cleanup;
}

struct aws_http_header host_header = {
.name = g_host_header_name,
.value = aws_byte_cursor_from_buf(&head_object_host_header),
Expand Down
1 change: 1 addition & 0 deletions tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,7 @@ add_net_test_case(test_s3_multipart_copy_large_object_special_char)
add_net_test_case(test_s3_multipart_copy_large_object)
add_net_test_case(test_s3_copy_object_invalid_source_key)
add_net_test_case(test_s3_copy_source_prefixed_by_slash)
add_net_test_case(test_s3_copy_invalid_source_uri)
add_net_test_case(test_s3_copy_source_prefixed_by_slash_multipart)
add_net_test_case(test_s3_put_pause_resume_happy_path)
add_net_test_case(test_s3_put_pause_resume_all_parts_done)
Expand Down
Loading

0 comments on commit 1c80418

Please sign in to comment.