Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

s3_object - Rework list_objects handling #2328

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions changelogs/fragments/1953-max_tokens.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
deprecated_features:
- s3_object - Support for ``mode=list`` has been deprecated. ``amazon.aws.s3_object_info`` should be used instead (https://github.com/ansible-collections/amazon.aws/pull/2328).
bugfixes:
- s3_object - Fixed an issue where ``max_keys`` was not respected (https://github.com/ansible-collections/amazon.aws/pull/2328).
minor_changes:
- s3_object_info - Added support for ``max_keys`` and ``marker`` parameter (https://github.com/ansible-collections/amazon.aws/pull/2328).
17 changes: 17 additions & 0 deletions plugins/module_utils/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,3 +181,20 @@ def list_bucket_inventory_configurations(client, bucket_name):
entries.extend(response["InventoryConfigurationList"])
next_token = response.get("NextToken")
return entries


@AWSRetry.jittered_backoff()
def _list_objects_v2(client, **params):
params = {k: v for k, v in params.items() if v is not None}
# For practical purposes, the paginator ignores MaxKeys, if we've been passed MaxKeys we need to
# explicitly call list_objects_v3 rather than re-use the paginator
if params.get("MaxKeys", None) is not None:
return client.list_objects_v2(**params)

paginator = client.get_paginator("list_objects_v2")
return paginator.paginate(**params).build_full_result()


def list_bucket_object_keys(client, bucket, prefix=None, max_keys=None, start_after=None):
response = _list_objects_v2(client, Bucket=bucket, Prefix=prefix, StartAfter=start_after, MaxKeys=max_keys)
return [c["Key"] for c in response.get("Contents", [])]
41 changes: 18 additions & 23 deletions plugins/modules/s3_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,13 +59,11 @@
- Specifies the key to start with when using list mode. Object keys are returned in
alphabetical order, starting with key after the marker in order.
type: str
default: ''
max_keys:
description:
- Max number of results to return when O(mode=list), set this if you want to retrieve fewer
than the default 1000 keys.
- Max number of results to return when O(mode=list)
- When not set, B(all) keys will be returned.
- Ignored when O(mode) is not V(list).
default: 1000
type: int
metadata:
description:
Expand All @@ -84,6 +82,8 @@
- 'V(copy): copy object that is already stored in another bucket'
- Support for creating and deleting buckets was removed in release 6.0.0.
To create and manage the bucket itself please use the M(amazon.aws.s3_bucket) module.
- Support for V(list) has been deprecated and will be removed in a release after 2026-11-01.
The M(amazon.aws.s3_object_info) module should be used instead of V(list).
required: true
choices: ['get', 'put', 'create', 'geturl', 'getstr', 'delobj', 'list', 'copy']
type: str
Expand Down Expand Up @@ -444,6 +444,7 @@
from ansible_collections.amazon.aws.plugins.module_utils.s3 import HAS_MD5
from ansible_collections.amazon.aws.plugins.module_utils.s3 import calculate_etag
from ansible_collections.amazon.aws.plugins.module_utils.s3 import calculate_etag_content
from ansible_collections.amazon.aws.plugins.module_utils.s3 import list_bucket_object_keys
from ansible_collections.amazon.aws.plugins.module_utils.s3 import s3_extra_params
from ansible_collections.amazon.aws.plugins.module_utils.s3 import validate_bucket_name
from ansible_collections.amazon.aws.plugins.module_utils.tagging import ansible_dict_to_boto3_tag_list
Expand Down Expand Up @@ -579,25 +580,9 @@ def bucket_check(module, s3, bucket, validate=True):
)


@AWSRetry.jittered_backoff()
def paginated_list(s3, **pagination_params):
pg = s3.get_paginator("list_objects_v2")
for page in pg.paginate(**pagination_params):
for data in page.get("Contents", []):
yield data["Key"]


def list_keys(s3, bucket, prefix=None, marker=None, max_keys=None):
pagination_params = {
"Bucket": bucket,
"Prefix": prefix,
"StartAfter": marker,
"MaxKeys": max_keys,
}
pagination_params = {k: v for k, v in pagination_params.items() if v}

try:
return list(paginated_list(s3, **pagination_params))
return list_bucket_object_keys(s3, bucket, prefix=prefix, start_after=marker, max_keys=max_keys)
except (
botocore.exceptions.ClientError,
botocore.exceptions.BotoCoreError,
Expand Down Expand Up @@ -1487,8 +1472,8 @@ def main():
encryption_mode=dict(choices=["AES256", "aws:kms"], default="AES256"),
expiry=dict(default=600, type="int", aliases=["expiration"]),
headers=dict(type="dict"),
marker=dict(default=""),
max_keys=dict(default=1000, type="int", no_log=False),
marker=dict(),
max_keys=dict(type="int", no_log=False),
metadata=dict(type="dict"),
mode=dict(choices=valid_modes, required=True),
sig_v4=dict(default=True, type="bool"),
Expand Down Expand Up @@ -1538,6 +1523,16 @@ def main():
mutually_exclusive=[["content", "content_base64", "src"]],
)

if module.params.get("mode") == "list":
module.deprecate(
(
"Support for 'list' mode has been deprecated and will be removed in a release after "
"2024-11-01. Please use the amazon.aws.s3_object_info module instead."
),
date="2026-11-01",
collection_name="amazon.aws",
)

endpoint_url = module.params.get("endpoint_url")
dualstack = module.params.get("dualstack")

Expand Down
41 changes: 21 additions & 20 deletions plugins/modules/s3_object_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,17 @@
type: list
elements: str
choices: ['ETag', 'Checksum', 'ObjectParts', 'StorageClass', 'ObjectSize']
marker:
description:
- Specifies the Object key to start with. Object keys are returned in alphabetical order, starting with key
after the marker in order.
type: str
version_added: 9.0.0
max_keys:
description:
- Max number of results to return. Set this if you want to retrieve only partial results.
type: int
version_added: 9.0.0
notes:
- Support for the E(S3_URL) environment variable has been
deprecated and will be removed in a release after 2024-12-01, please use the O(endpoint_url) parameter
Expand Down Expand Up @@ -441,6 +452,7 @@
from ansible_collections.amazon.aws.plugins.module_utils.botocore import is_boto3_error_code
from ansible_collections.amazon.aws.plugins.module_utils.modules import AnsibleAWSModule
from ansible_collections.amazon.aws.plugins.module_utils.retries import AWSRetry
from ansible_collections.amazon.aws.plugins.module_utils.s3 import list_bucket_object_keys
from ansible_collections.amazon.aws.plugins.module_utils.s3 import s3_extra_params
from ansible_collections.amazon.aws.plugins.module_utils.tagging import boto3_tag_list_to_ansible_dict

Expand Down Expand Up @@ -622,30 +634,17 @@ def get_object(connection, bucket_name, object_name):
return result


@AWSRetry.jittered_backoff(retries=10)
def _list_bucket_objects(connection, **params):
paginator = connection.get_paginator("list_objects")
return paginator.paginate(**params).build_full_result()


def list_bucket_objects(connection, module, bucket_name):
params = {}
params["Bucket"] = bucket_name

result = []
list_objects_response = {}

try:
list_objects_response = _list_bucket_objects(connection, **params)
keys = list_bucket_object_keys(
connection,
bucket=bucket_name,
max_keys=module.params["max_keys"],
start_after=module.params["marker"],
)
except (botocore.exceptions.ClientError, botocore.exceptions.BotoCoreError) as e:
module.fail_json_aws(e, msg="Failed to list bucket objects.")

if len(list_objects_response) != 0:
# convert to snake_case
for response_list_item in list_objects_response.get("Contents", []):
result.append(response_list_item["Key"])

return result
return keys


def bucket_check(
Expand Down Expand Up @@ -691,6 +690,8 @@ def main():
object_name=dict(type="str"),
dualstack=dict(default=False, type="bool"),
ceph=dict(default=False, type="bool", aliases=["rgw"]),
marker=dict(),
max_keys=dict(type="int", no_log=False),
)

required_if = [
Expand Down
21 changes: 21 additions & 0 deletions tests/integration/targets/s3_object/tasks/copy_object.yml
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,27 @@
- copy_with_metadata is changed
- obj_info.object_info.0.object_data.metadata == another_metadata

- name: Get objects info - multiple items
amazon.aws.s3_object_info:
bucket_name: "{{ bucket_name }}"
register: obj_info

- name: Validate multiple objects returned
ansible.builtin.assert:
that:
- (obj_info.s3_keys | length) > 1

- name: Get objects info - 1 item
amazon.aws.s3_object_info:
bucket_name: "{{ bucket_name }}"
max_keys: 1
register: obj_info

- name: Validate just 1 object returned
ansible.builtin.assert:
that:
- (obj_info.s3_keys | length) == 1

always:
- ansible.builtin.include_tasks: delete_bucket.yml
with_items:
Expand Down
14 changes: 14 additions & 0 deletions tests/integration/targets/s3_object/tasks/copy_recursively.yml
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,20 @@
- '"file2.txt" in _objects.s3_keys'
- '"file3.txt" in _objects.s3_keys'

- name: test list to get just 1 object from the bucket
amazon.aws.s3_object:
bucket: "{{ bucket_dst }}"
mode: list
max_keys: 1
retries: 3
delay: 3
register: result

- ansible.builtin.assert:
that:
- "(result.s3_keys | length) == 1"
- result.msg == "LIST operation complete"

# Test: Copy all objects from source bucket
- name: copy all objects from source bucket
amazon.aws.s3_object:
Expand Down
41 changes: 0 additions & 41 deletions tests/unit/plugins/modules/test_s3_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,26 +15,6 @@
utils = "ansible_collections.amazon.aws.plugins.module_utils.ec2"


@patch(module_name + ".paginated_list")
def test_list_keys_success(m_paginated_list):
s3 = MagicMock()

m_paginated_list.return_value = ["delete.txt"]

assert ["delete.txt"] == s3_object.list_keys(s3, "a987e6b6026ab04e4717", "", "", 1000)
m_paginated_list.assert_called_once()


@patch(module_name + ".paginated_list")
def test_list_keys_failure(m_paginated_list):
s3 = MagicMock()

m_paginated_list.side_effect = botocore.exceptions.BotoCoreError

with pytest.raises(s3_object.S3ObjectFailure):
s3_object.list_keys(s3, "a987e6b6026ab04e4717", "", "", 1000)


@patch(module_name + ".delete_key")
def test_s3_object_do_delobj_success(m_delete_key):
module = MagicMock()
Expand Down Expand Up @@ -69,27 +49,6 @@ def test_s3_object_do_delobj_failure_noobj(m_delete_key):
module.fail_json.assert_called_with(msg="object parameter is required")


@patch(module_name + ".paginated_list")
@patch(module_name + ".list_keys")
def test_s3_object_do_list_success(m_paginated_list, m_list_keys):
module = MagicMock()
s3 = MagicMock()

m_paginated_list.return_value = ["delete.txt"]
var_dict = {
"bucket": "a987e6b6026ab04e4717",
"prefix": "",
"marker": "",
"max_keys": 1000,
"bucketrtn": True,
}

s3_object.s3_object_do_list(module, s3, s3, var_dict)
assert m_paginated_list.call_count == 1
# assert m_list_keys.call_count == 1
# module.exit_json.assert_called_with(msg="LIST operation complete", s3_keys=['delete.txt'])


@patch(utils + ".get_aws_connection_info")
def test_populate_params(m_get_aws_connection_info):
module = MagicMock()
Expand Down
Loading