Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improvements to sync_folder_to_container speed, feature and verbose logging #355

Merged
merged 3 commits into from
Apr 25, 2014
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 50 additions & 13 deletions pyrax/cf_wrapper/client.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import logging
import datetime
from functools import wraps
import hashlib
Expand Down Expand Up @@ -893,7 +894,8 @@ def _upload_folder_in_background(self, folder_path, container, ignore,


def sync_folder_to_container(self, folder_path, container, delete=False,
include_hidden=False, ignore=None, ignore_timestamps=False):
include_hidden=False, ignore=None, ignore_timestamps=False,
object_prefix="", verbose=False):
"""
Compares the contents of the specified folder, and checks to make sure
that the corresponding object is present in the specified container. If
Expand All @@ -916,22 +918,44 @@ def sync_folder_to_container(self, folder_path, container, delete=False,
file names, and any names that match any of the 'ignore' patterns will
not be uploaded. The patterns should be standard *nix-style shell
patterns; e.g., '*pyc' will ignore all files ending in 'pyc', such as
'program.pyc' and 'abcpyc'. """
'program.pyc' and 'abcpyc'.

If `object_prefix` is set it will be appended to the object name when
it is checked and uploaded to the container. For example, if you use
sync_folder_to_container("folderToSync/", myContainer,
object_prefix="imgFolder") it will upload the files to the
container/imgFolder/... instead of just container/...

Set `verbose` to True to make it print what is going on. It will
show which files are being uploaded and which ones are not and why.
"""
cont = self.get_container(container)
self._local_files = []
# Load a list of all the remote objects so we don't have to keep
# hitting the service
if verbose:
log = logging.getLogger("pyrax")
log.info("Loading remote object list (prefix=%s)\n",
object_prefix)
data = cont.get_objects(prefix=object_prefix, full_listing=True)
self._remote_files = dict((d.name, d) for d in data)
self._sync_folder_to_container(folder_path, cont, prefix="",
delete=delete, include_hidden=include_hidden, ignore=ignore,
ignore_timestamps=ignore_timestamps)
ignore_timestamps=ignore_timestamps,
object_prefix=object_prefix, verbose=verbose)
# Unset the _remote_files
self._remote_files = None


def _sync_folder_to_container(self, folder_path, cont, prefix, delete,
include_hidden, ignore, ignore_timestamps):
include_hidden, ignore, ignore_timestamps, object_prefix, verbose):
"""
This is the internal method that is called recursively to handle
nested folder structures.
"""
fnames = os.listdir(folder_path)
ignore = utils.coerce_string_to_list(ignore)
log = logging.getLogger("pyrax")
if not include_hidden:
ignore.append(".*")
for fname in fnames:
Expand All @@ -944,17 +968,20 @@ def _sync_folder_to_container(self, folder_path, cont, prefix, delete,
subprefix = "%s/%s" % (prefix, subprefix)
self._sync_folder_to_container(pth, cont, prefix=subprefix,
delete=delete, include_hidden=include_hidden,
ignore=ignore, ignore_timestamps=ignore_timestamps)
ignore=ignore, ignore_timestamps=ignore_timestamps,
object_prefix=object_prefix, verbose=verbose)
continue
self._local_files.append(os.path.join(prefix, fname))
self._local_files.append(os.path.join(object_prefix, prefix, fname))
local_etag = utils.get_checksum(pth)
fullname = fname
fullname_with_prefix = "%s/%s" % (object_prefix, fname)
if prefix:
fullname = "%s/%s" % (prefix, fname)
fullname_with_prefix = "%s/%s/%s" % (object_prefix, prefix, fname)
try:
obj = cont.get_object(fullname)
obj = self._remote_files[fullname_with_prefix]
obj_etag = obj.etag
except exc.NoSuchObject:
except KeyError:
obj = None
obj_etag = None
if local_etag != obj_etag:
Expand All @@ -968,19 +995,29 @@ def _sync_folder_to_container(self, folder_path, cont, prefix, delete,
local_mod_str = local_mod.isoformat()
if obj_time_str >= local_mod_str:
# Remote object is newer
if verbose:
log.info("%s NOT UPLOADED because remote object is "
"newer\n", fullname)
continue
cont.upload_file(pth, obj_name=fullname, etag=local_etag,
return_none=True)
cont.upload_file(pth, obj_name=fullname_with_prefix,
etag=local_etag, return_none=True)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Continuation lines should be indented two levels, as per the HACKING document.

if verbose:
log.info("%s UPLOADED\n", fullname)
else:
if verbose:
log.info("%s NOT UPLOADED because it already "
"exists\n", fullname)
if delete and not prefix:
self._delete_objects_not_in_list(cont)
self._delete_objects_not_in_list(cont, object_prefix)


def _delete_objects_not_in_list(self, cont):
def _delete_objects_not_in_list(self, cont, object_prefix=""):
"""
Finds all the objects in the specified container that are not present
in the self._local_files list, and deletes them.
"""
objnames = set(cont.get_object_names(full_listing=True))
objnames = set(cont.get_object_names(prefix=object_prefix,
full_listing=True))
localnames = set(self._local_files)
to_delete = list(objnames.difference(localnames))
# We don't need to wait around for this to complete. Store the thread
Expand Down