From 09f70f28257a23360d49c18bf5fd74ad633ab9a1 Mon Sep 17 00:00:00 2001 From: Jake Freck Date: Fri, 3 Aug 2018 15:57:35 -0700 Subject: [PATCH 01/16] format all files, enforce formatting in travis build --- .travis.yml | 1 + .vscode/settings.json | 6 +- account_setup.py | 127 ++++++--------- .../base/helpers/create_user_on_node.py | 3 +- .../base/helpers/delete_user_on_cluster.py | 1 + .../base/helpers/get_application_log.py | 10 +- aztk/client/base/helpers/ssh_into_node.py | 9 +- aztk/client/client.py | 134 +++++++--------- aztk/client/cluster/helpers/copy.py | 13 +- aztk/client/cluster/helpers/create.py | 16 +- aztk/client/cluster/helpers/get.py | 2 - aztk/client/cluster/helpers/list.py | 6 +- aztk/client/job/helpers/submit.py | 17 +- aztk/core/models/fields.py | 15 +- aztk/core/models/validators.py | 10 +- aztk/error.py | 7 + aztk/internal/cluster_data/blob_data.py | 6 +- aztk/internal/cluster_data/node_data.py | 26 ++-- aztk/internal/configuration_base.py | 2 +- aztk/internal/docker_cmd.py | 5 +- aztk/models/cluster.py | 6 +- aztk/models/cluster_configuration.py | 13 +- aztk/models/custom_script.py | 1 + aztk/models/file.py | 1 + aztk/models/file_share.py | 1 + aztk/models/node_output.py | 3 +- .../models/plugins/internal/plugin_manager.py | 13 +- aztk/models/plugins/plugin_configuration.py | 1 + aztk/models/plugins/plugin_file.py | 5 +- aztk/models/port_forward_specification.py | 1 + aztk/models/scheduling_target.py | 1 + aztk/models/secrets_configuration.py | 10 +- aztk/models/toolkit.py | 8 +- aztk/models/user_configuration.py | 1 + aztk/node_scripts/core/config.py | 32 ++-- aztk/node_scripts/core/logger.py | 1 + aztk/node_scripts/install/create_user.py | 6 +- aztk/node_scripts/install/install.py | 1 + aztk/node_scripts/install/node_scheduling.py | 8 +- aztk/node_scripts/install/pick_master.py | 13 +- aztk/node_scripts/install/plugins.py | 28 ++-- aztk/node_scripts/install/scripts.py | 3 +- aztk/node_scripts/install/spark.py | 19 +-- aztk/node_scripts/install/spark_container.py | 25 ++- aztk/node_scripts/main.py | 1 - aztk/node_scripts/submit.py | 51 ++---- .../wait_until_master_selected.py | 1 + .../node_scripts/wait_until_setup_complete.py | 1 - aztk/spark/client/base/operations.py | 8 +- aztk/spark/client/client.py | 11 +- aztk/spark/client/cluster/helpers/copy.py | 8 +- aztk/spark/client/cluster/helpers/create.py | 17 +- .../client/cluster/helpers/create_user.py | 7 +- .../client/cluster/helpers/diagnostics.py | 6 +- aztk/spark/client/cluster/helpers/download.py | 24 +-- .../cluster/helpers/get_application_log.py | 9 +- aztk/spark/client/cluster/helpers/run.py | 9 +- .../client/cluster/helpers/ssh_into_master.py | 13 +- aztk/spark/client/cluster/helpers/submit.py | 13 +- aztk/spark/client/cluster/helpers/wait.py | 1 + aztk/spark/client/cluster/operations.py | 10 +- aztk/spark/client/job/helpers/get.py | 3 +- .../client/job/helpers/get_application.py | 3 +- .../client/job/helpers/stop_application.py | 1 + aztk/spark/client/job/helpers/submit.py | 11 +- aztk/spark/client/job/operations.py | 1 - aztk/spark/helpers/__init__.py | 1 - .../helpers/cluster_diagnostic_helper.py | 4 +- aztk/spark/helpers/create_cluster.py | 95 +++++------ aztk/spark/helpers/get_log.py | 12 +- aztk/spark/helpers/job_submission.py | 35 +++-- aztk/spark/helpers/submit.py | 57 ++++--- aztk/spark/models/models.py | 72 ++++----- .../plugins/install/apt_get/configuration.py | 7 +- .../plugins/install/conda/configuration.py | 7 +- .../models/plugins/install/configuration.py | 8 +- .../plugins/install/pip/configuration.py | 7 +- .../models/plugins/jupyter/configuration.py | 1 + .../plugins/jupyter_lab/configuration.py | 1 + .../models/plugins/nvblas/configuration.py | 3 +- .../plugins/resource_monitor/configuration.py | 4 +- .../models/plugins/simple/configuration.py | 1 + .../plugins/spark_ui_proxy/configuration.py | 7 +- .../plugins/spark_ui_proxy/spark_ui_proxy.py | 10 +- aztk/spark/utils/util.py | 5 +- aztk/utils/azure_api.py | 13 +- aztk/utils/command_builder.py | 5 +- aztk/utils/deprecation.py | 13 +- aztk/utils/file_utils.py | 1 + aztk/utils/get_ssh_key.py | 7 +- aztk/utils/helpers.py | 79 +++------- aztk/utils/ssh.py | 147 ++++++++++-------- aztk_cli/config.py | 62 +++----- aztk_cli/entrypoint.py | 18 +-- aztk_cli/logger.py | 34 ++-- aztk_cli/plugins.py | 2 +- aztk_cli/spark/endpoints/cluster/cluster.py | 33 ++-- .../endpoints/cluster/cluster_add_user.py | 29 ++-- .../endpoints/cluster/cluster_app_logs.py | 15 +- .../spark/endpoints/cluster/cluster_copy.py | 18 +-- .../spark/endpoints/cluster/cluster_create.py | 68 ++++---- .../spark/endpoints/cluster/cluster_debug.py | 6 +- .../spark/endpoints/cluster/cluster_delete.py | 31 ++-- .../spark/endpoints/cluster/cluster_get.py | 10 +- .../spark/endpoints/cluster/cluster_list.py | 7 +- .../spark/endpoints/cluster/cluster_run.py | 32 ++-- .../spark/endpoints/cluster/cluster_ssh.py | 25 +-- .../spark/endpoints/cluster/cluster_submit.py | 94 ++++++----- aztk_cli/spark/endpoints/init.py | 7 +- aztk_cli/spark/endpoints/job/delete.py | 29 ++-- aztk_cli/spark/endpoints/job/get.py | 5 +- aztk_cli/spark/endpoints/job/get_app.py | 10 +- aztk_cli/spark/endpoints/job/get_app_logs.py | 15 +- aztk_cli/spark/endpoints/job/job.py | 31 ++-- aztk_cli/spark/endpoints/job/list.py | 1 + aztk_cli/spark/endpoints/job/list_apps.py | 5 +- aztk_cli/spark/endpoints/job/stop.py | 5 +- aztk_cli/spark/endpoints/job/stop_app.py | 10 +- aztk_cli/spark/endpoints/job/submit.py | 19 ++- aztk_cli/spark/endpoints/spark.py | 19 +-- aztk_cli/toolkit.py | 3 + aztk_cli/utils.py | 144 +++++++---------- setup.py | 2 +- 123 files changed, 1006 insertions(+), 1211 deletions(-) diff --git a/.travis.yml b/.travis.yml index 54a8fb2c..1b69bf29 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,6 +8,7 @@ install: - pip install -e . script: + - yapf -dr aztk/ aztk_cli/ - pylint -E aztk - pytest --ignore=tests/integration_tests diff --git a/.vscode/settings.json b/.vscode/settings.json index 7641f485..5aadcbdc 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -15,4 +15,8 @@ "python.venvPath": "${workspaceFolder}/.venv/", "python.pythonPath": "${workspaceFolder}/.venv/Scripts/python.exe", "python.unitTest.pyTestEnabled": true, -} + "editor.formatOnSave": true, + "editor.codeActionsOnSave": { + "source.organizeImports": true + } +} \ No newline at end of file diff --git a/account_setup.py b/account_setup.py index 821d2b83..2aacbe42 100644 --- a/account_setup.py +++ b/account_setup.py @@ -56,12 +56,11 @@ def create_resource_group(credentials, subscription_id, **kwargs): resource_group_name=kwargs.get("resource_group", DefaultSettings.resource_group), parameters={ 'location': kwargs.get("region", DefaultSettings.region), - } - ) + }) except CloudError as e: if i == 2: - raise AccountSetupError( - "Unable to create resource group in region {}".format(kwargs.get("region", DefaultSettings.region))) + raise AccountSetupError("Unable to create resource group in region {}".format( + kwargs.get("region", DefaultSettings.region))) print(e.message) print("Please try again.") kwargs["resource_group"] = prompt_with_default("Azure Region", DefaultSettings.region) @@ -82,15 +81,10 @@ def create_storage_account(credentials, subscription_id, **kwargs): resource_group_name=kwargs.get("resource_group", DefaultSettings.resource_group), account_name=kwargs.get("storage_account", DefaultSettings.storage_account), parameters=StorageAccountCreateParameters( - sku=Sku(SkuName.standard_lrs), - kind=Kind.storage, - location=kwargs.get('region', DefaultSettings.region) - ) - ) + sku=Sku(SkuName.standard_lrs), kind=Kind.storage, location=kwargs.get('region', DefaultSettings.region))) return storage_account.result().id - def create_batch_account(credentials, subscription_id, **kwargs): """ Create a Batch account @@ -108,10 +102,7 @@ def create_batch_account(credentials, subscription_id, **kwargs): parameters=BatchAccountCreateParameters( location=kwargs.get('region', DefaultSettings.region), auto_storage=AutoStorageBaseProperties( - storage_account_id=kwargs.get('storage_account_id', DefaultSettings.region) - ) - ) - ) + storage_account_id=kwargs.get('storage_account_id', DefaultSettings.region)))) return batch_account.result().id @@ -151,19 +142,13 @@ def create_vnet(credentials, subscription_id, **kwargs): resource_group_name=resource_group_name, virtual_network_name=kwargs.get("virtual_network_name", DefaultSettings.virtual_network_name), parameters=VirtualNetwork( - location=kwargs.get("region", DefaultSettings.region), - address_space=AddressSpace(["10.0.0.0/24"]) - ) - ) + location=kwargs.get("region", DefaultSettings.region), address_space=AddressSpace(["10.0.0.0/24"]))) virtual_network = virtual_network.result() subnet = network_client.subnets.create_or_update( resource_group_name=resource_group_name, virtual_network_name=virtual_network_name, subnet_name=subnet_name, - subnet_parameters=Subnet( - address_prefix='10.0.0.0/24' - ) - ) + subnet_parameters=Subnet(address_prefix='10.0.0.0/24')) return subnet.result().id @@ -175,10 +160,7 @@ def create_aad_user(credentials, tenant_id, **kwargs): :param **application_name: str """ graph_rbac_client = GraphRbacManagementClient( - credentials, - tenant_id, - base_url=AZURE_PUBLIC_CLOUD.endpoints.active_directory_graph_resource_id - ) + credentials, tenant_id, base_url=AZURE_PUBLIC_CLOUD.endpoints.active_directory_graph_resource_id) application_credential = uuid.uuid4() try: display_name = kwargs.get("application_name", DefaultSettings.application_name) @@ -192,42 +174,32 @@ def create_aad_user(credentials, tenant_id, **kwargs): start_date=datetime(2000, 1, 1, 0, 0, 0, 0, tzinfo=timezone.utc), end_date=datetime(2299, 12, 31, 0, 0, 0, 0, tzinfo=timezone.utc), value=application_credential, - key_id=uuid.uuid4() - ) - ] - ) - ) + key_id=uuid.uuid4()) + ])) service_principal = graph_rbac_client.service_principals.create( - ServicePrincipalCreateParameters( - app_id=application.app_id, - account_enabled=True - ) - ) + ServicePrincipalCreateParameters(app_id=application.app_id, account_enabled=True)) except GraphErrorException as e: if e.inner_exception.code == "Request_BadRequest": - application = next(graph_rbac_client.applications.list( - filter="identifierUris/any(c:c eq 'http://{}.com')".format(display_name))) + application = next( + graph_rbac_client.applications.list( + filter="identifierUris/any(c:c eq 'http://{}.com')".format(display_name))) confirmation_prompt = "Previously created application with name {} found. "\ "Would you like to use it? (y/n): ".format(application.display_name) prompt_for_confirmation(confirmation_prompt, e, ValueError("Response not recognized. Please try again.")) - password_credentials = list(graph_rbac_client.applications.list_password_credentials(application_object_id=application.object_id)) + password_credentials = list( + graph_rbac_client.applications.list_password_credentials(application_object_id=application.object_id)) password_credentials.append( PasswordCredential( start_date=datetime(2000, 1, 1, 0, 0, 0, 0, tzinfo=timezone.utc), end_date=datetime(2299, 12, 31, 0, 0, 0, 0, tzinfo=timezone.utc), value=application_credential, - key_id=uuid.uuid4() - ) - ) + key_id=uuid.uuid4())) graph_rbac_client.applications.patch( application_object_id=application.object_id, - parameters=ApplicationUpdateParameters( - password_credentials=password_credentials - ) - ) - service_principal = next(graph_rbac_client.service_principals.list( - filter="appId eq '{}'".format(application.app_id))) + parameters=ApplicationUpdateParameters(password_credentials=password_credentials)) + service_principal = next( + graph_rbac_client.service_principals.list(filter="appId eq '{}'".format(application.app_id))) else: raise e @@ -244,21 +216,15 @@ def create_role_assignment(credentials, subscription_id, scope, principal_id): """ authorization_client = AuthorizationManagementClient(credentials, subscription_id) role_name = 'Contributor' - roles = list(authorization_client.role_definitions.list( - scope, - filter="roleName eq '{}'".format(role_name) - )) + roles = list(authorization_client.role_definitions.list(scope, filter="roleName eq '{}'".format(role_name))) contributor_role = roles[0] for i in range(10): try: - authorization_client.role_assignments.create( - scope, - uuid.uuid4(), - { - 'role_definition_id': contributor_role.id, - 'principal_id': principal_id - } - ) + authorization_client.role_assignments.create(scope, uuid.uuid4(), + { + 'role_definition_id': contributor_role.id, + 'principal_id': principal_id + }) break except CloudError as e: # ignore error if service principal has not yet been created @@ -321,7 +287,6 @@ def prompt_tenant_selection(tenant_ids): raise AccountSetupError("Tenant selection not recognized after 3 attempts.") - class Spinner: busy = False delay = 0.1 @@ -329,7 +294,8 @@ class Spinner: @staticmethod def spinning_cursor(): while 1: - for cursor in '|/-\\': yield cursor + for cursor in '|/-\\': + yield cursor def __init__(self, delay=None): self.spinner_generator = self.spinning_cursor() @@ -358,7 +324,6 @@ def stop(self): time.sleep(self.delay) - if __name__ == "__main__": print("\nGetting credentials.") # get credentials and tenant_id @@ -374,15 +339,22 @@ def stop(self): "Default values are provided in the brackets. "\ "Hit enter to use default.") kwargs = { - "region": prompt_with_default("Azure Region", DefaultSettings.region), - "resource_group": prompt_with_default("Resource Group Name", DefaultSettings.resource_group), - "storage_account": prompt_with_default("Storage Account Name", DefaultSettings.storage_account), - "batch_account": prompt_with_default("Batch Account Name", DefaultSettings.batch_account), - # "virtual_network_name": prompt_with_default("Virtual Network Name", DefaultSettings.virtual_network_name), - # "subnet_name": prompt_with_default("Subnet Name", DefaultSettings.subnet_name), - "application_name": prompt_with_default("Active Directory Application Name", DefaultSettings.application_name), - "application_credential_name": prompt_with_default("Active Directory Application Credential Name", DefaultSettings.resource_group), - "service_principal": prompt_with_default("Service Principal Name", DefaultSettings.service_principal) + "region": + prompt_with_default("Azure Region", DefaultSettings.region), + "resource_group": + prompt_with_default("Resource Group Name", DefaultSettings.resource_group), + "storage_account": + prompt_with_default("Storage Account Name", DefaultSettings.storage_account), + "batch_account": + prompt_with_default("Batch Account Name", DefaultSettings.batch_account), + # "virtual_network_name": prompt_with_default("Virtual Network Name", DefaultSettings.virtual_network_name), + # "subnet_name": prompt_with_default("Subnet Name", DefaultSettings.subnet_name), + "application_name": + prompt_with_default("Active Directory Application Name", DefaultSettings.application_name), + "application_credential_name": + prompt_with_default("Active Directory Application Credential Name", DefaultSettings.resource_group), + "service_principal": + prompt_with_default("Service Principal Name", DefaultSettings.service_principal) } print("Creating the Azure resources.") @@ -410,9 +382,9 @@ def stop(self): with Spinner(): profile = credentials.get_cli_profile() aad_cred, subscription_id, tenant_id = profile.get_login_credentials( - resource=AZURE_PUBLIC_CLOUD.endpoints.active_directory_graph_resource_id - ) - application_id, service_principal_object_id, application_credential = create_aad_user(aad_cred, tenant_id, **kwargs) + resource=AZURE_PUBLIC_CLOUD.endpoints.active_directory_graph_resource_id) + application_id, service_principal_object_id, application_credential = create_aad_user( + aad_cred, tenant_id, **kwargs) print("Created Azure Active Directory service principal.") @@ -425,10 +397,9 @@ def stop(self): "tenant_id": tenant_id, "client_id": application_id, "credential": application_credential, - # "subnet_id": subnet_id, + # "subnet_id": subnet_id, "batch_account_resource_id": batch_account_id, "storage_account_resource_id": storage_account_id - } - ) + }) print("\n# Copy the following into your .aztk/secrets.yaml file\n{}".format(secrets)) diff --git a/aztk/client/base/helpers/create_user_on_node.py b/aztk/client/base/helpers/create_user_on_node.py index 28c9a9c3..76c76625 100644 --- a/aztk/client/base/helpers/create_user_on_node.py +++ b/aztk/client/base/helpers/create_user_on_node.py @@ -32,8 +32,7 @@ def __create_user(self, id: str, node_id: str, username: str, password: str = No def create_user_on_node(base_client, id, node_id, username, ssh_key=None, password=None): try: - __create_user( - base_client, id=id, node_id=node_id, username=username, ssh_key=ssh_key, password=password) + __create_user(base_client, id=id, node_id=node_id, username=username, ssh_key=ssh_key, password=password) except batch_error.BatchErrorException as error: try: base_client.delete_user_on_node(id, node_id, username) diff --git a/aztk/client/base/helpers/delete_user_on_cluster.py b/aztk/client/base/helpers/delete_user_on_cluster.py index b20935e8..22d968cb 100644 --- a/aztk/client/base/helpers/delete_user_on_cluster.py +++ b/aztk/client/base/helpers/delete_user_on_cluster.py @@ -1,5 +1,6 @@ import concurrent.futures + #TODO: remove nodes param def delete_user_on_cluster(base_client, id, nodes, username): with concurrent.futures.ThreadPoolExecutor() as executor: diff --git a/aztk/client/base/helpers/get_application_log.py b/aztk/client/base/helpers/get_application_log.py index 46a70b5b..a5353a37 100644 --- a/aztk/client/base/helpers/get_application_log.py +++ b/aztk/client/base/helpers/get_application_log.py @@ -102,13 +102,9 @@ def get_log(batch_client, blob_client, cluster_id: str, application_name: str, t exit_code=task.execution_info.exit_code) -def get_application_log(base_operations, - cluster_id: str, - application_name: str, - tail=False, - current_bytes: int = 0): +def get_application_log(base_operations, cluster_id: str, application_name: str, tail=False, current_bytes: int = 0): try: - return get_log(base_operations.batch_client, base_operations.blob_client, cluster_id, - application_name, tail, current_bytes) + return get_log(base_operations.batch_client, base_operations.blob_client, cluster_id, application_name, tail, + current_bytes) except batch_error.BatchErrorException as e: raise error.AztkError(helpers.format_batch_exception(e)) diff --git a/aztk/client/base/helpers/ssh_into_node.py b/aztk/client/base/helpers/ssh_into_node.py index 5e5a024d..a0a903c4 100644 --- a/aztk/client/base/helpers/ssh_into_node.py +++ b/aztk/client/base/helpers/ssh_into_node.py @@ -2,7 +2,14 @@ from aztk.utils import ssh as ssh_lib -def ssh_into_node(base_client, pool_id, node_id, username, ssh_key=None, password=None, port_forward_list=None, internal=False): +def ssh_into_node(base_client, + pool_id, + node_id, + username, + ssh_key=None, + password=None, + port_forward_list=None, + internal=False): if internal: result = base_client.batch_client.compute_node.get(pool_id=pool_id, node_id=node_id) rls = models.RemoteLogin(ip_address=result.ip_address, port="22") diff --git a/aztk/client/client.py b/aztk/client/client.py index 94257a7b..560392b2 100644 --- a/aztk/client/client.py +++ b/aztk/client/client.py @@ -26,6 +26,7 @@ class CoreClient: should be used.** """ + def _get_context(self, secrets_configuration: models.SecretsConfiguration): self.secrets_configuration = secrets_configuration @@ -86,7 +87,8 @@ def __delete_pool_and_job(self, pool_id: str, keep_logs: bool = False): return job_exists or pool_exists @deprecated("0.10.0") - def __create_pool_and_job(self, cluster_conf: models.ClusterConfiguration, software_metadata_key: str, start_task, VmImageModel): + def __create_pool_and_job(self, cluster_conf: models.ClusterConfiguration, software_metadata_key: str, start_task, + VmImageModel): """ Create a pool and job :param cluster_conf: the configuration object used to create the cluster @@ -108,8 +110,7 @@ def __create_pool_and_job(self, cluster_conf: models.ClusterConfiguration, softw network_conf = None if cluster_conf.subnet_id is not None: - network_conf = batch_models.NetworkConfiguration( - subnet_id=cluster_conf.subnet_id) + network_conf = batch_models.NetworkConfiguration(subnet_id=cluster_conf.subnet_id) auto_scale_formula = "$TargetDedicatedNodes={0}; $TargetLowPriorityNodes={1}".format( cluster_conf.size, cluster_conf.size_low_priority) @@ -117,8 +118,7 @@ def __create_pool_and_job(self, cluster_conf: models.ClusterConfiguration, softw pool = batch_models.PoolAddParameter( id=pool_id, virtual_machine_configuration=batch_models.VirtualMachineConfiguration( - image_reference=image_ref_to_use, - node_agent_sku_id=sku_to_use), + image_reference=image_ref_to_use, node_agent_sku_id=sku_to_use), vm_size=cluster_conf.vm_size, enable_auto_scale=True, auto_scale_formula=auto_scale_formula, @@ -128,8 +128,7 @@ def __create_pool_and_job(self, cluster_conf: models.ClusterConfiguration, softw max_tasks_per_node=4, network_configuration=network_conf, metadata=[ - batch_models.MetadataItem( - name=constants.AZTK_SOFTWARE_METADATA_KEY, value=software_metadata_key), + batch_models.MetadataItem(name=constants.AZTK_SOFTWARE_METADATA_KEY, value=software_metadata_key), batch_models.MetadataItem( name=constants.AZTK_MODE_METADATA_KEY, value=constants.AZTK_CLUSTER_MODE_METADATA) ]) @@ -138,9 +137,7 @@ def __create_pool_and_job(self, cluster_conf: models.ClusterConfiguration, softw helpers.create_pool_if_not_exist(pool, self.batch_client) # Create job - job = batch_models.JobAddParameter( - id=job_id, - pool_info=batch_models.PoolInformation(pool_id=pool_id)) + job = batch_models.JobAddParameter(id=job_id, pool_info=batch_models.PoolInformation(pool_id=pool_id)) # Add job to batch self.batch_client.job.add(job) @@ -164,10 +161,8 @@ def __list_clusters(self, software_metadata_key): List all the cluster on your account. """ pools = self.batch_client.pool.list() - software_metadata = ( - constants.AZTK_SOFTWARE_METADATA_KEY, software_metadata_key) - cluster_metadata = ( - constants.AZTK_MODE_METADATA_KEY, constants.AZTK_CLUSTER_MODE_METADATA) + software_metadata = (constants.AZTK_SOFTWARE_METADATA_KEY, software_metadata_key) + cluster_metadata = (constants.AZTK_MODE_METADATA_KEY, constants.AZTK_CLUSTER_MODE_METADATA) aztk_pools = [] for pool in [pool for pool in pools if pool.metadata]: @@ -177,7 +172,8 @@ def __list_clusters(self, software_metadata_key): return aztk_pools @deprecated("0.10.0") - def __create_user(self, pool_id: str, node_id: str, username: str, password: str = None, ssh_key: str = None) -> str: + def __create_user(self, pool_id: str, node_id: str, username: str, password: str = None, + ssh_key: str = None) -> str: """ Create a pool user :param pool: the pool to add the user to @@ -187,16 +183,14 @@ def __create_user(self, pool_id: str, node_id: str, username: str, password: str :param ssh_key: ssh_key of the user to add """ # Create new ssh user for the given node - self.batch_client.compute_node.add_user( - pool_id, - node_id, - batch_models.ComputeNodeUser( - name=username, - is_admin=True, - password=password, - ssh_public_key=get_ssh_key.get_user_public_key( - ssh_key, self.secrets_configuration), - expiry_time=datetime.now(timezone.utc) + timedelta(days=365))) + self.batch_client.compute_node.add_user(pool_id, node_id, + batch_models.ComputeNodeUser( + name=username, + is_admin=True, + password=password, + ssh_public_key=get_ssh_key.get_user_public_key( + ssh_key, self.secrets_configuration), + expiry_time=datetime.now(timezone.utc) + timedelta(days=365))) @deprecated("0.10.0") def __delete_user(self, pool_id: str, node_id: str, username: str) -> str: @@ -217,8 +211,7 @@ def __get_remote_login_settings(self, pool_id: str, node_id: str): :param node_id :returns aztk.models.RemoteLogin """ - result = self.batch_client.compute_node.get_remote_login_settings( - pool_id, node_id) + result = self.batch_client.compute_node.get_remote_login_settings(pool_id, node_id) return models.RemoteLogin(ip_address=result.remote_login_ip_address, port=str(result.remote_login_port)) @deprecated("0.10.0") @@ -246,11 +239,10 @@ def __generate_user_on_pool(self, pool_id, nodes): ssh_key = RSA.generate(2048) ssh_pub_key = ssh_key.publickey().exportKey('OpenSSH').decode('utf-8') with concurrent.futures.ThreadPoolExecutor() as executor: - futures = {executor.submit(self.__create_user_on_node, - generated_username, - pool_id, - node.id, - ssh_pub_key): node for node in nodes} + futures = { + executor.submit(self.__create_user_on_node, generated_username, pool_id, node.id, ssh_pub_key): node + for node in nodes + } concurrent.futures.wait(futures) return generated_username, ssh_key @@ -258,12 +250,10 @@ def __generate_user_on_pool(self, pool_id, nodes): @deprecated("0.10.0") def __create_user_on_pool(self, username, pool_id, nodes, ssh_pub_key=None, password=None): with concurrent.futures.ThreadPoolExecutor() as executor: - futures = {executor.submit(self.__create_user_on_node, - username, - pool_id, - node.id, - ssh_pub_key, - password): node for node in nodes} + futures = { + executor.submit(self.__create_user_on_node, username, pool_id, node.id, ssh_pub_key, password): node + for node in nodes + } concurrent.futures.wait(futures) @deprecated("0.10.0") @@ -295,8 +285,7 @@ def __node_run(self, cluster_id, node_id, command, internal, container_name=None node_rls.port, ssh_key=ssh_key.exportKey().decode('utf-8'), container_name=container_name, - timeout=timeout - ) + timeout=timeout) return output finally: self.__delete_user(cluster_id, node.id, generated_username) @@ -319,9 +308,7 @@ def __cluster_run(self, cluster_id, command, internal, container_name=None, time cluster_nodes, ssh_key=ssh_key.exportKey().decode('utf-8'), container_name=container_name, - timeout=timeout - ) - ) + timeout=timeout)) return output except OSError as exc: raise exc @@ -329,7 +316,14 @@ def __cluster_run(self, cluster_id, command, internal, container_name=None, time self.__delete_user_on_pool(generated_username, pool.id, nodes) @deprecated("0.10.0") - def __cluster_copy(self, cluster_id, source_path, destination_path=None, container_name=None, internal=False, get=False, timeout=None): + def __cluster_copy(self, + cluster_id, + source_path, + destination_path=None, + container_name=None, + internal=False, + get=False, + timeout=None): pool, nodes = self.__get_pool_details(cluster_id) nodes = list(nodes) if internal: @@ -348,9 +342,7 @@ def __cluster_copy(self, cluster_id, source_path, destination_path=None, contain destination_path=destination_path, ssh_key=ssh_key.exportKey().decode('utf-8'), get=get, - timeout=timeout - ) - ) + timeout=timeout)) return output except (OSError, batch_error.BatchErrorException) as exc: raise exc @@ -358,7 +350,14 @@ def __cluster_copy(self, cluster_id, source_path, destination_path=None, contain self.__delete_user_on_pool(generated_username, pool.id, nodes) @deprecated("0.10.0") - def __ssh_into_node(self, pool_id, node_id, username, ssh_key=None, password=None, port_forward_list=None, internal=False): + def __ssh_into_node(self, + pool_id, + node_id, + username, + ssh_key=None, + password=None, + port_forward_list=None, + internal=False): if internal: result = self.batch_client.compute_node.get(pool_id=pool_id, node_id=node_id) rls = models.RemoteLogin(ip_address=result.ip_address, port="22") @@ -376,14 +375,8 @@ def __ssh_into_node(self, pool_id, node_id, username, ssh_key=None, password=Non ) @deprecated("0.10.0") - def __submit_job(self, - job_configuration, - start_task, - job_manager_task, - autoscale_formula, - software_metadata_key: str, - vm_image_model, - application_metadata): + def __submit_job(self, job_configuration, start_task, job_manager_task, autoscale_formula, + software_metadata_key: str, vm_image_model, application_metadata): """ Job Submission :param job_configuration -> aztk_sdk.spark.models.JobConfiguration @@ -404,8 +397,7 @@ def __submit_job(self, # set up subnet if necessary network_conf = None if job_configuration.subnet_id: - network_conf = batch_models.NetworkConfiguration( - subnet_id=job_configuration.subnet_id) + network_conf = batch_models.NetworkConfiguration(subnet_id=job_configuration.subnet_id) # set up a schedule for a recurring job auto_pool_specification = batch_models.AutoPoolSpecification( @@ -415,8 +407,7 @@ def __submit_job(self, pool=batch_models.PoolSpecification( display_name=job_configuration.id, virtual_machine_configuration=batch_models.VirtualMachineConfiguration( - image_reference=image_ref_to_use, - node_agent_sku_id=sku_to_use), + image_reference=image_ref_to_use, node_agent_sku_id=sku_to_use), vm_size=job_configuration.vm_size, enable_auto_scale=True, auto_scale_formula=autoscale_formula, @@ -426,13 +417,10 @@ def __submit_job(self, network_configuration=network_conf, max_tasks_per_node=4, metadata=[ - batch_models.MetadataItem( - name=constants.AZTK_SOFTWARE_METADATA_KEY, value=software_metadata_key), + batch_models.MetadataItem(name=constants.AZTK_SOFTWARE_METADATA_KEY, value=software_metadata_key), batch_models.MetadataItem( name=constants.AZTK_MODE_METADATA_KEY, value=constants.AZTK_JOB_MODE_METADATA) - ] - ) - ) + ])) # define job specification job_spec = batch_models.JobSpecification( @@ -440,25 +428,15 @@ def __submit_job(self, display_name=job_configuration.id, on_all_tasks_complete=batch_models.OnAllTasksComplete.terminate_job, job_manager_task=job_manager_task, - metadata=[ - batch_models.MetadataItem( - name='applications', value=application_metadata) - ] - ) + metadata=[batch_models.MetadataItem(name='applications', value=application_metadata)]) # define schedule schedule = batch_models.Schedule( - do_not_run_until=None, - do_not_run_after=None, - start_window=None, - recurrence_interval=None - ) + do_not_run_until=None, do_not_run_after=None, start_window=None, recurrence_interval=None) # create job schedule and add task setup = batch_models.JobScheduleAddParameter( - id=job_configuration.id, - schedule=schedule, - job_specification=job_spec) + id=job_configuration.id, schedule=schedule, job_specification=job_spec) self.batch_client.job_schedule.add(setup) diff --git a/aztk/client/cluster/helpers/copy.py b/aztk/client/cluster/helpers/copy.py index bc97d8c1..4148dd28 100644 --- a/aztk/client/cluster/helpers/copy.py +++ b/aztk/client/cluster/helpers/copy.py @@ -8,7 +8,14 @@ from aztk.utils import helpers -def cluster_copy(cluster_operations, cluster_id, source_path, destination_path=None, container_name=None, internal=False, get=False, timeout=None): +def cluster_copy(cluster_operations, + cluster_id, + source_path, + destination_path=None, + container_name=None, + internal=False, + get=False, + timeout=None): cluster = cluster_operations.get(cluster_id) pool, nodes = cluster.pool, list(cluster.nodes) if internal: @@ -31,9 +38,7 @@ def cluster_copy(cluster_operations, cluster_id, source_path, destination_path=N destination_path=destination_path, ssh_key=ssh_key.exportKey().decode('utf-8'), get=get, - timeout=timeout - ) - ) + timeout=timeout)) return output except (OSError, batch_error.BatchErrorException) as exc: raise exc diff --git a/aztk/client/cluster/helpers/create.py b/aztk/client/cluster/helpers/create.py index eb0a6d3c..736c79fa 100644 --- a/aztk/client/cluster/helpers/create.py +++ b/aztk/client/cluster/helpers/create.py @@ -5,7 +5,8 @@ from aztk.utils import helpers, constants -def create_pool_and_job(core_cluster_operations, cluster_conf: models.ClusterConfiguration, software_metadata_key: str, start_task, VmImageModel): +def create_pool_and_job(core_cluster_operations, cluster_conf: models.ClusterConfiguration, software_metadata_key: str, + start_task, VmImageModel): """ Create a pool and job :param cluster_conf: the configuration object used to create the cluster @@ -27,8 +28,7 @@ def create_pool_and_job(core_cluster_operations, cluster_conf: models.ClusterCon network_conf = None if cluster_conf.subnet_id is not None: - network_conf = batch_models.NetworkConfiguration( - subnet_id=cluster_conf.subnet_id) + network_conf = batch_models.NetworkConfiguration(subnet_id=cluster_conf.subnet_id) auto_scale_formula = "$TargetDedicatedNodes={0}; $TargetLowPriorityNodes={1}".format( cluster_conf.size, cluster_conf.size_low_priority) @@ -36,8 +36,7 @@ def create_pool_and_job(core_cluster_operations, cluster_conf: models.ClusterCon pool = batch_models.PoolAddParameter( id=pool_id, virtual_machine_configuration=batch_models.VirtualMachineConfiguration( - image_reference=image_ref_to_use, - node_agent_sku_id=sku_to_use), + image_reference=image_ref_to_use, node_agent_sku_id=sku_to_use), vm_size=cluster_conf.vm_size, enable_auto_scale=True, auto_scale_formula=auto_scale_formula, @@ -47,8 +46,7 @@ def create_pool_and_job(core_cluster_operations, cluster_conf: models.ClusterCon max_tasks_per_node=4, network_configuration=network_conf, metadata=[ - batch_models.MetadataItem( - name=constants.AZTK_SOFTWARE_METADATA_KEY, value=software_metadata_key), + batch_models.MetadataItem(name=constants.AZTK_SOFTWARE_METADATA_KEY, value=software_metadata_key), batch_models.MetadataItem( name=constants.AZTK_MODE_METADATA_KEY, value=constants.AZTK_CLUSTER_MODE_METADATA) ]) @@ -57,9 +55,7 @@ def create_pool_and_job(core_cluster_operations, cluster_conf: models.ClusterCon helpers.create_pool_if_not_exist(pool, core_cluster_operations.batch_client) # Create job - job = batch_models.JobAddParameter( - id=job_id, - pool_info=batch_models.PoolInformation(pool_id=pool_id)) + job = batch_models.JobAddParameter(id=job_id, pool_info=batch_models.PoolInformation(pool_id=pool_id)) # Add job to batch core_cluster_operations.batch_client.job.add(job) diff --git a/aztk/client/cluster/helpers/get.py b/aztk/client/cluster/helpers/get.py index 41c25232..1965a296 100644 --- a/aztk/client/cluster/helpers/get.py +++ b/aztk/client/cluster/helpers/get.py @@ -1,5 +1,3 @@ - - #TODO: return Cluster instead of (pool, nodes) from aztk import models diff --git a/aztk/client/cluster/helpers/list.py b/aztk/client/cluster/helpers/list.py index e1f825a5..b439d93e 100644 --- a/aztk/client/cluster/helpers/list.py +++ b/aztk/client/cluster/helpers/list.py @@ -7,10 +7,8 @@ def list_clusters(cluster_client, software_metadata_key): List all the cluster on your account. """ pools = cluster_client.batch_client.pool.list() - software_metadata = ( - constants.AZTK_SOFTWARE_METADATA_KEY, software_metadata_key) - cluster_metadata = ( - constants.AZTK_MODE_METADATA_KEY, constants.AZTK_CLUSTER_MODE_METADATA) + software_metadata = (constants.AZTK_SOFTWARE_METADATA_KEY, software_metadata_key) + cluster_metadata = (constants.AZTK_MODE_METADATA_KEY, constants.AZTK_CLUSTER_MODE_METADATA) aztk_clusters = [] for pool in [pool for pool in pools if pool.metadata]: diff --git a/aztk/client/job/helpers/submit.py b/aztk/client/job/helpers/submit.py index 4c8ee7b8..1021201f 100644 --- a/aztk/client/job/helpers/submit.py +++ b/aztk/client/job/helpers/submit.py @@ -4,15 +4,8 @@ from aztk.utils import helpers, constants -def submit_job( - job_client, - job_configuration, - start_task, - job_manager_task, - autoscale_formula, - software_metadata_key: str, - vm_image_model, - application_metadata): +def submit_job(job_client, job_configuration, start_task, job_manager_task, autoscale_formula, + software_metadata_key: str, vm_image_model, application_metadata): """ Job Submission :param job_configuration -> aztk_sdk.spark.models.JobConfiguration @@ -54,7 +47,8 @@ def submit_job( max_tasks_per_node=4, metadata=[ batch_models.MetadataItem(name=constants.AZTK_SOFTWARE_METADATA_KEY, value=software_metadata_key), - batch_models.MetadataItem(name=constants.AZTK_MODE_METADATA_KEY, value=constants.AZTK_JOB_MODE_METADATA) + batch_models.MetadataItem( + name=constants.AZTK_MODE_METADATA_KEY, value=constants.AZTK_JOB_MODE_METADATA) ])) # define job specification @@ -66,7 +60,8 @@ def submit_job( metadata=[batch_models.MetadataItem(name='applications', value=application_metadata)]) # define schedule - schedule = batch_models.Schedule(do_not_run_until=None, do_not_run_after=None, start_window=None, recurrence_interval=None) + schedule = batch_models.Schedule( + do_not_run_until=None, do_not_run_after=None, start_window=None, recurrence_interval=None) # create job schedule and add task setup = batch_models.JobScheduleAddParameter(id=job_configuration.id, schedule=schedule, job_specification=job_spec) diff --git a/aztk/core/models/fields.py b/aztk/core/models/fields.py index 12d1719e..f7cf41c4 100644 --- a/aztk/core/models/fields.py +++ b/aztk/core/models/fields.py @@ -2,8 +2,10 @@ import enum from aztk.error import InvalidModelFieldError + from . import validators as aztk_validators + class ModelMergeStrategy(enum.Enum): Override = 1 """ @@ -14,6 +16,7 @@ class ModelMergeStrategy(enum.Enum): Try to merge value nested """ + class ListMergeStrategy(enum.Enum): Replace = 1 """ @@ -24,11 +27,13 @@ class ListMergeStrategy(enum.Enum): Append all the values of the new list """ + # pylint: disable=W0212 class Field: """ Base class for all model fields """ + def __init__(self, *validators, **kwargs): self.default = kwargs.get('default') self.required = 'default' not in kwargs @@ -99,6 +104,7 @@ class Integer(Field): """ Model Integer field """ + def __init__(self, *args, **kwargs): super().__init__(aztk_validators.Integer(), *args, **kwargs) @@ -132,8 +138,7 @@ def __init__(self, model=None, **kwargs): self.merge_strategy = kwargs.get('merge_strategy', ListMergeStrategy.Append) self.skip_none = kwargs.get('skip_none', True) - super().__init__( - aztk_validators.List(*kwargs.get('inner_validators', [])), **kwargs) + super().__init__(aztk_validators.List(*kwargs.get('inner_validators', [])), **kwargs) def __set__(self, instance, value): if isinstance(value, collections.MutableSequence): @@ -145,7 +150,7 @@ def __set__(self, instance, value): def _resolve(self, value): result = [] for item in value: - if item is None and self.skip_none: # Skip none values + if item is None and self.skip_none: # Skip none values continue if self.model and isinstance(item, collections.MutableMapping): @@ -176,6 +181,7 @@ def serialize(self, instance): output.append(item) return output + class Model(Field): """ Field is another model @@ -214,10 +220,12 @@ def serialize(self, instance): else: return None + class Enum(Field): """ Field that should be an enum """ + def __init__(self, model, *args, **kwargs): super().__init__(aztk_validators.InstanceOf(model), *args, **kwargs) @@ -232,7 +240,6 @@ def __set__(self, instance, value): raise InvalidModelFieldError("{0} is not a valid option. Use one of {1}".format(value, available)) super().__set__(instance, value) - def serialize(self, instance): val = super().serialize(instance) if val is not None: diff --git a/aztk/core/models/validators.py b/aztk/core/models/validators.py index 6ed9d8e9..3e050270 100644 --- a/aztk/core/models/validators.py +++ b/aztk/core/models/validators.py @@ -9,6 +9,7 @@ class Validator: To write your validator extend this class and implement the validate method. To raise an error raise InvalidModelFieldError """ + def __call__(self, value): self.validate(value) @@ -16,7 +17,6 @@ def validate(self, value): raise NotImplementedError() - class Required(Validator): """ Validate the field value is not `None` @@ -77,7 +77,6 @@ def validate(self, value): raise InvalidModelFieldError('{0} should be a boolean'.format(value)) - class In(Validator): """ Validate the field value is in the list of allowed choices @@ -93,6 +92,7 @@ def validate(self, value): if value not in self.choices: raise InvalidModelFieldError('{0} should be in {1}'.format(value, self.choices)) + class InstanceOf(Validator): """ Check if the field is an instance of the given type @@ -106,8 +106,7 @@ def validate(self, value): return if not isinstance(value, self.type): - raise InvalidModelFieldError( - "should be an instance of '{}'".format(self.type.__name__)) + raise InvalidModelFieldError("should be an instance of '{}'".format(self.type.__name__)) class Model(Validator): @@ -123,8 +122,7 @@ def validate(self, value): return if not isinstance(value, self.model): - raise InvalidModelFieldError( - "should be an instance of '{}'".format(self.model.__name__)) + raise InvalidModelFieldError("should be an instance of '{}'".format(self.model.__name__)) value.validate() diff --git a/aztk/error.py b/aztk/error.py index de110b82..1d68374b 100644 --- a/aztk/error.py +++ b/aztk/error.py @@ -11,15 +11,19 @@ class AztkError(Exception): class AztkAttributeError(AztkError): pass + class ClusterNotReadyError(AztkError): pass + class AzureApiInitError(AztkError): pass + class InvalidPluginConfigurationError(AztkError): pass + class InvalidModelError(AztkError): def __init__(self, message: str, model=None): super().__init__() @@ -34,12 +38,15 @@ def __str__(self): class MissingRequiredAttributeError(InvalidModelError): pass + class InvalidCustomScriptError(InvalidModelError): pass + class InvalidPluginReferenceError(InvalidModelError): pass + class InvalidModelFieldError(InvalidModelError): def __init__(self, message: str, model=None, field=None): super().__init__(message, model) diff --git a/aztk/internal/cluster_data/blob_data.py b/aztk/internal/cluster_data/blob_data.py index 098d63c1..ceaf5f06 100644 --- a/aztk/internal/cluster_data/blob_data.py +++ b/aztk/internal/cluster_data/blob_data.py @@ -2,17 +2,18 @@ import datetime from azure.storage.blob import BlockBlobService, BlobPermissions + class BlobData: """ Object mapping to a blob entry. Can generate resource files for batch """ + def __init__(self, blob_client: BlockBlobService, container: str, blob: str): self.container = container self.blob = blob self.dest = blob self.blob_client = blob_client - def to_resource_file(self, dest: str = None) -> batch_models.ResourceFile: sas_token = self.blob_client.generate_blob_shared_access_signature( self.container, @@ -20,7 +21,6 @@ def to_resource_file(self, dest: str = None) -> batch_models.ResourceFile: permission=BlobPermissions.READ, expiry=datetime.datetime.utcnow() + datetime.timedelta(days=365)) - sas_url = self.blob_client.make_blob_url( - self.container, self.blob, sas_token=sas_token) + sas_url = self.blob_client.make_blob_url(self.container, self.blob, sas_token=sas_token) return batch_models.ResourceFile(file_path=dest or self.dest, blob_source=sas_url) diff --git a/aztk/internal/cluster_data/node_data.py b/aztk/internal/cluster_data/node_data.py index e484b01b..06d37b10 100644 --- a/aztk/internal/cluster_data/node_data.py +++ b/aztk/internal/cluster_data/node_data.py @@ -70,7 +70,7 @@ def add_dir(self, path: str, dest: str = None, exclude: List[str] = None): relative_folder = os.path.relpath(base, path) for file in files: if self._includeFile(file, exclude): - self.add_file(os.path.join(base, file), os.path.join(dest, relative_folder), binary = False) + self.add_file(os.path.join(base, file), os.path.join(dest, relative_folder), binary=False) def _add_custom_scripts(self): data = [] @@ -90,7 +90,8 @@ def _add_custom_scripts(self): raise InvalidCustomScriptError("Custom script '{0}' doesn't exists.".format(custom_script.script)) elif isinstance(custom_script.script, models.File): new_file_name = str(index) + '_' + custom_script.script.name - self.zipf.writestr(os.path.join('custom-scripts', new_file_name), custom_script.script.payload.getvalue()) + self.zipf.writestr( + os.path.join('custom-scripts', new_file_name), custom_script.script.payload.getvalue()) self.zipf.writestr( os.path.join(CUSTOM_SCRIPT_FOLDER, CUSTOM_SCRIPT_METADATA_FILE), yaml.dump(data, default_flow_style=False)) @@ -108,8 +109,8 @@ def _add_spark_configuration(self): binary=False) # add ssh keys for passwordless ssh - self.zipf.writestr( 'id_rsa.pub', spark_configuration.ssh_key_pair['pub_key']) - self.zipf.writestr( 'id_rsa', spark_configuration.ssh_key_pair['priv_key']) + self.zipf.writestr('id_rsa.pub', spark_configuration.ssh_key_pair['pub_key']) + self.zipf.writestr('id_rsa', spark_configuration.ssh_key_pair['priv_key']) if spark_configuration.jars: for jar in spark_configuration.jars: @@ -141,14 +142,15 @@ def _add_plugins(self): for file in plugin.files: zipf = self.zipf.writestr('plugins/{0}/{1}'.format(plugin.name, file.target), file.content()) if plugin.execute: - data.append(dict( - name=plugin.name, - execute='{0}/{1}'.format(plugin.name, plugin.execute), - args=plugin.args, - env=plugin.env, - target=plugin.target.value, - target_role=plugin.target_role.value, - )) + data.append( + dict( + name=plugin.name, + execute='{0}/{1}'.format(plugin.name, plugin.execute), + args=plugin.args, + env=plugin.env, + target=plugin.target.value, + target_role=plugin.target_role.value, + )) self.zipf.writestr(os.path.join('plugins', 'plugins-manifest.yaml'), yaml.dump(data)) return zipf diff --git a/aztk/internal/configuration_base.py b/aztk/internal/configuration_base.py index 7b8e7766..ca4e977a 100644 --- a/aztk/internal/configuration_base.py +++ b/aztk/internal/configuration_base.py @@ -1,6 +1,7 @@ import yaml from aztk.error import AztkError, InvalidModelError + class ConfigurationBase: """ Base class for any configuration. @@ -19,7 +20,6 @@ def from_dict(cls, args: dict): pretty_args = yaml.dump(args, default_flow_style=False) raise AztkError("{0} {1}\n{2}".format(cls.__name__, str(e), pretty_args)) - @classmethod def _from_dict(cls, args: dict): clean = dict((k, v) for k, v in args.items() if v) diff --git a/aztk/internal/docker_cmd.py b/aztk/internal/docker_cmd.py index 7dc75e1e..818d6d96 100644 --- a/aztk/internal/docker_cmd.py +++ b/aztk/internal/docker_cmd.py @@ -1,6 +1,7 @@ import os from aztk.utils.command_builder import CommandBuilder + class DockerCmd: """ Class helping to write a docker command @@ -17,7 +18,6 @@ def __init__(self, name: str, docker_repo: str, cmd: str, gpu_enabled=False): self.cmd.add_argument(docker_repo) self.cmd.add_argument(cmd) - def add_env(self, env: str, value: str): self.cmd.add_option('-e', '{0}={1}'.format(env, value)) @@ -31,8 +31,7 @@ def share_folder(self, folder: str): self.cmd.add_option('-v', '{0}:{0}'.format(folder)) def open_port(self, port: int): - self.cmd.add_option('-p', '{0}:{0}'.format(port)) # Spark Master UI - + self.cmd.add_option('-p', '{0}:{0}'.format(port)) # Spark Master UI def to_str(self): return self.cmd.to_str() diff --git a/aztk/models/cluster.py b/aztk/models/cluster.py index 0683395b..24f78394 100644 --- a/aztk/models/cluster.py +++ b/aztk/models/cluster.py @@ -1,9 +1,8 @@ import azure.batch.models as batch_models + class Cluster: - def __init__(self, - pool: batch_models.CloudPool, - nodes: batch_models.ComputeNodePaged = None): + def __init__(self, pool: batch_models.CloudPool, nodes: batch_models.ComputeNodePaged = None): self.id = pool.id self.pool = pool self.nodes = nodes @@ -20,4 +19,3 @@ def __init__(self, self.current_low_pri_nodes = pool.current_low_priority_nodes self.target_dedicated_nodes = pool.target_dedicated_nodes self.target_low_pri_nodes = pool.target_low_priority_nodes - diff --git a/aztk/models/cluster_configuration.py b/aztk/models/cluster_configuration.py index 301deb56..fdc2166c 100644 --- a/aztk/models/cluster_configuration.py +++ b/aztk/models/cluster_configuration.py @@ -1,6 +1,6 @@ import aztk.error as error from aztk.core.models import Model, fields -from aztk.utils import deprecated,deprecate, helpers +from aztk.utils import deprecated, deprecate, helpers from .custom_script import CustomScript from .file_share import FileShare @@ -9,6 +9,7 @@ from .user_configuration import UserConfiguration from .scheduling_target import SchedulingTarget + class ClusterConfiguration(Model): """ Cluster configuration model @@ -45,7 +46,8 @@ def __init__(self, *args, **kwargs): kwargs['size'] = kwargs.pop('vm_count') if 'vm_low_pri_count' in kwargs: - deprecate("vm_low_pri_count is deprecated for ClusterConfiguration.", "Please use size_low_priority instead.") + deprecate("vm_low_pri_count is deprecated for ClusterConfiguration.", + "Please use size_low_priority instead.") kwargs['size_low_priority'] = kwargs.pop('vm_low_pri_count') super().__init__(*args, **kwargs) @@ -77,7 +79,6 @@ def mixed_mode(self) -> bool: """ return self.size > 0 and self.size_low_priority > 0 - def gpu_enabled(self): return helpers.is_gpu_enabled(self.vm_size) @@ -92,8 +93,7 @@ def __validate__(self) -> bool: if self.vm_size is None: raise error.InvalidModelError( - "Please supply a vm_size in either the cluster.yaml configuration file or with a parameter (--vm-size)" - ) + "Please supply a vm_size in either the cluster.yaml configuration file or with a parameter (--vm-size)") if self.mixed_mode() and not self.subnet_id: raise error.InvalidModelError( @@ -101,7 +101,8 @@ def __validate__(self) -> bool: ) if self.custom_scripts: - deprecate("0.9.0", "Custom scripts are DEPRECATED.", "Use plugins instead. See https://aztk.readthedocs.io/en/v0.7.0/15-plugins.html.") + deprecate("0.9.0", "Custom scripts are DEPRECATED.", + "Use plugins instead. See https://aztk.readthedocs.io/en/v0.7.0/15-plugins.html.") if self.scheduling_target == SchedulingTarget.Dedicated and self.size == 0: raise error.InvalidModelError("Scheduling target cannot be Dedicated if dedicated vm size is 0") diff --git a/aztk/models/custom_script.py b/aztk/models/custom_script.py index aaf1473b..ec6f236a 100644 --- a/aztk/models/custom_script.py +++ b/aztk/models/custom_script.py @@ -1,5 +1,6 @@ from aztk.core.models import Model, fields + class CustomScript(Model): name = fields.String() script = fields.String() diff --git a/aztk/models/file.py b/aztk/models/file.py index 4baf1cb8..74cddbee 100644 --- a/aztk/models/file.py +++ b/aztk/models/file.py @@ -1,5 +1,6 @@ import io + class File: def __init__(self, name: str, payload: io.StringIO): self.name = name diff --git a/aztk/models/file_share.py b/aztk/models/file_share.py index c94bdcbb..060f65c8 100644 --- a/aztk/models/file_share.py +++ b/aztk/models/file_share.py @@ -1,5 +1,6 @@ from aztk.core.models import Model, fields + class FileShare(Model): storage_account_name = fields.String() storage_account_key = fields.String() diff --git a/aztk/models/node_output.py b/aztk/models/node_output.py index 9d20de60..891e8224 100644 --- a/aztk/models/node_output.py +++ b/aztk/models/node_output.py @@ -1,5 +1,6 @@ from tempfile import SpooledTemporaryFile -from typing import Union +from typing import Union + class NodeOutput: def __init__(self, id: str, output: Union[SpooledTemporaryFile, str] = None, error: Exception = None): diff --git a/aztk/models/plugins/internal/plugin_manager.py b/aztk/models/plugins/internal/plugin_manager.py index 814c2e44..1b4c68c2 100644 --- a/aztk/models/plugins/internal/plugin_manager.py +++ b/aztk/models/plugins/internal/plugin_manager.py @@ -28,8 +28,7 @@ class PluginManager: nvblas=plugins.NvBLASPlugin, apt_get=plugins.AptGetPlugin, pip_install=plugins.PipPlugin, - conda_install=plugins.CondaPlugin - ) + conda_install=plugins.CondaPlugin) def __init__(self): self.loaded = False @@ -51,7 +50,8 @@ def get_args_for(self, cls): args = dict() for key, param in signature.parameters.items(): if param.kind == param.POSITIONAL_OR_KEYWORD or param.kind == param.KEYWORD_ONLY: - args[key] = PluginArgument(key, default=param.default, required=param.default is inspect.Parameter.empty) + args[key] = PluginArgument( + key, default=param.default, required=param.default is inspect.Parameter.empty) return args @@ -66,17 +66,14 @@ def _validate_args(self, plugin_cls, args: dict): for arg in plugin_args.values(): if args.get(arg.name) is None: if arg.required: - message = "Missing a required argument {0} for plugin {1}".format( - arg.name, plugin_cls.__name__) + message = "Missing a required argument {0} for plugin {1}".format(arg.name, plugin_cls.__name__) raise InvalidPluginReferenceError(message) args[arg.name] = arg.default - def _validate_no_extra_args(self, plugin_cls, plugin_args: dict, args: dict): for name in args: if not name in plugin_args: - message = "Plugin {0} doesn't have an argument called '{1}'".format( - plugin_cls.__name__, name) + message = "Plugin {0} doesn't have an argument called '{1}'".format(plugin_cls.__name__, name) raise InvalidPluginReferenceError(message) diff --git a/aztk/models/plugins/plugin_configuration.py b/aztk/models/plugins/plugin_configuration.py index 54561f90..72b2229b 100644 --- a/aztk/models/plugins/plugin_configuration.py +++ b/aztk/models/plugins/plugin_configuration.py @@ -42,6 +42,7 @@ def public_port(self): return self.public return None + class PluginConfiguration(Model): """ Plugin manifest that should be returned in the main.py of your plugin diff --git a/aztk/models/plugins/plugin_file.py b/aztk/models/plugins/plugin_file.py index 1f3fafb8..277281c0 100644 --- a/aztk/models/plugins/plugin_file.py +++ b/aztk/models/plugins/plugin_file.py @@ -2,6 +2,7 @@ from typing import Union from aztk.core.models import Model, fields + class PluginFile(Model): """ Reference to a file for a plugin. @@ -15,7 +16,7 @@ def __init__(self, target: str = None, local_path: str = None): def content(self): with open(self.local_path, "r", encoding='UTF-8') as f: - return f.read() + return f.read() class TextPluginFile(Model): @@ -29,7 +30,7 @@ class TextPluginFile(Model): target = fields.String() - def __init__(self, target: str, content: Union[str,io.StringIO]): + def __init__(self, target: str, content: Union[str, io.StringIO]): super().__init__(target=target) if isinstance(content, str): self._content = content diff --git a/aztk/models/port_forward_specification.py b/aztk/models/port_forward_specification.py index d34b1b63..d7807d78 100644 --- a/aztk/models/port_forward_specification.py +++ b/aztk/models/port_forward_specification.py @@ -1,5 +1,6 @@ from aztk.core.models import Model, fields + class PortForwardingSpecification(Model): remote_port = fields.Integer() local_port = fields.Integer() diff --git a/aztk/models/scheduling_target.py b/aztk/models/scheduling_target.py index 7f5886aa..f89a91f8 100644 --- a/aztk/models/scheduling_target.py +++ b/aztk/models/scheduling_target.py @@ -1,5 +1,6 @@ from enum import Enum + class SchedulingTarget(Enum): """ Target where task will get scheduled. diff --git a/aztk/models/secrets_configuration.py b/aztk/models/secrets_configuration.py index 71b2c6b9..4559d1cd 100644 --- a/aztk/models/secrets_configuration.py +++ b/aztk/models/secrets_configuration.py @@ -1,6 +1,7 @@ from aztk.core.models import Model, fields from aztk.error import InvalidModelError + class ServicePrincipalConfiguration(Model): """ Container class for AAD authentication @@ -11,6 +12,7 @@ class ServicePrincipalConfiguration(Model): batch_account_resource_id = fields.String() storage_account_resource_id = fields.String() + class SharedKeyConfiguration(Model): """ Container class for shared key authentication @@ -46,14 +48,10 @@ class SecretsConfiguration(Model): def __validate__(self): if self.service_principal and self.shared_key: - raise InvalidModelError( - "Both service_principal and shared_key auth are configured, must use only one" - ) + raise InvalidModelError("Both service_principal and shared_key auth are configured, must use only one") if not self.service_principal and not self.shared_key: - raise InvalidModelError( - "Neither service_principal and shared_key auth are configured, must use only one" - ) + raise InvalidModelError("Neither service_principal and shared_key auth are configured, must use only one") def is_aad(self): return self.service_principal is not None diff --git a/aztk/models/toolkit.py b/aztk/models/toolkit.py index 8285e65b..0f78d9e7 100644 --- a/aztk/models/toolkit.py +++ b/aztk/models/toolkit.py @@ -2,16 +2,19 @@ from aztk.utils import constants, deprecate from aztk.core.models import Model, fields + class ToolkitDefinition: def __init__(self, versions, environments): self.versions = versions self.environments = environments + class ToolkitEnvironmentDefinition: def __init__(self, versions=None, default=""): self.versions = versions or [""] self.default = default + TOOLKIT_MAP = dict( spark=ToolkitDefinition( versions=["1.6.3", "2.1.0", "2.2.0", "2.3.0"], @@ -20,8 +23,7 @@ def __init__(self, versions=None, default=""): r=ToolkitEnvironmentDefinition(), miniconda=ToolkitEnvironmentDefinition(), anaconda=ToolkitEnvironmentDefinition(), - ) - ), + )), ) @@ -69,7 +71,6 @@ def __validate__(self): "Environment '{0}' version '{1}' for toolkit '{2}' is not available. Use one of: {3}".format( self.environment, self.environment_version, self.software, env_def.versions)) - def get_docker_repo(self, gpu: bool): if self.docker_repo: return self.docker_repo @@ -97,7 +98,6 @@ def _get_docker_tag(self, gpu: bool): return '-'.join(array) - def _get_environment_definition(self) -> ToolkitEnvironmentDefinition: toolkit = TOOLKIT_MAP.get(self.software) diff --git a/aztk/models/user_configuration.py b/aztk/models/user_configuration.py index 92ed4256..6cf8934a 100644 --- a/aztk/models/user_configuration.py +++ b/aztk/models/user_configuration.py @@ -1,5 +1,6 @@ from aztk.core.models import Model, fields + class UserConfiguration(Model): username = fields.String() ssh_key = fields.String(default=None) diff --git a/aztk/node_scripts/core/config.py b/aztk/node_scripts/core/config.py index 8a1e6dcc..d4a8efe6 100644 --- a/aztk/node_scripts/core/config.py +++ b/aztk/node_scripts/core/config.py @@ -10,8 +10,6 @@ from azure.mgmt.storage import StorageManagementClient from azure.storage.common import CloudStorageAccount - - RESOURCE_ID_PATTERN = re.compile('^/subscriptions/(?P[^/]+)' '/resourceGroups/(?P[^/]+)' '/providers/[^/]+' @@ -39,52 +37,42 @@ storage_account_key = os.environ.get("STORAGE_ACCOUNT_KEY") storage_account_suffix = os.environ.get("STORAGE_ACCOUNT_SUFFIX") + def get_blob_client() -> blob.BlockBlobService: if not storage_resource_id: return blob.BlockBlobService( - account_name=storage_account_name, - account_key=storage_account_key, - endpoint_suffix=storage_account_suffix) + account_name=storage_account_name, account_key=storage_account_key, endpoint_suffix=storage_account_suffix) else: credentials = ServicePrincipalCredentials( - client_id=client_id, - secret=credential, - tenant=tenant_id, - resource='https://management.core.windows.net/') + client_id=client_id, secret=credential, tenant=tenant_id, resource='https://management.core.windows.net/') m = RESOURCE_ID_PATTERN.match(storage_resource_id) accountname = m.group('account') subscription = m.group('subscription') resourcegroup = m.group('resourcegroup') mgmt_client = StorageManagementClient(credentials, subscription) - key = mgmt_client.storage_accounts.list_keys(resource_group_name=resourcegroup, - account_name=accountname).keys[0].value + key = mgmt_client.storage_accounts.list_keys( + resource_group_name=resourcegroup, account_name=accountname).keys[0].value storage_client = CloudStorageAccount(accountname, key) return storage_client.create_block_blob_service() + def get_batch_client() -> batch.BatchServiceClient: if not batch_resource_id: base_url = batch_service_url - credentials = batchauth.SharedKeyCredentials( - batch_account_name, - batch_account_key) + credentials = batchauth.SharedKeyCredentials(batch_account_name, batch_account_key) else: credentials = ServicePrincipalCredentials( - client_id=client_id, - secret=credential, - tenant=tenant_id, - resource='https://management.core.windows.net/') + client_id=client_id, secret=credential, tenant=tenant_id, resource='https://management.core.windows.net/') m = RESOURCE_ID_PATTERN.match(batch_resource_id) batch_client = BatchManagementClient(credentials, m.group('subscription')) account = batch_client.batch_account.get(m.group('resourcegroup'), m.group('account')) base_url = 'https://%s/' % account.account_endpoint credentials = ServicePrincipalCredentials( - client_id=client_id, - secret=credential, - tenant=tenant_id, - resource='https://batch.core.windows.net/') + client_id=client_id, secret=credential, tenant=tenant_id, resource='https://batch.core.windows.net/') return batch.BatchServiceClient(credentials, base_url=base_url) + batch_client = get_batch_client() blob_client = get_blob_client() diff --git a/aztk/node_scripts/core/logger.py b/aztk/node_scripts/core/logger.py index f569cb89..9e75731d 100644 --- a/aztk/node_scripts/core/logger.py +++ b/aztk/node_scripts/core/logger.py @@ -5,6 +5,7 @@ DEFAULT_FORMAT = '%(message)s' + def setup_logging(): for handler in logging.root.handlers[:]: logging.root.removeHandler(handler) diff --git a/aztk/node_scripts/install/create_user.py b/aztk/node_scripts/install/create_user.py index 204fec8a..0e3f6762 100644 --- a/aztk/node_scripts/install/create_user.py +++ b/aztk/node_scripts/install/create_user.py @@ -9,6 +9,7 @@ Creates a user if the user configuration file at $AZTK_WORKING_DIR/user.yaml exists ''' + def create_user(batch_client): path = os.path.join(os.environ['AZTK_WORKING_DIR'], "user.yaml") @@ -30,12 +31,11 @@ def create_user(batch_client): is_admin=True, password=password, ssh_public_key=str(user_conf['ssh-key']), - expiry_time=datetime.now(timezone.utc) + timedelta(days=365) - ) - ) + expiry_time=datetime.now(timezone.utc) + timedelta(days=365))) except batch_error.BatchErrorException as e: print(e) + def decrypt_password(user_conf): cipher_text = user_conf['password'] encrypted_aes_session_key = user_conf['aes_session_key'] diff --git a/aztk/node_scripts/install/install.py b/aztk/node_scripts/install/install.py index 57957797..b8548f99 100644 --- a/aztk/node_scripts/install/install.py +++ b/aztk/node_scripts/install/install.py @@ -13,6 +13,7 @@ def read_cluster_config(): print("Got cluster config", cluster_config) return cluster_config + def setup_host(docker_repo: str): """ Code to be run on the node(NOT in a container) diff --git a/aztk/node_scripts/install/node_scheduling.py b/aztk/node_scripts/install/node_scheduling.py index 12237eeb..c5eecd48 100644 --- a/aztk/node_scripts/install/node_scheduling.py +++ b/aztk/node_scripts/install/node_scheduling.py @@ -2,6 +2,7 @@ from aztk.models import ClusterConfiguration, SchedulingTarget from core import config, log + def disable_scheduling(batch_client: batch.BatchServiceClient): """ Disable scheduling for the current node @@ -16,6 +17,7 @@ def disable_scheduling(batch_client: batch.BatchServiceClient): else: log.info("Task scheduling is already disabled for this node") + def enable_scheduling(batch_client: batch.BatchServiceClient): """ Disable scheduling for the current node @@ -31,10 +33,8 @@ def enable_scheduling(batch_client: batch.BatchServiceClient): log.info("Task scheduling is already enabled for this node") -def setup_node_scheduling( - batch_client: batch.BatchServiceClient, - cluster_config: ClusterConfiguration, - is_master: bool): +def setup_node_scheduling(batch_client: batch.BatchServiceClient, cluster_config: ClusterConfiguration, + is_master: bool): is_dedicated = config.is_dedicated enable = False diff --git a/aztk/node_scripts/install/pick_master.py b/aztk/node_scripts/install/pick_master.py index fecd104f..99388f0f 100644 --- a/aztk/node_scripts/install/pick_master.py +++ b/aztk/node_scripts/install/pick_master.py @@ -10,9 +10,11 @@ MASTER_NODE_METADATA_KEY = "_spark_master_node" + class CannotAllocateMasterError(Exception): pass + def get_master_node_id(pool: batchmodels.CloudPool): """ :returns: the id of the node that is the assigned master of this pool @@ -26,16 +28,17 @@ def get_master_node_id(pool: batchmodels.CloudPool): return None + def try_assign_self_as_master(client: batch.BatchServiceClient, pool: batchmodels.CloudPool): current_metadata = pool.metadata or [] new_metadata = current_metadata + [{"name": MASTER_NODE_METADATA_KEY, "value": config.node_id}] try: - client.pool.patch(config.pool_id, batchmodels.PoolPatchParameter( - metadata=new_metadata - ), batchmodels.PoolPatchOptions( - if_match=pool.e_tag, - )) + client.pool.patch( + config.pool_id, + batchmodels.PoolPatchParameter(metadata=new_metadata), + batchmodels.PoolPatchOptions(if_match=pool.e_tag, + )) return True except (batcherror.BatchErrorException, ClientRequestError): print("Couldn't assign itself as master the pool because the pool was modified since last get.") diff --git a/aztk/node_scripts/install/plugins.py b/aztk/node_scripts/install/plugins.py index ea218338..f9734511 100644 --- a/aztk/node_scripts/install/plugins.py +++ b/aztk/node_scripts/install/plugins.py @@ -5,8 +5,8 @@ from pathlib import Path from aztk.models.plugins import PluginTarget, PluginTargetRole +log_folder = os.path.join(os.environ['AZTK_WORKING_DIR'], 'logs', 'plugins') -log_folder = os.path.join(os.environ['AZTK_WORKING_DIR'], 'logs','plugins') def _read_manifest_file(path=None): if not os.path.isfile(path): @@ -19,12 +19,10 @@ def _read_manifest_file(path=None): print("Error in plugins manifest: {0}".format(err)) - def setup_plugins(target: PluginTarget, is_master: bool = False, is_worker: bool = False): plugins_dir = _plugins_dir() - plugins_manifest = _read_manifest_file( - os.path.join(plugins_dir, 'plugins-manifest.yaml')) + plugins_manifest = _read_manifest_file(os.path.join(plugins_dir, 'plugins-manifest.yaml')) if not os.path.exists(log_folder): os.makedirs(log_folder) @@ -39,14 +37,12 @@ def _plugins_dir(): def _run_on_this_node(plugin_obj, target: PluginTarget, is_master, is_worker): - print("Loading plugin {} in {} on {}".format( - plugin_obj["execute"], - plugin_obj['target'], - plugin_obj['target_role'] - )) + print("Loading plugin {} in {} on {}".format(plugin_obj["execute"], plugin_obj['target'], + plugin_obj['target_role'])) if plugin_obj['target'] != target.value: - print("Ignoring ", plugin_obj["execute"], "as target is for ", plugin_obj['target'], "but is currently running in ", target.value) + print("Ignoring ", plugin_obj["execute"], "as target is for ", plugin_obj['target'], + "but is currently running in ", target.value) return False if plugin_obj['target_role'] == PluginTargetRole.Master.value and is_master is True: @@ -56,7 +52,8 @@ def _run_on_this_node(plugin_obj, target: PluginTarget, is_master, is_worker): if plugin_obj['target_role'] == PluginTargetRole.All.value: return True - print("Ignoring plugin", plugin_obj["execute"], "as target role is ", plugin_obj['target_role'], "and node is master: ", is_master, is_worker) + print("Ignoring plugin", plugin_obj["execute"], "as target role is ", plugin_obj['target_role'], + "and node is master: ", is_master, is_worker) return False @@ -72,8 +69,7 @@ def _setup_plugins(plugins_manifest, target: PluginTarget, is_master, is_worker) def _run_script(name: str, script_path: str = None, args: dict = None, env: dict = None): if not os.path.isfile(script_path): - print("Cannot run plugin script: {0} file does not exist".format( - script_path)) + print("Cannot run plugin script: {0} file does not exist".format(script_path)) return file_stat = os.stat(script_path) os.chmod(script_path, file_stat.st_mode | 0o777) @@ -90,11 +86,7 @@ def _run_script(name: str, script_path: str = None, args: dict = None, env: dict out_file = open(os.path.join(log_folder, '{0}.txt'.format(name)), 'w', encoding='UTF-8') try: - subprocess.call( - [script_path] + args, - env=my_env, - stdout=out_file, - stderr=out_file) + subprocess.call([script_path] + args, env=my_env, stdout=out_file, stderr=out_file) print("Finished running") print("------------------------------------------------------------------") except Exception as e: diff --git a/aztk/node_scripts/install/scripts.py b/aztk/node_scripts/install/scripts.py index 5c1426e0..ae994e8c 100644 --- a/aztk/node_scripts/install/scripts.py +++ b/aztk/node_scripts/install/scripts.py @@ -47,10 +47,11 @@ def _run_script(script_path: str = None): os.chmod(script_path, file_stat.st_mode | 0o777) print("Running custom script:", script_path) try: - subprocess.call([script_path], shell = True) + subprocess.call([script_path], shell=True) except Exception as e: print(e) + def _run_scripts_dir(root: str = None): try: for path, subdirs, files in os.walk(root): diff --git a/aztk/node_scripts/install/spark.py b/aztk/node_scripts/install/spark.py index 37d912ab..4ce6967f 100644 --- a/aztk/node_scripts/install/spark.py +++ b/aztk/node_scripts/install/spark.py @@ -29,6 +29,7 @@ def setup_as_worker(): setup_connection() start_spark_worker() + def get_pool() -> batchmodels.CloudPool: return batch_client.pool.get(config.pool_id) @@ -50,15 +51,13 @@ def setup_connection(): """ This setup spark config with which nodes are slaves and which are master """ - master_node_id = pick_master.get_master_node_id( - batch_client.pool.get(config.pool_id)) + master_node_id = pick_master.get_master_node_id(batch_client.pool.get(config.pool_id)) master_node = get_node(master_node_id) master_config_file = os.path.join(spark_conf_folder, "master") master_file = open(master_config_file, 'w', encoding='UTF-8') - print("Adding master node ip {0} to config file '{1}'".format( - master_node.ip_address, master_config_file)) + print("Adding master node ip {0} to config file '{1}'".format(master_node.ip_address, master_config_file)) master_file.write("{0}\n".format(master_node.ip_address)) master_file.close() @@ -66,8 +65,7 @@ def setup_connection(): def wait_for_master(): print("Waiting for master to be ready.") - master_node_id = pick_master.get_master_node_id( - batch_client.pool.get(config.pool_id)) + master_node_id = pick_master.get_master_node_id(batch_client.pool.get(config.pool_id)) if master_node_id == config.node_id: return @@ -85,8 +83,7 @@ def wait_for_master(): def start_spark_master(): master_ip = get_node(config.node_id).ip_address exe = os.path.join(spark_home, "sbin", "start-master.sh") - cmd = [exe, "-h", master_ip, "--webui-port", - str(config.spark_web_ui_port)] + cmd = [exe, "-h", master_ip, "--webui-port", str(config.spark_web_ui_port)] print("Starting master with '{0}'".format(" ".join(cmd))) call(cmd) try: @@ -99,12 +96,10 @@ def start_spark_master(): def start_spark_worker(): wait_for_master() exe = os.path.join(spark_home, "sbin", "start-slave.sh") - master_node_id = pick_master.get_master_node_id( - batch_client.pool.get(config.pool_id)) + master_node_id = pick_master.get_master_node_id(batch_client.pool.get(config.pool_id)) master_node = get_node(master_node_id) - cmd = [exe, "spark://{0}:7077".format(master_node.ip_address), - "--webui-port", str(config.spark_worker_ui_port)] + cmd = [exe, "spark://{0}:7077".format(master_node.ip_address), "--webui-port", str(config.spark_worker_ui_port)] print("Connecting to master with '{0}'".format(" ".join(cmd))) call(cmd) diff --git a/aztk/node_scripts/install/spark_container.py b/aztk/node_scripts/install/spark_container.py index 405498ee..13120679 100644 --- a/aztk/node_scripts/install/spark_container.py +++ b/aztk/node_scripts/install/spark_container.py @@ -3,11 +3,8 @@ from aztk.internal import DockerCmd from aztk.utils import constants -def start_spark_container( - docker_repo: str=None, - gpu_enabled: bool=False, - file_mounts=None, - plugins=None): + +def start_spark_container(docker_repo: str = None, gpu_enabled: bool = False, file_mounts=None, plugins=None): cmd = DockerCmd( name=constants.DOCKER_SPARK_CONTAINER_NAME, @@ -50,22 +47,22 @@ def start_spark_container( cmd.pass_env('SPARK_SUBMIT_LOGS_FILE') cmd.pass_env('SPARK_JOB_UI_PORT') - cmd.open_port(8080) # Spark Master UI - cmd.open_port(7077) # Spark Master - cmd.open_port(7337) # Spark Shuffle Service - cmd.open_port(4040) # Job UI - cmd.open_port(18080) # Spark History Server UI - cmd.open_port(3022) # Docker SSH + cmd.open_port(8080) # Spark Master UI + cmd.open_port(7077) # Spark Master + cmd.open_port(7337) # Spark Shuffle Service + cmd.open_port(4040) # Job UI + cmd.open_port(18080) # Spark History Server UI + cmd.open_port(3022) # Docker SSH if plugins: for plugin in plugins: for port in plugin.ports: cmd.open_port(port.internal) - print("="*60) + print("=" * 60) print(" Starting docker container") - print("-"*60) + print("-" * 60) print(cmd.to_str()) - print("="*60) + print("=" * 60) subprocess.call(['/bin/bash', '-c', 'echo Is master?: $AZTK_IS_MASTER _ $AZTK_IS_WORKER']) subprocess.call(['/bin/bash', '-c', cmd.to_str()]) diff --git a/aztk/node_scripts/main.py b/aztk/node_scripts/main.py index 0d3e3cea..eb9079b1 100644 --- a/aztk/node_scripts/main.py +++ b/aztk/node_scripts/main.py @@ -3,7 +3,6 @@ from core import logger - def run(): if len(sys.argv) < 2: print("Error: Expected at least one argument") diff --git a/aztk/node_scripts/submit.py b/aztk/node_scripts/submit.py index d877cb74..730cca28 100644 --- a/aztk/node_scripts/submit.py +++ b/aztk/node_scripts/submit.py @@ -12,7 +12,6 @@ # limit azure.storage logging logging.getLogger("azure.storage").setLevel(logging.CRITICAL) - ''' Submit helper methods ''' @@ -46,12 +45,9 @@ def upload_file_to_container(container_name, if not node_path: node_path = blob_name - blob_client.create_container(container_name, - fail_on_exist=False) + blob_client.create_container(container_name, fail_on_exist=False) - blob_client.create_blob_from_path(container_name, - blob_path, - file_path) + blob_client.create_blob_from_path(container_name, blob_path, file_path) sas_token = blob_client.generate_blob_shared_access_signature( container_name, @@ -59,32 +55,17 @@ def upload_file_to_container(container_name, permission=blob.BlobPermissions.READ, expiry=datetime.datetime.utcnow() + datetime.timedelta(days=7)) - sas_url = blob_client.make_blob_url(container_name, - blob_path, - sas_token=sas_token) - - return batch_models.ResourceFile(file_path=node_path, - blob_source=sas_url) - - -def __app_submit_cmd( - name: str, - app: str, - app_args: List[str], - main_class: str, - jars: List[str], - py_files: List[str], - files: List[str], - driver_java_options: str, - driver_library_path: str, - driver_class_path: str, - driver_memory: str, - executor_memory: str, - driver_cores: int, - executor_cores: int): + sas_url = blob_client.make_blob_url(container_name, blob_path, sas_token=sas_token) + + return batch_models.ResourceFile(file_path=node_path, blob_source=sas_url) + + +def __app_submit_cmd(name: str, app: str, app_args: List[str], main_class: str, jars: List[str], py_files: List[str], + files: List[str], driver_java_options: str, driver_library_path: str, driver_class_path: str, + driver_memory: str, executor_memory: str, driver_cores: int, executor_cores: int): cluster_id = os.environ['AZ_BATCH_POOL_ID'] spark_home = os.environ['SPARK_HOME'] - with open (os.path.join(spark_home, 'conf', 'master')) as f: + with open(os.path.join(spark_home, 'conf', 'master')) as f: master_ip = f.read().rstrip() # set file paths to correct path on container @@ -94,10 +75,8 @@ def __app_submit_cmd( files = [os.path.join(files_path, os.path.basename(f)) for f in files] # 2>&1 redirect stdout and stderr to be in the same file - spark_submit_cmd = CommandBuilder( - '{0}/bin/spark-submit'.format(spark_home)) - spark_submit_cmd.add_option( - '--master', 'spark://{0}:7077'.format(master_ip)) + spark_submit_cmd = CommandBuilder('{0}/bin/spark-submit'.format(spark_home)) + spark_submit_cmd.add_option('--master', 'spark://{0}:7077'.format(master_ip)) spark_submit_cmd.add_option('--name', name) spark_submit_cmd.add_option('--class', main_class) spark_submit_cmd.add_option('--jars', jars and ','.join(jars)) @@ -114,8 +93,7 @@ def __app_submit_cmd( spark_submit_cmd.add_option('--executor-cores', str(executor_cores)) spark_submit_cmd.add_argument( - os.path.expandvars(app) + ' ' + - ' '.join(['\'' + str(app_arg) + '\'' for app_arg in (app_args or [])])) + os.path.expandvars(app) + ' ' + ' '.join(['\'' + str(app_arg) + '\'' for app_arg in (app_args or [])])) with open("spark-submit.txt", mode="w", encoding="UTF-8") as stream: stream.write(spark_submit_cmd.to_str()) @@ -146,7 +124,6 @@ def upload_log(blob_client, application): def receive_submit_request(application_file_path): - ''' Handle the request to submit a task ''' diff --git a/aztk/node_scripts/wait_until_master_selected.py b/aztk/node_scripts/wait_until_master_selected.py index f033dca9..295f5ad2 100644 --- a/aztk/node_scripts/wait_until_master_selected.py +++ b/aztk/node_scripts/wait_until_master_selected.py @@ -18,5 +18,6 @@ def main(): print(e) time.sleep(1) + if __name__ == "__main__": main() diff --git a/aztk/node_scripts/wait_until_setup_complete.py b/aztk/node_scripts/wait_until_setup_complete.py index 9bfd9e72..19b0ad71 100644 --- a/aztk/node_scripts/wait_until_setup_complete.py +++ b/aztk/node_scripts/wait_until_setup_complete.py @@ -1,7 +1,6 @@ import time import os - while not os.path.exists('/tmp/setup_complete'): time.sleep(1) diff --git a/aztk/spark/client/base/operations.py b/aztk/spark/client/base/operations.py index e922ff7c..7a020f5e 100644 --- a/aztk/spark/client/base/operations.py +++ b/aztk/spark/client/base/operations.py @@ -44,8 +44,9 @@ def _generate_cluster_start_task(self, Returns: :obj:`azure.batch.models.StartTask`: the StartTask definition to provision the cluster. """ - return generate_cluster_start_task.generate_cluster_start_task( - core_base_operations, zip_resource_file, id, gpu_enabled, docker_repo, file_shares, plugins, mixed_mode, worker_on_master) + return generate_cluster_start_task.generate_cluster_start_task(core_base_operations, zip_resource_file, id, + gpu_enabled, docker_repo, file_shares, plugins, + mixed_mode, worker_on_master) #TODO: make this private or otherwise not public def _generate_application_task(self, core_base_operations, container_id, application, remote=False): @@ -61,4 +62,5 @@ def _generate_application_task(self, core_base_operations, container_id, applica Returns: :obj:`azure.batch.models.TaskAddParameter`: the Task definition for the Application. """ - return generate_application_task.generate_application_task(core_base_operations, container_id, application, remote) + return generate_application_task.generate_application_task(core_base_operations, container_id, application, + remote) diff --git a/aztk/spark/client/client.py b/aztk/spark/client/client.py index 8db8c349..01743b6c 100644 --- a/aztk/spark/client/client.py +++ b/aztk/spark/client/client.py @@ -26,19 +26,21 @@ class Client(CoreClient): cluster (:obj:`aztk.spark.client.cluster.ClusterOperations`): Cluster job (:obj:`aztk.spark.client.job.JobOperations`): Job """ + def __init__(self, secrets_configuration: models.SecretsConfiguration = None, **kwargs): self.secrets_configuration = None context = None if kwargs.get("secrets_config"): - deprecate(version="0.10.0", message="secrets_config key is deprecated in secrets.yaml", - advice="Please use secrets_configuration key instead.") + deprecate( + version="0.10.0", + message="secrets_config key is deprecated in secrets.yaml", + advice="Please use secrets_configuration key instead.") context = self._get_context(kwargs.get("secrets_config")) else: context = self._get_context(secrets_configuration) self.cluster = ClusterOperations(context) self.job = JobOperations(context) - # ALL THE FOLLOWING METHODS ARE DEPRECATED AND WILL BE REMOVED IN 0.10.0 @deprecated("0.10.0") @@ -171,7 +173,8 @@ def cluster_ssh_into_master(self, password=None, port_forward_list=None, internal=False): - return self.cluster._core_cluster_operations.ssh_into_node(cluster_id, node_id, username, ssh_key, password, port_forward_list, internal) + return self.cluster._core_cluster_operations.ssh_into_node(cluster_id, node_id, username, ssh_key, password, + port_forward_list, internal) ''' job submission diff --git a/aztk/spark/client/cluster/helpers/copy.py b/aztk/spark/client/cluster/helpers/copy.py index 8795931b..2b9cdbc0 100644 --- a/aztk/spark/client/cluster/helpers/copy.py +++ b/aztk/spark/client/cluster/helpers/copy.py @@ -4,7 +4,13 @@ from aztk.utils import helpers -def cluster_copy(core_cluster_operations, cluster_id: str, source_path: str, destination_path: str, host: bool = False, internal: bool = False, timeout: int = None): +def cluster_copy(core_cluster_operations, + cluster_id: str, + source_path: str, + destination_path: str, + host: bool = False, + internal: bool = False, + timeout: int = None): try: container_name = None if host else 'spark' return core_cluster_operations.copy( diff --git a/aztk/spark/client/cluster/helpers/create.py b/aztk/spark/client/cluster/helpers/create.py index 2fa30c71..b2580a3e 100644 --- a/aztk/spark/client/cluster/helpers/create.py +++ b/aztk/spark/client/cluster/helpers/create.py @@ -12,6 +12,7 @@ auto_user=batch_models.AutoUserSpecification( scope=batch_models.AutoUserScope.pool, elevation_level=batch_models.ElevationLevel.admin)) + def _default_scheduling_target(vm_count: int): if vm_count == 0: return models.SchedulingTarget.Any @@ -27,7 +28,10 @@ def _apply_default_for_cluster_config(configuration: models.ClusterConfiguration return cluster_conf -def create_cluster(core_cluster_operations, spark_cluster_operations, cluster_conf: models.ClusterConfiguration, wait: bool = False): +def create_cluster(core_cluster_operations, + spark_cluster_operations, + cluster_conf: models.ClusterConfiguration, + wait: bool = False): """ Create a new aztk spark cluster @@ -47,14 +51,15 @@ def create_cluster(core_cluster_operations, spark_cluster_operations, cluster_co node_data = NodeData(cluster_conf).add_core().done() zip_resource_files = cluster_data.upload_node_data(node_data).to_resource_file() - start_task = spark_cluster_operations._generate_cluster_start_task(core_cluster_operations, zip_resource_files, cluster_conf.cluster_id, - cluster_conf.gpu_enabled(), cluster_conf.get_docker_repo(), - cluster_conf.file_shares, cluster_conf.plugins, - cluster_conf.mixed_mode(), cluster_conf.worker_on_master) + start_task = spark_cluster_operations._generate_cluster_start_task( + core_cluster_operations, zip_resource_files, cluster_conf.cluster_id, cluster_conf.gpu_enabled(), + cluster_conf.get_docker_repo(), cluster_conf.file_shares, cluster_conf.plugins, cluster_conf.mixed_mode(), + cluster_conf.worker_on_master) software_metadata_key = base_models.Software.spark - cluster = core_cluster_operations.create(cluster_conf, software_metadata_key, start_task, constants.SPARK_VM_IMAGE) + cluster = core_cluster_operations.create(cluster_conf, software_metadata_key, start_task, + constants.SPARK_VM_IMAGE) # Wait for the master to be ready if wait: diff --git a/aztk/spark/client/cluster/helpers/create_user.py b/aztk/spark/client/cluster/helpers/create_user.py index 48ea22f6..452bb640 100644 --- a/aztk/spark/client/cluster/helpers/create_user.py +++ b/aztk/spark/client/cluster/helpers/create_user.py @@ -4,7 +4,12 @@ from aztk.utils import helpers -def create_user(core_cluster_operations, spark_cluster_operations, cluster_id: str, username: str, password: str = None, ssh_key: str = None) -> str: +def create_user(core_cluster_operations, + spark_cluster_operations, + cluster_id: str, + username: str, + password: str = None, + ssh_key: str = None) -> str: try: cluster = spark_cluster_operations.get(cluster_id) master_node_id = cluster.master_node_id diff --git a/aztk/spark/client/cluster/helpers/diagnostics.py b/aztk/spark/client/cluster/helpers/diagnostics.py index de26b06d..fbd6ef70 100644 --- a/aztk/spark/client/cluster/helpers/diagnostics.py +++ b/aztk/spark/client/cluster/helpers/diagnostics.py @@ -1,6 +1,3 @@ - - - import os from azure.batch.models import batch_error @@ -11,7 +8,8 @@ def _run(spark_cluster_operations, cluster_id, output_directory=None): # copy debug program to each node - output = spark_cluster_operations.copy(cluster_id, os.path.abspath("./aztk/spark/utils/debug.py"), "/tmp/debug.py", host=True) + output = spark_cluster_operations.copy( + cluster_id, os.path.abspath("./aztk/spark/utils/debug.py"), "/tmp/debug.py", host=True) ssh_cmd = _build_diagnostic_ssh_command() run_output = spark_cluster_operations.run(cluster_id, ssh_cmd, host=True) remote_path = "/tmp/debug.zip" diff --git a/aztk/spark/client/cluster/helpers/download.py b/aztk/spark/client/cluster/helpers/download.py index 3ecb5dfd..a4fca666 100644 --- a/aztk/spark/client/cluster/helpers/download.py +++ b/aztk/spark/client/cluster/helpers/download.py @@ -1,19 +1,25 @@ - import azure.batch.models.batch_error as batch_error from aztk import error from aztk.utils import helpers -def cluster_download(core_cluster_operations, cluster_id: str, source_path: str, destination_path: str = None, host: bool = False, internal: bool = False, timeout: int = None): +def cluster_download(core_cluster_operations, + cluster_id: str, + source_path: str, + destination_path: str = None, + host: bool = False, + internal: bool = False, + timeout: int = None): try: container_name = None if host else 'spark' - return core_cluster_operations.copy(cluster_id, - source_path, - destination_path=destination_path, - container_name=container_name, - get=True, - internal=internal, - timeout=timeout) + return core_cluster_operations.copy( + cluster_id, + source_path, + destination_path=destination_path, + container_name=container_name, + get=True, + internal=internal, + timeout=timeout) except batch_error.BatchErrorException as e: raise error.AztkError(helpers.format_batch_exception(e)) diff --git a/aztk/spark/client/cluster/helpers/get_application_log.py b/aztk/spark/client/cluster/helpers/get_application_log.py index 4ec73fe8..4043fc6f 100644 --- a/aztk/spark/client/cluster/helpers/get_application_log.py +++ b/aztk/spark/client/cluster/helpers/get_application_log.py @@ -1,7 +1,10 @@ from aztk.spark import models -def get_application_log(core_base_operations, cluster_id: str, application_name: str, tail=False, current_bytes: int = 0): - base_application_log = core_base_operations.get_application_log( - cluster_id, application_name, tail, current_bytes) +def get_application_log(core_base_operations, + cluster_id: str, + application_name: str, + tail=False, + current_bytes: int = 0): + base_application_log = core_base_operations.get_application_log(cluster_id, application_name, tail, current_bytes) return models.ApplicationLog(base_application_log) diff --git a/aztk/spark/client/cluster/helpers/run.py b/aztk/spark/client/cluster/helpers/run.py index a3677b83..0cdc820c 100644 --- a/aztk/spark/client/cluster/helpers/run.py +++ b/aztk/spark/client/cluster/helpers/run.py @@ -4,9 +4,14 @@ from aztk.utils import helpers -def cluster_run(core_cluster_operations, cluster_id: str, command: str, host=False, internal: bool = False, timeout=None): +def cluster_run(core_cluster_operations, + cluster_id: str, + command: str, + host=False, + internal: bool = False, + timeout=None): try: - return core_cluster_operations.run( + return core_cluster_operations.run( cluster_id, command, internal, container_name='spark' if not host else None, timeout=timeout) except batch_error.BatchErrorException as e: raise error.AztkError(helpers.format_batch_exception(e)) diff --git a/aztk/spark/client/cluster/helpers/ssh_into_master.py b/aztk/spark/client/cluster/helpers/ssh_into_master.py index e0b64d65..029bc81d 100644 --- a/aztk/spark/client/cluster/helpers/ssh_into_master.py +++ b/aztk/spark/client/cluster/helpers/ssh_into_master.py @@ -1,12 +1,19 @@ - import azure.batch.models.batch_error as batch_error from aztk import error from aztk.utils import helpers -def cluster_ssh_into_master(spark_cluster_operations, cluster_id, node_id, username, ssh_key=None, password=None, port_forward_list=None, internal=False): +def cluster_ssh_into_master(spark_cluster_operations, + cluster_id, + node_id, + username, + ssh_key=None, + password=None, + port_forward_list=None, + internal=False): try: - spark_cluster_operations.ssh_into_node(cluster_id, node_id, username, ssh_key, password, port_forward_list, internal) + spark_cluster_operations.ssh_into_node(cluster_id, node_id, username, ssh_key, password, port_forward_list, + internal) except batch_error.BatchErrorException as e: raise error.AztkError(helpers.format_batch_exception(e)) diff --git a/aztk/spark/client/cluster/helpers/submit.py b/aztk/spark/client/cluster/helpers/submit.py index 0da03eff..ea1d3f04 100644 --- a/aztk/spark/client/cluster/helpers/submit.py +++ b/aztk/spark/client/cluster/helpers/submit.py @@ -15,12 +15,18 @@ def affinitize_task_to_master(core_cluster_operations, spark_cluster_operations, cluster = spark_cluster_operations.get(cluster_id) if cluster.master_node_id is None: raise AztkError("Master has not yet been selected. Please wait until the cluster is finished provisioning.") - master_node = core_cluster_operations.batch_client.compute_node.get(pool_id=cluster_id, node_id=cluster.master_node_id) + master_node = core_cluster_operations.batch_client.compute_node.get( + pool_id=cluster_id, node_id=cluster.master_node_id) task.affinity_info = batch_models.AffinityInformation(affinity_id=master_node.affinity_id) return task -def submit_application(core_cluster_operations, spark_cluster_operations, cluster_id, application, remote: bool = False, wait: bool = False): +def submit_application(core_cluster_operations, + spark_cluster_operations, + cluster_id, + application, + remote: bool = False, + wait: bool = False): """ Submit a spark app """ @@ -32,7 +38,8 @@ def submit_application(core_cluster_operations, spark_cluster_operations, cluste core_cluster_operations.batch_client.task.add(job_id=job_id, task=task) if wait: - helpers.wait_for_task_to_complete(job_id=job_id, task_id=task.id, batch_client=core_cluster_operations.batch_client) + helpers.wait_for_task_to_complete( + job_id=job_id, task_id=task.id, batch_client=core_cluster_operations.batch_client) def submit(core_cluster_operations, diff --git a/aztk/spark/client/cluster/helpers/wait.py b/aztk/spark/client/cluster/helpers/wait.py index 5d9e3cff..00aafe9c 100644 --- a/aztk/spark/client/cluster/helpers/wait.py +++ b/aztk/spark/client/cluster/helpers/wait.py @@ -3,6 +3,7 @@ from aztk import error from aztk.utils import helpers + def wait_for_application_to_complete(core_cluster_operations, id, application_name): try: return core_cluster_operations.wait(id, application_name) diff --git a/aztk/spark/client/cluster/operations.py b/aztk/spark/client/cluster/operations.py index 75bde904..cd7140ea 100644 --- a/aztk/spark/client/cluster/operations.py +++ b/aztk/spark/client/cluster/operations.py @@ -163,7 +163,8 @@ def copy(self, Returns: :obj:`List[aztk.spark.models.NodeOutput]`: A list of NodeOutput objects representing the output of the copy command. """ - return copy.cluster_copy(self._core_cluster_operations, id, source_path, destination_path, host, internal, timeout) + return copy.cluster_copy(self._core_cluster_operations, id, source_path, destination_path, host, internal, + timeout) def download(self, id: str, @@ -190,8 +191,8 @@ def download(self, Returns: :obj:`List[aztk.spark.models.NodeOutput]`: A list of NodeOutput objects representing the output of the copy command. """ - return download.cluster_download(self._core_cluster_operations, id, source_path, destination_path, host, internal, - timeout) + return download.cluster_download(self._core_cluster_operations, id, source_path, destination_path, host, + internal, timeout) def diagnostics(self, id, output_directory=None): """Download a file from every node in a cluster. @@ -221,7 +222,8 @@ def get_application_log(self, id: str, application_name: str, tail=False, curren Returns: :obj:`aztk.spark.models.ApplicationLog`: a model representing the output of the application. """ - return get_application_log.get_application_log(self._core_cluster_operations, id, application_name, tail, current_bytes) + return get_application_log.get_application_log(self._core_cluster_operations, id, application_name, tail, + current_bytes) def get_remote_login_settings(self, id: str, node_id: str): """Get the remote login information for a node in a cluster diff --git a/aztk/spark/client/job/helpers/get.py b/aztk/spark/client/job/helpers/get.py index 2be9b55e..b34557da 100644 --- a/aztk/spark/client/job/helpers/get.py +++ b/aztk/spark/client/job/helpers/get.py @@ -10,7 +10,8 @@ def _get_job(core_job_operations, job_id): job = core_job_operations.batch_client.job_schedule.get(job_id) job_apps = [ - app for app in core_job_operations.batch_client.task.list(job_id=job.execution_info.recent_job.id) if app.id != job_id + app for app in core_job_operations.batch_client.task.list(job_id=job.execution_info.recent_job.id) + if app.id != job_id ] recent_run_job = get_recent_job(core_job_operations, job_id) pool_prefix = recent_run_job.pool_info.auto_pool_specification.auto_pool_id_prefix diff --git a/aztk/spark/client/job/helpers/get_application.py b/aztk/spark/client/job/helpers/get_application.py index cbee81d9..e5d1663c 100644 --- a/aztk/spark/client/job/helpers/get_application.py +++ b/aztk/spark/client/job/helpers/get_application.py @@ -12,7 +12,8 @@ def _get_application(spark_job_operations, job_id, application_name): # info about the app recent_run_job = get_recent_job(spark_job_operations._core_job_operations, job_id) try: - return spark_job_operations._core_job_operations.batch_client.task.get(job_id=recent_run_job.id, task_id=application_name) + return spark_job_operations._core_job_operations.batch_client.task.get( + job_id=recent_run_job.id, task_id=application_name) except batch_models.batch_error.BatchErrorException: raise error.AztkError( "The Spark application {0} is still being provisioned or does not exist.".format(application_name)) diff --git a/aztk/spark/client/job/helpers/stop_application.py b/aztk/spark/client/job/helpers/stop_application.py index bc9c9611..b2a72fcf 100644 --- a/aztk/spark/client/job/helpers/stop_application.py +++ b/aztk/spark/client/job/helpers/stop_application.py @@ -5,6 +5,7 @@ from aztk.utils import helpers from .get_recent_job import get_recent_job + def stop_app(core_job_operations, job_id, application_name): recent_run_job = get_recent_job(core_job_operations, job_id) diff --git a/aztk/spark/client/job/helpers/submit.py b/aztk/spark/client/job/helpers/submit.py index 09480c53..01b2965a 100644 --- a/aztk/spark/client/job/helpers/submit.py +++ b/aztk/spark/client/job/helpers/submit.py @@ -64,7 +64,10 @@ def _apply_default_for_job_config(job_conf: models.JobConfiguration): return job_conf -def submit_job(core_job_operations, spark_job_operations, job_configuration: models.JobConfiguration, wait: bool = False): +def submit_job(core_job_operations, + spark_job_operations, + job_configuration: models.JobConfiguration, + wait: bool = False): try: job_configuration = _apply_default_for_job_config(job_configuration) job_configuration.validate() @@ -84,8 +87,8 @@ def submit_job(core_job_operations, spark_job_operations, job_configuration: mod application_tasks = [] for application in job_configuration.applications: application_tasks.append((application, - spark_job_operations._generate_application_task(core_job_operations, job_configuration.id, - application))) + spark_job_operations._generate_application_task( + core_job_operations, job_configuration.id, application))) job_manager_task = generate_job_manager_task(core_job_operations, job_configuration, application_tasks) @@ -106,7 +109,7 @@ def submit_job(core_job_operations, spark_job_operations, job_configuration: mod software_metadata_key=software_metadata_key, vm_image_model=vm_image, application_metadata='\n'.join(application.name for application in (job_configuration.applications or []))) - + if wait: spark_job_operations.wait(id=job_configuration.id) diff --git a/aztk/spark/client/job/operations.py b/aztk/spark/client/job/operations.py index c639795c..ea3bd971 100644 --- a/aztk/spark/client/job/operations.py +++ b/aztk/spark/client/job/operations.py @@ -17,7 +17,6 @@ def __init__(self, context): self._core_job_operations = CoreJobOperations(context) # self._spark_base_cluster_operations = SparkBaseOperations() - def list(self): """List all jobs. diff --git a/aztk/spark/helpers/__init__.py b/aztk/spark/helpers/__init__.py index 1880b509..18aad865 100644 --- a/aztk/spark/helpers/__init__.py +++ b/aztk/spark/helpers/__init__.py @@ -1,2 +1 @@ # ALL FILES IN THIS DIRECTORY ARE DEPRECATED, WILL BE REMOTE IN v0.9.0 - diff --git a/aztk/spark/helpers/cluster_diagnostic_helper.py b/aztk/spark/helpers/cluster_diagnostic_helper.py index dd57aeef..9dca8c58 100644 --- a/aztk/spark/helpers/cluster_diagnostic_helper.py +++ b/aztk/spark/helpers/cluster_diagnostic_helper.py @@ -4,9 +4,11 @@ from aztk import models as aztk_models import azure.batch.models as batch_models + def run(spark_client, cluster_id, output_directory=None): # copy debug program to each node - output = spark_client.cluster_copy(cluster_id, os.path.abspath("./aztk/spark/utils/debug.py"), "/tmp/debug.py", host=True) + output = spark_client.cluster_copy( + cluster_id, os.path.abspath("./aztk/spark/utils/debug.py"), "/tmp/debug.py", host=True) ssh_cmd = _build_diagnostic_ssh_command() run_output = spark_client.cluster_run(cluster_id, ssh_cmd, host=True) remote_path = "/tmp/debug.zip" diff --git a/aztk/spark/helpers/create_cluster.py b/aztk/spark/helpers/create_cluster.py index 283692e9..e917e687 100644 --- a/aztk/spark/helpers/create_cluster.py +++ b/aztk/spark/helpers/create_cluster.py @@ -8,30 +8,27 @@ POOL_ADMIN_USER_IDENTITY = batch_models.UserIdentity( auto_user=batch_models.AutoUserSpecification( - scope=batch_models.AutoUserScope.pool, - elevation_level=batch_models.ElevationLevel.admin)) + scope=batch_models.AutoUserScope.pool, elevation_level=batch_models.ElevationLevel.admin)) + def _get_aztk_environment(cluster_id, worker_on_master, mixed_mode): envs = [] envs.append(batch_models.EnvironmentSetting(name="AZTK_MIXED_MODE", value=helpers.bool_env(mixed_mode))) - envs.append(batch_models.EnvironmentSetting( - name="AZTK_WORKER_ON_MASTER", value=helpers.bool_env(worker_on_master))) + envs.append(batch_models.EnvironmentSetting(name="AZTK_WORKER_ON_MASTER", value=helpers.bool_env(worker_on_master))) envs.append(batch_models.EnvironmentSetting(name="AZTK_CLUSTER_ID", value=cluster_id)) return envs + def __get_docker_credentials(spark_client): creds = [] docker = spark_client.secrets_config.docker if docker: if docker.endpoint: - creds.append(batch_models.EnvironmentSetting( - name="DOCKER_ENDPOINT", value=docker.endpoint)) + creds.append(batch_models.EnvironmentSetting(name="DOCKER_ENDPOINT", value=docker.endpoint)) if docker.username: - creds.append(batch_models.EnvironmentSetting( - name="DOCKER_USERNAME", value=docker.username)) + creds.append(batch_models.EnvironmentSetting(name="DOCKER_USERNAME", value=docker.username)) if docker.password: - creds.append(batch_models.EnvironmentSetting( - name="DOCKER_PASSWORD", value=docker.password)) + creds.append(batch_models.EnvironmentSetting(name="DOCKER_PASSWORD", value=docker.password)) return creds @@ -41,25 +38,17 @@ def __get_secrets_env(spark_client): service_principal = spark_client.secrets_config.service_principal if shared_key: return [ - batch_models.EnvironmentSetting( - name="BATCH_SERVICE_URL", value=shared_key.batch_service_url), - batch_models.EnvironmentSetting( - name="BATCH_ACCOUNT_KEY", value=shared_key.batch_account_key), - batch_models.EnvironmentSetting( - name="STORAGE_ACCOUNT_NAME", value=shared_key.storage_account_name), - batch_models.EnvironmentSetting( - name="STORAGE_ACCOUNT_KEY", value=shared_key.storage_account_key), - batch_models.EnvironmentSetting( - name="STORAGE_ACCOUNT_SUFFIX", value=shared_key.storage_account_suffix), + batch_models.EnvironmentSetting(name="BATCH_SERVICE_URL", value=shared_key.batch_service_url), + batch_models.EnvironmentSetting(name="BATCH_ACCOUNT_KEY", value=shared_key.batch_account_key), + batch_models.EnvironmentSetting(name="STORAGE_ACCOUNT_NAME", value=shared_key.storage_account_name), + batch_models.EnvironmentSetting(name="STORAGE_ACCOUNT_KEY", value=shared_key.storage_account_key), + batch_models.EnvironmentSetting(name="STORAGE_ACCOUNT_SUFFIX", value=shared_key.storage_account_suffix), ] else: return [ - batch_models.EnvironmentSetting( - name="SP_TENANT_ID", value=service_principal.tenant_id), - batch_models.EnvironmentSetting( - name="SP_CLIENT_ID", value=service_principal.client_id), - batch_models.EnvironmentSetting( - name="SP_CREDENTIAL", value=service_principal.credential), + batch_models.EnvironmentSetting(name="SP_TENANT_ID", value=service_principal.tenant_id), + batch_models.EnvironmentSetting(name="SP_CLIENT_ID", value=service_principal.client_id), + batch_models.EnvironmentSetting(name="SP_CREDENTIAL", value=service_principal.credential), batch_models.EnvironmentSetting( name="SP_BATCH_RESOURCE_ID", value=service_principal.batch_account_resource_id), batch_models.EnvironmentSetting( @@ -70,9 +59,9 @@ def __get_secrets_env(spark_client): def __cluster_install_cmd(zip_resource_file: batch_models.ResourceFile, gpu_enabled: bool, docker_repo: str = None, - plugins = None, + plugins=None, worker_on_master: bool = True, - file_mounts = None, + file_mounts=None, mixed_mode: bool = False): """ For Docker on ubuntu 16.04 - return the command line @@ -89,12 +78,9 @@ def __cluster_install_cmd(zip_resource_file: batch_models.ResourceFile, shares.append('mkdir -p {0}'.format(mount.mount_path)) # Mount the file share - shares.append('mount -t cifs //{0}.file.core.windows.net/{2} {3} -o vers=3.0,username={0},password={1},dir_mode=0777,file_mode=0777,sec=ntlmssp'.format( - mount.storage_account_name, - mount.storage_account_key, - mount.file_share_path, - mount.mount_path - )) + shares.append( + 'mount -t cifs //{0}.file.core.windows.net/{2} {3} -o vers=3.0,username={0},password={1},dir_mode=0777,file_mode=0777,sec=ntlmssp'. + format(mount.storage_account_name, mount.storage_account_key, mount.file_share_path, mount.mount_path)) setup = [ 'time('\ @@ -112,16 +98,16 @@ def __cluster_install_cmd(zip_resource_file: batch_models.ResourceFile, commands = shares + setup return commands -def generate_cluster_start_task( - spark_client, - zip_resource_file: batch_models.ResourceFile, - cluster_id: str, - gpu_enabled: bool, - docker_repo: str = None, - file_shares: List[aztk_models.FileShare] = None, - plugins: List[aztk_models.PluginConfiguration] = None, - mixed_mode: bool = False, - worker_on_master: bool = True): + +def generate_cluster_start_task(spark_client, + zip_resource_file: batch_models.ResourceFile, + cluster_id: str, + gpu_enabled: bool, + docker_repo: str = None, + file_shares: List[aztk_models.FileShare] = None, + plugins: List[aztk_models.PluginConfiguration] = None, + mixed_mode: bool = False, + worker_on_master: bool = True): """ This will return the start task object for the pool to be created. :param cluster_id str: Id of the cluster(Used for uploading the resource files) @@ -138,22 +124,17 @@ def generate_cluster_start_task( # TODO use certificate environment_settings = __get_secrets_env(spark_client) + [ - batch_models.EnvironmentSetting( - name="SPARK_WEB_UI_PORT", value=spark_web_ui_port), - batch_models.EnvironmentSetting( - name="SPARK_WORKER_UI_PORT", value=spark_worker_ui_port), - batch_models.EnvironmentSetting( - name="SPARK_JOB_UI_PORT", value=spark_job_ui_port), - batch_models.EnvironmentSetting( - name="SPARK_CONTAINER_NAME", value=spark_container_name), - batch_models.EnvironmentSetting( - name="SPARK_SUBMIT_LOGS_FILE", value=spark_submit_logs_file), - batch_models.EnvironmentSetting( - name="AZTK_GPU_ENABLED", value=helpers.bool_env(gpu_enabled)), + batch_models.EnvironmentSetting(name="SPARK_WEB_UI_PORT", value=spark_web_ui_port), + batch_models.EnvironmentSetting(name="SPARK_WORKER_UI_PORT", value=spark_worker_ui_port), + batch_models.EnvironmentSetting(name="SPARK_JOB_UI_PORT", value=spark_job_ui_port), + batch_models.EnvironmentSetting(name="SPARK_CONTAINER_NAME", value=spark_container_name), + batch_models.EnvironmentSetting(name="SPARK_SUBMIT_LOGS_FILE", value=spark_submit_logs_file), + batch_models.EnvironmentSetting(name="AZTK_GPU_ENABLED", value=helpers.bool_env(gpu_enabled)), ] + __get_docker_credentials(spark_client) + _get_aztk_environment(cluster_id, worker_on_master, mixed_mode) # start task command - command = __cluster_install_cmd(zip_resource_file, gpu_enabled, docker_repo, plugins, worker_on_master, file_shares, mixed_mode) + command = __cluster_install_cmd(zip_resource_file, gpu_enabled, docker_repo, plugins, worker_on_master, file_shares, + mixed_mode) return batch_models.StartTask( command_line=helpers.wrap_commands_in_shell(command), diff --git a/aztk/spark/helpers/get_log.py b/aztk/spark/helpers/get_log.py index 032e64a3..2f786509 100644 --- a/aztk/spark/helpers/get_log.py +++ b/aztk/spark/helpers/get_log.py @@ -12,14 +12,15 @@ output_file = constants.TASK_WORKING_DIR + \ "/" + constants.SPARK_SUBMIT_LOGS_FILE + def __check_task_node_exist(batch_client, cluster_id: str, task: batch_models.CloudTask) -> bool: try: - batch_client.compute_node.get( - cluster_id, task.node_info.node_id) + batch_client.compute_node.get(cluster_id, task.node_info.node_id) return True except batch_error.BatchErrorException: return False + def __wait_for_app_to_be_running(batch_client, cluster_id: str, application_name: str) -> batch_models.CloudTask: """ Wait for the batch task to leave the waiting state into running(or completed if it was fast enough) @@ -33,11 +34,11 @@ def __wait_for_app_to_be_running(batch_client, cluster_id: str, application_name else: return task + def __get_output_file_properties(batch_client, cluster_id: str, application_name: str): while True: try: - file = helpers.get_file_properties( - cluster_id, application_name, output_file, batch_client) + file = helpers.get_file_properties(cluster_id, application_name, output_file, batch_client) return file except batch_error.BatchErrorException as e: if e.response.status_code == 404: @@ -79,8 +80,7 @@ def get_log(batch_client, blob_client, cluster_id: str, application_name: str, t ocp_range = None if tail: - ocp_range = "bytes={0}-{1}".format( - current_bytes, target_bytes - 1) + ocp_range = "bytes={0}-{1}".format(current_bytes, target_bytes - 1) stream = batch_client.file.get_from_task( job_id, task_id, output_file, batch_models.FileGetFromTaskOptions(ocp_range=ocp_range)) diff --git a/aztk/spark/helpers/job_submission.py b/aztk/spark/helpers/job_submission.py index 65cb3b54..eb7ac1be 100644 --- a/aztk/spark/helpers/job_submission.py +++ b/aztk/spark/helpers/job_submission.py @@ -9,11 +9,11 @@ import aztk.error as error from aztk.utils import constants, helpers from aztk.utils.command_builder import CommandBuilder - - ''' Job Submission helper methods ''' + + def __app_cmd(): docker_exec = CommandBuilder("sudo docker exec") docker_exec.add_argument("-i") @@ -30,11 +30,12 @@ def __app_cmd(): def generate_task(spark_client, job, application_tasks): resource_files = [] for application, task in application_tasks: - task_definition_resource_file = helpers.upload_text_to_container(container_name=job.id, - application_name=application.name + '.yaml', - file_path=application.name + '.yaml', - content=yaml.dump(task), - blob_client=spark_client.blob_client) + task_definition_resource_file = helpers.upload_text_to_container( + container_name=job.id, + application_name=application.name + '.yaml', + file_path=application.name + '.yaml', + content=yaml.dump(task), + blob_client=spark_client.blob_client) resource_files.append(task_definition_resource_file) task_cmd = __app_cmd() @@ -48,9 +49,7 @@ def generate_task(spark_client, job, application_tasks): allow_low_priority_node=True, user_identity=batch_models.UserIdentity( auto_user=batch_models.AutoUserSpecification( - scope=batch_models.AutoUserScope.task, - elevation_level=batch_models.ElevationLevel.admin)) - ) + scope=batch_models.AutoUserScope.task, elevation_level=batch_models.ElevationLevel.admin))) return task @@ -83,8 +82,9 @@ def list_applications(spark_client, job_id): def get_job(spark_client, job_id): job = spark_client.batch_client.job_schedule.get(job_id) - job_apps = [app for app in - spark_client.batch_client.task.list(job_id=job.execution_info.recent_job.id) if app.id != job_id] + job_apps = [ + app for app in spark_client.batch_client.task.list(job_id=job.execution_info.recent_job.id) if app.id != job_id + ] recent_run_job = __get_recent_job(spark_client, job_id) pool_prefix = recent_run_job.pool_info.auto_pool_specification.auto_pool_id_prefix pool = nodes = None @@ -101,8 +101,8 @@ def disable(spark_client, job_id): # disable the currently running job from the job schedule if exists recent_run_job = __get_recent_job(spark_client, job_id) if recent_run_job.id and recent_run_job.state == batch_models.JobState.active: - spark_client.batch_client.job.disable(job_id=recent_run_job.id, - disable_tasks=batch_models.DisableJobOption.requeue) + spark_client.batch_client.job.disable( + job_id=recent_run_job.id, disable_tasks=batch_models.DisableJobOption.requeue) # disable the job_schedule spark_client.batch_client.job_schedule.disable(job_id) @@ -156,7 +156,8 @@ def get_application(spark_client, job_id, application_name): try: return spark_client.batch_client.task.get(job_id=recent_run_job.id, task_id=application_name) except batch_models.batch_error.BatchErrorException: - raise error.AztkError("The Spark application {0} is still being provisioned or does not exist.".format(application_name)) + raise error.AztkError( + "The Spark application {0} is still being provisioned or does not exist.".format(application_name)) def get_application_log(spark_client, job_id, application_name): @@ -176,7 +177,8 @@ def get_application_log(spark_client, job_id, application_name): raise error.AztkError("The application {0} has not yet been created.".format(application)) raise error.AztkError("The application {0} does not exist".format(application_name)) else: - if task.state in (batch_models.TaskState.active, batch_models.TaskState.running, batch_models.TaskState.preparing): + if task.state in (batch_models.TaskState.active, batch_models.TaskState.running, + batch_models.TaskState.preparing): raise error.AztkError("The application {0} has not yet finished executing.".format(application_name)) return spark_client.get_application_log(job_id, application_name) @@ -192,6 +194,7 @@ def stop_app(spark_client, job_id, application_name): except batch_models.batch_error.BatchErrorException: return False + def wait_until_job_finished(spark_client, job_id): job_state = spark_client.batch_client.job_schedule.get(job_id).state diff --git a/aztk/spark/helpers/submit.py b/aztk/spark/helpers/submit.py index 2e021977..49c5370c 100644 --- a/aztk/spark/helpers/submit.py +++ b/aztk/spark/helpers/submit.py @@ -6,8 +6,6 @@ from aztk.error import AztkError from aztk.utils import constants, helpers from aztk.utils.command_builder import CommandBuilder - - ''' Submit helper methods ''' @@ -22,11 +20,12 @@ def generate_task(spark_client, container_id, application, remote=False): # The application provided is not hosted remotely and therefore must be uploaded if not remote: - app_resource_file = helpers.upload_file_to_container(container_name=container_id, - application_name=application.name, - file_path=application.application, - blob_client=spark_client.blob_client, - use_full_path=False) + app_resource_file = helpers.upload_file_to_container( + container_name=container_id, + application_name=application.name, + file_path=application.application, + blob_client=spark_client.blob_client, + use_full_path=False) # Upload application file resource_files.append(app_resource_file) @@ -36,34 +35,36 @@ def generate_task(spark_client, container_id, application, remote=False): # Upload dependent JARS jar_resource_file_paths = [] for jar in application.jars: - current_jar_resource_file_path = helpers.upload_file_to_container(container_name=container_id, - application_name=application.name, - file_path=jar, - blob_client=spark_client.blob_client, - use_full_path=False) + current_jar_resource_file_path = helpers.upload_file_to_container( + container_name=container_id, + application_name=application.name, + file_path=jar, + blob_client=spark_client.blob_client, + use_full_path=False) jar_resource_file_paths.append(current_jar_resource_file_path) resource_files.append(current_jar_resource_file_path) # Upload dependent python files py_files_resource_file_paths = [] for py_file in application.py_files: - current_py_files_resource_file_path = helpers.upload_file_to_container(container_name=container_id, - application_name=application.name, - file_path=py_file, - blob_client=spark_client.blob_client, - use_full_path=False) - py_files_resource_file_paths.append( - current_py_files_resource_file_path) + current_py_files_resource_file_path = helpers.upload_file_to_container( + container_name=container_id, + application_name=application.name, + file_path=py_file, + blob_client=spark_client.blob_client, + use_full_path=False) + py_files_resource_file_paths.append(current_py_files_resource_file_path) resource_files.append(current_py_files_resource_file_path) # Upload other dependent files files_resource_file_paths = [] for file in application.files: - files_resource_file_path = helpers.upload_file_to_container(container_name=container_id, - application_name=application.name, - file_path=file, - blob_client=spark_client.blob_client, - use_full_path=False) + files_resource_file_path = helpers.upload_file_to_container( + container_name=container_id, + application_name=application.name, + file_path=file, + blob_client=spark_client.blob_client, + use_full_path=False) files_resource_file_paths.append(files_resource_file_path) resource_files.append(files_resource_file_path) @@ -95,13 +96,10 @@ def generate_task(spark_client, container_id, application, remote=False): id=application.name, command_line=helpers.wrap_commands_in_shell([task_cmd.to_str()]), resource_files=resource_files, - constraints=batch_models.TaskConstraints( - max_task_retry_count=application.max_retry_count), + constraints=batch_models.TaskConstraints(max_task_retry_count=application.max_retry_count), user_identity=batch_models.UserIdentity( auto_user=batch_models.AutoUserSpecification( - scope=batch_models.AutoUserScope.task, - elevation_level=batch_models.ElevationLevel.admin)) - ) + scope=batch_models.AutoUserScope.task, elevation_level=batch_models.ElevationLevel.admin))) return task @@ -122,7 +120,6 @@ def submit_application(spark_client, cluster_id, application, remote: bool = Fal task = generate_task(spark_client, cluster_id, application, remote) task = affinitize_task_to_master(spark_client, cluster_id, task) - # Add task to batch job (which has the same name as cluster_id) job_id = cluster_id spark_client.batch_client.task.add(job_id=job_id, task=task) diff --git a/aztk/spark/models/models.py b/aztk/spark/models/models.py index 26c44b8d..87e0d83a 100644 --- a/aztk/spark/models/models.py +++ b/aztk/spark/models/models.py @@ -6,6 +6,7 @@ from aztk.utils import constants, helpers from aztk.core.models import Model, fields + class SparkToolkit(aztk.models.Toolkit): def __init__(self, version: str, environment: str = None, environment_version: str = None): super().__init__( @@ -54,6 +55,7 @@ def __init__(self, remote_login: aztk.models.RemoteLogin): class PortForwardingSpecification(aztk.models.PortForwardingSpecification): pass + class File(aztk.models.File): pass @@ -105,10 +107,12 @@ class PluginConfiguration(aztk.models.PluginConfiguration): SchedulingTarget = aztk.models.SchedulingTarget + class ClusterConfiguration(aztk.models.ClusterConfiguration): spark_configuration = fields.Model(SparkConfiguration, default=None) worker_on_master = fields.Boolean(default=True) + class SecretsConfiguration(aztk.models.SecretsConfiguration): pass @@ -118,23 +122,22 @@ class VmImage(aztk.models.VmImage): class ApplicationConfiguration: - def __init__( - self, - name=None, - application=None, - application_args=None, - main_class=None, - jars=None, - py_files=None, - files=None, - driver_java_options=None, - driver_library_path=None, - driver_class_path=None, - driver_memory=None, - executor_memory=None, - driver_cores=None, - executor_cores=None, - max_retry_count=None): + def __init__(self, + name=None, + application=None, + application_args=None, + main_class=None, + jars=None, + py_files=None, + files=None, + driver_java_options=None, + driver_library_path=None, + driver_class_path=None, + driver_memory=None, + executor_memory=None, + driver_cores=None, + executor_cores=None, + max_retry_count=None): self.name = name self.application = application self.application_args = application_args @@ -185,19 +188,18 @@ def __init__(self, cloud_task: batch_models.CloudTask): class JobConfiguration: - def __init__( - self, - id = None, - applications = None, - vm_size = None, - custom_scripts=None, - spark_configuration=None, - toolkit=None, - max_dedicated_nodes=0, - max_low_pri_nodes=0, - subnet_id=None, - scheduling_target: SchedulingTarget = None, - worker_on_master=None): + def __init__(self, + id=None, + applications=None, + vm_size=None, + custom_scripts=None, + spark_configuration=None, + toolkit=None, + max_dedicated_nodes=0, + max_low_pri_nodes=0, + subnet_id=None, + scheduling_target: SchedulingTarget = None, + worker_on_master=None): self.id = id self.applications = applications @@ -240,8 +242,7 @@ def validate(self) -> bool: Raises: Error if invalid """ if self.toolkit is None: - raise error.InvalidModelError( - "Please supply a toolkit in the cluster configuration") + raise error.InvalidModelError("Please supply a toolkit in the cluster configuration") self.toolkit.validate() @@ -254,9 +255,7 @@ def validate(self) -> bool: ) if self.vm_size is None: - raise error.AztkError( - "Please supply a vm_size in your configuration." - ) + raise error.AztkError("Please supply a vm_size in your configuration.") if self.mixed_mode() and not self.subnet_id: raise error.AztkError( @@ -277,7 +276,8 @@ class JobState(): class Job(): - def __init__(self, cloud_job_schedule: batch_models.CloudJobSchedule, + def __init__(self, + cloud_job_schedule: batch_models.CloudJobSchedule, cloud_tasks: List[batch_models.CloudTask] = None, pool: batch_models.CloudPool = None, nodes: batch_models.ComputeNodePaged = None): diff --git a/aztk/spark/models/plugins/install/apt_get/configuration.py b/aztk/spark/models/plugins/install/apt_get/configuration.py index fc1c16f3..c5487f9c 100644 --- a/aztk/spark/models/plugins/install/apt_get/configuration.py +++ b/aztk/spark/models/plugins/install/apt_get/configuration.py @@ -6,9 +6,6 @@ dir_path = os.path.dirname(os.path.realpath(__file__)) + def AptGetPlugin(packages=None): - return InstallPlugin( - name="apt-get", - command="apt-get update && apt-get install -y", - packages=packages - ) + return InstallPlugin(name="apt-get", command="apt-get update && apt-get install -y", packages=packages) diff --git a/aztk/spark/models/plugins/install/conda/configuration.py b/aztk/spark/models/plugins/install/conda/configuration.py index b10bdb31..d39bfbf8 100644 --- a/aztk/spark/models/plugins/install/conda/configuration.py +++ b/aztk/spark/models/plugins/install/conda/configuration.py @@ -6,9 +6,6 @@ dir_path = os.path.dirname(os.path.realpath(__file__)) + def CondaPlugin(packages=None): - return InstallPlugin( - name="conda", - command="conda install -y", - packages=packages - ) + return InstallPlugin(name="conda", command="conda install -y", packages=packages) diff --git a/aztk/spark/models/plugins/install/configuration.py b/aztk/spark/models/plugins/install/configuration.py index a57b63e8..54140a94 100644 --- a/aztk/spark/models/plugins/install/configuration.py +++ b/aztk/spark/models/plugins/install/configuration.py @@ -5,14 +5,12 @@ dir_path = os.path.dirname(os.path.realpath(__file__)) + def InstallPlugin(name, command, packages=None): return PluginConfiguration( name=name, target_role=PluginTargetRole.All, execute="install.sh", - files=[ - PluginFile("install.sh", os.path.join(dir_path, "install.sh")) - ], + files=[PluginFile("install.sh", os.path.join(dir_path, "install.sh"))], args=packages, - env=dict(COMMAND=command) - ) + env=dict(COMMAND=command)) diff --git a/aztk/spark/models/plugins/install/pip/configuration.py b/aztk/spark/models/plugins/install/pip/configuration.py index ac0531ee..dab12e4e 100644 --- a/aztk/spark/models/plugins/install/pip/configuration.py +++ b/aztk/spark/models/plugins/install/pip/configuration.py @@ -6,9 +6,6 @@ dir_path = os.path.dirname(os.path.realpath(__file__)) + def PipPlugin(packages=None): - return InstallPlugin( - name="pip", - command="pip install", - packages=packages - ) + return InstallPlugin(name="pip", command="pip install", packages=packages) diff --git a/aztk/spark/models/plugins/jupyter/configuration.py b/aztk/spark/models/plugins/jupyter/configuration.py index 71095a3d..e3df3bc2 100644 --- a/aztk/spark/models/plugins/jupyter/configuration.py +++ b/aztk/spark/models/plugins/jupyter/configuration.py @@ -4,6 +4,7 @@ dir_path = os.path.dirname(os.path.realpath(__file__)) + def JupyterPlugin(): return PluginConfiguration( name="jupyter", diff --git a/aztk/spark/models/plugins/jupyter_lab/configuration.py b/aztk/spark/models/plugins/jupyter_lab/configuration.py index f6387904..a14910be 100644 --- a/aztk/spark/models/plugins/jupyter_lab/configuration.py +++ b/aztk/spark/models/plugins/jupyter_lab/configuration.py @@ -5,6 +5,7 @@ dir_path = os.path.dirname(os.path.realpath(__file__)) + def JupyterLabPlugin(): return PluginConfiguration( name="jupyterlab", diff --git a/aztk/spark/models/plugins/nvblas/configuration.py b/aztk/spark/models/plugins/nvblas/configuration.py index 40af5003..6ec047f7 100644 --- a/aztk/spark/models/plugins/nvblas/configuration.py +++ b/aztk/spark/models/plugins/nvblas/configuration.py @@ -14,5 +14,4 @@ def NvBLASPlugin(): execute="nvblas.sh", files=[ PluginFile("nvblas.sh", os.path.join(dir_path, "nvblas.sh")), - ] - ) + ]) diff --git a/aztk/spark/models/plugins/resource_monitor/configuration.py b/aztk/spark/models/plugins/resource_monitor/configuration.py index db819c87..09c535da 100644 --- a/aztk/spark/models/plugins/resource_monitor/configuration.py +++ b/aztk/spark/models/plugins/resource_monitor/configuration.py @@ -5,6 +5,7 @@ dir_path = os.path.dirname(os.path.realpath(__file__)) + class ResourceMonitorPlugin(PluginConfiguration): def __init__(self): super().__init__( @@ -22,5 +23,4 @@ def __init__(self): PluginFile("start_monitor.sh", os.path.join(dir_path, "start_monitor.sh")), PluginFile("etc/telegraf.conf", os.path.join(dir_path, "telegraf.conf")), PluginFile("docker-compose.yml", os.path.join(dir_path, "docker-compose.yml")), - ] - ) + ]) diff --git a/aztk/spark/models/plugins/simple/configuration.py b/aztk/spark/models/plugins/simple/configuration.py index f0ea6622..d58e9f98 100644 --- a/aztk/spark/models/plugins/simple/configuration.py +++ b/aztk/spark/models/plugins/simple/configuration.py @@ -5,6 +5,7 @@ dir_path = os.path.dirname(os.path.realpath(__file__)) + class SimplePlugin(PluginConfiguration): def __init__(self): super().__init__( diff --git a/aztk/spark/models/plugins/spark_ui_proxy/configuration.py b/aztk/spark/models/plugins/spark_ui_proxy/configuration.py index ceb71e67..623a7bda 100644 --- a/aztk/spark/models/plugins/spark_ui_proxy/configuration.py +++ b/aztk/spark/models/plugins/spark_ui_proxy/configuration.py @@ -10,12 +10,7 @@ class SparkUIProxyPlugin(PluginConfiguration): def __init__(self): super().__init__( name="spark_ui_proxy", - ports=[ - PluginPort( - internal=9999, - public=True - ) - ], + ports=[PluginPort(internal=9999, public=True)], target_role=PluginTargetRole.Master, execute="spark_ui_proxy.sh", args=["localhost:8080", "9999"], diff --git a/aztk/spark/models/plugins/spark_ui_proxy/spark_ui_proxy.py b/aztk/spark/models/plugins/spark_ui_proxy/spark_ui_proxy.py index e87eaa01..332e7e0a 100644 --- a/aztk/spark/models/plugins/spark_ui_proxy/spark_ui_proxy.py +++ b/aztk/spark/models/plugins/spark_ui_proxy/spark_ui_proxy.py @@ -1,18 +1,18 @@ #!/usr/bin/env python # MIT License -# +# # Copyright (c) 2016 Alexis Seigneurin -# +# # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: -# +# # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. -# +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -72,7 +72,7 @@ def proxyRequest(self, data): def extractUrlDetails(self, path): if path.startswith(URL_PREFIX + "proxy:"): - start_idx = len(URL_PREFIX) + 6 # len('proxy:') == 6 + start_idx = len(URL_PREFIX) + 6 # len('proxy:') == 6 idx = path.find("/", start_idx) targetHost = path[start_idx:] if idx == -1 else path[start_idx:idx] path = "" if idx == -1 else path[idx:] diff --git a/aztk/spark/utils/util.py b/aztk/spark/utils/util.py index 7d72239c..0df0c17d 100644 --- a/aztk/spark/utils/util.py +++ b/aztk/spark/utils/util.py @@ -30,7 +30,7 @@ def wait_for_master_to_be_ready(core_operations, spark_operations, cluster_id: s master_node = core_operations.batch_client.compute_node.get(cluster_id, master_node_id) - if master_node.state in [batch_models.ComputeNodeState.idle, batch_models.ComputeNodeState.running]: + if master_node.state in [batch_models.ComputeNodeState.idle, batch_models.ComputeNodeState.running]: break elif master_node.state is batch_models.ComputeNodeState.start_task_failed: raise MasterInvalidStateError("Start task failed on master") @@ -41,7 +41,6 @@ def wait_for_master_to_be_ready(core_operations, spark_operations, cluster_id: s delta = now - start_time if delta.total_seconds() > constants.WAIT_FOR_MASTER_TIMEOUT: - raise MasterInvalidStateError( - "Master didn't become ready before timeout.") + raise MasterInvalidStateError("Master didn't become ready before timeout.") time.sleep(10) diff --git a/aztk/utils/azure_api.py b/aztk/utils/azure_api.py index 7b61a7e0..10a4fa7a 100644 --- a/aztk/utils/azure_api.py +++ b/aztk/utils/azure_api.py @@ -10,7 +10,6 @@ from azure.storage.common import CloudStorageAccount from typing import Optional - RESOURCE_ID_PATTERN = re.compile('^/subscriptions/(?P[^/]+)' '/resourceGroups/(?P[^/]+)' '/providers/[^/]+' @@ -39,9 +38,8 @@ def make_batch_client(secrets): if secrets.shared_key: # Set up SharedKeyCredentials base_url = secrets.shared_key.batch_service_url - credentials = batch_auth.SharedKeyCredentials( - secrets.shared_key.batch_account_name, - secrets.shared_key.batch_account_key) + credentials = batch_auth.SharedKeyCredentials(secrets.shared_key.batch_account_name, + secrets.shared_key.batch_account_key) else: # Set up ServicePrincipalCredentials arm_credentials = ServicePrincipalCredentials( @@ -60,9 +58,7 @@ def make_batch_client(secrets): resource='https://batch.core.windows.net/') # Set up Batch Client - batch_client = batch.BatchServiceClient( - credentials, - base_url=base_url) + batch_client = batch.BatchServiceClient(credentials, base_url=base_url) # Set retry policy batch_client.config.retry_policy.retries = 5 @@ -97,7 +93,8 @@ def make_blob_client(secrets): subscription = m.group('subscription') resourcegroup = m.group('resourcegroup') mgmt_client = StorageManagementClient(arm_credentials, subscription) - key = mgmt_client.storage_accounts.list_keys(resource_group_name=resourcegroup, account_name=accountname).keys[0].value + key = mgmt_client.storage_accounts.list_keys( + resource_group_name=resourcegroup, account_name=accountname).keys[0].value storage_client = CloudStorageAccount(accountname, key) blob_client = storage_client.create_block_blob_service() diff --git a/aztk/utils/command_builder.py b/aztk/utils/command_builder.py index bd0d3b8f..a55cf625 100644 --- a/aztk/utils/command_builder.py +++ b/aztk/utils/command_builder.py @@ -1,8 +1,9 @@ class CommandOption(): - def __init__(self, name:str, value: str): + def __init__(self, name: str, value: str): self.name = name self.value = value + class CommandBuilder: """ Helper class to build a command line @@ -16,7 +17,7 @@ def __init__(self, executable: str): self.options = [] self.arguments = [] - def add_option(self, name: str, value: str = None, enable: bool=None): + def add_option(self, name: str, value: str = None, enable: bool = None): """ Add an option to the command line. diff --git a/aztk/utils/deprecation.py b/aztk/utils/deprecation.py index 8ce27bba..5bb14f05 100644 --- a/aztk/utils/deprecation.py +++ b/aztk/utils/deprecation.py @@ -2,6 +2,7 @@ import functools import inspect + def deprecated(version: str, advice: str = None): """ This is a decorator which can be used to mark functions @@ -23,6 +24,7 @@ def decorator(func): def new_func(*args, **kwargs): deprecate(version=version, message=msg.format(name=func.__name__, advice=advice), advice=advice) return func(*args, **kwargs) + return new_func return decorator @@ -37,8 +39,9 @@ def deprecate(version: str, message: str, advice: str = ""): advice (str): Sentence explaining alternatives to the deprecated functionality. """ - warnings.simplefilter('always', DeprecationWarning) # turn off filter - warnings.warn("{0} It will be removed in Aztk version {1}. {2}".format(message, version, advice), - category=DeprecationWarning, - stacklevel=2) - warnings.simplefilter('default', DeprecationWarning) # reset filter + warnings.simplefilter('always', DeprecationWarning) # turn off filter + warnings.warn( + "{0} It will be removed in Aztk version {1}. {2}".format(message, version, advice), + category=DeprecationWarning, + stacklevel=2) + warnings.simplefilter('default', DeprecationWarning) # reset filter diff --git a/aztk/utils/file_utils.py b/aztk/utils/file_utils.py index 3245a4a0..1325ebc0 100644 --- a/aztk/utils/file_utils.py +++ b/aztk/utils/file_utils.py @@ -1,5 +1,6 @@ import os + def ensure_dir(file_path): directory = os.path.dirname(file_path) if not os.path.exists(directory): diff --git a/aztk/utils/get_ssh_key.py b/aztk/utils/get_ssh_key.py index 601d005e..4df781fa 100644 --- a/aztk/utils/get_ssh_key.py +++ b/aztk/utils/get_ssh_key.py @@ -1,5 +1,6 @@ import os + def get_user_public_key(key_or_path: str = None, secrets_config=None): """ Return the ssh key. @@ -7,10 +8,10 @@ def get_user_public_key(key_or_path: str = None, secrets_config=None): otherwise will check the configuration file. """ if not key_or_path: - if not secrets_config.ssh_pub_key: - return None + if not secrets_config.ssh_pub_key: + return None - key_or_path = secrets_config.ssh_pub_key + key_or_path = secrets_config.ssh_pub_key if not key_or_path: return None diff --git a/aztk/utils/helpers.py b/aztk/utils/helpers.py index 972f2f9f..baa6c847 100644 --- a/aztk/utils/helpers.py +++ b/aztk/utils/helpers.py @@ -41,10 +41,7 @@ def wait_for_tasks_to_complete(job_id, batch_client): while True: tasks = batch_client.task.list(job_id) - incomplete_tasks = [ - task for task in tasks - if task.state != batch_models.TaskState.completed - ] + incomplete_tasks = [task for task in tasks if task.state != batch_models.TaskState.completed] if not incomplete_tasks: return time.sleep(5) @@ -66,13 +63,10 @@ def wait_for_task_to_complete(job_id: str, task_id: str, batch_client): return -def upload_text_to_container(container_name: str, - application_name: str, - content: str, - file_path: str, +def upload_text_to_container(container_name: str, application_name: str, content: str, file_path: str, blob_client=None) -> batch_models.ResourceFile: blob_name = file_path - blob_path = application_name + '/' + blob_name # + '/' + time_stamp + '/' + blob_name + blob_path = application_name + '/' + blob_name # + '/' + time_stamp + '/' + blob_name blob_client.create_container(container_name, fail_on_exist=False) blob_client.create_blob_from_text(container_name, blob_path, content) @@ -82,8 +76,7 @@ def upload_text_to_container(container_name: str, permission=blob.BlobPermissions.READ, expiry=datetime.datetime.utcnow() + datetime.timedelta(days=365)) - sas_url = blob_client.make_blob_url( - container_name, blob_path, sas_token=sas_token) + sas_url = blob_client.make_blob_url(container_name, blob_path, sas_token=sas_token) return batch_models.ResourceFile(file_path=blob_name, blob_source=sas_url) @@ -126,8 +119,7 @@ def upload_file_to_container(container_name, permission=blob.BlobPermissions.READ, expiry=datetime.datetime.utcnow() + datetime.timedelta(days=7)) - sas_url = blob_client.make_blob_url( - container_name, blob_path, sas_token=sas_token) + sas_url = blob_client.make_blob_url(container_name, blob_path, sas_token=sas_token) return batch_models.ResourceFile(file_path=node_path, blob_source=sas_url) @@ -145,8 +137,7 @@ def create_pool_if_not_exist(pool, batch_client): except batch_models.BatchErrorException as e: if e.error.code == "PoolExists": raise error.AztkError( - "A cluster with the same id already exists. Use a different id or delete the existing cluster" - ) + "A cluster with the same id already exists. Use a different id or delete the existing cluster") else: raise return True @@ -167,20 +158,16 @@ def wait_for_all_nodes_state(pool, node_state, batch_client): # refresh pool to ensure that there is no resize error pool = batch_client.pool.get(pool.id) if pool.resize_errors is not None: - raise RuntimeError( - 'resize error encountered for pool {}: {!r}'.format( - pool.id, pool.resize_errors)) + raise RuntimeError('resize error encountered for pool {}: {!r}'.format(pool.id, pool.resize_errors)) nodes = list(batch_client.compute_node.list(pool.id)) totalNodes = pool.target_dedicated_nodes + pool.target_low_priority_nodes - if (len(nodes) >= totalNodes - and all(node.state in node_state for node in nodes)): + if (len(nodes) >= totalNodes and all(node.state in node_state for node in nodes)): return nodes time.sleep(1) -def select_latest_verified_vm_image_with_node_agent_sku( - publisher, offer, sku_starts_with, batch_client): +def select_latest_verified_vm_image_with_node_agent_sku(publisher, offer, sku_starts_with, batch_client): """ Select the latest verified image that Azure Batch supports given a publisher, offer and sku (starts with filter). @@ -196,25 +183,17 @@ def select_latest_verified_vm_image_with_node_agent_sku( node_agent_skus = batch_client.account.list_node_agent_skus() # pick the latest supported sku - skus_to_use = [ - (sku, image_ref) for sku in node_agent_skus for image_ref in sorted( - sku.verified_image_references, key=lambda item: item.sku) - if image_ref.publisher.lower() == publisher.lower() - and image_ref.offer.lower() == offer.lower() - and image_ref.sku.startswith(sku_starts_with) - ] + skus_to_use = [(sku, image_ref) for sku in node_agent_skus + for image_ref in sorted(sku.verified_image_references, key=lambda item: item.sku) + if image_ref.publisher.lower() == publisher.lower() and image_ref.offer.lower() == offer.lower() + and image_ref.sku.startswith(sku_starts_with)] # skus are listed in reverse order, pick first for latest sku_to_use, image_ref_to_use = skus_to_use[0] return (sku_to_use.id, image_ref_to_use) -def create_sas_token(container_name, - blob_name, - permission, - blob_client, - expiry=None, - timeout=None): +def create_sas_token(container_name, blob_name, permission, blob_client, expiry=None, timeout=None): """ Create a blob sas token :param blob_client: The storage block blob client to use. @@ -231,18 +210,12 @@ def create_sas_token(container_name, if expiry is None: if timeout is None: timeout = 30 - expiry = datetime.datetime.utcnow() + datetime.timedelta( - minutes=timeout) + expiry = datetime.datetime.utcnow() + datetime.timedelta(minutes=timeout) return blob_client.generate_blob_shared_access_signature( container_name, blob_name, permission=permission, expiry=expiry) -def upload_blob_and_create_sas(container_name, - blob_name, - file_name, - expiry, - blob_client, - timeout=None): +def upload_blob_and_create_sas(container_name, blob_name, file_name, expiry, blob_client, timeout=None): """ Uploads a file from local disk to Azure Storage and creates a SAS for it. :param blob_client: The storage block blob client to use. @@ -269,8 +242,7 @@ def upload_blob_and_create_sas(container_name, expiry=expiry, timeout=timeout) - sas_url = blob_client.make_blob_url( - container_name, blob_name, sas_token=sas_token) + sas_url = blob_client.make_blob_url(container_name, blob_name, sas_token=sas_token) return sas_url @@ -283,8 +255,7 @@ def wrap_commands_in_shell(commands): :rtype: str :return: a shell wrapping commands """ - return '/bin/bash -c \'set -e; set -o pipefail; {}; wait\''.format( - ';'.join(commands)) + return '/bin/bash -c \'set -e; set -o pipefail; {}; wait\''.format(';'.join(commands)) def get_connection_info(pool_id, node_id, batch_client): @@ -328,10 +299,8 @@ def normalize_path(path: str) -> str: return path -def get_file_properties(job_id: str, task_id: str, file_path: str, - batch_client): - raw = batch_client.file.get_properties_from_task( - job_id, task_id, file_path, raw=True) +def get_file_properties(job_id: str, task_id: str, file_path: str, batch_client): + raw = batch_client.file.get_properties_from_task(job_id, task_id, file_path, raw=True) return batch_models.FileProperties( content_length=raw.headers["Content-Length"], @@ -393,13 +362,9 @@ def read_cluster_config(cluster_id: str, blob_client: blob.BlockBlobService): result = blob_client.get_blob_to_text(cluster_id, blob_path) return yaml.load(result.content) except azure.common.AzureMissingResourceHttpError: - logging.warn( - "Cluster %s doesn't have cluster configuration in storage", - cluster_id) + logging.warn("Cluster %s doesn't have cluster configuration in storage", cluster_id) except yaml.YAMLError: - logging.warn( - "Cluster %s contains invalid cluster configuration in blob", - cluster_id) + logging.warn("Cluster %s contains invalid cluster configuration in blob", cluster_id) def bool_env(value: bool): diff --git a/aztk/utils/ssh.py b/aztk/utils/ssh.py index a3e38388..9b28cec0 100644 --- a/aztk/utils/ssh.py +++ b/aztk/utils/ssh.py @@ -14,27 +14,27 @@ from aztk.error import AztkError from aztk.models import NodeOutput + class ForwardServer(SocketServer.ThreadingTCPServer): daemon_threads = True allow_reuse_address = True + # pylint: disable=no-member class Handler(SocketServer.BaseRequestHandler): def handle(self): try: channel = self.ssh_transport.open_channel('direct-tcpip', - (self.chain_host, self.chain_port), - self.request.getpeername()) + (self.chain_host, self.chain_port), self.request.getpeername()) except Exception as e: - logging.debug('Incoming request to %s:%d failed: %s', self.chain_host, - self.chain_port, - repr(e)) + logging.debug('Incoming request to %s:%d failed: %s', self.chain_host, self.chain_port, repr(e)) return if channel is None: logging.debug('Incoming request to %s:%d was rejected by the SSH server.', self.chain_host, self.chain_port) return - logging.debug('Connected! Tunnel open %r -> %r -> %r', self.request.getpeername(), channel.getpeername(), (self.chain_host, self.chain_port)) + logging.debug('Connected! Tunnel open %r -> %r -> %r', self.request.getpeername(), channel.getpeername(), + (self.chain_host, self.chain_port)) while True: r, w, x = select.select([self.request, channel], [], []) if self.request in r: @@ -59,17 +59,13 @@ class SubHandler(Handler): chain_host = remote_host chain_port = remote_port ssh_transport = transport + thread = threading.Thread(target=ForwardServer(('', local_port), SubHandler).serve_forever, daemon=True) thread.start() return thread -def connect(hostname, - port=22, - username=None, - password=None, - pkey=None, - timeout=None): +def connect(hostname, port=22, username=None, password=None, pkey=None, timeout=None): import paramiko client = paramiko.SSHClient() @@ -96,23 +92,28 @@ def forward_ports(client, port_forward_list): for port_forwarding_specification in port_forward_list: threads.append( - forward_tunnel( - port_forwarding_specification.remote_port, - "127.0.0.1", - port_forwarding_specification.local_port, - client.get_transport() - ) - ) + forward_tunnel(port_forwarding_specification.remote_port, "127.0.0.1", + port_forwarding_specification.local_port, client.get_transport())) return threads -def node_exec_command(node_id, command, username, hostname, port, ssh_key=None, password=None, container_name=None, timeout=None): +def node_exec_command(node_id, + command, + username, + hostname, + port, + ssh_key=None, + password=None, + container_name=None, + timeout=None): try: - client = connect(hostname=hostname, port=port, username=username, password=password, pkey=ssh_key, timeout=timeout) + client = connect( + hostname=hostname, port=port, username=username, password=password, pkey=ssh_key, timeout=timeout) except AztkError as e: return NodeOutput(node_id, e) if container_name: - cmd = 'sudo docker exec 2>&1 -t {0} /bin/bash -c \'set -e; set -o pipefail; {1}; wait\''.format(container_name, command) + cmd = 'sudo docker exec 2>&1 -t {0} /bin/bash -c \'set -e; set -o pipefail; {1}; wait\''.format( + container_name, command) else: cmd = '/bin/bash 2>&1 -c \'set -e; set -o pipefail; {0}; wait\''.format(command) stdin, stdout, stderr = client.exec_command(cmd, get_pty=True) @@ -121,38 +122,48 @@ def node_exec_command(node_id, command, username, hostname, port, ssh_key=None, return NodeOutput(node_id, output, None) -async def clus_exec_command(command, username, nodes, ports=None, ssh_key=None, password=None, container_name=None, timeout=None): - return await asyncio.gather( - *[asyncio.get_event_loop().run_in_executor(ThreadPoolExecutor(), - node_exec_command, - node.id, - command, - username, - node_rls.ip_address, - node_rls.port, - ssh_key, - password, - container_name, - timeout) for node, node_rls in nodes] - ) - - -def copy_from_node(node_id, source_path, destination_path, username, hostname, port, ssh_key=None, password=None, container_name=None, timeout=None): +async def clus_exec_command(command, + username, + nodes, + ports=None, + ssh_key=None, + password=None, + container_name=None, + timeout=None): + return await asyncio.gather(*[ + asyncio.get_event_loop() + .run_in_executor(ThreadPoolExecutor(), node_exec_command, node.id, command, username, node_rls.ip_address, + node_rls.port, ssh_key, password, container_name, timeout) for node, node_rls in nodes + ]) + + +def copy_from_node(node_id, + source_path, + destination_path, + username, + hostname, + port, + ssh_key=None, + password=None, + container_name=None, + timeout=None): try: - client = connect(hostname=hostname, port=port, username=username, password=password, pkey=ssh_key, timeout=timeout) + client = connect( + hostname=hostname, port=port, username=username, password=password, pkey=ssh_key, timeout=timeout) except AztkError as e: return NodeOutput(node_id, False, e) sftp_client = client.open_sftp() try: if destination_path: - destination_path = os.path.join(os.path.dirname(destination_path), node_id, os.path.basename(destination_path)) + destination_path = os.path.join( + os.path.dirname(destination_path), node_id, os.path.basename(destination_path)) os.makedirs(os.path.dirname(destination_path), exist_ok=True) with open(destination_path, 'wb') as f: sftp_client.getfo(source_path, f) else: import tempfile # create 2mb temporary file - f = tempfile.SpooledTemporaryFile(2*1024**3) + f = tempfile.SpooledTemporaryFile(2 * 1024**3) sftp_client.getfo(source_path, f) return NodeOutput(node_id, f, None) @@ -163,9 +174,19 @@ def copy_from_node(node_id, source_path, destination_path, username, hostname, p client.close() -def node_copy(node_id, source_path, destination_path, username, hostname, port, ssh_key=None, password=None, container_name=None, timeout=None): +def node_copy(node_id, + source_path, + destination_path, + username, + hostname, + port, + ssh_key=None, + password=None, + container_name=None, + timeout=None): try: - client = connect(hostname=hostname, port=port, username=username, password=password, pkey=ssh_key, timeout=timeout) + client = connect( + hostname=hostname, port=port, username=username, password=password, pkey=ssh_key, timeout=timeout) except AztkError as e: return NodeOutput(node_id, None, e) sftp_client = client.open_sftp() @@ -193,33 +214,27 @@ def node_copy(node_id, source_path, destination_path, username, hostname, port, #TODO: progress bar -async def clus_copy(username, nodes, source_path, destination_path, ssh_key=None, password=None, container_name=None, get=False, timeout=None): - return await asyncio.gather( - *[asyncio.get_event_loop().run_in_executor(ThreadPoolExecutor(), - copy_from_node if get else node_copy, - node.id, - source_path, - destination_path, - username, - node_rls.ip_address, - node_rls.port, - ssh_key, - password, - container_name, - timeout) for node, node_rls in nodes] - ) +async def clus_copy(username, + nodes, + source_path, + destination_path, + ssh_key=None, + password=None, + container_name=None, + get=False, + timeout=None): + return await asyncio.gather(*[ + asyncio.get_event_loop().run_in_executor(ThreadPoolExecutor(), copy_from_node if get else node_copy, node.id, + source_path, destination_path, username, node_rls.ip_address, + node_rls.port, ssh_key, password, container_name, timeout) + for node, node_rls in nodes + ]) def node_ssh(username, hostname, port, ssh_key=None, password=None, port_forward_list=None, timeout=None): try: client = connect( - hostname=hostname, - port=port, - username=username, - password=password, - pkey=ssh_key, - timeout=timeout - ) + hostname=hostname, port=port, username=username, password=password, pkey=ssh_key, timeout=timeout) threads = forward_ports(client=client, port_forward_list=port_forward_list) except AztkError as e: raise e diff --git a/aztk_cli/config.py b/aztk_cli/config.py index 9f521057..e8fdccc3 100644 --- a/aztk_cli/config.py +++ b/aztk_cli/config.py @@ -10,6 +10,7 @@ from aztk.models import Toolkit from aztk.models.plugins.internal import PluginReference + def load_aztk_secrets() -> SecretsConfiguration: """ Loads aztk from .aztk/secrets.yaml files(local and global) @@ -23,7 +24,7 @@ def load_aztk_secrets() -> SecretsConfiguration: if not global_config and not local_config: raise aztk.error.AztkError("There is no secrets.yaml in either ./.aztk/secrets.yaml or .aztk/secrets.yaml") - if global_config: # Global config is optional + if global_config: # Global config is optional _merge_secrets_dict(secrets, global_config) if local_config: _merge_secrets_dict(secrets, local_config) @@ -32,6 +33,7 @@ def load_aztk_secrets() -> SecretsConfiguration: secrets.validate() return secrets + def _load_config_file(path: str): if not os.path.isfile(path): return None @@ -40,27 +42,27 @@ def _load_config_file(path: str): try: return yaml.load(stream) except yaml.YAMLError as err: - raise aztk.error.AztkError( - "Error in {0}:\n {1}".format(path, err)) + raise aztk.error.AztkError("Error in {0}:\n {1}".format(path, err)) def _merge_secrets_dict(secrets: SecretsConfiguration, secrets_config): if 'default' in secrets_config: - deprecate("0.9.0", "default key in secrets.yaml is deprecated.", "Place all child parameters directly at the root") + deprecate("0.9.0", "default key in secrets.yaml is deprecated.", + "Place all child parameters directly at the root") secrets_config = dict(**secrets_config, **secrets_config.pop('default')) other = SecretsConfiguration.from_dict(secrets_config) secrets.merge(other) -def read_cluster_config( - path: str = aztk.utils.constants.DEFAULT_CLUSTER_CONFIG_PATH -) -> ClusterConfiguration: + +def read_cluster_config(path: str = aztk.utils.constants.DEFAULT_CLUSTER_CONFIG_PATH) -> ClusterConfiguration: """ Reads the config file in the .aztk/ directory (.aztk/cluster.yaml) """ config_dict = _load_config_file(path) return cluster_config_from_dict(config_dict) + def cluster_config_from_dict(config: dict): wait = False if config.get('plugins') not in [[None], None]: @@ -92,8 +94,7 @@ def __init__(self): self.job_history_ui_port = '18080' self.web_ui_port = '8080' - def _read_config_file( - self, path: str = aztk.utils.constants.DEFAULT_SSH_CONFIG_PATH): + def _read_config_file(self, path: str = aztk.utils.constants.DEFAULT_SSH_CONFIG_PATH): """ Reads the config file in the .aztk/ directory (.aztk/ssh.yaml) """ @@ -104,8 +105,7 @@ def _read_config_file( try: config = yaml.load(stream) except yaml.YAMLError as err: - raise aztk.error.AztkError( - "Error in ssh.yaml: {0}".format(err)) + raise aztk.error.AztkError("Error in ssh.yaml: {0}".format(err)) if config is None: return @@ -137,14 +137,11 @@ def _merge_dict(self, config): if config.get('internal') is not None: self.internal = config['internal'] - def merge(self, cluster_id, username, job_ui_port, job_history_ui_port, - web_ui_port, host, connect, internal): + def merge(self, cluster_id, username, job_ui_port, job_history_ui_port, web_ui_port, host, connect, internal): """ Merges fields with args object """ - self._read_config_file( - os.path.join(aztk.utils.constants.HOME_DIRECTORY_PATH, '.aztk', - 'ssh.yaml')) + self._read_config_file(os.path.join(aztk.utils.constants.HOME_DIRECTORY_PATH, '.aztk', 'ssh.yaml')) self._read_config_file() self._merge_dict( dict( @@ -164,8 +161,7 @@ def merge(self, cluster_id, username, job_ui_port, job_history_ui_port, if self.username is None: raise aztk.error.AztkError( - "Please supply a username either in the ssh.yaml configuration file or with a parameter (--username)" - ) + "Please supply a username either in the ssh.yaml configuration file or with a parameter (--username)") class JobConfig(): @@ -206,7 +202,6 @@ def _merge_dict(self, config): if scheduling_target: self.scheduling_target = SchedulingTarget(scheduling_target) - applications = config.get('applications') if applications: self.applications = [] @@ -226,9 +221,7 @@ def _merge_dict(self, config): driver_memory=application.get('driver_memory'), executor_memory=application.get('executor_memory'), driver_cores=application.get('driver_cores'), - executor_cores=application.get('executor_cores') - ) - ) + executor_cores=application.get('executor_cores'))) spark_configuration = config.get('spark_configuration') if spark_configuration: @@ -241,13 +234,10 @@ def __convert_to_path(self, str_path): if str_path: abs_path = os.path.abspath(os.path.expanduser(str_path)) if not os.path.exists(abs_path): - raise aztk.error.AztkError( - "Could not find file: {0}\nCheck your configuration file". - format(str_path)) + raise aztk.error.AztkError("Could not find file: {0}\nCheck your configuration file".format(str_path)) return abs_path - def _read_config_file( - self, path: str = aztk.utils.constants.DEFAULT_SPARK_JOB_CONFIG): + def _read_config_file(self, path: str = aztk.utils.constants.DEFAULT_SPARK_JOB_CONFIG): """ Reads the Job config file in the .aztk/ directory (.aztk/job.yaml) """ @@ -258,8 +248,7 @@ def _read_config_file( try: config = yaml.load(stream) except yaml.YAMLError as err: - raise aztk.error.AztkError( - "Error in job.yaml: {0}".format(err)) + raise aztk.error.AztkError("Error in job.yaml: {0}".format(err)) if config is None: return @@ -278,15 +267,12 @@ def merge(self, id, job_config_yaml=None): raise aztk.error.AztkError( "Application specified with no name. Please verify your configuration in job.yaml") if entry.application is None: - raise aztk.error.AztkError( - "No path to application specified for {} in job.yaml".format(entry.name)) + raise aztk.error.AztkError("No path to application specified for {} in job.yaml".format(entry.name)) def get_file_if_exists(file): - local_conf_file = os.path.join( - aztk.utils.constants.DEFAULT_SPARK_CONF_SOURCE, file) - global_conf_file = os.path.join(aztk.utils.constants.GLOBAL_CONFIG_PATH, - file) + local_conf_file = os.path.join(aztk.utils.constants.DEFAULT_SPARK_CONF_SOURCE, file) + global_conf_file = os.path.join(aztk.utils.constants.GLOBAL_CONFIG_PATH, file) if os.path.exists(local_conf_file): return local_conf_file @@ -309,16 +295,14 @@ def load_jars(): # try load global try: - jars_src = os.path.join(aztk.utils.constants.GLOBAL_CONFIG_PATH, - 'jars') + jars_src = os.path.join(aztk.utils.constants.GLOBAL_CONFIG_PATH, 'jars') jars = [os.path.join(jars_src, jar) for jar in os.listdir(jars_src)] except FileNotFoundError: pass # try load local, overwrite if found try: - jars_src = os.path.join(aztk.utils.constants.DEFAULT_SPARK_CONF_SOURCE, - 'jars') + jars_src = os.path.join(aztk.utils.constants.DEFAULT_SPARK_CONF_SOURCE, 'jars') jars = [os.path.join(jars_src, jar) for jar in os.listdir(jars_src)] except FileNotFoundError: pass diff --git a/aztk_cli/entrypoint.py b/aztk_cli/entrypoint.py index 8accb267..3044a797 100644 --- a/aztk_cli/entrypoint.py +++ b/aztk_cli/entrypoint.py @@ -24,15 +24,11 @@ def main(): setup_common_args(parser) - subparsers = parser.add_subparsers( - title="Available Softwares", dest="software", metavar="") + subparsers = parser.add_subparsers(title="Available Softwares", dest="software", metavar="") subparsers.required = True - spark_parser = subparsers.add_parser( - "spark", help="Commands to run spark jobs") - plugins_parser = subparsers.add_parser( - "plugins", help="Commands to list and view plugins") - toolkit_parser = subparsers.add_parser( - "toolkit", help="List current toolkit information and browse available ones") + spark_parser = subparsers.add_parser("spark", help="Commands to run spark jobs") + plugins_parser = subparsers.add_parser("plugins", help="Commands to list and view plugins") + toolkit_parser = subparsers.add_parser("toolkit", help="List current toolkit information and browse available ones") spark.setup_parser(spark_parser) plugins.setup_parser(plugins_parser) @@ -50,10 +46,8 @@ def main(): def setup_common_args(parser: argparse.ArgumentParser): - parser.add_argument('--version', action='version', - version=aztk.version.__version__) - parser.add_argument("--verbose", action='store_true', - help="Enable verbose logging.") + parser.add_argument('--version', action='version', version=aztk.version.__version__) + parser.add_argument("--verbose", action='store_true', help="Enable verbose logging.") def parse_common_args(args: NamedTuple): diff --git a/aztk_cli/logger.py b/aztk_cli/logger.py index 9b7b5ff5..24c94bf1 100644 --- a/aztk_cli/logger.py +++ b/aztk_cli/logger.py @@ -9,7 +9,7 @@ def add_coloring_to_emit_windows(fn): - # add methods we need to the class + # add methods we need to the class def _set_color(self, code): import ctypes @@ -21,10 +21,10 @@ def _set_color(self, code): setattr(logging.StreamHandler, '_set_color', _set_color) def new(*args): - FOREGROUND_BLUE = 0x0001 # text color contains blue. - FOREGROUND_GREEN = 0x0002 # text color contains green. - FOREGROUND_RED = 0x0004 # text color contains red. - FOREGROUND_INTENSITY = 0x0008 # text color is intensified. + FOREGROUND_BLUE = 0x0001 # text color contains blue. + FOREGROUND_GREEN = 0x0002 # text color contains green. + FOREGROUND_RED = 0x0004 # text color contains red. + FOREGROUND_INTENSITY = 0x0008 # text color is intensified. FOREGROUND_WHITE = FOREGROUND_BLUE | FOREGROUND_GREEN | FOREGROUND_RED # winbase.h @@ -41,7 +41,7 @@ def new(*args): FOREGROUND_MAGENTA = 0x0005 FOREGROUND_YELLOW = 0x0006 FOREGROUND_GREY = 0x0007 - FOREGROUND_INTENSITY = 0x0008 # foreground color is intensified. + FOREGROUND_INTENSITY = 0x0008 # foreground color is intensified. BACKGROUND_BLACK = 0x0000 BACKGROUND_BLUE = 0x0010 @@ -51,7 +51,7 @@ def new(*args): BACKGROUND_MAGENTA = 0x0050 BACKGROUND_YELLOW = 0x0060 BACKGROUND_GREY = 0x0070 - BACKGROUND_INTENSITY = 0x0080 # background color is intensified. + BACKGROUND_INTENSITY = 0x0080 # background color is intensified. levelno = args[1].levelno if levelno >= 50: @@ -74,6 +74,7 @@ def new(*args): args[0]._set_color(FOREGROUND_WHITE) # print "after" return ret + return new @@ -82,22 +83,23 @@ def add_coloring_to_emit_ansi(fn): def new(*args): levelno = args[1].levelno if levelno >= 50: - color = '\x1b[31m' # red + color = '\x1b[31m' # red elif levelno >= 40: - color = '\x1b[31m' # red + color = '\x1b[31m' # red elif levelno >= 30: - color = '\x1b[33m' # yellow + color = '\x1b[33m' # yellow elif levelno >= 20: - color = '\x1b[32m' # green + color = '\x1b[32m' # green elif levelno >= 19: - color = '\x1b[0m' # normal + color = '\x1b[0m' # normal elif levelno >= 10: - color = '\x1b[35m' # pink + color = '\x1b[35m' # pink else: - color = '\x1b[0m' # normal - args[1].msg = color + args[1].msg + '\x1b[0m' # normal + color = '\x1b[0m' # normal + args[1].msg = color + args[1].msg + '\x1b[0m' # normal # print "after" return fn(*args) + return new @@ -108,10 +110,10 @@ def new(*args): # all non-Windows platforms are supporting ANSI escapes so we use them logging.StreamHandler.emit = add_coloring_to_emit_ansi(logging.StreamHandler.emit) - logging.PRINT = 19 logging.addLevelName(logging.PRINT, "PRINT") + def print_level(self, message, *args, **kwargs): self._log(logging.PRINT, message, args, **kwargs) diff --git a/aztk_cli/plugins.py b/aztk_cli/plugins.py index 4a0c21d8..7a66f02a 100644 --- a/aztk_cli/plugins.py +++ b/aztk_cli/plugins.py @@ -11,7 +11,7 @@ def setup_parser(parser: argparse.ArgumentParser): def execute(args: typing.NamedTuple): plugins = plugin_manager.plugins log.info("------------------------------------------------------") - log.info(" Plugins (%i available)",len(plugins)) + log.info(" Plugins (%i available)", len(plugins)) log.info("------------------------------------------------------") for name, plugin in plugins.items(): log.info("- %s", name) diff --git a/aztk_cli/spark/endpoints/cluster/cluster.py b/aztk_cli/spark/endpoints/cluster/cluster.py index b90b094b..69af3712 100644 --- a/aztk_cli/spark/endpoints/cluster/cluster.py +++ b/aztk_cli/spark/endpoints/cluster/cluster.py @@ -28,30 +28,19 @@ class ClusterAction: def setup_parser(parser: argparse.ArgumentParser): - subparsers = parser.add_subparsers( - title="Actions", dest="cluster_action", metavar="") + subparsers = parser.add_subparsers(title="Actions", dest="cluster_action", metavar="") subparsers.required = True - create_parser = subparsers.add_parser( - ClusterAction.create, help="Create a new cluster") - add_user_parser = subparsers.add_parser( - ClusterAction.add_user, help="Add a user to the given cluster") - delete_parser = subparsers.add_parser( - ClusterAction.delete, help="Delete a cluster") - get_parser = subparsers.add_parser( - ClusterAction.get, help="Get information about a cluster") - list_parser = subparsers.add_parser( - ClusterAction.list, help="List clusters in your account") - app_logs_parser = subparsers.add_parser( - "app-logs", help="Get the logs from a submitted app") - ssh_parser = subparsers.add_parser( - ClusterAction.ssh, help="SSH into the master node of a cluster") - submit_parser = subparsers.add_parser( - "submit", help="Submit a new spark job to a cluster") - run_parser = subparsers.add_parser( - ClusterAction.run, help="Run a command on all nodes in your spark cluster") - copy_parser = subparsers.add_parser( - ClusterAction.copy, help="Copy files to all nodes in your spark cluster") + create_parser = subparsers.add_parser(ClusterAction.create, help="Create a new cluster") + add_user_parser = subparsers.add_parser(ClusterAction.add_user, help="Add a user to the given cluster") + delete_parser = subparsers.add_parser(ClusterAction.delete, help="Delete a cluster") + get_parser = subparsers.add_parser(ClusterAction.get, help="Get information about a cluster") + list_parser = subparsers.add_parser(ClusterAction.list, help="List clusters in your account") + app_logs_parser = subparsers.add_parser("app-logs", help="Get the logs from a submitted app") + ssh_parser = subparsers.add_parser(ClusterAction.ssh, help="SSH into the master node of a cluster") + submit_parser = subparsers.add_parser("submit", help="Submit a new spark job to a cluster") + run_parser = subparsers.add_parser(ClusterAction.run, help="Run a command on all nodes in your spark cluster") + copy_parser = subparsers.add_parser(ClusterAction.copy, help="Copy files to all nodes in your spark cluster") debug_parser = subparsers.add_parser( ClusterAction.debug, help="Debugging tool that aggregates logs and output from the cluster.") diff --git a/aztk_cli/spark/endpoints/cluster/cluster_add_user.py b/aztk_cli/spark/endpoints/cluster/cluster_add_user.py index c59bdd5a..a3837cc8 100644 --- a/aztk_cli/spark/endpoints/cluster/cluster_add_user.py +++ b/aztk_cli/spark/endpoints/cluster/cluster_add_user.py @@ -6,16 +6,19 @@ def setup_parser(parser: argparse.ArgumentParser): - parser.add_argument('--id', dest='cluster_id', required=True, - help='The unique id of your spark cluster') - parser.add_argument('-u', '--username', - help='The username to access your spark cluster\'s head node') + parser.add_argument('--id', dest='cluster_id', required=True, help='The unique id of your spark cluster') + parser.add_argument('-u', '--username', help='The username to access your spark cluster\'s head node') auth_group = parser.add_mutually_exclusive_group() - auth_group.add_argument('-p', '--password', - help="The password to access your spark cluster's master node. If not provided will use ssh public key.") - auth_group.add_argument('--ssh-key', - help="The ssh public key to access your spark cluster's master node. You can also set the ssh-key in the configuration file.") + auth_group.add_argument( + '-p', + '--password', + help="The password to access your spark cluster's master node. If not provided will use ssh public key.") + auth_group.add_argument( + '--ssh-key', + help= + "The ssh public key to access your spark cluster's master node. You can also set the ssh-key in the configuration file." + ) parser.set_defaults(username="admin") @@ -32,14 +35,10 @@ def execute(args: typing.NamedTuple): else: ssh_key = spark_client.secrets_configuration.ssh_pub_key - ssh_key, password = utils.get_ssh_key_or_prompt(ssh_key, args.username, args.password, spark_client.secrets_configuration) + ssh_key, password = utils.get_ssh_key_or_prompt(ssh_key, args.username, args.password, + spark_client.secrets_configuration) - spark_client.cluster.create_user( - id=args.cluster_id, - username=args.username, - password=password, - ssh_key=ssh_key - ) + spark_client.cluster.create_user(id=args.cluster_id, username=args.username, password=password, ssh_key=ssh_key) if password: log.info('password: %s', '*' * len(password)) diff --git a/aztk_cli/spark/endpoints/cluster/cluster_app_logs.py b/aztk_cli/spark/endpoints/cluster/cluster_app_logs.py index fb25cbd4..03207633 100644 --- a/aztk_cli/spark/endpoints/cluster/cluster_app_logs.py +++ b/aztk_cli/spark/endpoints/cluster/cluster_app_logs.py @@ -7,19 +7,14 @@ def setup_parser(parser: argparse.ArgumentParser): - parser.add_argument('--id', - dest='cluster_id', - required=True, - help='The unique id of your spark cluster') - parser.add_argument('--name', - dest='app_name', - required=True, - help='The unique id of your job name') + parser.add_argument('--id', dest='cluster_id', required=True, help='The unique id of your spark cluster') + parser.add_argument('--name', dest='app_name', required=True, help='The unique id of your job name') output_group = parser.add_mutually_exclusive_group() - output_group.add_argument('--output', - help='Path to the file you wish to output to. If not \ + output_group.add_argument( + '--output', + help='Path to the file you wish to output to. If not \ specified, output is printed to stdout') output_group.add_argument('--tail', dest='tail', action='store_true') diff --git a/aztk_cli/spark/endpoints/cluster/cluster_copy.py b/aztk_cli/spark/endpoints/cluster/cluster_copy.py index 455ae49e..ba17bbda 100644 --- a/aztk_cli/spark/endpoints/cluster/cluster_copy.py +++ b/aztk_cli/spark/endpoints/cluster/cluster_copy.py @@ -7,17 +7,17 @@ def setup_parser(parser: argparse.ArgumentParser): - parser.add_argument('--id', dest='cluster_id', required=True, - help='The unique id of your spark cluster') + parser.add_argument('--id', dest='cluster_id', required=True, help='The unique id of your spark cluster') - parser.add_argument('--source-path', required=True, - help='the local file you wish to copy to the cluster') + parser.add_argument('--source-path', required=True, help='the local file you wish to copy to the cluster') parser.add_argument('--dest-path', required=True, help='the path the file will be copied to on each node in the cluster.'\ 'Note that this must include the file name.') - parser.add_argument('--internal', action='store_true', - help='Connect using the local IP of the master node. Only use if using a VPN.') + parser.add_argument( + '--internal', + action='store_true', + help='Connect using the local IP of the master node. Only use if using a VPN.') parser.set_defaults(internal=False) @@ -25,10 +25,6 @@ def execute(args: typing.NamedTuple): spark_client = aztk.spark.Client(config.load_aztk_secrets()) with utils.Spinner(): copy_output = spark_client.cluster.copy( - id=args.cluster_id, - source_path=args.source_path, - destination_path=args.dest_path, - internal=args.internal - ) + id=args.cluster_id, source_path=args.source_path, destination_path=args.dest_path, internal=args.internal) [utils.log_node_copy_output(node_output) for node_output in copy_output] sys.exit(0 if not any([node_output.error for node_output in copy_output]) else 1) diff --git a/aztk_cli/spark/endpoints/cluster/cluster_create.py b/aztk_cli/spark/endpoints/cluster/cluster_create.py index 410330e8..c0e37bc2 100644 --- a/aztk_cli/spark/endpoints/cluster/cluster_create.py +++ b/aztk_cli/spark/endpoints/cluster/cluster_create.py @@ -9,26 +9,24 @@ def setup_parser(parser: argparse.ArgumentParser): - parser.add_argument('--id', dest='cluster_id', - help='The unique id of your spark cluster') - parser.add_argument('--size', type=int, - help='Number of vms in your cluster') - parser.add_argument('--size-low-pri', type=int, - help='Number of low priority vms in your cluster (Deprecated, use --size-low-priority)') - parser.add_argument('--size-low-priority', type=int, - help='Number of low priority vms in your cluster') - parser.add_argument('--vm-size', - help='VM size for nodes in your cluster') - parser.add_argument('--username', - help='Username to access your cluster (required: --wait flag)') - parser.add_argument('--password', - help="The password to access your spark cluster's head \ + parser.add_argument('--id', dest='cluster_id', help='The unique id of your spark cluster') + parser.add_argument('--size', type=int, help='Number of vms in your cluster') + parser.add_argument( + '--size-low-pri', + type=int, + help='Number of low priority vms in your cluster (Deprecated, use --size-low-priority)') + parser.add_argument('--size-low-priority', type=int, help='Number of low priority vms in your cluster') + parser.add_argument('--vm-size', help='VM size for nodes in your cluster') + parser.add_argument('--username', help='Username to access your cluster (required: --wait flag)') + parser.add_argument( + '--password', + help="The password to access your spark cluster's head \ node. If not provided will use ssh public key.") - parser.add_argument('--docker-repo', - help='The location of the public docker image you want to use \ + parser.add_argument( + '--docker-repo', + help='The location of the public docker image you want to use \ (/:)') - parser.add_argument('--subnet-id', - help='The subnet in which to create the cluster.') + parser.add_argument('--subnet-id', help='The subnet in which to create the cluster.') parser.add_argument('--no-wait', dest='wait', action='store_false') parser.add_argument('--wait', dest='wait', action='store_true') @@ -47,16 +45,17 @@ def execute(args: typing.NamedTuple): deprecate("0.9.0", "--size-low-pri has been deprecated.", "Please use --size-low-priority.") args.size_low_priority = args.size_low_pri - cluster_conf.merge(ClusterConfiguration( - cluster_id=args.cluster_id, - size=args.size, - size_low_priority=args.size_low_priority, - vm_size=args.vm_size, - subnet_id=args.subnet_id, - user_configuration=UserConfiguration( - username=args.username, - password=args.password, - ))) + cluster_conf.merge( + ClusterConfiguration( + cluster_id=args.cluster_id, + size=args.size, + size_low_priority=args.size_low_priority, + vm_size=args.vm_size, + subnet_id=args.subnet_id, + user_configuration=UserConfiguration( + username=args.username, + password=args.password, + ))) if args.docker_repo and cluster_conf.toolkit: cluster_conf.toolkit.docker_repo = args.docker_repo @@ -67,14 +66,10 @@ def execute(args: typing.NamedTuple): if user_configuration and user_configuration.username: ssh_key, password = utils.get_ssh_key_or_prompt(spark_client.secrets_configuration.ssh_pub_key, - user_configuration.username, - user_configuration.password, + user_configuration.username, user_configuration.password, spark_client.secrets_configuration) cluster_conf.user_configuration = aztk.spark.models.UserConfiguration( - username=user_configuration.username, - password=password, - ssh_key=ssh_key - ) + username=user_configuration.username, password=password, ssh_key=ssh_key) else: cluster_conf.user_configuration = None @@ -82,10 +77,7 @@ def execute(args: typing.NamedTuple): utils.print_cluster_conf(cluster_conf, wait) with utils.Spinner(): # create spark cluster - cluster = spark_client.cluster.create( - cluster_configuration=cluster_conf, - wait=wait - ) + cluster = spark_client.cluster.create(cluster_configuration=cluster_conf, wait=wait) if wait: log.info("Cluster %s created successfully.", cluster.id) diff --git a/aztk_cli/spark/endpoints/cluster/cluster_debug.py b/aztk_cli/spark/endpoints/cluster/cluster_debug.py index 21a16c16..3675a0f0 100644 --- a/aztk_cli/spark/endpoints/cluster/cluster_debug.py +++ b/aztk_cli/spark/endpoints/cluster/cluster_debug.py @@ -8,11 +8,9 @@ def setup_parser(parser: argparse.ArgumentParser): - parser.add_argument('--id', dest='cluster_id', required=True, - help='The unique id of your spark cluster') + parser.add_argument('--id', dest='cluster_id', required=True, help='The unique id of your spark cluster') - parser.add_argument('--output', '-o', required=False, - help='the directory for the output folder') + parser.add_argument('--output', '-o', required=False, help='the directory for the output folder') def execute(args: typing.NamedTuple): diff --git a/aztk_cli/spark/endpoints/cluster/cluster_delete.py b/aztk_cli/spark/endpoints/cluster/cluster_delete.py index 48c9b0f5..82df04f2 100644 --- a/aztk_cli/spark/endpoints/cluster/cluster_delete.py +++ b/aztk_cli/spark/endpoints/cluster/cluster_delete.py @@ -6,21 +6,22 @@ def setup_parser(parser: argparse.ArgumentParser): - parser.add_argument('--id', - dest='cluster_ids', - nargs='*', - required=True, - help='The unique id of your spark cluster') - parser.add_argument('--force', '-f', - dest='force', - required=False, - action='store_true', - help='Do not prompt for confirmation, force deletion of cluster.') - parser.add_argument('--keep-logs', '-k', - dest='keep_logs', - action='store_true', - required=False, - help='Prevent logs in storage from being deleted.') + parser.add_argument( + '--id', dest='cluster_ids', nargs='*', required=True, help='The unique id of your spark cluster') + parser.add_argument( + '--force', + '-f', + dest='force', + required=False, + action='store_true', + help='Do not prompt for confirmation, force deletion of cluster.') + parser.add_argument( + '--keep-logs', + '-k', + dest='keep_logs', + action='store_true', + required=False, + help='Prevent logs in storage from being deleted.') parser.set_defaults(force=False, keep_logs=False) diff --git a/aztk_cli/spark/endpoints/cluster/cluster_get.py b/aztk_cli/spark/endpoints/cluster/cluster_get.py index 97bfd184..f66cee4d 100644 --- a/aztk_cli/spark/endpoints/cluster/cluster_get.py +++ b/aztk_cli/spark/endpoints/cluster/cluster_get.py @@ -6,14 +6,8 @@ def setup_parser(parser: argparse.ArgumentParser): - parser.add_argument('--id', - dest='cluster_id', - required=True, - help='The unique id of your spark cluster') - parser.add_argument('--show-config', - dest='show_config', - action='store_true', - help='Show the cluster configuration') + parser.add_argument('--id', dest='cluster_id', required=True, help='The unique id of your spark cluster') + parser.add_argument('--show-config', dest='show_config', action='store_true', help='Show the cluster configuration') parser.add_argument('--internal', action='store_true', help="Show the local IP of the nodes. "\ "Only use if using connecting with a VPN.") diff --git a/aztk_cli/spark/endpoints/cluster/cluster_list.py b/aztk_cli/spark/endpoints/cluster/cluster_list.py index 85b42139..9f8dafbd 100644 --- a/aztk_cli/spark/endpoints/cluster/cluster_list.py +++ b/aztk_cli/spark/endpoints/cluster/cluster_list.py @@ -6,11 +6,8 @@ def setup_parser(parser: argparse.ArgumentParser): - parser.add_argument('-q', '--quiet', - dest='quiet', - required=False, - action='store_true', - help='The unique id of your spark cluster') + parser.add_argument( + '-q', '--quiet', dest='quiet', required=False, action='store_true', help='The unique id of your spark cluster') parser.set_defaults(quiet=False) diff --git a/aztk_cli/spark/endpoints/cluster/cluster_run.py b/aztk_cli/spark/endpoints/cluster/cluster_run.py index 7306e0fd..9db3df84 100644 --- a/aztk_cli/spark/endpoints/cluster/cluster_run.py +++ b/aztk_cli/spark/endpoints/cluster/cluster_run.py @@ -6,20 +6,20 @@ def setup_parser(parser: argparse.ArgumentParser): - parser.add_argument('--id', - dest='cluster_id', - required=True, - help='The unique id of your spark cluster') - parser.add_argument('--node-id', '-n', - dest='node_id', - required=False, - help='The unique id of the node in the cluster to run the command on') - parser.add_argument('command', - help='The command to run on your spark cluster') - parser.add_argument('--internal', action='store_true', - help='Connect using the local IP of the master node. Only use if using a VPN') - parser.add_argument('--host', action='store_true', - help='Run the command on the host instead of the Spark Docker container') + parser.add_argument('--id', dest='cluster_id', required=True, help='The unique id of your spark cluster') + parser.add_argument( + '--node-id', + '-n', + dest='node_id', + required=False, + help='The unique id of the node in the cluster to run the command on') + parser.add_argument('command', help='The command to run on your spark cluster') + parser.add_argument( + '--internal', + action='store_true', + help='Connect using the local IP of the master node. Only use if using a VPN') + parser.add_argument( + '--host', action='store_true', help='Run the command on the host instead of the Spark Docker container') parser.set_defaults(internal=False, host=False) @@ -27,7 +27,9 @@ def execute(args: typing.NamedTuple): spark_client = aztk.spark.Client(config.load_aztk_secrets()) with utils.Spinner(): if args.node_id: - results = [spark_client.cluster.node_run(args.cluster_id, args.node_id, args.command, args.host, args.internal)] + results = [ + spark_client.cluster.node_run(args.cluster_id, args.node_id, args.command, args.host, args.internal) + ] else: results = spark_client.cluster.run(args.cluster_id, args.command, args.host, args.internal) diff --git a/aztk_cli/spark/endpoints/cluster/cluster_ssh.py b/aztk_cli/spark/endpoints/cluster/cluster_ssh.py index 885cd7b8..8e66fbf6 100644 --- a/aztk_cli/spark/endpoints/cluster/cluster_ssh.py +++ b/aztk_cli/spark/endpoints/cluster/cluster_ssh.py @@ -19,10 +19,15 @@ def setup_parser(parser: argparse.ArgumentParser): parser.add_argument('-u', '--username', help='Username to spark cluster') parser.add_argument('--password', help='Password for the specified ssh user') parser.add_argument('--host', dest="host", action='store_true', help='Connect to the host of the Spark container') - parser.add_argument('--no-connect', dest="connect", action='store_false', - help='Do not create the ssh session. Only print out the command to run.') - parser.add_argument('--internal', action='store_true', - help='Connect using the local IP of the master node. Only use if using a VPN.') + parser.add_argument( + '--no-connect', + dest="connect", + action='store_false', + help='Do not create the ssh session. Only print out the command to run.') + parser.add_argument( + '--internal', + action='store_true', + help='Connect using the local IP of the master node. Only use if using a VPN.') parser.set_defaults(connect=True, internal=False) @@ -97,9 +102,8 @@ def native_python_ssh_into_master(spark_client, cluster, ssh_conf, password): plugin_ports = [] if configuration and configuration.plugins: ports = [ - PortForwardingSpecification( - port.internal, - port.public_port) for plugin in configuration.plugins for port in plugin.ports if port.expose_publicly + PortForwardingSpecification(port.internal, port.public_port) for plugin in configuration.plugins + for port in plugin.ports if port.expose_publicly ] plugin_ports.extend(ports) @@ -111,12 +115,11 @@ def native_python_ssh_into_master(spark_client, cluster, ssh_conf, password): ssh_key=None, password=password, port_forward_list=[ - PortForwardingSpecification(remote_port=8080, local_port=8080), # web ui - PortForwardingSpecification(remote_port=4040, local_port=4040), # job ui + PortForwardingSpecification(remote_port=8080, local_port=8080), # web ui + PortForwardingSpecification(remote_port=4040, local_port=4040), # job ui PortForwardingSpecification(remote_port=18080, local_port=18080), # job history ui ] + plugin_ports, - internal=ssh_conf.internal - ) + internal=ssh_conf.internal) def shell_out_ssh(spark_client, ssh_conf): diff --git a/aztk_cli/spark/endpoints/cluster/cluster_submit.py b/aztk_cli/spark/endpoints/cluster/cluster_submit.py index 927d7571..2669b6de 100644 --- a/aztk_cli/spark/endpoints/cluster/cluster_submit.py +++ b/aztk_cli/spark/endpoints/cluster/cluster_submit.py @@ -8,81 +8,80 @@ def setup_parser(parser: argparse.ArgumentParser): - parser.add_argument('--id', dest='cluster_id', required=True, - help='The unique id of your spark cluster') + parser.add_argument('--id', dest='cluster_id', required=True, help='The unique id of your spark cluster') - parser.add_argument('--name', required=True, - help='a name for your application') + parser.add_argument('--name', required=True, help='a name for your application') - parser.add_argument('--wait', dest='wait', action='store_true', - help='Wait for app to complete') - parser.add_argument('--no-wait', dest='wait', action='store_false', - help='Do not wait for app to complete') + parser.add_argument('--wait', dest='wait', action='store_true', help='Wait for app to complete') + parser.add_argument('--no-wait', dest='wait', action='store_false', help='Do not wait for app to complete') parser.set_defaults(wait=True) - parser.add_argument('--class', dest='main_class', - help='Your application\'s main class (for Java only).') + parser.add_argument('--class', dest='main_class', help='Your application\'s main class (for Java only).') - parser.add_argument('--jars', - help='Comma-separated list of local jars to include \ + parser.add_argument( + '--jars', + help='Comma-separated list of local jars to include \ on the driver and executor classpaths. Use \ absolute path to reference files.') - parser.add_argument('--py-files', - help='Comma-separated list of .zip, .egg, or .py files \ + parser.add_argument( + '--py-files', + help='Comma-separated list of .zip, .egg, or .py files \ to place on the PYTHONPATH for Python apps. Use \ absolute path to reference files.') - parser.add_argument('--files', - help='Comma-separated list of .zip, .egg, or .py files \ + parser.add_argument( + '--files', + help='Comma-separated list of .zip, .egg, or .py files \ to place on the PYTHONPATH for Python apps. Use \ absolute path ot reference files.') - parser.add_argument('--driver-java-options', - help='Extra Java options to pass to the driver.') + parser.add_argument('--driver-java-options', help='Extra Java options to pass to the driver.') - parser.add_argument('--driver-library-path', - help='Extra library path entries to pass to the driver.') + parser.add_argument('--driver-library-path', help='Extra library path entries to pass to the driver.') - parser.add_argument('--driver-class-path', - help='Extra class path entries to pass to the driver. \ + parser.add_argument( + '--driver-class-path', + help='Extra class path entries to pass to the driver. \ Note that jars added with --jars are automatically \ included in the classpath.') - parser.add_argument('--driver-memory', - help="Memory for driver (e.g. 1000M, 2G) (Default: 1024M).") + parser.add_argument('--driver-memory', help="Memory for driver (e.g. 1000M, 2G) (Default: 1024M).") - parser.add_argument('--executor-memory', - help='Memory per executor (e.g. 1000M, 2G) (Default: 1G).') + parser.add_argument('--executor-memory', help='Memory per executor (e.g. 1000M, 2G) (Default: 1G).') - parser.add_argument('--driver-cores', - help='Cores for driver (Default: 1).') + parser.add_argument('--driver-cores', help='Cores for driver (Default: 1).') - parser.add_argument('--executor-cores', - help='Number of cores per executor. (Default: All \ + parser.add_argument( + '--executor-cores', + help='Number of cores per executor. (Default: All \ available cores on the worker)') - parser.add_argument('--max-retry-count', - help='Number of times the Spark job may be retried \ + parser.add_argument( + '--max-retry-count', + help='Number of times the Spark job may be retried \ if there is a failure') - parser.add_argument('--output', - help='Path to the file you wish to output to. If not \ + parser.add_argument( + '--output', + help='Path to the file you wish to output to. If not \ specified, output is printed to stdout') - parser.add_argument('--remote', action='store_true', - help='Do not upload the app to the cluster, assume it is \ + parser.add_argument( + '--remote', + action='store_true', + help='Do not upload the app to the cluster, assume it is \ already accessible at the given path') - parser.add_argument('app', - help='App jar OR python file to execute. A path to a local \ + parser.add_argument( + 'app', + help='App jar OR python file to execute. A path to a local \ file is expected, unless used in conjunction with \ the --remote flag. When the --remote flag is set, a \ remote path that is accessible from the cluster is \ expected. Remote paths are not validated up-front.') - parser.add_argument('app_args', nargs='*', - help='Arguments for the application') + parser.add_argument('app_args', nargs='*', help='Arguments for the application') def execute(args: typing.NamedTuple): @@ -133,10 +132,9 @@ def execute(args: typing.NamedTuple): log.info("Application arguments: %s", args.app_args) log.info("-------------------------------------------") - spark_client.cluster.submit( id=args.cluster_id, - application = aztk.spark.models.ApplicationConfiguration( + application=aztk.spark.models.ApplicationConfiguration( name=args.name, application=args.app, application_args=args.app_args, @@ -151,19 +149,19 @@ def execute(args: typing.NamedTuple): executor_memory=args.executor_memory, driver_cores=args.driver_cores, executor_cores=args.executor_cores, - max_retry_count=args.max_retry_count - ), + max_retry_count=args.max_retry_count), remote=args.remote, - wait=False - ) + wait=False) if args.wait: if not args.output: exit_code = utils.stream_logs(client=spark_client, cluster_id=args.cluster_id, application_name=args.name) else: with utils.Spinner(): - spark_client.cluster.wait(id=args.cluster_id, application_name=args.name) # TODO: replace wait_until_application_done - application_log = spark_client.cluster.get_application_log(id=args.cluster_id, application_name=args.name) + spark_client.cluster.wait( + id=args.cluster_id, application_name=args.name) # TODO: replace wait_until_application_done + application_log = spark_client.cluster.get_application_log( + id=args.cluster_id, application_name=args.name) with open(os.path.abspath(os.path.expanduser(args.output)), "w", encoding="UTF-8") as f: f.write(application_log.log) exit_code = application_log.exit_code diff --git a/aztk_cli/spark/endpoints/init.py b/aztk_cli/spark/endpoints/init.py index f93b9908..e9fdabc2 100644 --- a/aztk_cli/spark/endpoints/init.py +++ b/aztk_cli/spark/endpoints/init.py @@ -7,8 +7,11 @@ def setup_parser(parser: argparse.ArgumentParser): - parser.add_argument('--global', dest='global_flag', action='store_true', - help="Create a .aztk/ folder in your home directory for global configurations.") + parser.add_argument( + '--global', + dest='global_flag', + action='store_true', + help="Create a .aztk/ folder in your home directory for global configurations.") software_parser = parser.add_mutually_exclusive_group() software_parser.add_argument('--miniconda', action="store_true", required=False) software_parser.add_argument('--anaconda', action="store_true", required=False) diff --git a/aztk_cli/spark/endpoints/job/delete.py b/aztk_cli/spark/endpoints/job/delete.py index 8e5bf232..70b2384d 100644 --- a/aztk_cli/spark/endpoints/job/delete.py +++ b/aztk_cli/spark/endpoints/job/delete.py @@ -6,20 +6,21 @@ def setup_parser(parser: argparse.ArgumentParser): - parser.add_argument('--id', - dest='job_id', - required=True, - help='The unique id of your AZTK Job') - parser.add_argument('--force', '-f', - dest='force', - required=False, - action='store_true', - help='Do not prompt for confirmation, force deletion of cluster.') - parser.add_argument('--keep-logs', '-k', - dest='keep_logs', - action='store_true', - required=False, - help='Prevent logs in storage from being deleted.') + parser.add_argument('--id', dest='job_id', required=True, help='The unique id of your AZTK Job') + parser.add_argument( + '--force', + '-f', + dest='force', + required=False, + action='store_true', + help='Do not prompt for confirmation, force deletion of cluster.') + parser.add_argument( + '--keep-logs', + '-k', + dest='keep_logs', + action='store_true', + required=False, + help='Prevent logs in storage from being deleted.') parser.set_defaults(force=False, keep_logs=False) diff --git a/aztk_cli/spark/endpoints/job/get.py b/aztk_cli/spark/endpoints/job/get.py index 1d5a0a90..9e608dd8 100644 --- a/aztk_cli/spark/endpoints/job/get.py +++ b/aztk_cli/spark/endpoints/job/get.py @@ -7,10 +7,7 @@ def setup_parser(parser: argparse.ArgumentParser): - parser.add_argument('--id', - dest='job_id', - required=True, - help='The unique id of your AZTK job') + parser.add_argument('--id', dest='job_id', required=True, help='The unique id of your AZTK job') def execute(args: typing.NamedTuple): diff --git a/aztk_cli/spark/endpoints/job/get_app.py b/aztk_cli/spark/endpoints/job/get_app.py index 47b4faf1..ca276d4a 100644 --- a/aztk_cli/spark/endpoints/job/get_app.py +++ b/aztk_cli/spark/endpoints/job/get_app.py @@ -7,14 +7,8 @@ def setup_parser(parser: argparse.ArgumentParser): - parser.add_argument('--id', - dest='job_id', - required=True, - help='The unique id of your AZTK job') - parser.add_argument('--name', - dest='app_name', - required=True, - help='The unique id of your job name') + parser.add_argument('--id', dest='job_id', required=True, help='The unique id of your AZTK job') + parser.add_argument('--name', dest='app_name', required=True, help='The unique id of your job name') def execute(args: typing.NamedTuple): diff --git a/aztk_cli/spark/endpoints/job/get_app_logs.py b/aztk_cli/spark/endpoints/job/get_app_logs.py index 06700943..c4ae76aa 100644 --- a/aztk_cli/spark/endpoints/job/get_app_logs.py +++ b/aztk_cli/spark/endpoints/job/get_app_logs.py @@ -7,16 +7,11 @@ def setup_parser(parser: argparse.ArgumentParser): - parser.add_argument('--id', - dest='job_id', - required=True, - help='The unique id of your AZTK job') - parser.add_argument('--name', - dest='app_name', - required=True, - help='The unique id of your job name') - parser.add_argument('--output', - help='Path to the file you wish to output to. If not \ + parser.add_argument('--id', dest='job_id', required=True, help='The unique id of your AZTK job') + parser.add_argument('--name', dest='app_name', required=True, help='The unique id of your job name') + parser.add_argument( + '--output', + help='Path to the file you wish to output to. If not \ specified, output is printed to stdout') diff --git a/aztk_cli/spark/endpoints/job/job.py b/aztk_cli/spark/endpoints/job/job.py index 40dbb8a5..3f930191 100644 --- a/aztk_cli/spark/endpoints/job/job.py +++ b/aztk_cli/spark/endpoints/job/job.py @@ -24,29 +24,18 @@ class ClusterAction: def setup_parser(parser: argparse.ArgumentParser): - subparsers = parser.add_subparsers( - title="Actions", dest="job_action", metavar="") + subparsers = parser.add_subparsers(title="Actions", dest="job_action", metavar="") subparsers.required = True - get_app_logs_parser = subparsers.add_parser( - ClusterAction.get_app_logs, help="Get a Job's application logs") - get_app_parser = subparsers.add_parser( - ClusterAction.get_app, help="Get information about a Job's application") - delete_parser = subparsers.add_parser( - ClusterAction.delete, help="Delete a Job") - get_parser = subparsers.add_parser( - ClusterAction.get, help="Get information about a Job") - list_parser = subparsers.add_parser( - ClusterAction.list, help="List Jobs in your account") - list_apps_parser = subparsers.add_parser( - ClusterAction.list_apps, help="List all applications on an AZTK Job") - stop_app_parser = subparsers.add_parser( - ClusterAction.stop_app, help="Stop a Job's application") - stop_parser = subparsers.add_parser( - ClusterAction.stop, help="Stop a Job from running") - submit_parser = subparsers.add_parser( - ClusterAction.submit, help="Submit a new spark Job") - + get_app_logs_parser = subparsers.add_parser(ClusterAction.get_app_logs, help="Get a Job's application logs") + get_app_parser = subparsers.add_parser(ClusterAction.get_app, help="Get information about a Job's application") + delete_parser = subparsers.add_parser(ClusterAction.delete, help="Delete a Job") + get_parser = subparsers.add_parser(ClusterAction.get, help="Get information about a Job") + list_parser = subparsers.add_parser(ClusterAction.list, help="List Jobs in your account") + list_apps_parser = subparsers.add_parser(ClusterAction.list_apps, help="List all applications on an AZTK Job") + stop_app_parser = subparsers.add_parser(ClusterAction.stop_app, help="Stop a Job's application") + stop_parser = subparsers.add_parser(ClusterAction.stop, help="Stop a Job from running") + submit_parser = subparsers.add_parser(ClusterAction.submit, help="Submit a new spark Job") get_app_logs.setup_parser(get_app_logs_parser) get_app.setup_parser(get_app_parser) diff --git a/aztk_cli/spark/endpoints/job/list.py b/aztk_cli/spark/endpoints/job/list.py index 0c169705..a1ef757e 100644 --- a/aztk_cli/spark/endpoints/job/list.py +++ b/aztk_cli/spark/endpoints/job/list.py @@ -10,6 +10,7 @@ def setup_parser(_: argparse.ArgumentParser): # No arguments for list yet pass + def execute(args: typing.NamedTuple): spark_client = aztk.spark.Client(config.load_aztk_secrets()) diff --git a/aztk_cli/spark/endpoints/job/list_apps.py b/aztk_cli/spark/endpoints/job/list_apps.py index d7dfdd78..343490e5 100644 --- a/aztk_cli/spark/endpoints/job/list_apps.py +++ b/aztk_cli/spark/endpoints/job/list_apps.py @@ -6,10 +6,7 @@ def setup_parser(parser: argparse.ArgumentParser): - parser.add_argument('--id', - dest='job_id', - required=True, - help='The unique id of your AZTK job') + parser.add_argument('--id', dest='job_id', required=True, help='The unique id of your AZTK job') def execute(args: typing.NamedTuple): diff --git a/aztk_cli/spark/endpoints/job/stop.py b/aztk_cli/spark/endpoints/job/stop.py index afdbc644..4fd5e9f9 100644 --- a/aztk_cli/spark/endpoints/job/stop.py +++ b/aztk_cli/spark/endpoints/job/stop.py @@ -7,10 +7,7 @@ def setup_parser(parser: argparse.ArgumentParser): - parser.add_argument('--id', - dest='job_id', - required=True, - help='The unique id of your AZTK job') + parser.add_argument('--id', dest='job_id', required=True, help='The unique id of your AZTK job') def execute(args: typing.NamedTuple): diff --git a/aztk_cli/spark/endpoints/job/stop_app.py b/aztk_cli/spark/endpoints/job/stop_app.py index 4fc316d2..f1599892 100644 --- a/aztk_cli/spark/endpoints/job/stop_app.py +++ b/aztk_cli/spark/endpoints/job/stop_app.py @@ -7,14 +7,8 @@ def setup_parser(parser: argparse.ArgumentParser): - parser.add_argument('--id', - dest='job_id', - required=True, - help='The unique id of your AZTK job') - parser.add_argument('--name', - dest='app_name', - required=True, - help='The unique id of your job name') + parser.add_argument('--id', dest='job_id', required=True, help='The unique id of your AZTK job') + parser.add_argument('--name', dest='app_name', required=True, help='The unique id of your job name') def execute(args: typing.NamedTuple): diff --git a/aztk_cli/spark/endpoints/job/submit.py b/aztk_cli/spark/endpoints/job/submit.py index bc519346..7e432782 100644 --- a/aztk_cli/spark/endpoints/job/submit.py +++ b/aztk_cli/spark/endpoints/job/submit.py @@ -7,14 +7,17 @@ def setup_parser(parser: argparse.ArgumentParser): - parser.add_argument('--id', - dest='job_id', - required=False, - help='The unique id of your Spark Job. Defaults to the id value in .aztk/job.yaml') - parser.add_argument('--configuration', '-c', - dest='job_conf', - required=False, - help='Path to the job.yaml configuration file. Defaults to .aztk/job.yaml') + parser.add_argument( + '--id', + dest='job_id', + required=False, + help='The unique id of your Spark Job. Defaults to the id value in .aztk/job.yaml') + parser.add_argument( + '--configuration', + '-c', + dest='job_conf', + required=False, + help='Path to the job.yaml configuration file. Defaults to .aztk/job.yaml') def execute(args: typing.NamedTuple): diff --git a/aztk_cli/spark/endpoints/spark.py b/aztk_cli/spark/endpoints/spark.py index 39eae266..94c52f27 100644 --- a/aztk_cli/spark/endpoints/spark.py +++ b/aztk_cli/spark/endpoints/spark.py @@ -7,26 +7,19 @@ def setup_parser(parser: argparse.ArgumentParser): - subparsers = parser.add_subparsers( - title="Actions", dest="action", metavar="") + subparsers = parser.add_subparsers(title="Actions", dest="action", metavar="") subparsers.required = True - cluster_parser = subparsers.add_parser( - "cluster", help="Commands to manage a cluster") - job_parser = subparsers.add_parser( - "job", help="Commands to manage a Job") - init_parser = subparsers.add_parser( - "init", help="Initialize your environment") + cluster_parser = subparsers.add_parser("cluster", help="Commands to manage a cluster") + job_parser = subparsers.add_parser("job", help="Commands to manage a Job") + init_parser = subparsers.add_parser("init", help="Initialize your environment") cluster.setup_parser(cluster_parser) job.setup_parser(job_parser) init.setup_parser(init_parser) + def execute(args: typing.NamedTuple): - actions = dict( - cluster=cluster.execute, - job=job.execute, - init=init.execute - ) + actions = dict(cluster=cluster.execute, job=job.execute, init=init.execute) func = actions[args.action] func(args) diff --git a/aztk_cli/toolkit.py b/aztk_cli/toolkit.py index 7d962675..52ce718a 100644 --- a/aztk_cli/toolkit.py +++ b/aztk_cli/toolkit.py @@ -40,6 +40,7 @@ def print_available_softwares(): for toolkit in TOOLKIT_MAP: log.info(" - %s", toolkit) + def validate_software(software: str): if software not in TOOLKIT_MAP: log.error("Software '%s' is not supported.", software) @@ -47,12 +48,14 @@ def validate_software(software: str): return False return True + def print_available_software_version(software: str): toolkit_def = TOOLKIT_MAP.get(software) log.info("Available version for %s: ", software) for version in toolkit_def.versions: log.info(" - %s", version) + def print_available_environments(software: str): toolkit_def = TOOLKIT_MAP.get(software) diff --git a/aztk_cli/utils.py b/aztk_cli/utils.py index f00089b7..526d2b58 100644 --- a/aztk_cli/utils.py +++ b/aztk_cli/utils.py @@ -34,9 +34,12 @@ def get_ssh_key_or_prompt(ssh_key, username, password, secrets_configuration): else: break else: - raise error.AztkError("Failed to get valid password, cannot add user to cluster. It is recommended that you provide a ssh public key in .aztk/secrets.yaml. Or provide an ssh-key or password with command line parameters (--ssh-key or --password). You may also run the 'aztk spark cluster add-user' command to add a user to this cluster.") + raise error.AztkError( + "Failed to get valid password, cannot add user to cluster. It is recommended that you provide a ssh public key in .aztk/secrets.yaml. Or provide an ssh-key or password with command line parameters (--ssh-key or --password). You may also run the 'aztk spark cluster add-user' command to add a user to this cluster." + ) return ssh_key, password + def print_cluster(client, cluster: models.Cluster, internal: bool = False): node_count = __pretty_node_count(cluster) @@ -65,45 +68,38 @@ def print_cluster(client, cluster: models.Cluster, internal: bool = False): if internal: ip = node.ip_address else: - ip ='{}:{}'.format(remote_login_settings.ip_address, remote_login_settings.port) + ip = '{}:{}'.format(remote_login_settings.ip_address, remote_login_settings.port) log.info( - print_format.format( - node.id, - node.state.value, - ip, - "*" if node.is_dedicated else '', - '*' if node.id == cluster.master_node_id else '') - ) + print_format.format(node.id, node.state.value, ip, "*" if node.is_dedicated else '', '*' + if node.id == cluster.master_node_id else '')) log.info('') + def __pretty_node_count(cluster: models.Cluster) -> str: if cluster.pool.allocation_state is batch_models.AllocationState.resizing: - return '{} -> {}'.format( - cluster.total_current_nodes, - cluster.total_target_nodes) + return '{} -> {}'.format(cluster.total_current_nodes, cluster.total_target_nodes) else: return '{}'.format(cluster.total_current_nodes) -def __pretty_dedicated_node_count(cluster: models.Cluster)-> str: + +def __pretty_dedicated_node_count(cluster: models.Cluster) -> str: if (cluster.pool.allocation_state is batch_models.AllocationState.resizing or cluster.pool.state is batch_models.PoolState.deleting)\ and cluster.current_dedicated_nodes != cluster.target_dedicated_nodes: - return '{} -> {}'.format( - cluster.current_dedicated_nodes, - cluster.target_dedicated_nodes) + return '{} -> {}'.format(cluster.current_dedicated_nodes, cluster.target_dedicated_nodes) else: return '{}'.format(cluster.current_dedicated_nodes) -def __pretty_low_pri_node_count(cluster: models.Cluster)-> str: + +def __pretty_low_pri_node_count(cluster: models.Cluster) -> str: if (cluster.pool.allocation_state is batch_models.AllocationState.resizing or cluster.pool.state is batch_models.PoolState.deleting)\ and cluster.current_low_pri_nodes != cluster.target_low_pri_nodes: - return '{} -> {}'.format( - cluster.current_low_pri_nodes, - cluster.target_low_pri_nodes) + return '{} -> {}'.format(cluster.current_low_pri_nodes, cluster.target_low_pri_nodes) else: return '{}'.format(cluster.current_low_pri_nodes) + def print_clusters(clusters: List[models.Cluster]): print_format = '{:<34}| {:<10}| {:<20}| {:<7}' print_format_underline = '{:-<34}|{:-<11}|{:-<21}|{:-<7}' @@ -113,14 +109,7 @@ def print_clusters(clusters: List[models.Cluster]): for cluster in clusters: node_count = __pretty_node_count(cluster) - log.info( - print_format.format( - cluster.id, - cluster.visible_state, - cluster.vm_size, - node_count - ) - ) + log.info(print_format.format(cluster.id, cluster.visible_state, cluster.vm_size, node_count)) def print_clusters_quiet(clusters: List[models.Cluster]): @@ -131,10 +120,7 @@ def stream_logs(client, cluster_id, application_name): current_bytes = 0 while True: app_logs = client.cluster.get_application_log( - id=cluster_id, - application_name=application_name, - tail=True, - current_bytes=current_bytes) + id=cluster_id, application_name=application_name, tail=True, current_bytes=current_bytes) log.print(app_logs.log) if app_logs.application_state == 'completed': return app_logs.exit_code @@ -142,17 +128,16 @@ def stream_logs(client, cluster_id, application_name): time.sleep(3) -def ssh_in_master( - client, - cluster_id: str, - username: str = None, - webui: str = None, - jobui: str = None, - jobhistoryui: str = None, - ports=None, - host: bool = False, - connect: bool = True, - internal: bool = False): +def ssh_in_master(client, + cluster_id: str, + username: str = None, + webui: str = None, + jobui: str = None, + jobhistoryui: str = None, + ports=None, + host: bool = False, + connect: bool = True, + internal: bool = False): """ SSH into head node of spark-app :param cluster_id: Id of the cluster to ssh in @@ -193,17 +178,14 @@ def ssh_in_master( ssh_command.add_option("-i", ssh_priv_key) ssh_command.add_argument("-t") - ssh_command.add_option("-L", "{0}:localhost:{1}".format( - webui, spark_web_ui_port), enable=bool(webui)) - ssh_command.add_option("-L", "{0}:localhost:{1}".format( - jobui, spark_job_ui_port), enable=bool(jobui)) - ssh_command.add_option("-L", "{0}:localhost:{1}".format( - jobhistoryui, spark_job_history_ui_port), enable=bool(jobui)) + ssh_command.add_option("-L", "{0}:localhost:{1}".format(webui, spark_web_ui_port), enable=bool(webui)) + ssh_command.add_option("-L", "{0}:localhost:{1}".format(jobui, spark_job_ui_port), enable=bool(jobui)) + ssh_command.add_option( + "-L", "{0}:localhost:{1}".format(jobhistoryui, spark_job_history_ui_port), enable=bool(jobui)) if ports is not None: for port in ports: - ssh_command.add_option( - "-L", "{0}:localhost:{1}".format(port[0], port[1])) + ssh_command.add_option("-L", "{0}:localhost:{1}".format(port[0], port[1])) if configuration and configuration.plugins: for plugin in configuration.plugins: for port in plugin.ports: @@ -214,8 +196,7 @@ def ssh_in_master( if internal: ssh_command.add_argument("{0}@{1}".format(user, master_internal_node_ip)) else: - ssh_command.add_argument( - "{0}@{1} -p {2}".format(user, master_node_ip, master_node_port)) + ssh_command.add_argument("{0}@{1} -p {2}".format(user, master_node_ip, master_node_port)) if host is False: ssh_command.add_argument("\'sudo docker exec -it spark /bin/bash\'") @@ -227,6 +208,7 @@ def ssh_in_master( return '\n\t{}\n'.format(command) + def print_batch_exception(batch_exception): """ Prints the contents of the specified Batch exception. @@ -249,6 +231,7 @@ def print_batch_exception(batch_exception): Job submission ''' + def print_jobs(jobs: List[models.Job]): print_format = '{:<34}| {:<10}| {:<20}' print_format_underline = '{:-<34}|{:-<11}|{:-<21}' @@ -257,13 +240,7 @@ def print_jobs(jobs: List[models.Job]): log.info(print_format_underline.format('', '', '', '')) for job in jobs: - log.info( - print_format.format( - job.id, - job.state, - utc_to_local(job.creation_time) - ) - ) + log.info(print_format.format(job.id, job.state, utc_to_local(job.creation_time))) def print_job(client, job: models.Job): @@ -306,7 +283,7 @@ def print_cluster_summary(cluster: models.Cluster): print_format = '{:<4} {:<23} {:<15}' log.info("Cluster %s", cluster.id) - log.info("-"*42) + log.info("-" * 42) log.info("Nodes %s", __pretty_node_count(cluster)) log.info("| Dedicated: %s", __pretty_dedicated_node_count(cluster)) log.info("| Low priority: %s", __pretty_low_pri_node_count(cluster)) @@ -331,7 +308,7 @@ def application_summary(applications): print_format = '{:<17} {:<14}' log.info("Applications") - log.info("-"*42) + log.info("-" * 42) for state in states: if states[state] > 0: log.info(print_format.format(state + ":", states[state])) @@ -339,6 +316,7 @@ def application_summary(applications): if warn_scheduling: log.warning("\nNo Spark applications will be scheduled until the master is selected.") + def print_applications(applications): print_format = '{:<36}| {:<15}| {:<16} | {:^9} |' print_format_underline = '{:-<36}|{:-<16}|{:-<18}|{:-<11}|' @@ -348,46 +326,26 @@ def print_applications(applications): warn_scheduling = False for name in applications: if applications[name] is None: - log.info( - print_format.format( - name, - "scheduling", - "-", - "-" - ) - ) + log.info(print_format.format(name, "scheduling", "-", "-")) warn_scheduling = True else: application = applications[name] log.info( - print_format.format( - application.name, - application.state, - utc_to_local(application.state_transition_time), - application.exit_code if application.exit_code is not None else "-" - ) - ) + print_format.format(application.name, application.state, utc_to_local( + application.state_transition_time), application.exit_code + if application.exit_code is not None else "-")) if warn_scheduling: log.warning("\nNo Spark applications will be scheduled until the master is selected.") + def print_application(application: models.Application): print_format = '{:<30}| {:<15}' log.info("") log.info("Application %s", application.name) - log.info("-"*42) - log.info( - print_format.format( - "State", - application.state - ) - ) - log.info( - print_format.format( - "State transition time", - utc_to_local(application.state_transition_time) - ) - ) + log.info("-" * 42) + log.info(print_format.format("State", application.state)) + log.info(print_format.format("State transition time", utc_to_local(application.state_transition_time))) log.info("") @@ -398,7 +356,8 @@ class Spinner: @staticmethod def spinning_cursor(): while 1: - for cursor in '|/-\\': yield cursor + for cursor in '|/-\\': + yield cursor def __init__(self, delay=None): self.spinner_generator = self.spinning_cursor() @@ -442,7 +401,8 @@ def print_cluster_conf(cluster_conf: ClusterConfiguration, wait: bool): log.info("cluster vm size: %s", cluster_conf.vm_size) log.info("custom scripts: %s", len(cluster_conf.custom_scripts) if cluster_conf.custom_scripts else 0) log.info("subnet ID: %s", cluster_conf.subnet_id) - log.info("file shares: %s", len(cluster_conf.file_shares) if cluster_conf.file_shares is not None else 0) + log.info("file shares: %s", + len(cluster_conf.file_shares) if cluster_conf.file_shares is not None else 0) log.info("gpu enabled: %s", str(cluster_conf.gpu_enabled())) log.info("docker repo name: %s", cluster_conf.get_docker_repo()) log.info("wait for cluster: %s", wait) diff --git a/setup.py b/setup.py index 8b64e308..743252a8 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,6 @@ from aztk import version from aztk_cli import constants - data_files = [] @@ -17,6 +16,7 @@ def _includeFile(filename: str, exclude: [str]) -> bool: return True + def find_package_files(root, directory, dest=""): paths = [] for (path, _, filenames) in os.walk(os.path.join(root, directory)): From 9c183ba35cb54bc31e7bda5544c44103197c8b77 Mon Sep 17 00:00:00 2001 From: Jake Freck Date: Mon, 6 Aug 2018 10:25:48 -0700 Subject: [PATCH 02/16] add yapf to vsts build --- .vsts-ci.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.vsts-ci.yml b/.vsts-ci.yml index 5bcdfee9..90b7a76e 100644 --- a/.vsts-ci.yml +++ b/.vsts-ci.yml @@ -14,6 +14,11 @@ steps: condition: and(succeeded(), eq(variables['agent.os'], 'linux')) displayName: install aztk +- script: | + yapf -dr aztk/ aztk_cli/ + condition: and(succeeded(), eq(variables['agent.os'], 'linux')) + displayName: yapf + - script: | pytest -n 50 condition: and(succeeded(), in(variables['agent.os'], 'linux')) From ef8cc8a99cc1eeb71be8890617ea19afef88038d Mon Sep 17 00:00:00 2001 From: Jake Freck Date: Mon, 6 Aug 2018 11:26:06 -0700 Subject: [PATCH 03/16] update vsts build --- .vsts-ci.yml | 42 +++++++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/.vsts-ci.yml b/.vsts-ci.yml index 90b7a76e..0d8942f3 100644 --- a/.vsts-ci.yml +++ b/.vsts-ci.yml @@ -1,25 +1,29 @@ trigger: - master -steps: -- task: UsePythonVersion@0 - inputs: - versionSpec: '>= 3.5' - addToPath: true - architecture: 'x64' +queue: Hosted Linux Preview -- script: | - pip install -r requirements.txt - pip install -e . - condition: and(succeeded(), eq(variables['agent.os'], 'linux')) - displayName: install aztk +phases: + phase: Test + steps: + - task: UsePythonVersion@0 + inputs: + versionSpec: '>= 3.5' + addToPath: true + architecture: 'x64' -- script: | - yapf -dr aztk/ aztk_cli/ - condition: and(succeeded(), eq(variables['agent.os'], 'linux')) - displayName: yapf + - script: | + pip install -r requirements.txt + pip install -e . + condition: succeeded() + displayName: install aztk -- script: | - pytest -n 50 - condition: and(succeeded(), in(variables['agent.os'], 'linux')) - displayName: pytest + - script: | + yapf -dr aztk/ aztk_cli/ + condition: succeeded() + displayName: yapf + + - script: | + pytest -n 50 + condition: succeeded() + displayName: pytest From bf64e4cbbcad3a1a15109e77382fb8899904d28c Mon Sep 17 00:00:00 2001 From: Jake Freck Date: Mon, 6 Aug 2018 11:29:59 -0700 Subject: [PATCH 04/16] fix --- .vsts-ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.vsts-ci.yml b/.vsts-ci.yml index 0d8942f3..c0e6b8be 100644 --- a/.vsts-ci.yml +++ b/.vsts-ci.yml @@ -4,7 +4,7 @@ trigger: queue: Hosted Linux Preview phases: - phase: Test +- phase: Test steps: - task: UsePythonVersion@0 inputs: From 038220556329e664bed277557cb13a34b7cc4485 Mon Sep 17 00:00:00 2001 From: Jake Freck Date: Mon, 6 Aug 2018 11:33:58 -0700 Subject: [PATCH 05/16] fix --- .vsts-ci.yml | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/.vsts-ci.yml b/.vsts-ci.yml index c0e6b8be..5621df4e 100644 --- a/.vsts-ci.yml +++ b/.vsts-ci.yml @@ -5,25 +5,25 @@ queue: Hosted Linux Preview phases: - phase: Test - steps: - - task: UsePythonVersion@0 - inputs: - versionSpec: '>= 3.5' - addToPath: true - architecture: 'x64' + steps: + - task: UsePythonVersion@0 + inputs: + versionSpec: '>= 3.5' + addToPath: true + architecture: 'x64' - - script: | - pip install -r requirements.txt - pip install -e . - condition: succeeded() - displayName: install aztk + - script: | + pip install -r requirements.txt + pip install -e . + condition: succeeded() + displayName: install aztk - - script: | - yapf -dr aztk/ aztk_cli/ - condition: succeeded() - displayName: yapf + - script: | + yapf -dr aztk/ aztk_cli/ + condition: succeeded() + displayName: yapf - - script: | - pytest -n 50 - condition: succeeded() - displayName: pytest + - script: | + pytest -n 50 + condition: succeeded() + displayName: pytest From 388dabbd186d01db03583706e0e6d0c364fb8930 Mon Sep 17 00:00:00 2001 From: Jake Freck Date: Mon, 6 Aug 2018 12:17:27 -0700 Subject: [PATCH 06/16] fix --- .vsts-ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.vsts-ci.yml b/.vsts-ci.yml index 5621df4e..a9b930a1 100644 --- a/.vsts-ci.yml +++ b/.vsts-ci.yml @@ -1,10 +1,10 @@ trigger: - master -queue: Hosted Linux Preview phases: - phase: Test + queue: Hosted Linux Preview steps: - task: UsePythonVersion@0 inputs: From f8ded70a2d1098e52d93457d51faf15f627baab6 Mon Sep 17 00:00:00 2001 From: Jake Freck Date: Mon, 6 Aug 2018 12:43:12 -0700 Subject: [PATCH 07/16] change queue to ubuntu --- .vsts-ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.vsts-ci.yml b/.vsts-ci.yml index a9b930a1..cb9a1d89 100644 --- a/.vsts-ci.yml +++ b/.vsts-ci.yml @@ -4,7 +4,7 @@ trigger: phases: - phase: Test - queue: Hosted Linux Preview + queue: Hosted Ubuntu 1604 steps: - task: UsePythonVersion@0 inputs: From fd24b70b00b42b3b2bd1e4d814ea6d3f12723ea6 Mon Sep 17 00:00:00 2001 From: Jake Freck Date: Mon, 6 Aug 2018 12:45:01 -0700 Subject: [PATCH 08/16] revert --- .vsts-ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.vsts-ci.yml b/.vsts-ci.yml index cb9a1d89..a9b930a1 100644 --- a/.vsts-ci.yml +++ b/.vsts-ci.yml @@ -4,7 +4,7 @@ trigger: phases: - phase: Test - queue: Hosted Ubuntu 1604 + queue: Hosted Linux Preview steps: - task: UsePythonVersion@0 inputs: From a0b38c0b6daec60452a5f544166a524076268f50 Mon Sep 17 00:00:00 2001 From: Jake Freck Date: Mon, 6 Aug 2018 12:49:51 -0700 Subject: [PATCH 09/16] temporarily enable builds on pushes to this branch --- .vsts-ci.yml | 43 ++++++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/.vsts-ci.yml b/.vsts-ci.yml index a9b930a1..bdbce631 100644 --- a/.vsts-ci.yml +++ b/.vsts-ci.yml @@ -1,29 +1,30 @@ trigger: - master + - internal/yapf-on-save phases: -- phase: Test - queue: Hosted Linux Preview - steps: - - task: UsePythonVersion@0 - inputs: - versionSpec: '>= 3.5' - addToPath: true - architecture: 'x64' + - phase: Test + queue: Hosted Linux Preview + steps: + - task: UsePythonVersion@0 + inputs: + versionSpec: '>= 3.5' + addToPath: true + architecture: 'x64' - - script: | - pip install -r requirements.txt - pip install -e . - condition: succeeded() - displayName: install aztk + - script: | + pip install -r requirements.txt + pip install -e . + condition: succeeded() + displayName: install aztk - - script: | - yapf -dr aztk/ aztk_cli/ - condition: succeeded() - displayName: yapf + - script: | + yapf -dr aztk/ aztk_cli/ + condition: succeeded() + displayName: yapf - - script: | - pytest -n 50 - condition: succeeded() - displayName: pytest + - script: | + pytest -n 50 + condition: succeeded() + displayName: pytest From 272fdc5defbf3ca1d099f912f660f24195dd0366 Mon Sep 17 00:00:00 2001 From: Jake Freck Date: Mon, 6 Aug 2018 13:00:05 -0700 Subject: [PATCH 10/16] change to non preview --- .vsts-ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.vsts-ci.yml b/.vsts-ci.yml index bdbce631..4e7487c4 100644 --- a/.vsts-ci.yml +++ b/.vsts-ci.yml @@ -5,7 +5,7 @@ trigger: phases: - phase: Test - queue: Hosted Linux Preview + queue: Hosted Ubuntu 1604 steps: - task: UsePythonVersion@0 inputs: From b91c54cd1e913fe0812282f9a385205c921c8d9b Mon Sep 17 00:00:00 2001 From: Jake Freck Date: Mon, 6 Aug 2018 13:02:03 -0700 Subject: [PATCH 11/16] revert --- .vsts-ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.vsts-ci.yml b/.vsts-ci.yml index 4e7487c4..bdbce631 100644 --- a/.vsts-ci.yml +++ b/.vsts-ci.yml @@ -5,7 +5,7 @@ trigger: phases: - phase: Test - queue: Hosted Ubuntu 1604 + queue: Hosted Linux Preview steps: - task: UsePythonVersion@0 inputs: From d6389b1005518e72ff788a6345232e4cefce99ff Mon Sep 17 00:00:00 2001 From: Jake Freck Date: Mon, 6 Aug 2018 13:16:59 -0700 Subject: [PATCH 12/16] update yapf version, rerun --- aztk/client/client.py | 16 ++++++++-------- aztk/utils/ssh.py | 8 ++++---- aztk_cli/utils.py | 4 ++-- requirements.txt | 2 +- 4 files changed, 15 insertions(+), 15 deletions(-) diff --git a/aztk/client/client.py b/aztk/client/client.py index 560392b2..6aafe7f6 100644 --- a/aztk/client/client.py +++ b/aztk/client/client.py @@ -183,14 +183,14 @@ def __create_user(self, pool_id: str, node_id: str, username: str, password: str :param ssh_key: ssh_key of the user to add """ # Create new ssh user for the given node - self.batch_client.compute_node.add_user(pool_id, node_id, - batch_models.ComputeNodeUser( - name=username, - is_admin=True, - password=password, - ssh_public_key=get_ssh_key.get_user_public_key( - ssh_key, self.secrets_configuration), - expiry_time=datetime.now(timezone.utc) + timedelta(days=365))) + self.batch_client.compute_node.add_user( + pool_id, node_id, + batch_models.ComputeNodeUser( + name=username, + is_admin=True, + password=password, + ssh_public_key=get_ssh_key.get_user_public_key(ssh_key, self.secrets_configuration), + expiry_time=datetime.now(timezone.utc) + timedelta(days=365))) @deprecated("0.10.0") def __delete_user(self, pool_id: str, node_id: str, username: str) -> str: diff --git a/aztk/utils/ssh.py b/aztk/utils/ssh.py index 9b28cec0..1b9d6e6d 100644 --- a/aztk/utils/ssh.py +++ b/aztk/utils/ssh.py @@ -224,10 +224,10 @@ async def clus_copy(username, get=False, timeout=None): return await asyncio.gather(*[ - asyncio.get_event_loop().run_in_executor(ThreadPoolExecutor(), copy_from_node if get else node_copy, node.id, - source_path, destination_path, username, node_rls.ip_address, - node_rls.port, ssh_key, password, container_name, timeout) - for node, node_rls in nodes + asyncio.get_event_loop() + .run_in_executor(ThreadPoolExecutor(), copy_from_node + if get else node_copy, node.id, source_path, destination_path, username, node_rls.ip_address, + node_rls.port, ssh_key, password, container_name, timeout) for node, node_rls in nodes ]) diff --git a/aztk_cli/utils.py b/aztk_cli/utils.py index 526d2b58..31f40ae4 100644 --- a/aztk_cli/utils.py +++ b/aztk_cli/utils.py @@ -331,8 +331,8 @@ def print_applications(applications): else: application = applications[name] log.info( - print_format.format(application.name, application.state, utc_to_local( - application.state_transition_time), application.exit_code + print_format.format(application.name, application.state, + utc_to_local(application.state_transition_time), application.exit_code if application.exit_code is not None else "-")) if warn_scheduling: log.warning("\nNo Spark applications will be scheduled until the master is selected.") diff --git a/requirements.txt b/requirements.txt index f59758cf..3e88ed9c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,7 +8,7 @@ pycryptodomex>=3.4 paramiko==2.4.0 # Development -yapf==0.20.1 +yapf==0.22.0 pylint==1.8.4 pytest==3.1.3 pytest-xdist==1.22.0 From 3ff795c5671a65856c5831482c40ec2f2c4b175a Mon Sep 17 00:00:00 2001 From: Jake Freck Date: Mon, 6 Aug 2018 13:24:10 -0700 Subject: [PATCH 13/16] update pytest parallelism --- .vsts-ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.vsts-ci.yml b/.vsts-ci.yml index bdbce631..8608dbf8 100644 --- a/.vsts-ci.yml +++ b/.vsts-ci.yml @@ -25,6 +25,6 @@ phases: displayName: yapf - script: | - pytest -n 50 + pytest -n 102 condition: succeeded() displayName: pytest From 1b6972fb2ecfe6a208f59f6f10e2a830c2321cf0 Mon Sep 17 00:00:00 2001 From: Jake Freck Date: Mon, 6 Aug 2018 13:47:20 -0700 Subject: [PATCH 14/16] add retry to arm call to avoid failures --- aztk/utils/azure_api.py | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/aztk/utils/azure_api.py b/aztk/utils/azure_api.py index 10a4fa7a..160e47e4 100644 --- a/aztk/utils/azure_api.py +++ b/aztk/utils/azure_api.py @@ -1,14 +1,16 @@ import re -import azure.batch.batch_service_client as batch +from typing import Optional + import azure.batch.batch_auth as batch_auth +import azure.batch.batch_service_client as batch import azure.storage.blob as blob -from aztk import error -from aztk.version import __version__ from azure.common.credentials import ServicePrincipalCredentials from azure.mgmt.batch import BatchManagementClient from azure.mgmt.storage import StorageManagementClient from azure.storage.common import CloudStorageAccount -from typing import Optional + +from aztk import error +from aztk.version import __version__ RESOURCE_ID_PATTERN = re.compile('^/subscriptions/(?P[^/]+)' '/resourceGroups/(?P[^/]+)' @@ -93,9 +95,25 @@ def make_blob_client(secrets): subscription = m.group('subscription') resourcegroup = m.group('resourcegroup') mgmt_client = StorageManagementClient(arm_credentials, subscription) - key = mgmt_client.storage_accounts.list_keys( - resource_group_name=resourcegroup, account_name=accountname).keys[0].value + key = retry_function( + mgmt_client.storage_accounts.list_keys, + 10, + 1, + Exception, + resource_group_name=resourcegroup, + account_name=accountname).keys[0].value storage_client = CloudStorageAccount(accountname, key) blob_client = storage_client.create_block_blob_service() return blob_client + + +def retry_function(function, retry_attempts: int, retry_interval: int, exception: Exception, *args, **kwargs): + import time + for i in range(retry_attempts): + try: + return function(*args, **kwargs) + except exception as e: + if i == retry_attempts - 1: + raise e + time.sleep(retry_interval) From cf508a1123fb94bb0e1f5df091f613bff21b6f44 Mon Sep 17 00:00:00 2001 From: Jake Freck Date: Mon, 6 Aug 2018 13:55:08 -0700 Subject: [PATCH 15/16] remove non-master trigger --- .vsts-ci.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.vsts-ci.yml b/.vsts-ci.yml index 8608dbf8..6eff785a 100644 --- a/.vsts-ci.yml +++ b/.vsts-ci.yml @@ -1,6 +1,5 @@ trigger: - master - - internal/yapf-on-save phases: From eb1c1878a482dedd721999b5a98c762ea4736ea4 Mon Sep 17 00:00:00 2001 From: Jake Freck Date: Mon, 6 Aug 2018 15:10:33 -0700 Subject: [PATCH 16/16] update builds, formatting style --- .style.yapf | 3 ++- .travis.yml | 2 +- .vsts-ci.yml | 2 +- aztk/node_scripts/install/pick_master.py | 2 +- aztk/utils/helpers.py | 3 ++- aztk_cli/spark/endpoints/cluster/cluster_ssh.py | 6 ++++-- pylintrc | 2 +- 7 files changed, 12 insertions(+), 8 deletions(-) diff --git a/.style.yapf b/.style.yapf index ca54f975..5946e749 100644 --- a/.style.yapf +++ b/.style.yapf @@ -1,7 +1,8 @@ [style] -based_on_style=pep8 +based_on_style=google spaces_before_comment=4 split_before_logical_operator=True indent_width=4 column_limit=120 split_arguments_when_comma_terminated=True +blank_line_before_nested_class_or_def=False \ No newline at end of file diff --git a/.travis.yml b/.travis.yml index 1b69bf29..b7e82c57 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,7 +8,7 @@ install: - pip install -e . script: - - yapf -dr aztk/ aztk_cli/ + - yapf -dpr aztk/ aztk_cli/ - pylint -E aztk - pytest --ignore=tests/integration_tests diff --git a/.vsts-ci.yml b/.vsts-ci.yml index 6eff785a..373e0dd9 100644 --- a/.vsts-ci.yml +++ b/.vsts-ci.yml @@ -19,7 +19,7 @@ phases: displayName: install aztk - script: | - yapf -dr aztk/ aztk_cli/ + yapf -dpr aztk/ aztk_cli/ condition: succeeded() displayName: yapf diff --git a/aztk/node_scripts/install/pick_master.py b/aztk/node_scripts/install/pick_master.py index 99388f0f..0fca6b19 100644 --- a/aztk/node_scripts/install/pick_master.py +++ b/aztk/node_scripts/install/pick_master.py @@ -38,7 +38,7 @@ def try_assign_self_as_master(client: batch.BatchServiceClient, pool: batchmodel config.pool_id, batchmodels.PoolPatchParameter(metadata=new_metadata), batchmodels.PoolPatchOptions(if_match=pool.e_tag, - )) + )) return True except (batcherror.BatchErrorException, ClientRequestError): print("Couldn't assign itself as master the pool because the pool was modified since last get.") diff --git a/aztk/utils/helpers.py b/aztk/utils/helpers.py index baa6c847..cd469889 100644 --- a/aztk/utils/helpers.py +++ b/aztk/utils/helpers.py @@ -183,7 +183,8 @@ def select_latest_verified_vm_image_with_node_agent_sku(publisher, offer, sku_st node_agent_skus = batch_client.account.list_node_agent_skus() # pick the latest supported sku - skus_to_use = [(sku, image_ref) for sku in node_agent_skus + skus_to_use = [(sku, image_ref) + for sku in node_agent_skus for image_ref in sorted(sku.verified_image_references, key=lambda item: item.sku) if image_ref.publisher.lower() == publisher.lower() and image_ref.offer.lower() == offer.lower() and image_ref.sku.startswith(sku_starts_with)] diff --git a/aztk_cli/spark/endpoints/cluster/cluster_ssh.py b/aztk_cli/spark/endpoints/cluster/cluster_ssh.py index 8e66fbf6..ae085809 100644 --- a/aztk_cli/spark/endpoints/cluster/cluster_ssh.py +++ b/aztk_cli/spark/endpoints/cluster/cluster_ssh.py @@ -102,8 +102,10 @@ def native_python_ssh_into_master(spark_client, cluster, ssh_conf, password): plugin_ports = [] if configuration and configuration.plugins: ports = [ - PortForwardingSpecification(port.internal, port.public_port) for plugin in configuration.plugins - for port in plugin.ports if port.expose_publicly + PortForwardingSpecification(port.internal, port.public_port) + for plugin in configuration.plugins + for port in plugin.ports + if port.expose_publicly ] plugin_ports.extend(ports) diff --git a/pylintrc b/pylintrc index 341cebf9..c1b570b5 100644 --- a/pylintrc +++ b/pylintrc @@ -192,7 +192,7 @@ max-nested-blocks=5 [FORMAT] # Maximum number of characters on a single line. -max-line-length=140 +max-line-length=120 # Regexp for a line that is allowed to be longer than the limit. ignore-long-lines=^\s*(# )??$