ampsight · neill-campbell · Feb 27, 2023 · Mar 8, 2023 · Mar 8, 2023 · Mar 10, 2023
diff --git a/CHANGES.next.md b/CHANGES.next.md
@@ -1,5 +1,7 @@
 ### Breaking changes:
-
+-   Added --accept_licenses flag. User have to turn this flag on to acknowledge
+    that PKB may install software thereby accepting license agreements on the
+    user's behalf.
 -   Renamed Database-related flags from managed_db* to db* Added alias for
     backwards compatibility, might not be supported in the future release.
 -   Require Python 3.9+
@@ -46,6 +48,7 @@
 -   Remove flag fio_write_against_multiple_clients from FIO.
 -   Drop windows coremark benchmark.
 -   Remove cudnn linux package.
+-   Make Ubuntu 20 the default os_type.
 
 ### New features:
 

diff --git a/LICENSE b/LICENSE
@@ -199,3 +199,39 @@
    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    See the License for the specific language governing permissions and
    limitations under the License.
+
+---
+
+Files: data/blaze_config.j2, data/blazemark_config.j2
+#==================================================================================================
+#
+#  Configfile file for the Blaze library
+#
+#  Copyright (C) 2013 Klaus Iglberger - All Rights Reserved
+#
+#  This file is part of the Blaze library. You can redistribute it and/or modify it under
+#  the terms of the New (Revised) BSD License. Redistribution and use in source and binary
+#  forms, with or without modification, are permitted provided that the following conditions
+#  are met:
+#
+#  1. Redistributions of source code must retain the above copyright notice, this list of
+#     conditions and the following disclaimer.
+#  2. Redistributions in binary form must reproduce the above copyright notice, this list
+#     of conditions and the following disclaimer in the documentation and/or other materials
+#     provided with the distribution.
+#  3. Neither the names of the Blaze development group nor the names of its contributors
+#     may be used to endorse or promote products derived from this software without specific
+#     prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
+#  EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+#  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
+#  SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+#  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+#  TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+#  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+#  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+#  ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+#  DAMAGE.
+#
+#==================================================================================================
diff --git a/README.md b/README.md
@@ -35,6 +35,8 @@ as part of a benchmark run. Therefore you must accept the license of each of the
 benchmarks individually, and take responsibility for using them before you use
 the PerfKit Benchmarker.
 
+Moving forward, you will need to run PKB with the explicit flag --accept-licenses.
+
 In its current release these are the benchmarks that are executed:
 
 -   `aerospike`:

diff --git a/perfkitbenchmarker/configs/benchmark_config_spec.py b/perfkitbenchmarker/configs/benchmark_config_spec.py
@@ -190,6 +190,14 @@ def _GetOptionDecoderConstructions(cls):
             'default': None,
             'none_ok': True
         }),
+        'dataproc_serverless_memory': (option_decoders.IntDecoder, {
+            'default': None,
+            'none_ok': True
+        }),
+        'dataproc_serverless_memory_overhead': (option_decoders.IntDecoder, {
+            'default': None,
+            'none_ok': True
+        }),
         'emr_serverless_executor_count': (option_decoders.IntDecoder, {
             'default': None,
             'none_ok': True

diff --git a/perfkitbenchmarker/configs/default_config_constants.yaml b/perfkitbenchmarker/configs/default_config_constants.yaml
@@ -52,6 +52,10 @@ default_single_core: &default_single_core
     machine_type: cx2-2x4
     zone: us-south-1
     image: null
+  OCI:
+    machine_type: VM.Standard.A1.Flex
+    zone: us-ashburn-1
+    image: null
 
 # TODO: update the two core machines for more providers
 default_dual_core: &default_dual_core
@@ -83,6 +87,10 @@ default_dual_core: &default_dual_core
     image: null
   Kubernetes:
     image: null
+  OCI:
+    machine_type: VM.Standard.A1.Flex
+    zone: us-ashburn-1
+    image: null
 
 # TODO(user): update the disk types below as more providers are
 # updated for the disk types refactor.
@@ -138,6 +146,10 @@ default_500_gb: &default_500_gb
     disk_type: standard
     disk_size: 500
     mount_point: /scratch
+  OCI:
+    disk_type: paravirtualized          
+    disk_size: 500
+    mount_point: /scratch
 
 
 # TODO(user): update the disk types below as more providers are
@@ -194,3 +206,8 @@ default_50_gb: &default_50_gb
     disk_type: standard
     disk_size: 50
     mount_point: /scratch
+  OCI:
+    disk_type: paravirtualized
+    disk_size: 50
+    mount_point: /scratch
+
diff --git a/perfkitbenchmarker/flags.py b/perfkitbenchmarker/flags.py
@@ -36,6 +36,13 @@ def GetCurrentUser():
   except KeyError:
     return 'user_unknown'
 
+
+flags.DEFINE_boolean(
+    'accept_licenses',
+    False,
+    'Acknowledge that PKB may install software thereby accepting license'
+    ' agreements on the users behalf.',
+)
 flags.DEFINE_list('ssh_options', [], 'Additional options to pass to ssh.')
 flags.DEFINE_boolean('use_ipv6', False, 'Whether to use ipv6 for ssh/scp.')
 flags.DEFINE_list('benchmarks', ['cluster_boot'],

diff --git a/perfkitbenchmarker/linux_benchmarks/apachebench_benchmark.py b/perfkitbenchmarker/linux_benchmarks/apachebench_benchmark.py
@@ -151,11 +151,11 @@ class ApacheBenchRunMode(object):
   description: Runs apachebench benchmark.
   vm_groups:
     client:
-      os_type: ubuntu1804
+      os_type: ubuntu2004
       vm_spec: *default_single_core
       vm_count: 1
     server:
-      os_type: ubuntu1804
+      os_type: ubuntu2004
       vm_spec: *default_single_core
 """
 FLAGS = flags.FLAGS

diff --git a/perfkitbenchmarker/linux_benchmarks/cloud_redis_memtier_benchmark.py b/perfkitbenchmarker/linux_benchmarks/cloud_redis_memtier_benchmark.py
@@ -16,10 +16,14 @@
 Spins up a cloud redis instance, runs memtier against it, then spins it down.
 """
 
+import collections
 from absl import flags
+from absl import logging
 from perfkitbenchmarker import background_tasks
 from perfkitbenchmarker import configs
+from perfkitbenchmarker import linux_virtual_machine
 from perfkitbenchmarker import managed_memory_store
+from perfkitbenchmarker import sample
 from perfkitbenchmarker.linux_packages import memtier
 
 FLAGS = flags.FLAGS
@@ -36,6 +40,9 @@
       vm_count: 1
 """
 
+_LinuxVm = linux_virtual_machine.BaseLinuxVirtualMachine
+_ManagedRedis = managed_memory_store.BaseManagedMemoryStore
+
 
 def GetConfig(user_config):
   config = configs.LoadConfig(BENCHMARK_CONFIG, user_config, BENCHMARK_NAME)
@@ -90,6 +97,74 @@ def Prepare(benchmark_spec):
     memtier.Load(vm, memory_store_ip, memory_store_port, password)
 
 
+def _GetConnections(
+    vms: list[_LinuxVm], redis_instance: _ManagedRedis
+) -> list[memtier.MemtierConnection]:
+  """Gets a list of connections mapping client VMs to shards."""
+  if len(vms) == 1:
+    return [
+        memtier.MemtierConnection(
+            vms[0],
+            redis_instance.GetMemoryStoreIp(),
+            redis_instance.GetMemoryStorePort(),
+        )
+    ]
+  # Spread shards by client VM (evenly distributed by zone) such that each
+  # client VM gets an equal number of shards in each zone.
+  connections = []
+  shards = redis_instance.GetShardEndpoints(vms[0])
+  shards_by_zone = collections.defaultdict(list)
+  for shard in shards:
+    shards_by_zone[shard.zone].append(shard)
+  shards_by_vm = collections.defaultdict(list)
+  for shards_list in shards_by_zone.values():
+    for shard_index, shard in enumerate(shards_list):
+      vm_index = shard_index % len(vms)
+      vm = vms[vm_index]
+      connections.append(memtier.MemtierConnection(vm, shard.ip, shard.port))
+      shards_by_vm[vm].append(shard)
+  logging.info('Shards by VM: %s', shards_by_vm)
+  return connections
+
+
+def _MeasureMemtierDistribution(
+    redis_instance: _ManagedRedis,
+    vms: list[_LinuxVm],
+) -> list[sample.Sample]:
+  """Runs and reports stats across a series of memtier runs."""
+  connections = _GetConnections(vms, redis_instance)
+  return memtier.MeasureLatencyCappedThroughputDistribution(
+      connections,
+      redis_instance.GetMemoryStoreIp(),
+      redis_instance.GetMemoryStorePort(),
+      vms,
+      redis_instance.node_count,
+      redis_instance.GetMemoryStorePassword(),
+  )
+
+
+def _Run(vms: list[_LinuxVm], redis_instance: _ManagedRedis):
+  """Runs memtier based on provided flags."""
+  if memtier.MEMTIER_RUN_MODE.value == memtier.MemtierMode.MEASURE_CPU_LATENCY:
+    return memtier.RunGetLatencyAtCpu(redis_instance, vms)
+  if memtier.MEMTIER_LATENCY_CAPPED_THROUGHPUT.value:
+    if memtier.MEMTIER_DISTRIBUTION_ITERATIONS.value:
+      return _MeasureMemtierDistribution(redis_instance, vms)
+    return memtier.MeasureLatencyCappedThroughput(
+        vms[0],
+        redis_instance.node_count,
+        redis_instance.GetMemoryStoreIp(),
+        redis_instance.GetMemoryStorePort(),
+        redis_instance.GetMemoryStorePassword(),
+    )
+  return memtier.RunOverAllThreadsPipelinesAndClients(
+      vms,
+      redis_instance.GetMemoryStoreIp(),
+      [redis_instance.GetMemoryStorePort()],
+      redis_instance.GetMemoryStorePassword(),
+  )
+
+
 def Run(benchmark_spec):
   """Run benchmark and collect samples.
 
@@ -101,30 +176,10 @@ def Run(benchmark_spec):
     A list of sample.Sample instances.
   """
   memtier_vms = benchmark_spec.vm_groups['clients']
-  samples = []
-  if memtier.MEMTIER_RUN_MODE.value == memtier.MemtierMode.MEASURE_CPU_LATENCY:
-    samples = memtier.RunGetLatencyAtCpu(
-        benchmark_spec.cloud_redis_instance, memtier_vms
-    )
-  elif memtier.MEMTIER_LATENCY_CAPPED_THROUGHPUT.value:
-    samples = memtier.MeasureLatencyCappedThroughput(
-        memtier_vms[0],
-        benchmark_spec.cloud_redis_instance.GetMemoryStoreIp(),
-        benchmark_spec.cloud_redis_instance.GetMemoryStorePort(),
-        benchmark_spec.cloud_redis_instance.GetMemoryStorePassword(),
-    )
-  else:
-    samples = memtier.RunOverAllThreadsPipelinesAndClients(
-        memtier_vms,
-        benchmark_spec.cloud_redis_instance.GetMemoryStoreIp(),
-        [benchmark_spec.cloud_redis_instance.GetMemoryStorePort()],
-        benchmark_spec.cloud_redis_instance.GetMemoryStorePassword(),
-    )
-
-  for sample in samples:
-    sample.metadata.update(
-        benchmark_spec.cloud_redis_instance.GetResourceMetadata()
-    )
+  redis_instance: _ManagedRedis = benchmark_spec.cloud_redis_instance
+  samples = _Run(memtier_vms, redis_instance)
+  for s in samples:
+    s.metadata.update(benchmark_spec.cloud_redis_instance.GetResourceMetadata())
 
   return samples
 
@@ -140,4 +195,6 @@ def Cleanup(benchmark_spec):
 
 
 def _Install(vm):
+  """Installs necessary client packages."""
   vm.Install('memtier')
+  vm.Install('redis_cli')
diff --git a/perfkitbenchmarker/linux_benchmarks/cluster_boot_benchmark.py b/perfkitbenchmarker/linux_benchmarks/cluster_boot_benchmark.py
@@ -368,6 +368,10 @@ def MeasureDelete(
     List of Samples containing the delete times and an overall cluster delete
     time.
   """
+  # Only measure VMs that have a delete time.
+  vms = [vm for vm in vms if vm.delete_start_time and vm.delete_end_time]
+  if not vms:
+    return []
   # Collect a delete time from each VM.
   delete_times = [vm.delete_end_time - vm.delete_start_time for vm in vms]
   # Get the cluster delete time.

diff --git a/perfkitbenchmarker/linux_benchmarks/hammerdbcli_benchmark.py b/perfkitbenchmarker/linux_benchmarks/hammerdbcli_benchmark.py
@@ -12,6 +12,7 @@
 from perfkitbenchmarker import virtual_machine
 
 from perfkitbenchmarker.linux_packages import hammerdb
+from perfkitbenchmarker.providers.gcp import gcp_alloy_db  # pylint: disable=unused-import
 
 # MYSQL Config file path
 MYSQL_CONFIG_PATH = '/etc/mysql/mysql.conf.d/mysqld.cnf'

diff --git a/perfkitbenchmarker/linux_benchmarks/lmbench_benchmark.py b/perfkitbenchmarker/linux_benchmarks/lmbench_benchmark.py
@@ -219,9 +219,10 @@ def _AddProcessorMetricSamples(lmbench_output, processor_metric_list, metadata,
     regex = '%s: (.*)' % metric
     value_unit = regex_util.ExtractGroup(regex, lmbench_output)
     [value, unit] = value_unit.split(' ')
-    results.append(
-        sample.Sample('%s' % metric.replace('\\', ''), float(value), unit,
-                      metadata))
+    if unit == 'microseconds':
+      results.append(
+          sample.Sample('%s' % metric.replace('\\', ''), float(value), unit,
+                        metadata))
 
 
 def _ParseOutput(lmbench_output):

diff --git a/perfkitbenchmarker/linux_benchmarks/mlperf_benchmark.py b/perfkitbenchmarker/linux_benchmarks/mlperf_benchmark.py
@@ -183,6 +183,10 @@ def PrepareBenchmark(benchmark_spec, vm=None):
   _UpdateBenchmarkSpecWithFlags(benchmark_spec)
   vm = vm or benchmark_spec.vms[0]
 
+  has_gpu = nvidia_driver.CheckNvidiaGpuExists(vm)
+  if has_gpu:
+    vm.Install('cuda_toolkit')
+
   if (bool(benchmark_spec.tpus) and nvidia_driver.CheckNvidiaGpuExists(vm)):
     raise errors.Config.InvalidValue(
         'Invalid configuration. GPUs and TPUs can not both present in the config.'
@@ -362,10 +366,6 @@ def PrepareRunner(benchmark_spec, vm=None):
   else:
     benchmark_spec.model_dir = '/tmp'
 
-    has_gpu = nvidia_driver.CheckNvidiaGpuExists(vm)
-    if has_gpu:
-      vm.Install('cuda_toolkit')
-
     vm.Install('nvidia_docker')
     docker.AddUser(vm)
     vm.RemoteCommand('sudo usermod -aG docker $USER')

diff --git a/perfkitbenchmarker/linux_benchmarks/netperf_benchmark.py b/perfkitbenchmarker/linux_benchmarks/netperf_benchmark.py
@@ -503,7 +503,18 @@ def RunNetperf(vm, benchmark_name, server_ips, num_streams):
   if len(parsed_output) == 1:
     # Only 1 netperf thread
     throughput_sample, latency_samples, histogram = parsed_output[0]
-    return samples + [throughput_sample] + latency_samples
+    output_samples = samples + [throughput_sample] + latency_samples
+    # Create formatted output for TCP stream throughput metrics
+    if benchmark_name.upper() == 'TCP_STREAM':
+      output_samples.append(
+          sample.Sample(
+              throughput_sample.metric + '_1stream',
+              throughput_sample.value,
+              throughput_sample.unit,
+              throughput_sample.metadata,
+          )
+      )
+    return output_samples
   else:
     # Multiple netperf threads
     # Unzip parsed output
@@ -527,6 +538,17 @@ def RunNetperf(vm, benchmark_name, server_ips, num_streams):
       samples.append(
           sample.Sample(f'{benchmark_name}_Throughput_{stat}', float(value),
                         throughput_unit, metadata))
+    # Create formatted output, following {benchmark_name}_Throughput_Xstream(s)
+    # for TCP stream throughput metrics
+    if benchmark_name.upper() == 'TCP_STREAM':
+      samples.append(
+          sample.Sample(
+              f'{benchmark_name}_Throughput_{len(parsed_output)}streams',
+              throughput_stats['total'],
+              throughput_unit,
+              metadata,
+          )
+      )
     if enable_latency_histograms:
       # Combine all of the latency histogram dictionaries
       latency_histogram = collections.Counter()