diff --git a/Jenkinsfile b/Jenkinsfile index ba1ed06a2e..a18f2bcb20 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -140,8 +140,11 @@ pipeline { sh script: 'sudo lsof -i -P -n | grep LISTEN || true', label: 'Check ports.' sh script: 'cd build/bin && PYTHONPATH=../.. timeout 20m python3 -m script.testing.replication.tests_simple --build-type=debug', label: 'Replication (Simple)' sh script: 'cd build && PYTHONPATH=.. timeout 20m python3 -m script.testing.junit --build-type=debug --query-mode=simple', label: 'UnitTest (Simple)' + sh script: 'cd build && PYTHONPATH=.. timeout 60m python3 -m script.testing.junit --build-type=debug --query-mode=simple -a "compiled_query_execution=True" -a "bytecode_handlers_path=./bytecode_handlers_ir.bc"', label: 'UnitTest (Simple, Compiled Execution)' sh script: 'cd build && PYTHONPATH=.. timeout 20m python3 -m script.testing.junit --build-type=debug --query-mode=extended', label: 'UnitTest (Extended)' + sh script: 'cd build && PYTHONPATH=.. timeout 60m python3 -m script.testing.junit --build-type=debug --query-mode=extended -a "compiled_query_execution=True" -a "bytecode_handlers_path=./bytecode_handlers_ir.bc"', label: 'UnitTest (Extended, Compiled Execution)' sh script: 'cd build && PYTHONPATH=.. timeout 20m python3 -m script.testing.junit --build-type=debug --query-mode=extended -a "pipeline_metrics_enable=True" -a "pipeline_metrics_sample_rate=100" -a "counters_enable=True" -a "query_trace_metrics_enable=True"', label: 'UnitTest (Extended with pipeline metrics, counters, and query trace metrics)' + sh script: 'cd build && PYTHONPATH=.. timeout 60m python3 -m script.testing.junit --build-type=debug --query-mode=extended -a "pipeline_metrics_enable=True" -a "pipeline_metrics_sample_rate=100" -a "counters_enable=True" -a "query_trace_metrics_enable=True" -a "compiled_query_execution=True" -a "bytecode_handlers_path=./bytecode_handlers_ir.bc"', label: 'UnitTest (Extended, Compiled Execution with pipeline metrics, counters, and query trace metrics)' sh script: 'sudo lsof -i -P -n | grep LISTEN || true', label: 'Check ports.' sh 'cd build && timeout 1h ninja check-tpl' sh 'cd build && timeout 1h ninja jumbotests' @@ -183,7 +186,9 @@ pipeline { sh script: 'cd build && timeout 10s sudo python3 -B ../script/testing/kill_server.py 15723', label: 'Kill PID(15723)' sh script: 'sudo lsof -i -P -n | grep LISTEN || true', label: 'Check ports.' sh script: 'cd build && PYTHONPATH=.. timeout 20m python3 -m script.testing.junit --build-type=debug --query-mode=simple', label: 'UnitTest (Simple)' + sh script: 'cd build && PYTHONPATH=.. timeout 60m python3 -m script.testing.junit --build-type=debug --query-mode=simple -a "compiled_query_execution=True" -a "bytecode_handlers_path=./bytecode_handlers_ir.bc"', label: 'UnitTest (Simple, Compiled Execution)' sh script: 'cd build && PYTHONPATH=.. timeout 20m python3 -m script.testing.junit --build-type=debug --query-mode=extended', label: 'UnitTest (Extended)' + sh script: 'cd build && PYTHONPATH=.. timeout 60m python3 -m script.testing.junit --build-type=debug --query-mode=extended -a "compiled_query_execution=True" -a "bytecode_handlers_path=./bytecode_handlers_ir.bc"', label: 'UnitTest (Extended, Compiled Execution)' sh script: 'sudo lsof -i -P -n | grep LISTEN || true', label: 'Check ports.' sh 'cd build && timeout 1h ninja check-tpl' sh 'cd build && timeout 1h ninja unittest' @@ -232,7 +237,9 @@ pipeline { sh script: 'sudo lsof -i -P -n | grep LISTEN || true', label: 'Check ports.' sh script: 'cd build/bin && PYTHONPATH=../.. timeout 20m python3 -m script.testing.replication.tests_simple --build-type=debug', label: 'Replication (Simple)' sh script: 'cd build && PYTHONPATH=.. timeout 20m python3 -m script.testing.junit --build-type=debug --query-mode=simple', label: 'UnitTest (Simple)' + sh script: 'cd build && PYTHONPATH=.. timeout 60m python3 -m script.testing.junit --build-type=debug --query-mode=simple -a "compiled_query_execution=True" -a "bytecode_handlers_path=./bytecode_handlers_ir.bc"', label: 'UnitTest (Simple, Compiled Execution)' sh script: 'cd build && PYTHONPATH=.. timeout 20m python3 -m script.testing.junit --build-type=debug --query-mode=extended', label: 'UnitTest (Extended)' + sh script: 'cd build && PYTHONPATH=.. timeout 60m python3 -m script.testing.junit --build-type=debug --query-mode=extended -a "compiled_query_execution=True" -a "bytecode_handlers_path=./bytecode_handlers_ir.bc"', label: 'UnitTest (Extended, Compiled Execution)' sh script: 'sudo lsof -i -P -n | grep LISTEN || true', label: 'Check ports.' sh 'cd build && timeout 1h ninja check-tpl' sh 'cd build && timeout 1h ninja jumbotests' @@ -271,7 +278,9 @@ pipeline { sh script: 'sudo lsof -i -P -n | grep LISTEN || true', label: 'Check ports.' sh script: 'cd build/bin && PYTHONPATH=../.. timeout 20m python3 -m script.testing.replication.tests_simple --build-type=release', label: 'Replication (Simple)' sh script: 'cd build && PYTHONPATH=.. timeout 20m python3 -m script.testing.junit --build-type=release --query-mode=simple', label: 'UnitTest (Simple)' + sh script: 'cd build && PYTHONPATH=.. timeout 60m python3 -m script.testing.junit --build-type=release --query-mode=simple -a "compiled_query_execution=True" -a "bytecode_handlers_path=./bytecode_handlers_ir.bc"', label: 'UnitTest (Simple, Compiled Execution)' sh script: 'cd build && PYTHONPATH=.. timeout 20m python3 -m script.testing.junit --build-type=release --query-mode=extended', label: 'UnitTest (Extended)' + sh script: 'cd build && PYTHONPATH=.. timeout 60m python3 -m script.testing.junit --build-type=release --query-mode=extended -a "compiled_query_execution=True" -a "bytecode_handlers_path=./bytecode_handlers_ir.bc"', label: 'UnitTest (Extended, Compiled Execution)' sh script: 'sudo lsof -i -P -n | grep LISTEN || true', label: 'Check ports.' sh 'cd build && timeout 1h ninja check-tpl' sh 'cd build && timeout 1h ninja jumbotests' @@ -314,7 +323,9 @@ pipeline { sh script: 'sudo lsof -i -P -n | grep LISTEN || true', label: 'Check ports.' sh script: 'cd build/bin && PYTHONPATH=../.. timeout 20m python3 -m script.testing.replication.tests_simple --build-type=release', label: 'Replication (Simple)' sh script: 'cd build && PYTHONPATH=.. timeout 20m python3 -m script.testing.junit --build-type=release --query-mode=simple', label: 'UnitTest (Simple)' + sh script: 'cd build && PYTHONPATH=.. timeout 60m python3 -m script.testing.junit --build-type=release --query-mode=simple -a "compiled_query_execution=True" -a "bytecode_handlers_path=./bytecode_handlers_ir.bc"', label: 'UnitTest (Simple, Compiled Execution)' sh script: 'cd build && PYTHONPATH=.. timeout 20m python3 -m script.testing.junit --build-type=release --query-mode=extended', label: 'UnitTest (Extended)' + sh script: 'cd build && PYTHONPATH=.. timeout 60m python3 -m script.testing.junit --build-type=release --query-mode=extended -a "compiled_query_execution=True" -a "bytecode_handlers_path=./bytecode_handlers_ir.bc"', label: 'UnitTest (Extended, Compiled Execution)' sh script: 'sudo lsof -i -P -n | grep LISTEN || true', label: 'Check ports.' sh 'cd build && timeout 1h ninja check-tpl' sh 'cd build && timeout 1h ninja jumbotests' diff --git a/build-support/tpl_bytecode_handlers_ir_compiler.py b/build-support/tpl_bytecode_handlers_ir_compiler.py index 9be8989e5e..8872542e80 100755 --- a/build-support/tpl_bytecode_handlers_ir_compiler.py +++ b/build-support/tpl_bytecode_handlers_ir_compiler.py @@ -1,48 +1,91 @@ #!/usr/bin/env python3 -""" -Usage: tpl_bytecode_handlers_ir_compiler.py LLVM_COMPILER CMAKE_BINARY_DIR BCH_CPP BCH_OUT - where - LLVM_COMPILER = The path to clang++ on the system. - CMAKE_BINARY_DIR = The build directory, which must contain compile_commands.json. - BCH_CPP = The path to the bytecode_handlers_ip.cpp file to be compiled. - BCH_OUT = The output path for the compiled file. - -Compiles bytecode_handlers_ir.cpp with clang++. -This script is necessary because it is not possible to mix compilers in a single CMake project -without going through some superproject / externalproject shenanigans. Moreover, with "modern" -target-based CMake, you do not have access to a convenient list of all the CMAKE_CXX_FLAGS -that another compiler should compile with, and you do not have an easy way of extracting a -full set of target properties for compilation. - -So instead, we parse compile_commands.json to get the flags that bytecode_handlers_ir.cpp -should be compiled with. Note that we depend on the command being - /usr/bin/c++ (WE EXTRACT THIS) -o blah -c blah -which is the case for at least CMake 3.16 on Ubuntu when generating for both Make and Ninja. - -Fortunately, we are only compiling with common flags which are shared by both gcc and clang. -If this changes, we may need the above superproject / externalproject solutions. -""" +# +# tpl_bytecode_handlers_ir_compiler.py +# Compiles bytecode_handlers_ir.cpp with clang++. +# +# Usage: +# +# tpl_bytecode_handlers_ir_compiler.py LLVM_COMPILER CMAKE_BINARY_DIR BCH_CPP BCH_OUT +# +# where +# LLVM_COMPILER = The path to clang++ on the system. +# CMAKE_BINARY_DIR = The build directory, which must contain compile_commands.json. +# BCH_CPP = The path to the bytecode_handlers_ip.cpp file to be compiled. +# BCH_OUT = The output path for the compiled file. +# +# +# This script is necessary because it is not possible to mix compilers in a single CMake project +# without going through some superproject / externalproject shenanigans. Moreover, with "modern" +# target-based CMake, you do not have access to a convenient list of all the CMAKE_CXX_FLAGS +# that another compiler should compile with, and you do not have an easy way of extracting a +# full set of target properties for compilation. +# +# So instead, we parse compile_commands.json to get the flags that bytecode_handlers_ir.cpp +# should be compiled with. Note that we depend on the command being +# /usr/bin/c++ (WE EXTRACT THIS) -o blah -c blah +# which is the case for at least CMake 3.16 on Ubuntu when generating for both Make and Ninja. +# +# Fortunately, we are only compiling with common flags which are shared by both gcc and clang. +# If this changes, we may need the above superproject / externalproject solutions. import os -import subprocess import sys +import subprocess +from typing import List + +# Script exit codes +EXIT_SUCCESS = 0 +EXIT_FAILURE = -1 PROGRAM_NAME = sys.argv[0] + # The Clang compiler that will emit LLVM. PATH_TO_LLVM_COMPILER = sys.argv[1] + # cd to the build directory, which should have a compile_commands.json file. PATH_TO_CMAKE_BINARY_DIR = sys.argv[2] + # The bytecode_handlers_ir.cpp file to be compiled. PATH_TO_BCH_CPP = sys.argv[3] + # The output path and filename. PATH_TO_BCH_OUT = sys.argv[4] +# Those flags that we do not want passed through to +# clang++ for compilation of the bytecode handlers +FLAG_BLACKLIST = [ + "", + "--coverage", # Relevant? + "-fPIC", # Relevant? + "-ggdb", # No need for debug symbols + "-fsanitize=address" # Don't want ASAN instrumentation +] + +# Those flags that we want to transform in some way +# as they are +FLAG_TRANSFORMS = { + "-O0": "-O3", # Always optimize +} + +def apply_transform(flag: str) -> str: + """ + Apply any transformations defined for the flag if they + are present; otherwise return the flag unmodified. + :param flag The input flag + :return The flag with transformation applied + """ + return FLAG_TRANSFORMS[flag] if flag in FLAG_TRANSFORMS else flag -def GetClangFlags(): - prev = '' - with open('compile_commands.json') as f: +def get_clang_flags() -> List[str]: + """ + Compute the flags passed to clang++ to compile the bytecodes. + :return A list of the flags to pass to clang++ (strings) + """ + prev = "" + with open("compile_commands.json") as f: for line in f: - # Look for the line that ends with tpl.cpp". The preceding line should be the compilation command. + # Look for the line that ends with bytecode_handlers_ir.cpp". + # The preceding line should be the compilation command. if line.endswith('bytecode_handlers_ir.cpp"\n'): command = prev # Some magic parsing logic. I hate this. @@ -50,18 +93,23 @@ def GetClangFlags(): # Remove the compiler (idx 0) and executable (-o blah -c blahblah). command = command.split(' ')[1:-4] # Return the compile command. - return [x for x in command if x != '' and x != '--coverage' and x != '-fPIC'] + return [apply_transform(c) for c in filter(lambda x: x not in FLAG_BLACKLIST, command)] + # Record the line for the next iteration. prev = line raise Exception("Could not find bytecode_handlers_ir.cpp in compile_commands.json.") - -if __name__ == '__main__': +def main() -> int: os.chdir(PATH_TO_CMAKE_BINARY_DIR) - call = [PATH_TO_LLVM_COMPILER] + GetClangFlags() + ["-emit-llvm", "-o", PATH_TO_BCH_OUT, "-c", PATH_TO_BCH_CPP] - call = ' '.join(call) + call = [PATH_TO_LLVM_COMPILER] + get_clang_flags() + ["-emit-llvm", "-o", PATH_TO_BCH_OUT, "-c", PATH_TO_BCH_CPP] + call = " ".join(call) try: subprocess.check_call(call, shell=True) - print('{} invoked: {}'.format(PROGRAM_NAME, call)) + print("{} invoked: {}".format(PROGRAM_NAME, call)) except: - sys.exit(-1) + return EXIT_FAILURE + + return EXIT_SUCCESS + +if __name__ == "__main__": + sys.exit(main()) diff --git a/script/testing/README.md b/script/testing/README.md index 9ae53854f6..6f9d21b605 100644 --- a/script/testing/README.md +++ b/script/testing/README.md @@ -1,67 +1,73 @@ # Testing Scripts -## Documentation - -Read and follow this: https://numpydoc.readthedocs.io/en/latest/format.html +This document describes the benchmarks and tests defined in this directory. ## Folder structure -All tests are compatible with python3 -- `util`: all the common utilities for running all kinds of tests -- `junit`: entry script to fire a junit test (and many other supporting configs) -- `micro_bench`: entry script to run the microbenchmark tests -- `oltpbench`: entry script to fire an oltp bench test -- `artifact_stats`: entry script to collect the artifact stats -- `reporting`: utility scripts for posting test data to Django API and formating JSON payloads - -## Util -`util` folder contains a list of common Python scripts -- `common.py`: functions that can be used in many different settings -- `constants.py`: all the constants used in the any file under the `util` or across the different tests -- `db_server.py`: It provides a `NoisePageServer` class that can start, stop, or restart an instance of the NoisePage -- `test_server.py`: the base `TestServer` class for running all types of tests -- `test_case.py`: the base `TestCase` class for all types of test cases. -- `mem_metrics.py`: the `MemoryMetric` class and `MemoryInfo` named tuple to manage the memory related information during the run time of the tests. -- `periodic_task.py`: the `PeriodicTask` class provides a general utility in Python which runs a separate thread that will execute a subprocess every `x` seconds until told to stop. - -## OLTP Bench -`oltpbench` folder contains Python scripts for running an oltp bench test. Refer to [OLTP Benchmark Testing](https://github.com/cmu-db/noisepage/tree/master/script/testing/oltpbench/README.md) for more details. - -## How to run a test -To run a test of a certain type, just run the `run_.py` script in the respective folder. For example, if you want to run a junit test, just simply run `python3 junit/run_junit.py`. - -By doing that, `junit/run_junit.py` script will try to import the `TestJUnit` class from the `util/TestJunit.py`, which subsequently use most of the functionalities provided from its super class `TestServer` from `util/TestServer.py`. - -## QueryMode -For both `junit` and `oltpbench`, we support 2 query modes with the optional argument `--query-mode` + +A brief overview of the contents of this directory follows below. + +- `util/`: All the common utilities for running all kinds of tests +- `junit/`: Entry script to fire a junit test (and many other supporting configs) +- `microbench/`: Entry script to run the microbenchmark tests +- `oltpbench/`: Entry script to fire an OLTPBench test +- `artifact_stats/`: Entry script to collect the artifact stats +- `reporting/`: Utility scripts for posting test data to Django API and formatting JSON payloads + +## Utilities + +The `util/` subdirectory contains a list of common Python scripts: + +- `common.py`: Contains functions that can be used in many different settings +- `constants.py`: Contains all the constants used in the any file under the `util` or across the different tests +- `db_server.py`: Provides a `NoisePageServer` class that can start, stop, or restart an instance of the NoisePage +- `test_server.py`: Defines the base `TestServer` class for running all types of tests +- `test_case.py`: Defines the base `TestCase` class for all types of test cases +- `mem_metrics.py`: Defines the `MemoryMetric` class and `MemoryInfo` named tuple to manage the memory related information during the run time of the tests +- `periodic_task.py`: Defines the `PeriodicTask` class provides a general utility in Python which runs a separate thread that will execute a subprocess every `x` seconds until told to stop + +## OLTPBench + +The `oltpbench/` subdirectory contains Python scripts for running an OLTPBench test. Refer to [OLTP Benchmark Testing](https://github.com/cmu-db/noisepage/tree/master/script/testing/oltpbench/README.md) for more details. + +## Running a Test + +To run a test of a certain type, just run the `run_.py` script in the respective folder. For example, if you want to run a JUnit test, just simply run `python3 junit/run_junit.py`. + +By doing that, `junit/run_junit.py` script will try to import the `TestJUnit` class from the `util/TestJunit.py`, which subsequently uses most of the functionalities provided from its super class `TestServer` from `util/TestServer.py`. + +## Query Modes + +For both JUnit tests and OLTPBench benchmarks, we support two query modes with the optional argument `--query-mode`: + - `simple` (default if not specified) - `extended` If you specify the `--query-mode extended`, you then can also indicate the prepare threshold (default is `5`) with the optional argument `--prepare-threshold` with type `int`. Please be reminded that if you choose the query mode as `simple`, the prepare threshold will be ignored. -## TestServer -`TestServer` is the base class for running all types of the tests. +## Test workflow -### Test workflow -- check if the noisepage bin exists -- run the pre-suite task (test suite specific) +- Check if the noisepage bin exists +- Run the pre-suite task (test suite specific) - e.g. install oltp bin -- run the test sequentially - - [Optional] fork a subprocess to start the DB (via python subprocess.Popen) - - if skip this step, the test will run on the used database - - run the pre-test task (test specific) - - fork a subprocess to start the test process using the command (via python subprocess.Popen) - - check the return code from the OS - - write the stdout and the stderr to the test output log file - - run the post-test task (test specific) - - [Optional] stop the DB - - if skip this step, the populated database can be used for following experiments -- run the post-suite task (test suite specific) -- print out the logs to the stdout - -### Adding a new test case -The classes in the `util` folder can be used and extend to help you create a new test type. +- Run the test sequentially + - [Optional] Fork a subprocess to start the DB (via python subprocess.Popen) + - If skip this step, the test will run on the used database + - Run the pre-test task (test specific) + - Fork a subprocess to start the test process using the command (via python subprocess.Popen) + - Check the return code from the OS + - Write the stdout and the stderr to the test output log file + - Run the post-test task (test specific) + - [Optional] Stop the DB + - If skip this step, the populated database can be used for following experiments +- Run the post-suite task (test suite specific) +- Print out the logs to the stdout + +## Adding a New Test Case + +The classes in the `util/` subdirectory can be used and extend to help you create a new test type. All test cases should inherit from the `TestCase` class. Anyone is free to modify any attribute from the base class. + - Mandatory attributes - `test_command` (`List(str)`): the command to run the test case - Optional attributes @@ -72,7 +78,8 @@ All test cases should inherit from the `TestCase` class. Anyone is free to modif - `run_post_test`: the post-test tasks required for the test - e.g. parse the output json, etc. -### Base classes +## Base classes + - `NoisePageServer` - Manage the lifecycle of the NoisePage instance. It create a Python subprocess for the NoisePage process, poll the logs, and terminate or kill it when the test finishes - `TestCase` @@ -80,9 +87,10 @@ All test cases should inherit from the `TestCase` class. Anyone is free to modif - The `TestCase` class also provides `run_pre_test` and `run_post_test` functions for you to override for preparation and clean up of each test case. - `TestServer` - Manage the entire lifecycle of a test. It uses the `NoisePageServer` to manage the database process. One `TestServer` can have a list of `TestCase`s and treats the entire collection as a suite. - - It also provides the `run_pre_suite` and `run_post_suite` functions for you to override to specify any preparation and cleanup at the suite level. + - Also provides the `run_pre_suite` and `run_post_suite` functions for you to override to specify any preparation and cleanup at the suite level. + +## Step-by-step instructions -### Step-by-step instructions - Create the folder for your test under `noisepage/script/testing/` - In the folder of your test, create the following files - `run_.py` @@ -102,6 +110,7 @@ All test cases should inherit from the `TestCase` class. Anyone is free to modif - You can refer to [oltpbench/util.py](https://github.com/cmu-db/noisepage/blob/master/script/testing/oltpbench/util.py) for reference. - Create a stage for your test in Jenkins pipeline - Go to `noisepage/Jenkinsfile`, create a stage at the place of your choice, and create the stage based on the template config as below. + ```groovy stage('My Test') { parallel{ @@ -140,4 +149,14 @@ All test cases should inherit from the `TestCase` class. Anyone is free to modif } } } - ``` \ No newline at end of file + ``` + +## Testing the Tests (_Meta-Tests_) + +The `meta` subdirectory contains some basic unit tests for various functionality provided by the testing infrastructure. You can run the unit tests from the top level directory (`testing/`) with: + +```bash +$ python -m unittest discover -s meta -t . +``` + +This will run all of the unit tests defined in the `meta/` subdirectory. \ No newline at end of file diff --git a/script/testing/meta/__init__.py b/script/testing/meta/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/script/testing/meta/util/__init__.py b/script/testing/meta/util/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/script/testing/meta/util/test_db_server.py b/script/testing/meta/util/test_db_server.py new file mode 100644 index 0000000000..0010cda874 --- /dev/null +++ b/script/testing/meta/util/test_db_server.py @@ -0,0 +1,108 @@ +# test_db_server.py +# Unit tests for test database server class (db_server.py). + +import unittest +from typing import List + +# The module under test +from util.db_server import construct_server_argument, construct_server_args_string + +# A dummy path to the binary directory (absolute path) +BIN_DIR = "/noisepage/build/bin" + +class TestServerArgumentConstruction(unittest.TestCase): + """ + Test the construction of individual arguments passed + to the database server. Tests in this TestCase verify + that attribute and value preprocessors are behaving + as expected. + """ + + def test_boolean_lowering_true(self): + r = construct_server_argument("wal_enable", True, {}) + self.assertEqual(r, "-wal_enable=true") + + def test_boolean_lowering_false(self): + r = construct_server_argument("wal_enable", False, {}) + self.assertEqual(r, "-wal_enable=false") + + def test_flag_is_none(self): + r = construct_server_argument("attribute", None, {}) + self.assertEqual(r, "-attribute") + + def test_flag_not_none(self): + r = construct_server_argument("attribute", "value", {}) + self.assertEqual(r, "-attribute=value") + + def test_non_path_is_not_expanded_current(self): + r = construct_server_argument("some_argument", "./hello", {"bin_dir": "/some/binary/directory"}) + self.assertEqual(r, "-some_argument=./hello") + + def test_non_path_is_not_expanded_parent(self): + r = construct_server_argument("some_argument", "../hello", {"bin_dir": "/some/binary/directory"}) + self.assertEqual(r, "-some_argument=../hello") + + def test_path_is_expanded_current(self): + r = construct_server_argument("argument_path", "./hello", {"bin_dir": "/some/binary/directory"}) + self.assertEqual(r, "-argument_path=/some/binary/directory/hello") + + def test_path_is_expanded_parent(self): + r = construct_server_argument("argument_path", "../hello", {"bin_dir": "/some/binary/directory"}) + self.assertEqual(r, "-argument_path=/some/binary/hello") + +class TestServerArgumentStringConstruction(unittest.TestCase): + """ + Test the construction of a full server argument string + from one or many individual arguments (attribute:value) + that must be forwarded to the DBMS server. + """ + + def test_single_argument_0(self): + r = construct_server_args_string({"wal_enable": True}, BIN_DIR) + self.assertEqual(r, "-wal_enable=true") + + def test_single_argument_1(self): + r = construct_server_args_string({"wal_enable": False}, BIN_DIR) + self.assertEqual(r, "-wal_enable=false") + + def test_multi_argument_0(self): + r = construct_server_args_string({"attr0": "value0", "attr1": "value1"}, BIN_DIR) + e = "-attr0=value0 -attr1=value1".split() + self.assertTrue(contains_all_substrings(r, e)) + + def test_multi_argument_1(self): + r = construct_server_args_string({"attr0": True, "attr1": "value1"}, BIN_DIR) + e = "-attr0=true -attr1=value1".split() + self.assertTrue(contains_all_substrings(r, e)) + + def test_multi_argument_2(self): + r = construct_server_args_string({"attr0": "value0", "attr1": True}, BIN_DIR) + e = "-attr0=value0 -attr1=true".split() + self.assertTrue(contains_all_substrings(r, e)) + + def test_multi_argument_3(self): + r = construct_server_args_string({"attr0": True, "attr1": True}, BIN_DIR) + e = "-attr0=true -attr1=true".split() + self.assertTrue(contains_all_substrings(r, e)) + +def contains_all_substrings(haystack: str, needles: List[str]) -> bool: + """ + Determine if the string `haystack` contains each of the + strings in `needles` as substrings. + + We use this method to check the correctness of the full + server argument string construction because we don't want + to bake-in any guarantees regarding the order in which + arguments are processed, because correctness does not + ultimately depend on the ordering. + + :param haystack The full query string + :param needles A list of substrings for which to search + :return `True` if `haystack` contains all of the strings + provided in `needles` as substrings, `False` otherwise + (note that all([]) returns `True` (vacuously true)) + """ + return all(needle in haystack for needle in needles) + +if __name__ == "__main__": + unittest.main() diff --git a/script/testing/util/db_server.py b/script/testing/util/db_server.py index 6187102aa6..2ff1f6cb72 100644 --- a/script/testing/util/db_server.py +++ b/script/testing/util/db_server.py @@ -1,22 +1,30 @@ +# db_server.py +# Class definition for `NoisePageServer` used in JUnit tests. + import os +import time import shlex +import pathlib import subprocess -import time import psycopg2 as psql +from typing import Dict, List + from .common import print_pipe from .constants import (DEFAULT_DB_BIN, DEFAULT_DB_HOST, DEFAULT_DB_OUTPUT_FILE, DEFAULT_DB_PORT, DEFAULT_DB_USER, DEFAULT_DB_WAL_FILE, DIR_REPO, LOG) +# ----------------------------------------------------------------------------- +# NoisePageServer class NoisePageServer: """ NoisePageServer represents a NoisePage DBMS instance. """ - def __init__(self, host=DEFAULT_DB_HOST, port=DEFAULT_DB_PORT, build_type='', server_args=None, + def __init__(self, host=DEFAULT_DB_HOST, port=DEFAULT_DB_PORT, build_type='', server_args={}, db_output_file=DEFAULT_DB_OUTPUT_FILE): """ Creates an instance of the DB that can be started, stopped, or restarted. @@ -35,15 +43,12 @@ def __init__(self, host=DEFAULT_DB_HOST, port=DEFAULT_DB_PORT, build_type='', se db_output_file : str, filepath The output file that the DB should output its logs to. """ - if server_args is None: - server_args = {} - default_server_args = { 'wal_file_path': DEFAULT_DB_WAL_FILE } self.db_host = host self.db_port = port - self.build_path = get_build_path(build_type) + self.binary_dir = get_binary_directory(build_type) self.server_args = {**default_server_args, **server_args} self.db_output_file = db_output_file self.db_process = None @@ -68,8 +73,11 @@ def run_db(self, is_dry_run=False): which is used to determine whether the database is ready to accept incoming connections. """ - server_args_str = generate_server_args_str(self.server_args) - db_run_command = f'{self.build_path} {server_args_str}' + # Construct the server arguments string from the map of arguments + server_args_str = construct_server_args_string(self.server_args, self.binary_dir) + + # Construct the complete command to launch the DBMS server + db_run_command = f"{os.path.join(self.binary_dir, DEFAULT_DB_BIN)} {server_args_str}" if is_dry_run: LOG.info(f'Dry-run: {db_run_command}') @@ -226,10 +234,12 @@ def execute(self, sql, expect_result=True, quiet=True, user=DEFAULT_DB_USER, aut LOG.error(f"Executing SQL failed: {sql}") raise e +# ----------------------------------------------------------------------------- +# Server Utilities -def get_build_path(build_type): +def get_binary_directory(build_type): """ - Get the path to the DBMS binary. + Get the path in which the DBMS binary resides. Parameters ---------- @@ -238,7 +248,7 @@ def get_build_path(build_type): Returns ------- - The path to the DBMS binary to be used. + The absolute path to the directory in which the DBMS binary is located. Warnings -------- @@ -254,21 +264,250 @@ def get_build_path(build_type): ("CLion", "cmake-build-{}".format(build_type)), ] for _, path in path_list: - db_bin_path = os.path.join(DIR_REPO, path, "bin", DEFAULT_DB_BIN) - LOG.debug(f'Locating DBMS binary in: {db_bin_path}') - if os.path.exists(db_bin_path): - return db_bin_path + bin_dir = os.path.join(DIR_REPO, path, "bin") + bin_path = os.path.join(bin_dir, DEFAULT_DB_BIN) + LOG.debug(f'Locating DBMS binary at: {bin_path}') + if os.path.exists(bin_path): + return bin_dir raise RuntimeError(f'No DBMS binary found in: {path_list}') +def construct_server_args_string(server_args, bin_dir): + """ + Construct the arguments string to pass to the DBMS server. + + Parameters + ---------- + server_args : dict + The dictionary of argument:value + bin_dir : str + The absolute path to the directory in which the DBMS binary resides + + Returns + ------- + The complete string of DBMS server arguments. + """ + # Construct the metadata object that is provided to each preprocessor + meta = {"bin_dir": bin_dir} + + # Construct the string DBMS argument string + return " ".join([construct_server_argument(attribute, value, meta) for attribute, value in server_args.items()]) + +def construct_server_argument(attr, value, meta): + """ + Construct a DBMS server argument from the associated attribute and value. + + Construction of the arguments to the DBMS server may rely on certain + preprocessing steps to take place prior to injecting the completed + argument into the string passed to the DBMS server itself. This function + composes all of these preprocessing steps that must be applied to each + attribute - value pair supplied to the DBMS. + + Arguments + --------- + attr : str + The attribute string + value : str + The value string + meta : Dict + The dictionary of meta-information passed to each preprocessor + + Returns + ------- + The preprocessed argument in the format expected by the DBMS. + """ + # Currently do not require any attribute preprocessing + ATTR_PREPROCESSORS = [] + + # The preprocessing steps required for individual argument values + # NOTE(Kyle): The order of preprocessors is important here because, + # as one example, resolve_relative_paths looks for a relative path + # designation (i.e. './') at the front of the value, while handle_flags + # will append the necessary '=' for non-flag arguments, which would + # obviously confound relative path expansion if applied first. + VALUE_PREPROCESSORS = [ + resolve_relative_paths, + lower_booleans, + handle_flags, + ] + + # Make the value available to the attribute preprocessors + attr_meta = {**meta, **{"value": value}} + + # Make the attribute available to the value preprocessors + value_meta = {**meta, **{"attr": attr}} + + preprocessed_attr = apply_all(ATTR_PREPROCESSORS, attr, attr_meta) + preprocessed_value = apply_all(VALUE_PREPROCESSORS, value, value_meta) + return f"-{preprocessed_attr}{preprocessed_value}" + +# ----------------------------------------------------------------------------- +# Preprocessing Utilities + +class AllTypes: + """ + A dummy catch-all type for value or attribute preprocessors + that should ALWAYS be applied, regardless of the type of the + value or attribute being processed. + """ + def __init__(self): + pass + +def applies_to(*target_types): + """ + A decorator that produces a no-op function in the event that the 'target' + (i.e. first) argument provided to a preprocessing function is not an + instance of one of the types specified in the decorator arguments. + + This function should not be invoked directly; it is intended to be used + as a decorator for preprocessor functions to deal with the fact that + certain preprocessing operations are only applicable to certain types. + """ + def wrap_outer(f): + def wrap_inner(target, meta): + # The argument is a targeted type if the catch-all type AllTypes + # is provided as an argument to the decorator OR the argument is + # an instance of any of the types provided as an argument + arg_is_targeted_type = AllTypes in target_types or any(isinstance(target, ty) for ty in target_types) + return f(target, meta) if arg_is_targeted_type else target + return wrap_inner + return wrap_outer + +# ----------------------------------------------------------------------------- +# Attribute Preprocessors +# +# The signature for attribute preprocessors must be: +# def preprocessor(attr: str, meta: Dict) -> str: +# ... + + +# ----------------------------------------------------------------------------- +# Value Preprocessors +# +# The signature for value preprocessors must be: +# def preprocessor(value: str, meta: Dict) -> str: +# ... + +@applies_to(bool) +def lower_booleans(value: str, meta: Dict) -> str: + """ + Lower boolean string values to the format expected by the DBMS server. + + e.g. + `True` -> `true` + `False` -> `false` + + Arguments + --------- + value : str + The DBMS server argument value + meta : Dict + Dictionary of meta-information available to all preprocessors + + Returns + ------- + The preprocessed server argument value + """ + assert value is True or value is False, "Input must be a first-class boolean type." + return str(value).lower() + +@applies_to(str) +def resolve_relative_paths(value: str, meta: Dict) -> str: + """ + Resolve relative paths in the DBMS server arguments to their equivalent absolute paths. + + When specifying path arguments to the DBMS, it is often simpler to think in terms + of how the path relates to the location of the DBMS server binary. However, because + the command used by NoisePageServer to launch the DBMS server instance specifies + the absolute path to the binary, relative paths (that necessarily encode the expected + location of the binary) will fail to function properly. Expanding these relative paths + to their absolute counterparts before passing them to the DBMS addresses this issue. + + The above highlights the fundamental limitation of this current implementation, namely: + + ALL RELATIVE PATHS ARE ASSUMED TO BE RELATIVE TO THE DBMS BINARY DIRECTORY + + All relative paths that are relative to another directory will fail to resolve properly. + + Arguments + --------- + value : str + The DBMS server argument value + meta : Dict + Dictionary of meta-information available to all preprocessors + + Returns + ------- + The preprocessed server argument value + """ + # NOTE(Kyle): This is somewhat dirty because it introduces a + # 'hidden' dependency that is not reflected in the DBMS code: + # we only resolve those arguments that actually end with `_path`. + # In practice, I prefer this to the alternative of just assuming + # that anything that starts with './' or '../' is a relative path, + # and it ensures that, at worst, we do LESS resolving than might + # otherwise be expected, never more. + is_path = str.endswith(meta["attr"], "_path") + is_relative = not os.path.isabs(value) + + if is_path and is_relative: + return pathlib.Path(os.path.join(meta["bin_dir"], value)).resolve() + else: + return value + +@applies_to(AllTypes) +def handle_flags(value: str, meta: Dict) -> str: + """ + Handle DBMS server arguments with no associated value. + + Some arguments to the DBMS are flags that do not have an associated value; + in these cases, we do not want to format the complete argument as + `-attribute=value` and instead want to format it as `-attribute` alone. + This preprocessor encapsulates the logic for this transformation. -def generate_server_args_str(server_args): - """ Create a server args string to pass to the DBMS """ - server_args_arr = [] - for attribute, value in server_args.items(): - value = str(value).lower() if isinstance(value, bool) else value - value = f'={value}' if value != None else '' - arg = f'-{attribute}{value}' - server_args_arr.append(arg) + TODO(Kyle): Do we actually support any arguments like this? + I can't seem to come up with any actual examples... - return ' '.join(server_args_arr) + Arguments + --------- + value : str + The DBMS server argument value + meta : Dict + Dictionary of meta-information available to all preprocessors + + Returns + ------- + The preprocessed server argument value + """ + return f"={value}" if value is not None else "" + +# ----------------------------------------------------------------------------- +# Utility + +def apply_all(functions: List, init_obj, meta: Dict): + """ + Apply all of the functions in `functions` to object `init_obj` sequentially, + supplying metadata object `meta` to each function invocation. + + TODO(Kyle): Initially I wanted to implement this with function composition + in terms of functools.reduce() which makes it really beautiful, but there + we run into issues with multi-argument callbacks, and the real solution is + to use partial application, but this seemed like overkill... maybe revisit. + + Arguments + --------- + functions : List[function] + The collection of functions to invoke + init_obj : object + Arbitrary object to which functions should be applied + meta : object + Arbitrary meta-object supplied as second argument to each function invocation + + Returns + ------- + The result of applying each function in `functions` to `init_obj`. + """ + obj = init_obj + for function in functions: + obj = function(obj, meta) + return obj