diff --git a/.github/workflows/load-test.yml b/.github/workflows/load-test.yml index afc43c624..d5ed9d684 100644 --- a/.github/workflows/load-test.yml +++ b/.github/workflows/load-test.yml @@ -35,7 +35,7 @@ jobs: run: mvn clean compile package -DskipTests=true - name: execute py script run: | - python loadTest.py + python perf_test/perfTestRunner.py git config user.name nitish git config user.email nitish1814@github.com git add . diff --git a/common/core/src/main/java/zingg/common/core/block/Canopy.java b/common/core/src/main/java/zingg/common/core/block/Canopy.java index d4e63a30d..69733f34f 100644 --- a/common/core/src/main/java/zingg/common/core/block/Canopy.java +++ b/common/core/src/main/java/zingg/common/core/block/Canopy.java @@ -16,6 +16,7 @@ public class Canopy implements Serializable { + private static final long serialVersionUID = -229533781044789499L; public static final Log LOG = LogFactory.getLog(Canopy.class); diff --git a/loadTestReport_1731315754.8297133 b/loadTestReport_1731315754.8297133 deleted file mode 100644 index 6ae7b4b06..000000000 --- a/loadTestReport_1731315754.8297133 +++ /dev/null @@ -1,18 +0,0 @@ -******************************** perf test report, 2024-11-11 ******************************** - ---------- Test bed details --------- -Load samples: 65_samples -Phases: findTrainingData match ------------------------------------- - - capturing for 65_samples -PHASE TIME_TAKEN_IN_MINUTES -findTrainingData 0.02767307758331299 -match 0.026116657257080077 - -*********************************************************************************************** - - - - - diff --git a/loadTestReport_1731316352.9782615 b/loadTestReport_1731316352.9782615 deleted file mode 100644 index 19f5423e4..000000000 --- a/loadTestReport_1731316352.9782615 +++ /dev/null @@ -1,18 +0,0 @@ -******************************** perf test report, 2024-11-11 ******************************** - ---------- Test bed details --------- -Load samples: 65_samples -Phases: findTrainingData match ------------------------------------- - - capturing for 65_samples -PHASE TIME_TAKEN_IN_MINUTES -findTrainingData 0.02484825054804484 -match 0.024951712290445963 - -*********************************************************************************************** - - - - - diff --git a/loadTestReport_1731317648.5763354 b/loadTestReport_1731317648.5763354 deleted file mode 100644 index bad7e8371..000000000 --- a/loadTestReport_1731317648.5763354 +++ /dev/null @@ -1,18 +0,0 @@ -******************************** perf test report, 2024-11-11 ******************************** - ---------- Test bed details --------- -Load samples: 65_samples -Phases: findTrainingData match ------------------------------------- - - capturing for 65_samples -PHASE TIME_TAKEN_IN_MINUTES -findTrainingData 0.03420765399932861 -match 0.031384321053822835 - -*********************************************************************************************** - - - - - diff --git a/loadTestReport_1731318116.360373 b/loadTestReport_1731318116.360373 deleted file mode 100644 index f799bf97e..000000000 --- a/loadTestReport_1731318116.360373 +++ /dev/null @@ -1,18 +0,0 @@ -******************************** perf test report, 2024-11-11 ******************************** - ---------- Test bed details --------- -Load samples: 65_samples -Phases: findTrainingData match ------------------------------------- - - capturing for 65_samples -PHASE TIME_TAKEN_IN_MINUTES -findTrainingData 0.034497920672098795 -match 0.031397847334543864 - -*********************************************************************************************** - - - - - diff --git a/loadTestReport_1731318449.2858524 b/loadTestReport_1731318449.2858524 deleted file mode 100644 index 1cf532efe..000000000 --- a/loadTestReport_1731318449.2858524 +++ /dev/null @@ -1,18 +0,0 @@ -******************************** perf test report, 2024-11-11 ******************************** - ---------- Test bed details --------- -Load samples: 65_samples -Phases: findTrainingData match ------------------------------------- - - capturing for 65_samples -PHASE TIME_TAKEN_IN_MINUTES -findTrainingData 0.035767209529876706 -match 0.03248757521311442 - -*********************************************************************************************** - - - - - diff --git a/models/101/trainingData/unmarked/.part-00000-34139ed1-e811-4cf5-ac0d-d586e5ab2e28-c000.snappy.parquet.crc b/models/101/trainingData/unmarked/.part-00000-34139ed1-e811-4cf5-ac0d-d586e5ab2e28-c000.snappy.parquet.crc new file mode 100644 index 000000000..698c32e35 Binary files /dev/null and b/models/101/trainingData/unmarked/.part-00000-34139ed1-e811-4cf5-ac0d-d586e5ab2e28-c000.snappy.parquet.crc differ diff --git a/models/101/trainingData/unmarked/.part-00000-e1bdba42-bf91-4196-b8c1-a54bb1d3fd66-c000.snappy.parquet.crc b/models/101/trainingData/unmarked/.part-00000-e1bdba42-bf91-4196-b8c1-a54bb1d3fd66-c000.snappy.parquet.crc new file mode 100644 index 000000000..600c310e7 Binary files /dev/null and b/models/101/trainingData/unmarked/.part-00000-e1bdba42-bf91-4196-b8c1-a54bb1d3fd66-c000.snappy.parquet.crc differ diff --git a/models/101/trainingData/unmarked/part-00000-34139ed1-e811-4cf5-ac0d-d586e5ab2e28-c000.snappy.parquet b/models/101/trainingData/unmarked/part-00000-34139ed1-e811-4cf5-ac0d-d586e5ab2e28-c000.snappy.parquet new file mode 100644 index 000000000..4a2ce68cd Binary files /dev/null and b/models/101/trainingData/unmarked/part-00000-34139ed1-e811-4cf5-ac0d-d586e5ab2e28-c000.snappy.parquet differ diff --git a/models/101/trainingData/unmarked/part-00000-e1bdba42-bf91-4196-b8c1-a54bb1d3fd66-c000.snappy.parquet b/models/101/trainingData/unmarked/part-00000-e1bdba42-bf91-4196-b8c1-a54bb1d3fd66-c000.snappy.parquet new file mode 100644 index 000000000..ac84de377 Binary files /dev/null and b/models/101/trainingData/unmarked/part-00000-e1bdba42-bf91-4196-b8c1-a54bb1d3fd66-c000.snappy.parquet differ diff --git a/models/104/trainingData/unmarked/.part-00000-ef87e025-7f8f-4804-9916-37c3a2f983f1-c000.snappy.parquet.crc b/models/104/trainingData/unmarked/.part-00000-ef87e025-7f8f-4804-9916-37c3a2f983f1-c000.snappy.parquet.crc new file mode 100644 index 000000000..ffb7c0416 Binary files /dev/null and b/models/104/trainingData/unmarked/.part-00000-ef87e025-7f8f-4804-9916-37c3a2f983f1-c000.snappy.parquet.crc differ diff --git a/models/104/trainingData/unmarked/part-00000-ef87e025-7f8f-4804-9916-37c3a2f983f1-c000.snappy.parquet b/models/104/trainingData/unmarked/part-00000-ef87e025-7f8f-4804-9916-37c3a2f983f1-c000.snappy.parquet new file mode 100644 index 000000000..de0680837 Binary files /dev/null and b/models/104/trainingData/unmarked/part-00000-ef87e025-7f8f-4804-9916-37c3a2f983f1-c000.snappy.parquet differ diff --git a/perf_test_input.py b/perf_test/perfTestInput.py similarity index 51% rename from perf_test_input.py rename to perf_test/perfTestInput.py index 2f2ddb581..125e6b1d5 100644 --- a/perf_test_input.py +++ b/perf_test/perfTestInput.py @@ -5,15 +5,16 @@ INCREMENTAL = "runIncrement" #load file config to test on -config_65 = "examples/febrl120k/config.json" -config_120k = "examples/febrl120k/config120k.json" +febrl = "./examples/febrl120k/config.json" +febrl_120k = "./examples/febrl120k/config120k.json" +ncVoter_5m = "./examples/ncVoters5M/config.json" #bash script location -ZINGG = "scripts/zingg.sh" +ZINGG = "./scripts/zingg.sh" #add all the load to test -load_configs = {"65_samples" : config_65} +load_configs = {"65_samples" : febrl, "120k_samples" : febrl_120k, "5m_samples" : ncVoter_5m} #add all the phases on which testing is required phases = [FIND_TRAINING_DATA, MATCH] diff --git a/loadTest.py b/perf_test/perfTestRunner.py similarity index 71% rename from loadTest.py rename to perf_test/perfTestRunner.py index 149322147..5b08084f7 100644 --- a/loadTest.py +++ b/perf_test/perfTestRunner.py @@ -1,34 +1,44 @@ import subprocess -from perf_test_input import phases, load_configs, ZINGG +from perfTestInput import phases, load_configs, ZINGG import time -from datetime import date +from datetime import date, datetime from subprocess import PIPE +import os + +#set working directory +os.chdir(os.path.dirname("../")) ZINGG = ZINGG #phases to run: ftd, match phases_to_test = phases +now = datetime.now() +current_time = now.strftime("%H:%M:%S") + #load to test: 65, 120k, 5m load = load_configs start_time = time.time() +reportFile = "./perf_test/perf_test_report/loadTestReport" + def perf_test_small_all(): return "small_test_running_all" +propertyFile = "./config/zingg.conf" def run_phase(phase, conf): print("Running phase - " + phase) - return subprocess.call(ZINGG + " %s %s %s %s" % ("--phase", phase, "--conf", conf), shell=True) + return subprocess.call(ZINGG + " %s %s %s %s %s %s" % ("--phase", phase, "--conf", conf, "--properties-file", propertyFile), shell=True) def perf_test_small(phase): return "small_test_running" def write_on_start(): - f = open("loadTestReport_" + str(start_time), "w+") - f.write("******************************** perf test report, " + str(date.today()) + " ********************************\n\n"); - f.write("--------- Test bed details ---------\n") + f = open(reportFile, "w+") + f.write("******************************** perf test report, " + str(date.today()) + ", " + current_time + " ********************************\n\n"); + f.write("------------ Test bed details ------------\n") f.write("Load samples: ") for load, config in load_configs.items(): f.write(str(load) + " ") @@ -37,27 +47,27 @@ def write_on_start(): for phase in phases: f.write(phase + " ") f.write("\n") - f.write("------------------------------------\n\n") + f.write("------------------------------------------\n\n") f.close() def write_on_complete(): - f = open("loadTestReport_" + str(start_time), "a+") - f.write("***********************************************************************************************\n\n\n\n\n\n") + f = open(reportFile, "a+") + f.write("********************************************************************************************************\n\n\n\n\n\n") def write_success_stats(phase_time, load): - f = open("loadTestReport_" + str(start_time), "a+") + f = open(reportFile, "a+") f.write("{:>50}".format("capturing for " + load) + "\n") f.write("PHASE {:>65}".format("TIME_TAKEN_IN_MINUTES") + "\n") for phase, time in phase_time.items(): - f.write(success_message(phase, time/60) + "\n") + f.write(success_message(phase, round(time/60, 1)) + "\n") f.write("\n") f.close() def write_failure_stats(phase_error): - f = open("loadTestReport_" + str(start_time), "a+") + f = open(reportFile, "a+") for phase, error in phase_error.items(): f.write(error_message(phase, error) + "\n\n") f.close() diff --git a/perf_test/perf_test_report/loadTestReport b/perf_test/perf_test_report/loadTestReport new file mode 100644 index 000000000..5151bb7d0 --- /dev/null +++ b/perf_test/perf_test_report/loadTestReport @@ -0,0 +1,18 @@ +******************************** perf test report, 2024-11-14, 18:09:04 ******************************** + +------------ Test bed details ------------ +Load samples: 65_samples +Phases: findTrainingData match +------------------------------------------ + + capturing for 65_samples +PHASE TIME_TAKEN_IN_MINUTES +findTrainingData 3.0 +match 4.5 + +******************************************************************************************************** + + + + + diff --git a/scripts/zingg.sh b/scripts/zingg.sh index 2ac64f852..125690e77 100755 --- a/scripts/zingg.sh +++ b/scripts/zingg.sh @@ -1,5 +1,4 @@ #!/bin/bash -#ZINGG_HOME=./assembly/target ZINGG_JARS=$ZINGG_HOME/zingg-0.4.1-SNAPSHOT.jar EMAIL=zingg@zingg.ai LICENSE=zinggLicense.txt