From 0f74d714c1549bde93a2a4153caa484305534225 Mon Sep 17 00:00:00 2001 From: miguelgfierro Date: Thu, 20 Oct 2022 10:02:12 +0000 Subject: [PATCH 01/23] wip --- tests/conftest.py | 3 ++ .../examples/test_notebooks_python.py | 28 ++++++++++++++++++- 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/tests/conftest.py b/tests/conftest.py index d5212fcff5..7ffa163494 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -335,6 +335,9 @@ def notebooks(): "nni_tuning_svd": os.path.join( folder_notebooks, "04_model_select_and_optimize", "nni_surprise_svd.ipynb" ), + "benchmark_movielens": os.path.join( + folder_notebooks, "06_benchmarks", "movielens.ipynb" + ), } return paths diff --git a/tests/integration/examples/test_notebooks_python.py b/tests/integration/examples/test_notebooks_python.py index 98dcef806e..fc2eedb0fc 100644 --- a/tests/integration/examples/test_notebooks_python.py +++ b/tests/integration/examples/test_notebooks_python.py @@ -1,7 +1,6 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. -import sys import pytest try: @@ -309,3 +308,30 @@ def test_xlearn_fm_integration(notebooks, output_notebook, kernel_name): ] assert results["auc_score"] == pytest.approx(0.75, rel=TOL, abs=ABS_TOL) + + +@pytest.mark.notebooks +@pytest.mark.integration +@pytest.mark.parametrize( + "size, algos, expected_values", + [ + ( + ["100k"], + ["svd", "sar", "bpr"], + dict( + eval_precision=0.131601, + eval_recall=0.038056, + eval_precision2=0.145599, + eval_recall2=0.051338, + ), + ), + ], +) +def test_benchmark_movielens_cpu(notebooks, output_notebook, kernel_name, size, algos, expected_values): + notebook_path = notebooks["benchmark_movielens"] + pm.execute_notebook( + notebook_path, + output_notebook, + kernel_name=kernel_name, + parameters=dict(data_sizes=size, algorithms=algos), + ) \ No newline at end of file From d6d85bc0e3ec18b00fd796d16f1cd521e0d4f4d8 Mon Sep 17 00:00:00 2001 From: miguelgfierro Date: Mon, 24 Oct 2022 09:16:28 +0000 Subject: [PATCH 02/23] programmatic execution --- examples/06_benchmarks/movielens.ipynb | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/examples/06_benchmarks/movielens.ipynb b/examples/06_benchmarks/movielens.ipynb index 28dc5ceb1c..6c0f184bca 100644 --- a/examples/06_benchmarks/movielens.ipynb +++ b/examples/06_benchmarks/movielens.ipynb @@ -108,8 +108,8 @@ "import os\n", "import sys\n", "import json\n", - "import pandas as pd\n", "import numpy as np\n", + "import pandas as pd\n", "import seaborn as sns\n", "import pyspark\n", "import tensorflow as tf # NOTE: TF needs to be imported before PyTorch, otherwise we get a weird initialization error\n", @@ -126,6 +126,9 @@ "from recommenders.datasets.python_splitters import python_stratified_split\n", "from recommenders.models.fastai.fastai_utils import hide_fastai_progress_bar\n", "\n", + "\n", + "current_path = os.path.join(os.getcwd(), \"examples\", \"06_benchmarks\") # To execute the notebook programmatically from root folder\n", + "sys.path.append(current_path)\n", "from benchmark_utils import * \n", "\n", "print(f\"System version: {sys.version}\")\n", @@ -1175,9 +1178,9 @@ ], "metadata": { "kernelspec": { - "display_name": "reco", + "display_name": "Python 3.7.13 ('reco')", "language": "python", - "name": "conda-env-reco-py" + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -1193,7 +1196,7 @@ }, "vscode": { "interpreter": { - "hash": "5659d8898e613821d32ce6048323094401019f14577fd202084feb69010a20de" + "hash": "2d9774f375d93db4a064c1fe757ad48aac12718e2d70725b436da9188beb8cf3" } } }, From 5a25e2efee1f79742745c566e533251ef2c86316 Mon Sep 17 00:00:00 2001 From: miguelgfierro Date: Tue, 25 Oct 2022 11:08:32 +0000 Subject: [PATCH 03/23] imports --- examples/06_benchmarks/benchmark_utils.py | 50 ++++++++++++++--------- examples/06_benchmarks/movielens.ipynb | 33 ++++++++++----- 2 files changed, 54 insertions(+), 29 deletions(-) diff --git a/examples/06_benchmarks/benchmark_utils.py b/examples/06_benchmarks/benchmark_utils.py index 561eea4d40..81ef43461a 100644 --- a/examples/06_benchmarks/benchmark_utils.py +++ b/examples/06_benchmarks/benchmark_utils.py @@ -5,13 +5,20 @@ import numpy as np import pandas as pd from tempfile import TemporaryDirectory -from pyspark.ml.recommendation import ALS -from pyspark.sql.types import StructType, StructField -from pyspark.sql.types import FloatType, IntegerType, LongType -from fastai.collab import collab_learner, CollabDataBunch import surprise import cornac +try: + from pyspark.ml.recommendation import ALS + from pyspark.sql.types import StructType, StructField + from pyspark.sql.types import FloatType, IntegerType, LongType +except ImportError: + pass # skip this import if we are not in a Spark environment +try: + from fastai.collab import collab_learner, CollabDataBunch +except ImportError: + pass # skip this import if we are not in a GPU environment +from recommenders.utils.timer import Timer from recommenders.utils.constants import ( COL_DICT, DEFAULT_K, @@ -22,27 +29,12 @@ DEFAULT_TIMESTAMP_COL, SEED, ) -from recommenders.utils.timer import Timer -from recommenders.utils.spark_utils import start_or_get_spark from recommenders.models.sar import SAR -from recommenders.models.ncf.ncf_singlenode import NCF -from recommenders.models.ncf.dataset import Dataset as NCFDataset from recommenders.models.surprise.surprise_utils import ( predict, compute_ranking_predictions, ) -from recommenders.models.fastai.fastai_utils import ( - cartesian_product, - score, -) from recommenders.models.cornac.cornac_utils import predict_ranking -from recommenders.models.deeprec.models.graphrec.lightgcn import LightGCN -from recommenders.models.deeprec.DataModel.ImplicitCF import ImplicitCF -from recommenders.models.deeprec.deeprec_utils import prepare_hparams -from recommenders.evaluation.spark_evaluation import ( - SparkRatingEvaluation, - SparkRankingEvaluation, -) from recommenders.evaluation.python_evaluation import ( map_at_k, ndcg_at_k, @@ -51,6 +43,26 @@ ) from recommenders.evaluation.python_evaluation import rmse, mae, rsquared, exp_var +try: + from recommenders.utils.spark_utils import start_or_get_spark + from recommenders.evaluation.spark_evaluation import ( + SparkRatingEvaluation, + SparkRankingEvaluation, + ) +except Exception: + pass # skip this import if we are not in a Spark environment +try: + from recommenders.models.deeprec.deeprec_utils import prepare_hparams + from recommenders.models.fastai.fastai_utils import ( + cartesian_product, + score, + ) + from recommenders.models.deeprec.models.graphrec.lightgcn import LightGCN + from recommenders.models.deeprec.DataModel.ImplicitCF import ImplicitCF + from recommenders.models.ncf.ncf_singlenode import NCF + from recommenders.models.ncf.dataset import Dataset as NCFDataset +except ImportError: + pass # skip this import if we are not in a GPU environment # Helpers tmp_dir = TemporaryDirectory() diff --git a/examples/06_benchmarks/movielens.ipynb b/examples/06_benchmarks/movielens.ipynb index 6c0f184bca..9acb726f2f 100644 --- a/examples/06_benchmarks/movielens.ipynb +++ b/examples/06_benchmarks/movielens.ipynb @@ -111,21 +111,34 @@ "import numpy as np\n", "import pandas as pd\n", "import seaborn as sns\n", - "import pyspark\n", - "import tensorflow as tf # NOTE: TF needs to be imported before PyTorch, otherwise we get a weird initialization error\n", - "tf.get_logger().setLevel('ERROR') # only show error messages\n", - "import torch\n", - "import fastai\n", "import surprise\n", "import cornac\n", "\n", - "from recommenders.utils.spark_utils import start_or_get_spark\n", - "from recommenders.utils.general_utils import get_number_processors\n", - "from recommenders.utils.gpu_utils import get_cuda_version, get_cudnn_version\n", + "try:\n", + " import pyspark\n", + "except ImportError:\n", + " pass # skip this import if we are not in a Spark environment\n", + "\n", + "try:\n", + " import tensorflow as tf # NOTE: TF needs to be imported before PyTorch, otherwise we get an error\n", + " tf.get_logger().setLevel('ERROR') # only show error messages\n", + " import torch\n", + " import fastai\n", + "except ImportError:\n", + " pass # skip this import if we are not in a GPU environment\n", + "\n", "from recommenders.datasets import movielens\n", + "from recommenders.utils.general_utils import get_number_processors\n", "from recommenders.datasets.python_splitters import python_stratified_split\n", - "from recommenders.models.fastai.fastai_utils import hide_fastai_progress_bar\n", - "\n", + "try:\n", + " from recommenders.utils.spark_utils import start_or_get_spark\n", + "except ImportError:\n", + " pass # skip this import if we are not in a Spark environment\n", + "try:\n", + " from recommenders.utils.gpu_utils import get_cuda_version, get_cudnn_version\n", + " from recommenders.models.fastai.fastai_utils import hide_fastai_progress_bar\n", + "except ImportError:\n", + " pass # skip this import if we are not in a GPU environment\n", "\n", "current_path = os.path.join(os.getcwd(), \"examples\", \"06_benchmarks\") # To execute the notebook programmatically from root folder\n", "sys.path.append(current_path)\n", From 71e2d52554c3231950c36f5e50dc9ceaea857863 Mon Sep 17 00:00:00 2001 From: miguelgfierro Date: Tue, 25 Oct 2022 11:12:02 +0000 Subject: [PATCH 04/23] fixing import errors in the CPU environment --- examples/06_benchmarks/benchmark_utils.py | 2 +- examples/06_benchmarks/movielens.ipynb | 20 +++++++++++++------- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/examples/06_benchmarks/benchmark_utils.py b/examples/06_benchmarks/benchmark_utils.py index 81ef43461a..daa27aac21 100644 --- a/examples/06_benchmarks/benchmark_utils.py +++ b/examples/06_benchmarks/benchmark_utils.py @@ -49,7 +49,7 @@ SparkRatingEvaluation, SparkRankingEvaluation, ) -except Exception: +except (ImportError,NameError): pass # skip this import if we are not in a Spark environment try: from recommenders.models.deeprec.deeprec_utils import prepare_hparams diff --git a/examples/06_benchmarks/movielens.ipynb b/examples/06_benchmarks/movielens.ipynb index 9acb726f2f..b8a0bf11e1 100644 --- a/examples/06_benchmarks/movielens.ipynb +++ b/examples/06_benchmarks/movielens.ipynb @@ -145,17 +145,23 @@ "from benchmark_utils import * \n", "\n", "print(f\"System version: {sys.version}\")\n", + "print(f\"Number of cores: {get_number_processors()}\")\n", "print(f\"NumPy version: {np.__version__}\")\n", "print(f\"Pandas version: {pd.__version__}\")\n", - "print(f\"PySpark version: {pyspark.__version__}\")\n", "print(f\"Surprise version: {surprise.__version__}\")\n", - "print(f\"PyTorch version: {torch.__version__}\")\n", - "print(f\"Fast AI version: {fastai.__version__}\")\n", "print(f\"Cornac version: {cornac.__version__}\")\n", - "print(f\"TensorFlow version: {tf.__version__}\")\n", - "print(f\"CUDA version: {get_cuda_version()}\")\n", - "print(f\"CuDNN version: {get_cudnn_version()}\")\n", - "print(f\"Number of cores: {get_number_processors()}\")\n", + "try:\n", + " print(f\"PySpark version: {pyspark.__version__}\")\n", + "except NameError:\n", + " pass # skip this import if we are not in a Spark environment\n", + "try:\n", + " print(f\"PyTorch version: {torch.__version__}\")\n", + " print(f\"Fast AI version: {fastai.__version__}\")\n", + " print(f\"TensorFlow version: {tf.__version__}\")\n", + " print(f\"CUDA version: {get_cuda_version()}\")\n", + " print(f\"CuDNN version: {get_cudnn_version()}\")\n", + "except ImportError:\n", + " pass # skip this import if we are not in a GPU environment\n", "\n", "%load_ext autoreload\n", "%autoreload 2" From d911c3e573e248a12fd8b93906cf00551e63c6b3 Mon Sep 17 00:00:00 2001 From: miguelgfierro Date: Tue, 25 Oct 2022 11:12:36 +0000 Subject: [PATCH 05/23] fixing import errors in the CPU environment --- examples/06_benchmarks/movielens.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/06_benchmarks/movielens.ipynb b/examples/06_benchmarks/movielens.ipynb index b8a0bf11e1..5a37a07e3d 100644 --- a/examples/06_benchmarks/movielens.ipynb +++ b/examples/06_benchmarks/movielens.ipynb @@ -160,7 +160,7 @@ " print(f\"TensorFlow version: {tf.__version__}\")\n", " print(f\"CUDA version: {get_cuda_version()}\")\n", " print(f\"CuDNN version: {get_cudnn_version()}\")\n", - "except ImportError:\n", + "except NameError:\n", " pass # skip this import if we are not in a GPU environment\n", "\n", "%load_ext autoreload\n", From c47f7a7b0b4ffa4aad8c88ddf0c38846d9f58724 Mon Sep 17 00:00:00 2001 From: miguelgfierro Date: Tue, 25 Oct 2022 11:16:14 +0000 Subject: [PATCH 06/23] fixing import errors in the CPU environment --- examples/06_benchmarks/movielens.ipynb | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/examples/06_benchmarks/movielens.ipynb b/examples/06_benchmarks/movielens.ipynb index 5a37a07e3d..0e945eac75 100644 --- a/examples/06_benchmarks/movielens.ipynb +++ b/examples/06_benchmarks/movielens.ipynb @@ -137,6 +137,7 @@ "try:\n", " from recommenders.utils.gpu_utils import get_cuda_version, get_cudnn_version\n", " from recommenders.models.fastai.fastai_utils import hide_fastai_progress_bar\n", + " hide_fastai_progress_bar()\n", "except ImportError:\n", " pass # skip this import if we are not in a GPU environment\n", "\n", @@ -173,18 +174,11 @@ "metadata": {}, "outputs": [], "source": [ - "spark = start_or_get_spark(\"PySpark\", memory=\"32g\")\n", - "spark.conf.set(\"spark.sql.analyzer.failAmbiguousSelfJoin\", \"false\")" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "# Hide fastai progress bar\n", - "hide_fastai_progress_bar()" + "try:\n", + " spark = start_or_get_spark(\"PySpark\", memory=\"32g\")\n", + " spark.conf.set(\"spark.sql.analyzer.failAmbiguousSelfJoin\", \"false\")\n", + "except NameError:\n", + " pass # skip this import if we are not in a Spark environment" ] }, { @@ -195,8 +189,12 @@ "source": [ "# fix random seeds to make sure out runs are reproducible\n", "np.random.seed(SEED)\n", - "torch.manual_seed(SEED)\n", - "torch.cuda.manual_seed_all(SEED)" + "try:\n", + " tf.random.set_seed(SEED)\n", + " torch.manual_seed(SEED)\n", + " torch.cuda.manual_seed_all(SEED)\n", + "except NameError:\n", + " pass # skip this import if we are not in a GPU environment" ] }, { From 55fc64389f8904b606a26a87342a535d3aba6433 Mon Sep 17 00:00:00 2001 From: miguelgfierro Date: Tue, 25 Oct 2022 11:26:05 +0000 Subject: [PATCH 07/23] import errors --- examples/06_benchmarks/movielens.ipynb | 34 ++++++++++++++------------ 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/examples/06_benchmarks/movielens.ipynb b/examples/06_benchmarks/movielens.ipynb index 0e945eac75..e14072dc4b 100644 --- a/examples/06_benchmarks/movielens.ipynb +++ b/examples/06_benchmarks/movielens.ipynb @@ -69,7 +69,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -81,7 +81,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 11, "metadata": {}, "outputs": [ { @@ -90,17 +90,19 @@ "text": [ "System version: 3.7.13 (default, Mar 29 2022, 02:18:16) \n", "[GCC 7.5.0]\n", + "Number of cores: 6\n", "NumPy version: 1.21.6\n", "Pandas version: 1.3.5\n", - "PySpark version: 3.2.2\n", "Surprise version: 1.1.1\n", - "PyTorch version: 1.12.1+cu102\n", - "Fast AI version: 1.0.61\n", "Cornac version: 1.14.2\n", - "TensorFlow version: 2.7.4\n", + "PySpark version: 3.2.2\n", "CUDA version: 10.2\n", "CuDNN version: 7605\n", - "Number of cores: 6\n" + "TensorFlow version: 2.7.4\n", + "PyTorch version: 1.12.1+cu102\n", + "Fast AI version: 1.0.61\n", + "The autoreload extension is already loaded. To reload it, use:\n", + " %reload_ext autoreload\n" ] } ], @@ -127,6 +129,10 @@ "except ImportError:\n", " pass # skip this import if we are not in a GPU environment\n", "\n", + "current_path = os.path.join(os.getcwd(), \"examples\", \"06_benchmarks\") # To execute the notebook programmatically from root folder\n", + "sys.path.append(current_path)\n", + "from benchmark_utils import * \n", + "\n", "from recommenders.datasets import movielens\n", "from recommenders.utils.general_utils import get_number_processors\n", "from recommenders.datasets.python_splitters import python_stratified_split\n", @@ -141,10 +147,6 @@ "except ImportError:\n", " pass # skip this import if we are not in a GPU environment\n", "\n", - "current_path = os.path.join(os.getcwd(), \"examples\", \"06_benchmarks\") # To execute the notebook programmatically from root folder\n", - "sys.path.append(current_path)\n", - "from benchmark_utils import * \n", - "\n", "print(f\"System version: {sys.version}\")\n", "print(f\"Number of cores: {get_number_processors()}\")\n", "print(f\"NumPy version: {np.__version__}\")\n", @@ -156,11 +158,11 @@ "except NameError:\n", " pass # skip this import if we are not in a Spark environment\n", "try:\n", - " print(f\"PyTorch version: {torch.__version__}\")\n", - " print(f\"Fast AI version: {fastai.__version__}\")\n", - " print(f\"TensorFlow version: {tf.__version__}\")\n", " print(f\"CUDA version: {get_cuda_version()}\")\n", " print(f\"CuDNN version: {get_cudnn_version()}\")\n", + " print(f\"TensorFlow version: {tf.__version__}\")\n", + " print(f\"PyTorch version: {torch.__version__}\")\n", + " print(f\"Fast AI version: {fastai.__version__}\")\n", "except NameError:\n", " pass # skip this import if we are not in a GPU environment\n", "\n", @@ -170,7 +172,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -183,7 +185,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ From 5fe1456f2f7bc69846894767115b7a74db402a30 Mon Sep 17 00:00:00 2001 From: miguelgfierro Date: Wed, 26 Oct 2022 09:32:12 +0000 Subject: [PATCH 08/23] spark test --- .../examples/test_notebooks_pyspark.py | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/tests/integration/examples/test_notebooks_pyspark.py b/tests/integration/examples/test_notebooks_pyspark.py index dd5cc538b6..a7eb3ae89e 100644 --- a/tests/integration/examples/test_notebooks_pyspark.py +++ b/tests/integration/examples/test_notebooks_pyspark.py @@ -62,3 +62,30 @@ def test_mmlspark_lightgbm_criteo_integration(notebooks, output_notebook, kernel ] assert results["auc"] == pytest.approx(0.68895, rel=TOL, abs=ABS_TOL) + + +@pytest.mark.notebooks +@pytest.mark.integration +@pytest.mark.parametrize( + "size, algos, expected_values", + [ + ( + ["100k"], + ["als"], + dict( + eval_precision=0.131601, + eval_recall=0.038056, + eval_precision2=0.145599, + eval_recall2=0.051338, + ), + ), + ], +) +def test_benchmark_movielens_spark(notebooks, output_notebook, kernel_name, size, algos, expected_values): + notebook_path = notebooks["benchmark_movielens"] + pm.execute_notebook( + notebook_path, + output_notebook, + kernel_name=kernel_name, + parameters=dict(data_sizes=size, algorithms=algos), + ) \ No newline at end of file From b0068edfbc07b5af8815a19abe4f2cfc08da1d5c Mon Sep 17 00:00:00 2001 From: miguelgfierro Date: Thu, 27 Oct 2022 16:08:47 +0000 Subject: [PATCH 09/23] spark --- tests/integration/examples/test_notebooks_pyspark.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/integration/examples/test_notebooks_pyspark.py b/tests/integration/examples/test_notebooks_pyspark.py index a7eb3ae89e..3c2604368a 100644 --- a/tests/integration/examples/test_notebooks_pyspark.py +++ b/tests/integration/examples/test_notebooks_pyspark.py @@ -64,6 +64,7 @@ def test_mmlspark_lightgbm_criteo_integration(notebooks, output_notebook, kernel assert results["auc"] == pytest.approx(0.68895, rel=TOL, abs=ABS_TOL) +@pytest.mark.spark @pytest.mark.notebooks @pytest.mark.integration @pytest.mark.parametrize( @@ -81,7 +82,7 @@ def test_mmlspark_lightgbm_criteo_integration(notebooks, output_notebook, kernel ), ], ) -def test_benchmark_movielens_spark(notebooks, output_notebook, kernel_name, size, algos, expected_values): +def test_benchmark_movielens_pyspark(notebooks, output_notebook, kernel_name, size, algos, expected_values): notebook_path = notebooks["benchmark_movielens"] pm.execute_notebook( notebook_path, From 5a0c564bd7fc1e02a2eddecb2f37ddc4426640c4 Mon Sep 17 00:00:00 2001 From: miguelgfierro Date: Thu, 27 Oct 2022 16:49:41 +0000 Subject: [PATCH 10/23] gpu test --- .../examples/test_notebooks_gpu.py | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/tests/integration/examples/test_notebooks_gpu.py b/tests/integration/examples/test_notebooks_gpu.py index 78f0bf54f2..888f2499e7 100644 --- a/tests/integration/examples/test_notebooks_gpu.py +++ b/tests/integration/examples/test_notebooks_gpu.py @@ -723,3 +723,31 @@ def test_sasrec_quickstart_integration( for key, value in expected_values.items(): assert results[key] == pytest.approx(value, rel=TOL, abs=ABS_TOL) + + +@pytest.mark.gpu +@pytest.mark.notebooks +@pytest.mark.integration +@pytest.mark.parametrize( + "size, algos, expected_values", + [ + ( + ["100k"], + ["ncf", "fastai", "bivae", "lightgcn"], + dict( + eval_precision=0.131601, + eval_recall=0.038056, + eval_precision2=0.145599, + eval_recall2=0.051338, + ), + ), + ], +) +def test_benchmark_movielens_gpu(notebooks, output_notebook, kernel_name, size, algos, expected_values): + notebook_path = notebooks["benchmark_movielens"] + pm.execute_notebook( + notebook_path, + output_notebook, + kernel_name=kernel_name, + parameters=dict(data_sizes=size, algorithms=algos), + ) \ No newline at end of file From cff71cf87532ff66e06fef71ffe44ee6c3db44b3 Mon Sep 17 00:00:00 2001 From: miguelgfierro Date: Thu, 27 Oct 2022 19:14:26 +0000 Subject: [PATCH 11/23] :memo: --- examples/06_benchmarks/benchmark_utils.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/examples/06_benchmarks/benchmark_utils.py b/examples/06_benchmarks/benchmark_utils.py index daa27aac21..867b4c66ee 100644 --- a/examples/06_benchmarks/benchmark_utils.py +++ b/examples/06_benchmarks/benchmark_utils.py @@ -247,8 +247,6 @@ def recommend_k_fastai(model, test, train, top_k=DEFAULT_K, remove_seen=True): def prepare_training_ncf(df_train, df_test): - #df_train.sort_values(["userID"], axis=0, ascending=[True], inplace=True) - #df_test.sort_values(["userID"], axis=0, ascending=[True], inplace=True) train = df_train.sort_values(["userID"], axis=0, ascending=[True]) test = df_test.sort_values(["userID"], axis=0, ascending=[True]) test = test[df_test["userID"].isin(train["userID"].unique())] From 32e40699fac952459b9fa46e0fc9c2799e18bcc8 Mon Sep 17 00:00:00 2001 From: miguelgfierro Date: Thu, 27 Oct 2022 19:25:29 +0000 Subject: [PATCH 12/23] params --- examples/06_benchmarks/movielens.ipynb | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/examples/06_benchmarks/movielens.ipynb b/examples/06_benchmarks/movielens.ipynb index e14072dc4b..f6ee667157 100644 --- a/examples/06_benchmarks/movielens.ipynb +++ b/examples/06_benchmarks/movielens.ipynb @@ -336,9 +336,11 @@ "}\n", "\n", "lightgcn_param = {\n", - " \"yaml_file\": os.path.join(\"..\",\"..\",\"recommenders\", \"models\", \"deeprec\", \"config\", \"lightgcn.yaml\"),\n", + " \"model_type\": \"lightgcn\",\n", " \"n_layers\": 3,\n", " \"batch_size\": 1024,\n", + " \"embed_size\": 64,\n", + " \"decay\": 0.0001,\n", " \"epochs\": 20,\n", " \"learning_rate\": 0.005,\n", " \"eval_epoch\": 5,\n", From e8737e8f836593ee59914f94bb2c656a786de969 Mon Sep 17 00:00:00 2001 From: miguelgfierro Date: Thu, 27 Oct 2022 19:36:05 +0000 Subject: [PATCH 13/23] :bug: --- examples/06_benchmarks/movielens.ipynb | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/06_benchmarks/movielens.ipynb b/examples/06_benchmarks/movielens.ipynb index f6ee667157..3cc889cf1a 100644 --- a/examples/06_benchmarks/movielens.ipynb +++ b/examples/06_benchmarks/movielens.ipynb @@ -345,6 +345,7 @@ " \"learning_rate\": 0.005,\n", " \"eval_epoch\": 5,\n", " \"top_k\": DEFAULT_K,\n", + " \"metrics\": [\"recall\", \"ndcg\", \"precision\", \"map\"],\n", "}\n", "\n", "params = {\n", From d7ffd19000a4e145349e095e2c201d4f2ff8286f Mon Sep 17 00:00:00 2001 From: miguelgfierro Date: Thu, 27 Oct 2022 19:42:35 +0000 Subject: [PATCH 14/23] :bug: --- examples/06_benchmarks/movielens.ipynb | 2 ++ 1 file changed, 2 insertions(+) diff --git a/examples/06_benchmarks/movielens.ipynb b/examples/06_benchmarks/movielens.ipynb index 3cc889cf1a..e4b668ceee 100644 --- a/examples/06_benchmarks/movielens.ipynb +++ b/examples/06_benchmarks/movielens.ipynb @@ -346,6 +346,8 @@ " \"eval_epoch\": 5,\n", " \"top_k\": DEFAULT_K,\n", " \"metrics\": [\"recall\", \"ndcg\", \"precision\", \"map\"],\n", + " \"save_model\":False,\n", + " \"MODEL_DIR\":\".\",\n", "}\n", "\n", "params = {\n", From 2b88fbb5011ef3b4e0f54e1567f534a060db0791 Mon Sep 17 00:00:00 2001 From: miguelgfierro Date: Thu, 27 Oct 2022 19:55:11 +0000 Subject: [PATCH 15/23] testing --- tests/integration/examples/test_notebooks_gpu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/examples/test_notebooks_gpu.py b/tests/integration/examples/test_notebooks_gpu.py index 888f2499e7..b767561c4d 100644 --- a/tests/integration/examples/test_notebooks_gpu.py +++ b/tests/integration/examples/test_notebooks_gpu.py @@ -733,7 +733,7 @@ def test_sasrec_quickstart_integration( [ ( ["100k"], - ["ncf", "fastai", "bivae", "lightgcn"], + ["lightgcn"], #["ncf", "fastai", "bivae", "lightgcn"], dict( eval_precision=0.131601, eval_recall=0.038056, From 4ffbb16b02f13574c312315829e2aa10cd423821 Mon Sep 17 00:00:00 2001 From: miguelgfierro Date: Fri, 11 Nov 2022 12:55:37 +0000 Subject: [PATCH 16/23] wip #1832 --- recommenders/models/deeprec/models/graphrec/lightgcn.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/recommenders/models/deeprec/models/graphrec/lightgcn.py b/recommenders/models/deeprec/models/graphrec/lightgcn.py index 7f17bf95f3..60042464dd 100644 --- a/recommenders/models/deeprec/models/graphrec/lightgcn.py +++ b/recommenders/models/deeprec/models/graphrec/lightgcn.py @@ -301,25 +301,25 @@ def run_eval(self): if metric == "map": ret.append( map_at_k( - self.data.test, topk_scores, relevancy_method=None, k=self.top_k + self.data.test, topk_scores, k=self.top_k ) ) elif metric == "ndcg": ret.append( ndcg_at_k( - self.data.test, topk_scores, relevancy_method=None, k=self.top_k + self.data.test, topk_scores, k=self.top_k ) ) elif metric == "precision": ret.append( precision_at_k( - self.data.test, topk_scores, relevancy_method=None, k=self.top_k + self.data.test, topk_scores, k=self.top_k ) ) elif metric == "recall": ret.append( recall_at_k( - self.data.test, topk_scores, relevancy_method=None, k=self.top_k + self.data.test, topk_scores, k=self.top_k ) ) return ret From b49013715ca3e6ead54862172fcb2b07412c906c Mon Sep 17 00:00:00 2001 From: miguelgfierro Date: Fri, 11 Nov 2022 12:58:49 +0000 Subject: [PATCH 17/23] test gpu benchmark --- tests/integration/examples/test_notebooks_gpu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/examples/test_notebooks_gpu.py b/tests/integration/examples/test_notebooks_gpu.py index b767561c4d..888f2499e7 100644 --- a/tests/integration/examples/test_notebooks_gpu.py +++ b/tests/integration/examples/test_notebooks_gpu.py @@ -733,7 +733,7 @@ def test_sasrec_quickstart_integration( [ ( ["100k"], - ["lightgcn"], #["ncf", "fastai", "bivae", "lightgcn"], + ["ncf", "fastai", "bivae", "lightgcn"], dict( eval_precision=0.131601, eval_recall=0.038056, From 5d64cf34503e9e8f35da187736496bc888c141fb Mon Sep 17 00:00:00 2001 From: miguelgfierro Date: Fri, 11 Nov 2022 13:09:43 +0000 Subject: [PATCH 18/23] :memo: --- tests/integration/examples/test_notebooks_gpu.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/integration/examples/test_notebooks_gpu.py b/tests/integration/examples/test_notebooks_gpu.py index 888f2499e7..6fbdfd2002 100644 --- a/tests/integration/examples/test_notebooks_gpu.py +++ b/tests/integration/examples/test_notebooks_gpu.py @@ -709,8 +709,6 @@ def test_sasrec_quickstart_integration( "model_name": model_name, "seed": seed, } - - print("Executing notebook ... ") pm.execute_notebook( notebook_path, output_notebook, From 68d15dfc9cc83f2967e13262a40c5dc8a6924b16 Mon Sep 17 00:00:00 2001 From: miguelgfierro Date: Fri, 11 Nov 2022 13:39:08 +0000 Subject: [PATCH 19/23] values --- examples/06_benchmarks/movielens.ipynb | 4 +++- .../examples/test_notebooks_gpu.py | 21 ++++++++++--------- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/examples/06_benchmarks/movielens.ipynb b/examples/06_benchmarks/movielens.ipynb index e4b668ceee..28b7381159 100644 --- a/examples/06_benchmarks/movielens.ipynb +++ b/examples/06_benchmarks/movielens.ipynb @@ -113,6 +113,7 @@ "import numpy as np\n", "import pandas as pd\n", "import seaborn as sns\n", + "import scrapbook as sb\n", "import surprise\n", "import cornac\n", "\n", @@ -805,7 +806,8 @@ " summary = generate_summary(data_size, algo, DEFAULT_K, time_train, time_rating, ratings, time_ranking, rankings)\n", " df_results.loc[df_results.shape[0] + 1] = summary\n", " \n", - "print(\"\\nComputation finished\")\n" + "print(\"\\nComputation finished\")\n", + "sb.glue(\"results\", df_results[\"nDCG@k\"].tolist())" ] }, { diff --git a/tests/integration/examples/test_notebooks_gpu.py b/tests/integration/examples/test_notebooks_gpu.py index 6fbdfd2002..1a539a215c 100644 --- a/tests/integration/examples/test_notebooks_gpu.py +++ b/tests/integration/examples/test_notebooks_gpu.py @@ -14,7 +14,7 @@ from recommenders.utils.gpu_utils import get_number_gpus -TOL = 0.5 +TOL = 0.1 ABS_TOL = 0.05 @@ -727,25 +727,26 @@ def test_sasrec_quickstart_integration( @pytest.mark.notebooks @pytest.mark.integration @pytest.mark.parametrize( - "size, algos, expected_values", + "size, algos, expected_values_ndcg", [ ( ["100k"], ["ncf", "fastai", "bivae", "lightgcn"], - dict( - eval_precision=0.131601, - eval_recall=0.038056, - eval_precision2=0.145599, - eval_recall2=0.051338, - ), + [0.382793, 0.147583, 0.471722, 0.412664] ), ], ) -def test_benchmark_movielens_gpu(notebooks, output_notebook, kernel_name, size, algos, expected_values): +def test_benchmark_movielens_gpu(notebooks, output_notebook, kernel_name, size, algos, expected_values_ndcg): notebook_path = notebooks["benchmark_movielens"] pm.execute_notebook( notebook_path, output_notebook, kernel_name=kernel_name, parameters=dict(data_sizes=size, algorithms=algos), - ) \ No newline at end of file + ) + results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[ + "data" + ] + assert len(results["results"]) == 4 + for i, value in enumerate(results["results"]): + assert results["results"][i] == pytest.approx(value, rel=TOL, abs=ABS_TOL) From 30a5409869c73248973233ed46412bb35229c604 Mon Sep 17 00:00:00 2001 From: miguelgfierro Date: Fri, 11 Nov 2022 13:49:12 +0000 Subject: [PATCH 20/23] values --- .../examples/test_notebooks_python.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/tests/integration/examples/test_notebooks_python.py b/tests/integration/examples/test_notebooks_python.py index fc2eedb0fc..4cd9fd0a55 100644 --- a/tests/integration/examples/test_notebooks_python.py +++ b/tests/integration/examples/test_notebooks_python.py @@ -313,25 +313,26 @@ def test_xlearn_fm_integration(notebooks, output_notebook, kernel_name): @pytest.mark.notebooks @pytest.mark.integration @pytest.mark.parametrize( - "size, algos, expected_values", + "size, algos, expected_values_ndcg", [ ( ["100k"], ["svd", "sar", "bpr"], - dict( - eval_precision=0.131601, - eval_recall=0.038056, - eval_precision2=0.145599, - eval_recall2=0.051338, - ), + [0.094444, 0.393818, 0.444990] ), ], ) -def test_benchmark_movielens_cpu(notebooks, output_notebook, kernel_name, size, algos, expected_values): +def test_benchmark_movielens_cpu(notebooks, output_notebook, kernel_name, size, algos, expected_values_ndcg): notebook_path = notebooks["benchmark_movielens"] pm.execute_notebook( notebook_path, output_notebook, kernel_name=kernel_name, parameters=dict(data_sizes=size, algorithms=algos), - ) \ No newline at end of file + ) + results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[ + "data" + ] + assert len(results["results"]) == 3 + for i, value in enumerate(results["results"]): + assert results["results"][i] == pytest.approx(value, rel=TOL, abs=ABS_TOL) \ No newline at end of file From f8455d4e06cd7deca02f5dccfe4b24e163598046 Mon Sep 17 00:00:00 2001 From: miguelgfierro Date: Fri, 11 Nov 2022 14:05:17 +0000 Subject: [PATCH 21/23] pyspark --- .../examples/test_notebooks_pyspark.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/tests/integration/examples/test_notebooks_pyspark.py b/tests/integration/examples/test_notebooks_pyspark.py index 3c2604368a..0cfbc22ed9 100644 --- a/tests/integration/examples/test_notebooks_pyspark.py +++ b/tests/integration/examples/test_notebooks_pyspark.py @@ -68,25 +68,26 @@ def test_mmlspark_lightgbm_criteo_integration(notebooks, output_notebook, kernel @pytest.mark.notebooks @pytest.mark.integration @pytest.mark.parametrize( - "size, algos, expected_values", + "size, algos, expected_values_ndcg", [ ( ["100k"], ["als"], - dict( - eval_precision=0.131601, - eval_recall=0.038056, - eval_precision2=0.145599, - eval_recall2=0.051338, - ), + [0] ), ], ) -def test_benchmark_movielens_pyspark(notebooks, output_notebook, kernel_name, size, algos, expected_values): +def test_benchmark_movielens_pyspark(notebooks, output_notebook, kernel_name, size, algos, expected_values_ndcg): notebook_path = notebooks["benchmark_movielens"] pm.execute_notebook( notebook_path, output_notebook, kernel_name=kernel_name, parameters=dict(data_sizes=size, algorithms=algos), - ) \ No newline at end of file + ) + results = sb.read_notebook(output_notebook).scraps.dataframe.set_index("name")[ + "data" + ] + assert len(results["results"]) == 1 + for i, value in enumerate(results["results"]): + assert results["results"][i] == pytest.approx(value, rel=TOL, abs=ABS_TOL) \ No newline at end of file From 6a996039191aa78f3aed1239c616a023c4d87a2a Mon Sep 17 00:00:00 2001 From: miguelgfierro Date: Fri, 11 Nov 2022 14:06:33 +0000 Subject: [PATCH 22/23] fix #1832 --- tests/integration/examples/test_notebooks_pyspark.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/examples/test_notebooks_pyspark.py b/tests/integration/examples/test_notebooks_pyspark.py index 0cfbc22ed9..9b1a7b9558 100644 --- a/tests/integration/examples/test_notebooks_pyspark.py +++ b/tests/integration/examples/test_notebooks_pyspark.py @@ -73,7 +73,7 @@ def test_mmlspark_lightgbm_criteo_integration(notebooks, output_notebook, kernel ( ["100k"], ["als"], - [0] + [0.035812] ), ], ) From e24d0e4bf81b298040e15e7ed1ed5d03c19d74e6 Mon Sep 17 00:00:00 2001 From: miguelgfierro Date: Fri, 11 Nov 2022 14:20:49 +0000 Subject: [PATCH 23/23] test groups --- tests/ci/azureml_tests/test_groups.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tests/ci/azureml_tests/test_groups.py b/tests/ci/azureml_tests/test_groups.py index 6b7c5b5e42..6817500467 100644 --- a/tests/ci/azureml_tests/test_groups.py +++ b/tests/ci/azureml_tests/test_groups.py @@ -44,7 +44,7 @@ "tests/smoke/examples/test_notebooks_python.py::test_cornac_bpr_smoke", # 16.62s "tests/integration/examples/test_notebooks_python.py::test_cornac_bpr_integration", # 165.72s ], - "group_cpu_002": [ # Total group time: 1742.32s (didn't add xlearn) + "group_cpu_002": [ # Total group time: 1800.32s (didn't add xlearn) # "tests/smoke/examples/test_notebooks_python.py::test_baseline_deep_dive_smoke", # 15.98s "tests/integration/examples/test_notebooks_python.py::test_baseline_deep_dive_integration", # 170.73s @@ -54,6 +54,7 @@ # "tests/integration/examples/test_notebooks_python.py::test_geoimc_integration", # 1006.19s # + "tests/integration/examples/test_notebooks_python.py::test_benchmark_movielens_cpu", #58s # FIXME: Add experimental tests in a later iteration # "tests/integration/examples/test_notebooks_python.py::test_xlearn_fm_integration", # 255.73s ], @@ -119,15 +120,17 @@ "tests/smoke/examples/test_notebooks_gpu.py::test_npa_smoke", # 366.22s "tests/integration/examples/test_notebooks_gpu.py::test_npa_quickstart_integration", # 810.92s ], - "group_gpu_007": [ # Total group time: 620.89s + "group_gpu_007": [ # Total group time: 846.89s "tests/unit/examples/test_notebooks_gpu.py::test_gpu_vm", # 0.76s (Always the first test to check the GPU works) "tests/smoke/examples/test_notebooks_gpu.py::test_naml_smoke", # 620.13s + # + "tests/integration/examples/test_notebooks_gpu.py::test_benchmark_movielens_gpu", # 226s # FIXME: Reduce test time https://github.com/microsoft/recommenders/issues/1731 # "tests/integration/examples/test_notebooks_gpu.py::test_naml_quickstart_integration", # 2033.85s # FIXME: https://github.com/microsoft/recommenders/issues/1716 # "tests/integration/examples/test_notebooks_gpu.py::test_sasrec_quickstart_integration", # 448.06s + 614.69s ], - "group_spark_001": [ # Total group time: 845.16s + "group_spark_001": [ # Total group time: 987.16s "tests/smoke/recommenders/dataset/test_movielens.py::test_load_spark_df", # 4.33s "tests/integration/recommenders/datasets/test_movielens.py::test_load_spark_df", # 25.58s + 101.99s + 139.23s # @@ -137,6 +140,7 @@ # "tests/smoke/examples/test_notebooks_pyspark.py::test_als_pyspark_smoke", # 49.53s "tests/integration/examples/test_notebooks_pyspark.py::test_als_pyspark_integration", # 110.58s + "tests/integration/examples/test_notebooks_pyspark.py::test_benchmark_movielens_pyspark", #142 ], }