From 1eb2a93e4a0dbd6a5fe316ae261a235b48834875 Mon Sep 17 00:00:00 2001 From: miguelgfierro Date: Thu, 15 Jul 2021 16:25:05 +0000 Subject: [PATCH 01/17] rename common to utils --- docs/source/common.rst | 20 +- docs/source/index.rst | 2 +- examples/00_quick_start/als_movielens.ipynb | 6 +- .../00_quick_start/fastai_movielens.ipynb | 2 +- examples/00_quick_start/ncf_movielens.ipynb | 4 +- examples/00_quick_start/sar_movielens.ipynb | 6 +- .../sar_movielens_with_azureml.ipynb | 2 +- .../sequential_recsys_amazondataset.ipynb | 4 +- .../00_quick_start/wide_deep_movielens.ipynb | 4 +- examples/00_quick_start/xdeepfm_criteo.ipynb | 2 +- examples/01_prepare_data/data_split.ipynb | 2 +- .../baseline_deep_dive.ipynb | 2 +- .../cornac_bivae_deep_dive.ipynb | 4 +- .../cornac_bpr_deep_dive.ipynb | 8 +- .../lightgcn_deep_dive.ipynb | 6 +- .../multi_vae_deep_dive.ipynb | 4 +- .../standard_vae_deep_dive.ipynb | 6 +- .../surprise_svd_deep_dive.ipynb | 2 +- .../mmlspark_lightgbm_criteo.ipynb | 4 +- .../vowpal_wabbit_deep_dive.ipynb | 2 +- examples/02_model_hybrid/fm_deep_dive.ipynb | 4 +- examples/02_model_hybrid/ncf_deep_dive.ipynb | 6 +- .../als_movielens_diversity_metrics.ipynb | 6 +- examples/03_evaluate/evaluation.ipynb | 2 +- .../azureml_hyperdrive_wide_and_deep.ipynb | 6 +- .../nni_ncf.ipynb | 2 +- .../nni_surprise_svd.ipynb | 2 +- .../train_scripts/wide_deep_training.py | 2 +- .../tuning_spark_als.ipynb | 4 +- .../05_operationalize/als_movie_o16n.ipynb | 10 +- .../lightgbm_criteo_o16n.ipynb | 2 +- examples/06_benchmarks/benchmark_utils.py | 35 +--- examples/06_benchmarks/movielens.ipynb | 4 +- .../KDD2020-tutorial/step5_run_lightgcn.ipynb | 4 +- reco_utils/README.md | 2 +- reco_utils/dataset/amazon_reviews.py | 2 +- reco_utils/dataset/criteo.py | 22 +-- reco_utils/dataset/movielens.py | 18 +- reco_utils/dataset/pandas_df_utils.py | 2 +- reco_utils/dataset/python_splitters.py | 6 +- reco_utils/dataset/spark_splitters.py | 73 ++++---- reco_utils/dataset/sparse.py | 6 +- reco_utils/dataset/split_utils.py | 30 +-- reco_utils/evaluation/python_evaluation.py | 34 ++-- .../evaluation/spark_diversity_evaluation.py | 14 +- reco_utils/evaluation/spark_evaluation.py | 16 +- reco_utils/recommender/cornac/cornac_utils.py | 2 +- .../deeprec/DataModel/ImplicitCF.py | 4 +- .../deeprec/models/graphrec/lightgcn.py | 4 +- reco_utils/recommender/fastai/fastai_utils.py | 6 +- reco_utils/recommender/geoimc/geoimc_data.py | 172 +++++++++--------- .../recommender/geoimc/geoimc_predict.py | 38 ++-- reco_utils/recommender/ncf/dataset.py | 32 ++-- reco_utils/recommender/rlrmc/RLRMCdataset.py | 2 +- reco_utils/recommender/sar/sar_singlenode.py | 8 +- .../recommender/surprise/surprise_utils.py | 14 +- reco_utils/recommender/vowpal_wabbit/vw.py | 18 +- .../recommender/wide_deep/wide_deep_utils.py | 14 +- reco_utils/tuning/nni/ncf_training.py | 2 +- tests/conftest.py | 4 +- .../examples/test_notebooks_gpu.py | 2 +- tests/smoke/examples/test_notebooks_gpu.py | 3 +- tests/unit/examples/test_notebooks_gpu.py | 3 +- .../reco_utils/common/test_general_utils.py | 2 +- .../unit/reco_utils/common/test_gpu_utils.py | 2 +- .../unit/reco_utils/common/test_k8s_utils.py | 2 +- .../common/test_notebook_utils.ipynb | 4 +- .../reco_utils/common/test_notebook_utils.py | 3 +- tests/unit/reco_utils/common/test_plot.py | 2 +- .../reco_utils/common/test_python_utils.py | 2 +- tests/unit/reco_utils/common/test_tf_utils.py | 4 +- tests/unit/reco_utils/common/test_timer.py | 2 +- .../dataset/test_python_splitter.py | 2 +- .../reco_utils/dataset/test_spark_splitter.py | 2 +- tests/unit/reco_utils/dataset/test_sparse.py | 2 +- .../evaluation/test_python_evaluation.py | 3 +- .../recommender/test_cornac_utils.py | 2 +- .../reco_utils/recommender/test_geoimc.py | 2 +- .../recommender/test_ncf_dataset.py | 2 +- .../recommender/test_ncf_singlenode.py | 2 +- .../recommender/test_sar_singlenode.py | 2 +- .../recommender/test_surprise_utils.py | 2 +- .../recommender/test_wide_deep_utils.py | 4 +- 83 files changed, 374 insertions(+), 408 deletions(-) diff --git a/docs/source/common.rst b/docs/source/common.rst index b167c21d1e..14beecc374 100644 --- a/docs/source/common.rst +++ b/docs/source/common.rst @@ -7,64 +7,64 @@ Common utilities module General utilities =============================== -.. automodule:: reco_utils.common.general_utils +.. automodule:: reco_utils.utils.general_utils :members: GPU utilities =============================== -.. automodule:: reco_utils.common.gpu_utils +.. automodule:: reco_utils.utils.gpu_utils :members: Kubernetes utilities =============================== -.. automodule:: reco_utils.common.k8s_utils +.. automodule:: reco_utils.utils.k8s_utils :members: Notebook utilities =============================== -.. automodule:: reco_utils.common.notebook_utils +.. automodule:: reco_utils.utils.notebook_utils :members: -.. automodule:: reco_utils.common.notebook_memory_management +.. automodule:: reco_utils.utils.notebook_memory_management :members: Python utilities =============================== -.. automodule:: reco_utils.common.python_utils +.. automodule:: reco_utils.utils.python_utils :members: Spark utilities =============================== -.. automodule:: reco_utils.common.spark_utils +.. automodule:: reco_utils.utils.spark_utils :members: Tensorflow utilities =============================== -.. automodule:: reco_utils.common.tf_utils +.. automodule:: reco_utils.utils.tf_utils :members: Timer =============================== -.. automodule:: reco_utils.common.timer +.. automodule:: reco_utils.utils.timer :members: Plot utilities =============================== -.. automodule:: reco_utils.common.plot +.. automodule:: reco_utils.utils.plot :members: \ No newline at end of file diff --git a/docs/source/index.rst b/docs/source/index.rst index 0f3f002a01..62850666f9 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -11,7 +11,7 @@ evaluating recommender systems. :maxdepth: 1 :caption: Contents: - Common + Utils Dataset Evaluation Recommender algorithms diff --git a/examples/00_quick_start/als_movielens.ipynb b/examples/00_quick_start/als_movielens.ipynb index 6a886ebd7b..84a26276ea 100644 --- a/examples/00_quick_start/als_movielens.ipynb +++ b/examples/00_quick_start/als_movielens.ipynb @@ -52,12 +52,12 @@ "from pyspark.sql.types import StructType, StructField\n", "from pyspark.sql.types import StringType, FloatType, IntegerType, LongType\n", "\n", - "from reco_utils.common.timer import Timer\n", + "from reco_utils.utils.timer import Timer\n", "from reco_utils.dataset import movielens\n", - "from reco_utils.common.notebook_utils import is_jupyter\n", + "from reco_utils.utils.notebook_utils import is_jupyter\n", "from reco_utils.dataset.spark_splitters import spark_random_split\n", "from reco_utils.evaluation.spark_evaluation import SparkRatingEvaluation, SparkRankingEvaluation\n", - "from reco_utils.common.spark_utils import start_or_get_spark\n", + "from reco_utils.utils.spark_utils import start_or_get_spark\n", "\n", "print(\"System version: {}\".format(sys.version))\n", "print(\"Spark version: {}\".format(pyspark.__version__))\n" diff --git a/examples/00_quick_start/fastai_movielens.ipynb b/examples/00_quick_start/fastai_movielens.ipynb index ae17eaa109..d79bfb5f13 100644 --- a/examples/00_quick_start/fastai_movielens.ipynb +++ b/examples/00_quick_start/fastai_movielens.ipynb @@ -49,7 +49,7 @@ "import torch, fastai\n", "from fastai.collab import EmbeddingDotBias, collab_learner, CollabDataBunch, load_learner\n", "\n", - "from reco_utils.common.timer import Timer\n", + "from reco_utils.utils.timer import Timer\n", "from reco_utils.dataset import movielens\n", "from reco_utils.dataset.python_splitters import python_stratified_split\n", "from reco_utils.recommender.fastai.fastai_utils import cartesian_product, score\n", diff --git a/examples/00_quick_start/ncf_movielens.ipynb b/examples/00_quick_start/ncf_movielens.ipynb index c94e314a7d..716486a594 100644 --- a/examples/00_quick_start/ncf_movielens.ipynb +++ b/examples/00_quick_start/ncf_movielens.ipynb @@ -52,11 +52,11 @@ "import tensorflow as tf\n", "tf.get_logger().setLevel('ERROR') # only show error messages\n", "\n", - "from reco_utils.common.timer import Timer\n", + "from reco_utils.utils.timer import Timer\n", "from reco_utils.recommender.ncf.ncf_singlenode import NCF\n", "from reco_utils.recommender.ncf.dataset import Dataset as NCFDataset\n", "from reco_utils.dataset import movielens\n", - "from reco_utils.common.notebook_utils import is_jupyter\n", + "from reco_utils.utils.notebook_utils import is_jupyter\n", "from reco_utils.dataset.python_splitters import python_chrono_split\n", "from reco_utils.evaluation.python_evaluation import (rmse, mae, rsquared, exp_var, map_at_k, ndcg_at_k, precision_at_k, \n", " recall_at_k, get_top_k_items)\n", diff --git a/examples/00_quick_start/sar_movielens.ipynb b/examples/00_quick_start/sar_movielens.ipynb index 19d89ad28c..93f30e9b4d 100644 --- a/examples/00_quick_start/sar_movielens.ipynb +++ b/examples/00_quick_start/sar_movielens.ipynb @@ -64,8 +64,8 @@ "import scrapbook as sb\n", "from sklearn.preprocessing import minmax_scale\n", "\n", - "from reco_utils.common.python_utils import binarize\n", - "from reco_utils.common.timer import Timer\n", + "from reco_utils.utils.python_utils import binarize\n", + "from reco_utils.utils.timer import Timer\n", "from reco_utils.dataset import movielens\n", "from reco_utils.dataset.python_splitters import python_stratified_split\n", "from reco_utils.evaluation.python_evaluation import (\n", @@ -497,7 +497,7 @@ "source": [ "### 2.3. Evaluate how well SAR performs\n", "\n", - "We evaluate how well SAR performs for a few common ranking metrics provided in the `python_evaluation` module in reco_utils. We will consider the Mean Average Precision (MAP), Normalized Discounted Cumalative Gain (NDCG), Precision, and Recall for the top-k items per user we computed with SAR. User, item and rating column names are specified in each evaluation method." + "We evaluate how well SAR performs for a few utils ranking metrics provided in the `python_evaluation` module in reco_utils. We will consider the Mean Average Precision (MAP), Normalized Discounted Cumalative Gain (NDCG), Precision, and Recall for the top-k items per user we computed with SAR. User, item and rating column names are specified in each evaluation method." ] }, { diff --git a/examples/00_quick_start/sar_movielens_with_azureml.ipynb b/examples/00_quick_start/sar_movielens_with_azureml.ipynb index 5a2bfaf334..46ba3a05ed 100644 --- a/examples/00_quick_start/sar_movielens_with_azureml.ipynb +++ b/examples/00_quick_start/sar_movielens_with_azureml.ipynb @@ -337,7 +337,7 @@ "from azureml.core import Run\n", "from sklearn.externals import joblib\n", "\n", - "from reco_utils.common.timer import Timer\n", + "from reco_utils.utils.timer import Timer\n", "from reco_utils.dataset import movielens\n", "from reco_utils.dataset.python_splitters import python_stratified_split\n", "from reco_utils.evaluation.python_evaluation import map_at_k, ndcg_at_k, precision_at_k, recall_at_k\n", diff --git a/examples/00_quick_start/sequential_recsys_amazondataset.ipynb b/examples/00_quick_start/sequential_recsys_amazondataset.ipynb index 06fa645bd0..5ff2c15039 100644 --- a/examples/00_quick_start/sequential_recsys_amazondataset.ipynb +++ b/examples/00_quick_start/sequential_recsys_amazondataset.ipynb @@ -63,8 +63,8 @@ "import tensorflow as tf\n", "tf.get_logger().setLevel('ERROR') # only show error messages\n", "\n", - "from reco_utils.common.timer import Timer\n", - "from reco_utils.common.constants import SEED\n", + "from reco_utils.utils.timer import Timer\n", + "from reco_utils.utils.constants import SEED\n", "from reco_utils.recommender.deeprec.deeprec_utils import (\n", " prepare_hparams\n", ")\n", diff --git a/examples/00_quick_start/wide_deep_movielens.ipynb b/examples/00_quick_start/wide_deep_movielens.ipynb index ee965f13f3..1fa6ae280a 100644 --- a/examples/00_quick_start/wide_deep_movielens.ipynb +++ b/examples/00_quick_start/wide_deep_movielens.ipynb @@ -65,14 +65,14 @@ "import tensorflow as tf\n", "tf.get_logger().setLevel('ERROR') # only show error messages\n", "\n", - "from reco_utils.common.constants import (\n", + "from reco_utils.utils.constants import (\n", " DEFAULT_USER_COL as USER_COL,\n", " DEFAULT_ITEM_COL as ITEM_COL,\n", " DEFAULT_RATING_COL as RATING_COL,\n", " DEFAULT_PREDICTION_COL as PREDICT_COL,\n", " SEED\n", ")\n", - "from reco_utils.common import tf_utils, gpu_utils, plot\n", + "from reco_utils.utils import tf_utils, gpu_utils, plot\n", "from reco_utils.dataset import movielens\n", "from reco_utils.dataset.pandas_df_utils import user_item_pairs\n", "from reco_utils.dataset.python_splitters import python_random_split\n", diff --git a/examples/00_quick_start/xdeepfm_criteo.ipynb b/examples/00_quick_start/xdeepfm_criteo.ipynb index 8091941368..ae7d7f421a 100644 --- a/examples/00_quick_start/xdeepfm_criteo.ipynb +++ b/examples/00_quick_start/xdeepfm_criteo.ipynb @@ -53,7 +53,7 @@ "import tensorflow as tf\n", "tf.get_logger().setLevel('ERROR') # only show error messages\n", "\n", - "from reco_utils.common.constants import SEED\n", + "from reco_utils.utils.constants import SEED\n", "from reco_utils.recommender.deeprec.deeprec_utils import (\n", " download_deeprec_resources, prepare_hparams\n", ")\n", diff --git a/examples/01_prepare_data/data_split.ipynb b/examples/01_prepare_data/data_split.ipynb index 55922026bc..6119834d42 100644 --- a/examples/01_prepare_data/data_split.ipynb +++ b/examples/01_prepare_data/data_split.ipynb @@ -56,7 +56,7 @@ "import numpy as np\n", "from datetime import datetime, timedelta\n", "\n", - "from reco_utils.common.spark_utils import start_or_get_spark\n", + "from reco_utils.utils.spark_utils import start_or_get_spark\n", "from reco_utils.dataset.download_utils import maybe_download\n", "from reco_utils.dataset.python_splitters import (\n", " python_random_split, \n", diff --git a/examples/02_model_collaborative_filtering/baseline_deep_dive.ipynb b/examples/02_model_collaborative_filtering/baseline_deep_dive.ipynb index 72f3d9816a..d00ca8182c 100644 --- a/examples/02_model_collaborative_filtering/baseline_deep_dive.ipynb +++ b/examples/02_model_collaborative_filtering/baseline_deep_dive.ipynb @@ -70,7 +70,7 @@ "import itertools\n", "import pandas as pd\n", "\n", - "from reco_utils.common.notebook_utils import is_jupyter\n", + "from reco_utils.utils.notebook_utils import is_jupyter\n", "from reco_utils.dataset import movielens\n", "from reco_utils.dataset.python_splitters import python_random_split\n", "from reco_utils.dataset.pandas_df_utils import filter_by\n", diff --git a/examples/02_model_collaborative_filtering/cornac_bivae_deep_dive.ipynb b/examples/02_model_collaborative_filtering/cornac_bivae_deep_dive.ipynb index 0e35bf35c8..df1b840a1b 100644 --- a/examples/02_model_collaborative_filtering/cornac_bivae_deep_dive.ipynb +++ b/examples/02_model_collaborative_filtering/cornac_bivae_deep_dive.ipynb @@ -55,8 +55,8 @@ "from reco_utils.dataset.python_splitters import python_random_split\n", "from reco_utils.evaluation.python_evaluation import map_at_k, ndcg_at_k, precision_at_k, recall_at_k\n", "from reco_utils.recommender.cornac.cornac_utils import predict_ranking\n", - "from reco_utils.common.timer import Timer\n", - "from reco_utils.common.constants import SEED\n", + "from reco_utils.utils.timer import Timer\n", + "from reco_utils.utils.constants import SEED\n", "\n", "print(\"System version: {}\".format(sys.version))\n", "print(\"PyTorch version: {}\".format(torch.__version__))\n", diff --git a/examples/02_model_collaborative_filtering/cornac_bpr_deep_dive.ipynb b/examples/02_model_collaborative_filtering/cornac_bpr_deep_dive.ipynb index 2b79e67a52..29ab3f9fd1 100644 --- a/examples/02_model_collaborative_filtering/cornac_bpr_deep_dive.ipynb +++ b/examples/02_model_collaborative_filtering/cornac_bpr_deep_dive.ipynb @@ -53,8 +53,8 @@ "from reco_utils.dataset.python_splitters import python_random_split\n", "from reco_utils.evaluation.python_evaluation import map_at_k, ndcg_at_k, precision_at_k, recall_at_k\n", "from reco_utils.recommender.cornac.cornac_utils import predict_ranking\n", - "from reco_utils.common.timer import Timer\n", - "from reco_utils.common.constants import SEED\n", + "from reco_utils.utils.timer import Timer\n", + "from reco_utils.utils.constants import SEED\n", "\n", "print(\"System version: {}\".format(sys.version))\n", "print(\"Cornac version: {}\".format(cornac.__version__))" @@ -89,7 +89,7 @@ "\n", "### 1.1 Personalized Ranking from Implicit Feedback\n", "\n", - "The task of personalized ranking aims at providing each user a ranked list of items (recommendations). This is very common in scenarios where recommender systems are based on implicit user behavior (e.g. purchases, clicks). The available observations are only positive feedback where the non-observed ones are a mixture of real negative feedback and missing values.\n", + "The task of personalized ranking aims at providing each user a ranked list of items (recommendations). This is very utils in scenarios where recommender systems are based on implicit user behavior (e.g. purchases, clicks). The available observations are only positive feedback where the non-observed ones are a mixture of real negative feedback and missing values.\n", "\n", "One usual approach for item recommendation is directly predicting a preference score $\\hat{x}_{u,i}$ given to item $i$ by user $u$. BPR uses a different approach by using item pairs $(i, j)$ and optimizing for the correct ranking given preference of user $u$, thus, there are notions of *positive* and *negative* items. The training data $D_S : U \\times I \\times I$ is defined as:\n", "\n", @@ -118,7 +118,7 @@ "\n", "The preference scoring function $\\hat{x}_{uij}(\\Theta)$ could be an arbitrary real-valued function of the model parameter $\\Theta$. Thus, it makes BPR a general framework for modeling the relationship between triplets $(u, i, j)$ where different model classes like matrix factorization could be used for estimating $\\hat{x}_{uij}(\\Theta)$.\n", "\n", - "For the prior, one of the common pratices is to choose $p(\\Theta)$ following a normal distribution, which results in a nice form of L2 regularization in the final log-form of the objective function.\n", + "For the prior, one of the utils pratices is to choose $p(\\Theta)$ following a normal distribution, which results in a nice form of L2 regularization in the final log-form of the objective function.\n", "\n", "$$ p(\\Theta) \\sim N(0, \\Sigma_{\\Theta}) $$\n", "\n", diff --git a/examples/02_model_collaborative_filtering/lightgcn_deep_dive.ipynb b/examples/02_model_collaborative_filtering/lightgcn_deep_dive.ipynb index ba3fe2754b..9671ca4fdf 100644 --- a/examples/02_model_collaborative_filtering/lightgcn_deep_dive.ipynb +++ b/examples/02_model_collaborative_filtering/lightgcn_deep_dive.ipynb @@ -52,13 +52,13 @@ "import tensorflow as tf\n", "tf.get_logger().setLevel('ERROR') # only show error messages\n", "\n", - "from reco_utils.common.timer import Timer\n", + "from reco_utils.utils.timer import Timer\n", "from reco_utils.recommender.deeprec.models.graphrec.lightgcn import LightGCN\n", "from reco_utils.recommender.deeprec.DataModel.ImplicitCF import ImplicitCF\n", "from reco_utils.dataset import movielens\n", "from reco_utils.dataset.python_splitters import python_stratified_split\n", "from reco_utils.evaluation.python_evaluation import map_at_k, ndcg_at_k, precision_at_k, recall_at_k\n", - "from reco_utils.common.constants import SEED as DEFAULT_SEED\n", + "from reco_utils.utils.constants import SEED as DEFAULT_SEED\n", "from reco_utils.recommender.deeprec.deeprec_utils import prepare_hparams\n", "\n", "print(\"System version: {}\".format(sys.version))\n", @@ -728,7 +728,7 @@ "\n", "Here there are the performances of LightGCN compared to [SAR](../00_quick_start/sar_movielens.ipynb) and [NCF](../00_quick_start/ncf_movielens.ipynb) on MovieLens dataset of 100k and 1m. The method of data loading and splitting is the same as that described above and the GPU used was a GeForce GTX 1080Ti.\n", "\n", - "Settings common to the three models: `epochs=15, seed=42`.\n", + "Settings utils to the three models: `epochs=15, seed=42`.\n", "\n", "Settings for LightGCN: `embed_size=64, n_layers=3, batch_size=1024, decay=0.0001, learning_rate=0.015 `.\n", "\n", diff --git a/examples/02_model_collaborative_filtering/multi_vae_deep_dive.ipynb b/examples/02_model_collaborative_filtering/multi_vae_deep_dive.ipynb index 84b2c886fa..b2b94f6b5a 100644 --- a/examples/02_model_collaborative_filtering/multi_vae_deep_dive.ipynb +++ b/examples/02_model_collaborative_filtering/multi_vae_deep_dive.ipynb @@ -96,14 +96,14 @@ "import tensorflow as tf\n", "import keras\n", "\n", - "from reco_utils.common.timer import Timer\n", + "from reco_utils.utils.timer import Timer\n", "from reco_utils.dataset import movielens\n", "from reco_utils.dataset.split_utils import min_rating_filter_pandas\n", "from reco_utils.dataset.python_splitters import numpy_stratified_split\n", "from reco_utils.evaluation.python_evaluation import map_at_k, ndcg_at_k, precision_at_k, recall_at_k\n", "\n", "from reco_utils.dataset.sparse import AffinityMatrix\n", - "from reco_utils.common.python_utils import binarize\n", + "from reco_utils.utils.python_utils import binarize\n", "from reco_utils.recommender.vae.multinomial_vae import Mult_VAE\n", "\n", "from tempfile import TemporaryDirectory\n", diff --git a/examples/02_model_collaborative_filtering/standard_vae_deep_dive.ipynb b/examples/02_model_collaborative_filtering/standard_vae_deep_dive.ipynb index 328b77a4e3..e3423c070e 100644 --- a/examples/02_model_collaborative_filtering/standard_vae_deep_dive.ipynb +++ b/examples/02_model_collaborative_filtering/standard_vae_deep_dive.ipynb @@ -96,15 +96,15 @@ "import tensorflow as tf\n", "import keras\n", "\n", - "from reco_utils.common.timer import Timer\n", + "from reco_utils.utils.timer import Timer\n", "from reco_utils.dataset import movielens\n", "from reco_utils.dataset.split_utils import min_rating_filter_pandas\n", "from reco_utils.dataset.python_splitters import numpy_stratified_split\n", "from reco_utils.evaluation.python_evaluation import map_at_k, ndcg_at_k, precision_at_k, recall_at_k\n", - "from reco_utils.common.constants import SEED as DEFAULT_SEED\n", + "from reco_utils.utils.constants import SEED as DEFAULT_SEED\n", "\n", "from reco_utils.dataset.sparse import AffinityMatrix\n", - "from reco_utils.common.python_utils import binarize\n", + "from reco_utils.utils.python_utils import binarize\n", "from reco_utils.recommender.vae.standard_vae import StandardVAE\n", "\n", "print(\"System version: {}\".format(sys.version))\n", diff --git a/examples/02_model_collaborative_filtering/surprise_svd_deep_dive.ipynb b/examples/02_model_collaborative_filtering/surprise_svd_deep_dive.ipynb index f048bd5f9a..5e804a9ecc 100644 --- a/examples/02_model_collaborative_filtering/surprise_svd_deep_dive.ipynb +++ b/examples/02_model_collaborative_filtering/surprise_svd_deep_dive.ipynb @@ -105,7 +105,7 @@ "import scrapbook as sb\n", "import pandas as pd\n", "\n", - "from reco_utils.common.timer import Timer\n", + "from reco_utils.utils.timer import Timer\n", "from reco_utils.dataset import movielens\n", "from reco_utils.dataset.python_splitters import python_random_split\n", "from reco_utils.evaluation.python_evaluation import (rmse, mae, rsquared, exp_var, map_at_k, ndcg_at_k, precision_at_k, \n", diff --git a/examples/02_model_content_based_filtering/mmlspark_lightgbm_criteo.ipynb b/examples/02_model_content_based_filtering/mmlspark_lightgbm_criteo.ipynb index 05d56122a2..b10283149c 100644 --- a/examples/02_model_content_based_filtering/mmlspark_lightgbm_criteo.ipynb +++ b/examples/02_model_content_based_filtering/mmlspark_lightgbm_criteo.ipynb @@ -71,8 +71,8 @@ "import papermill as pm\n", "import scrapbook as sb\n", "\n", - "from reco_utils.common.spark_utils import start_or_get_spark\n", - "from reco_utils.common.notebook_utils import is_databricks\n", + "from reco_utils.utils.spark_utils import start_or_get_spark\n", + "from reco_utils.utils.notebook_utils import is_databricks\n", "from reco_utils.dataset.criteo import load_spark_df\n", "from reco_utils.dataset.spark_splitters import spark_random_split\n", "\n", diff --git a/examples/02_model_content_based_filtering/vowpal_wabbit_deep_dive.ipynb b/examples/02_model_content_based_filtering/vowpal_wabbit_deep_dive.ipynb index 5e48dc1a86..52ea265657 100644 --- a/examples/02_model_content_based_filtering/vowpal_wabbit_deep_dive.ipynb +++ b/examples/02_model_content_based_filtering/vowpal_wabbit_deep_dive.ipynb @@ -86,7 +86,7 @@ "import papermill as pm\n", "import scrapbook as sb\n", "\n", - "from reco_utils.common.notebook_utils import is_jupyter\n", + "from reco_utils.utils.notebook_utils import is_jupyter\n", "from reco_utils.dataset.movielens import load_pandas_df\n", "from reco_utils.dataset.python_splitters import python_random_split\n", "from reco_utils.evaluation.python_evaluation import (rmse, mae, exp_var, rsquared, get_top_k_items,\n", diff --git a/examples/02_model_hybrid/fm_deep_dive.ipynb b/examples/02_model_hybrid/fm_deep_dive.ipynb index eb1754bed2..a22046f85f 100644 --- a/examples/02_model_hybrid/fm_deep_dive.ipynb +++ b/examples/02_model_hybrid/fm_deep_dive.ipynb @@ -251,8 +251,8 @@ "%matplotlib notebook\n", "from matplotlib import pyplot as plt\n", "\n", - "from reco_utils.common.constants import SEED\n", - "from reco_utils.common.timer import Timer\n", + "from reco_utils.utils.constants import SEED\n", + "from reco_utils.utils.timer import Timer\n", "from reco_utils.dataset.download_utils import maybe_download, unzip_file\n", "from reco_utils.tuning.parameter_sweep import generate_param_grid\n", "from reco_utils.dataset.pandas_df_utils import LibffmConverter\n", diff --git a/examples/02_model_hybrid/ncf_deep_dive.ipynb b/examples/02_model_hybrid/ncf_deep_dive.ipynb index f423cf2abf..4f0e3f3894 100644 --- a/examples/02_model_hybrid/ncf_deep_dive.ipynb +++ b/examples/02_model_hybrid/ncf_deep_dive.ipynb @@ -53,14 +53,14 @@ "import tensorflow as tf\n", "tf.get_logger().setLevel('ERROR') # only show error messages\n", "\n", - "from reco_utils.common.timer import Timer\n", + "from reco_utils.utils.timer import Timer\n", "from reco_utils.recommender.ncf.ncf_singlenode import NCF\n", "from reco_utils.recommender.ncf.dataset import Dataset as NCFDataset\n", "from reco_utils.dataset import movielens\n", "from reco_utils.dataset.python_splitters import python_chrono_split\n", "from reco_utils.evaluation.python_evaluation import (rmse, mae, rsquared, exp_var, map_at_k, ndcg_at_k, precision_at_k, \n", " recall_at_k, get_top_k_items)\n", - "from reco_utils.common.constants import SEED as DEFAULT_SEED\n", + "from reco_utils.utils.constants import SEED as DEFAULT_SEED\n", "\n", "\n", "print(\"System version: {}\".format(sys.version))\n", @@ -196,7 +196,7 @@ "\n", "To evaluate the performance of item recommendation, we adopted the leave-one-out evaluation.\n", "\n", - "For each user, we held out his/her latest interaction as the test set and utilized the remaining data for training. We use `python_chrono_split` to achieve this. And since it is too time-consuming to rank all items for every user during evaluation, we followed the common strategy that randomly samples 100 items that are not interacted by the user, ranking the test item among the 100 items. Our test samples will be constructed by `NCFDataset`." + "For each user, we held out his/her latest interaction as the test set and utilized the remaining data for training. We use `python_chrono_split` to achieve this. And since it is too time-consuming to rank all items for every user during evaluation, we followed the utils strategy that randomly samples 100 items that are not interacted by the user, ranking the test item among the 100 items. Our test samples will be constructed by `NCFDataset`." ] }, { diff --git a/examples/03_evaluate/als_movielens_diversity_metrics.ipynb b/examples/03_evaluate/als_movielens_diversity_metrics.ipynb index ab6269b660..1733c06b1f 100644 --- a/examples/03_evaluate/als_movielens_diversity_metrics.ipynb +++ b/examples/03_evaluate/als_movielens_diversity_metrics.ipynb @@ -68,12 +68,12 @@ "from pyspark.sql.types import StructType, StructField\n", "from pyspark.sql.types import StringType, FloatType, IntegerType, LongType\n", "\n", - "from reco_utils.common.timer import Timer\n", + "from reco_utils.utils.timer import Timer\n", "from reco_utils.dataset import movielens\n", - "from reco_utils.common.notebook_utils import is_jupyter\n", + "from reco_utils.utils.notebook_utils import is_jupyter\n", "from reco_utils.dataset.spark_splitters import spark_random_split\n", "from reco_utils.evaluation.spark_evaluation import SparkRatingEvaluation, SparkRankingEvaluation\n", - "from reco_utils.common.spark_utils import start_or_get_spark\n", + "from reco_utils.utils.spark_utils import start_or_get_spark\n", "\n", "from reco_utils.evaluation.spark_diversity_evaluation import DiversityEvaluation\n", "from pyspark.sql.window import Window\n", diff --git a/examples/03_evaluate/evaluation.ipynb b/examples/03_evaluate/evaluation.ipynb index 4e9a6ea0f4..3187053fee 100644 --- a/examples/03_evaluate/evaluation.ipynb +++ b/examples/03_evaluate/evaluation.ipynb @@ -63,7 +63,7 @@ "import pyspark\n", "from sklearn.preprocessing import minmax_scale\n", "\n", - "from reco_utils.common.spark_utils import start_or_get_spark\n", + "from reco_utils.utils.spark_utils import start_or_get_spark\n", "from reco_utils.evaluation.spark_evaluation import SparkRankingEvaluation, SparkRatingEvaluation\n", "from reco_utils.evaluation.python_evaluation import auc, logloss\n", "\n", diff --git a/examples/04_model_select_and_optimize/azureml_hyperdrive_wide_and_deep.ipynb b/examples/04_model_select_and_optimize/azureml_hyperdrive_wide_and_deep.ipynb index e884104cf1..5f3d4b2538 100644 --- a/examples/04_model_select_and_optimize/azureml_hyperdrive_wide_and_deep.ipynb +++ b/examples/04_model_select_and_optimize/azureml_hyperdrive_wide_and_deep.ipynb @@ -85,9 +85,9 @@ "import azureml.widgets as widgets\n", "import azureml.train.hyperdrive as hd\n", "\n", - "from reco_utils.common.timer import Timer\n", - "from reco_utils.common.constants import SEED\n", - "from reco_utils.common.tf_utils import pandas_input_fn_for_saved_model\n", + "from reco_utils.utils.timer import Timer\n", + "from reco_utils.utils.constants import SEED\n", + "from reco_utils.utils.tf_utils import pandas_input_fn_for_saved_model\n", "from reco_utils.dataset import movielens\n", "from reco_utils.dataset.pandas_df_utils import user_item_pairs\n", "from reco_utils.dataset.python_splitters import python_random_split\n", diff --git a/examples/04_model_select_and_optimize/nni_ncf.ipynb b/examples/04_model_select_and_optimize/nni_ncf.ipynb index cbbd081888..396691f33b 100644 --- a/examples/04_model_select_and_optimize/nni_ncf.ipynb +++ b/examples/04_model_select_and_optimize/nni_ncf.ipynb @@ -67,7 +67,7 @@ "tf.get_logger().setLevel('ERROR') # only show error messages\n", "\n", "import reco_utils\n", - "from reco_utils.common.timer import Timer\n", + "from reco_utils.utils.timer import Timer\n", "from reco_utils.dataset import movielens\n", "from reco_utils.dataset.python_splitters import python_chrono_split\n", "from reco_utils.evaluation.python_evaluation import rmse, precision_at_k, ndcg_at_k\n", diff --git a/examples/04_model_select_and_optimize/nni_surprise_svd.ipynb b/examples/04_model_select_and_optimize/nni_surprise_svd.ipynb index 87fc0ac02e..80fa152563 100644 --- a/examples/04_model_select_and_optimize/nni_surprise_svd.ipynb +++ b/examples/04_model_select_and_optimize/nni_surprise_svd.ipynb @@ -61,7 +61,7 @@ "from tempfile import TemporaryDirectory\n", "\n", "import reco_utils\n", - "from reco_utils.common.timer import Timer\n", + "from reco_utils.utils.timer import Timer\n", "from reco_utils.dataset import movielens\n", "from reco_utils.dataset.python_splitters import python_random_split\n", "from reco_utils.evaluation.python_evaluation import rmse, precision_at_k, ndcg_at_k\n", diff --git a/examples/04_model_select_and_optimize/train_scripts/wide_deep_training.py b/examples/04_model_select_and_optimize/train_scripts/wide_deep_training.py index c5e0462c7a..6e7af2074c 100644 --- a/examples/04_model_select_and_optimize/train_scripts/wide_deep_training.py +++ b/examples/04_model_select_and_optimize/train_scripts/wide_deep_training.py @@ -19,7 +19,7 @@ except ImportError: run = None -from reco_utils.common.constants import ( +from reco_utils.utils.constants import ( DEFAULT_USER_COL, DEFAULT_ITEM_COL, DEFAULT_RATING_COL, diff --git a/examples/04_model_select_and_optimize/tuning_spark_als.ipynb b/examples/04_model_select_and_optimize/tuning_spark_als.ipynb index 6ac74eb4d0..bec2b4c872 100644 --- a/examples/04_model_select_and_optimize/tuning_spark_als.ipynb +++ b/examples/04_model_select_and_optimize/tuning_spark_als.ipynb @@ -81,8 +81,8 @@ "from hyperopt.pyll.base import scope\n", "from hyperopt.pyll.stochastic import sample\n", "\n", - "from reco_utils.common.timer import Timer\n", - "from reco_utils.common.spark_utils import start_or_get_spark\n", + "from reco_utils.utils.timer import Timer\n", + "from reco_utils.utils.spark_utils import start_or_get_spark\n", "from reco_utils.evaluation.spark_evaluation import SparkRankingEvaluation, SparkRatingEvaluation\n", "from reco_utils.dataset.movielens import load_spark_df\n", "from reco_utils.dataset.spark_splitters import spark_random_split\n", diff --git a/examples/05_operationalize/als_movie_o16n.ipynb b/examples/05_operationalize/als_movie_o16n.ipynb index 6e519d5b7f..a8c5097e87 100644 --- a/examples/05_operationalize/als_movie_o16n.ipynb +++ b/examples/05_operationalize/als_movie_o16n.ipynb @@ -78,7 +78,7 @@ "import urllib\n", "\n", - "from azure.common.client_factory import get_client_from_cli_profile\n", + "from azure.utils.client_factory import get_client_from_cli_profile\n", "import azure.mgmt.cosmosdb\n", "import azureml.core\n", "from azureml.core import Workspace\n", @@ -96,14 +96,14 @@ "from pyspark.sql.types import StructType, StructField\n", "from pyspark.sql.types import FloatType, IntegerType, LongType\n", "\n", - "from reco_utils.common.timer import Timer\n", - "from reco_utils.common.spark_utils import start_or_get_spark\n", + "from reco_utils.utils.timer import Timer\n", + "from reco_utils.utils.spark_utils import start_or_get_spark\n", "from reco_utils.dataset import movielens\n", "from reco_utils.dataset.cosmos_cli import find_collection, read_collection, read_database, find_database\n", "from reco_utils.dataset.download_utils import maybe_download\n", "from reco_utils.dataset.spark_splitters import spark_random_split\n", "from reco_utils.evaluation.spark_evaluation import SparkRatingEvaluation, SparkRankingEvaluation\n", - "from reco_utils.common.notebook_utils import is_databricks\n", + "from reco_utils.utils.notebook_utils import is_databricks\n", "\n", "print(\"Azure SDK version:\", azureml.core.VERSION)" ] @@ -165,7 +165,7 @@ "1. [Azure ML Service](https://azure.microsoft.com/en-us/services/machine-learning-service/)\n", " 1. [Azure ML Workspace](https://docs.microsoft.com/en-us/azure/machine-learning/concept-workspace)\n", " 1. [Azure Application Insights](https://azure.microsoft.com/en-us/services/monitor/)\n", - " 1. [Azure Storage](https://docs.microsoft.com/en-us/azure/storage/common/storage-account-overview)\n", + " 1. [Azure Storage](https://docs.microsoft.com/en-us/azure/storage/utils/storage-account-overview)\n", " 1. [Azure Key Vault](https://azure.microsoft.com/en-us/services/key-vault/) \n", "\n", "1. [Azure Cosmos DB](https://azure.microsoft.com/en-us/services/cosmos-db/)\n", diff --git a/examples/05_operationalize/lightgbm_criteo_o16n.ipynb b/examples/05_operationalize/lightgbm_criteo_o16n.ipynb index d988506c8b..3f6948e5fc 100644 --- a/examples/05_operationalize/lightgbm_criteo_o16n.ipynb +++ b/examples/05_operationalize/lightgbm_criteo_o16n.ipynb @@ -95,7 +95,7 @@ "import shutil\n", "\n", "from reco_utils.dataset.criteo import get_spark_schema, load_spark_df\n", - "from reco_utils.common.k8s_utils import qps_to_replicas, replicas_to_qps, nodes_to_replicas\n", + "from reco_utils.utils.k8s_utils import qps_to_replicas, replicas_to_qps, nodes_to_replicas\n", "\n", "from azureml.core import Workspace\n", "from azureml.core import VERSION as azureml_version\n", diff --git a/examples/06_benchmarks/benchmark_utils.py b/examples/06_benchmarks/benchmark_utils.py index 20f09bef63..26309b044c 100644 --- a/examples/06_benchmarks/benchmark_utils.py +++ b/examples/06_benchmarks/benchmark_utils.py @@ -7,7 +7,7 @@ import surprise import cornac -from reco_utils.common.constants import ( +from reco_utils.utils.constants import ( COL_DICT, DEFAULT_K, DEFAULT_USER_COL, @@ -17,8 +17,8 @@ DEFAULT_TIMESTAMP_COL, SEED, ) -from reco_utils.common.timer import Timer -from reco_utils.common.spark_utils import start_or_get_spark +from reco_utils.utils.timer import Timer +from reco_utils.utils.spark_utils import start_or_get_spark from reco_utils.recommender.sar.sar_singlenode import SARSingleNode from reco_utils.recommender.ncf.ncf_singlenode import NCF from reco_utils.recommender.ncf.dataset import Dataset as NCFDataset @@ -44,12 +44,7 @@ precision_at_k, recall_at_k, ) -from reco_utils.evaluation.python_evaluation import ( - rmse, - mae, - rsquared, - exp_var -) +from reco_utils.evaluation.python_evaluation import rmse, mae, rsquared, exp_var def prepare_training_als(train, test): @@ -82,8 +77,7 @@ def prepare_metrics_als(train, test): ) ) spark = start_or_get_spark() - return spark.createDataFrame(train, schema), spark.createDataFrame(test, - schema) + return spark.createDataFrame(train, schema), spark.createDataFrame(test, schema) def predict_als(model, test): @@ -172,10 +166,7 @@ def prepare_training_fastai(train, test): def train_fastai(params, data): model = collab_learner( - data, - n_factors=params["n_factors"], - y_range=params["y_range"], - wd=params["wd"] + data, n_factors=params["n_factors"], y_range=params["y_range"], wd=params["wd"] ) with Timer() as t: model.fit_one_cycle(cyc_len=params["epochs"], max_lr=params["max_lr"]) @@ -267,10 +258,7 @@ def recommend_k_ncf(model, test, train, top_k=DEFAULT_K, remove_seen=True): } ) merged = pd.merge( - train, - topk_scores, - on=[DEFAULT_USER_COL, DEFAULT_ITEM_COL], - how="outer" + train, topk_scores, on=[DEFAULT_USER_COL, DEFAULT_ITEM_COL], how="outer" ) topk_scores = merged[merged[DEFAULT_RATING_COL].isnull()].drop( DEFAULT_RATING_COL, axis=1 @@ -280,8 +268,7 @@ def recommend_k_ncf(model, test, train, top_k=DEFAULT_K, remove_seen=True): def prepare_training_cornac(train, test): return cornac.data.Dataset.from_uir( - train.drop(DEFAULT_TIMESTAMP_COL, axis=1).itertuples(index=False), - seed=SEED + train.drop(DEFAULT_TIMESTAMP_COL, axis=1).itertuples(index=False), seed=SEED ) @@ -344,11 +331,7 @@ def train_lightgcn(params, data): return model, t -def recommend_k_lightgcn(model, - test, - train, - top_k=DEFAULT_K, - remove_seen=True): +def recommend_k_lightgcn(model, test, train, top_k=DEFAULT_K, remove_seen=True): with Timer() as t: topk_scores = model.recommend_k_items( test, top_k=top_k, remove_seen=remove_seen diff --git a/examples/06_benchmarks/movielens.ipynb b/examples/06_benchmarks/movielens.ipynb index 780cea7ea8..f82ada1599 100644 --- a/examples/06_benchmarks/movielens.ipynb +++ b/examples/06_benchmarks/movielens.ipynb @@ -106,8 +106,8 @@ "tf.get_logger().setLevel('ERROR') # only show error messages\n", "import surprise\n", "\n", - "from reco_utils.common.general_utils import get_number_processors\n", - "from reco_utils.common.gpu_utils import get_cuda_version, get_cudnn_version\n", + "from reco_utils.utils.general_utils import get_number_processors\n", + "from reco_utils.utils.gpu_utils import get_cuda_version, get_cudnn_version\n", "from reco_utils.dataset import movielens\n", "from reco_utils.dataset.python_splitters import python_stratified_split\n", "\n", diff --git a/examples/07_tutorials/KDD2020-tutorial/step5_run_lightgcn.ipynb b/examples/07_tutorials/KDD2020-tutorial/step5_run_lightgcn.ipynb index fb7ab59160..68b5f0e4c2 100644 --- a/examples/07_tutorials/KDD2020-tutorial/step5_run_lightgcn.ipynb +++ b/examples/07_tutorials/KDD2020-tutorial/step5_run_lightgcn.ipynb @@ -36,13 +36,13 @@ "import pandas as pd\n", "import numpy as np\n", "import tensorflow as tf\n", - "from reco_utils.common.timer import Timer\n", + "from reco_utils.utils.timer import Timer\n", "from reco_utils.recommender.deeprec.models.graphrec.lightgcn import LightGCN\n", "from reco_utils.recommender.deeprec.DataModel.ImplicitCF import ImplicitCF\n", "from reco_utils.dataset import movielens\n", "from reco_utils.dataset.python_splitters import python_stratified_split\n", "from reco_utils.evaluation.python_evaluation import map_at_k, ndcg_at_k, precision_at_k, recall_at_k\n", - "from reco_utils.common.constants import SEED as DEFAULT_SEED\n", + "from reco_utils.utils.constants import SEED as DEFAULT_SEED\n", "from reco_utils.recommender.deeprec.deeprec_utils import prepare_hparams\n", "from reco_utils.recommender.deeprec.deeprec_utils import cal_metric\n", "from utils.general import *\n", diff --git a/reco_utils/README.md b/reco_utils/README.md index 2d2a5d54fe..30535788f3 100644 --- a/reco_utils/README.md +++ b/reco_utils/README.md @@ -84,7 +84,7 @@ It is also possible to install directly from GitHub. Or from a specific branch a # Contents -## [Common](common) +## [Utils](utils) This submodule contains high-level utilities for defining constants used in most algorithms as well as helper functions for managing aspects of different frameworks: GPU, Spark, Jupyter notebook. diff --git a/reco_utils/dataset/amazon_reviews.py b/reco_utils/dataset/amazon_reviews.py index 5b360d1ffc..75f70215e2 100644 --- a/reco_utils/dataset/amazon_reviews.py +++ b/reco_utils/dataset/amazon_reviews.py @@ -11,7 +11,7 @@ import logging import _pickle as cPickle -from reco_utils.common.constants import SEED +from reco_utils.utils.constants import SEED from reco_utils.dataset.download_utils import maybe_download, download_path diff --git a/reco_utils/dataset/criteo.py b/reco_utils/dataset/criteo.py index 675c8a42dc..36e435a64a 100644 --- a/reco_utils/dataset/criteo.py +++ b/reco_utils/dataset/criteo.py @@ -12,7 +12,7 @@ pass # so the environment without spark doesn't break from reco_utils.dataset.download_utils import maybe_download, download_path -from reco_utils.common.notebook_utils import is_databricks +from reco_utils.utils.notebook_utils import is_databricks CRITEO_URL = { @@ -40,11 +40,11 @@ def load_pandas_df(size="sample", local_cache_path=None, header=DEFAULT_HEADER): The schema is: .. code-block:: python - +