Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bug in Deeprec tests and adding more tests #1957

Merged
merged 11 commits into from
Jul 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 11 additions & 8 deletions tests/ci/azureml_tests/test_groups.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,13 +204,6 @@
],
"group_gpu_001": [ # Total group time: 492.62s
"tests/unit/examples/test_notebooks_gpu.py::test_gpu_vm", # 0.76s (Always the first test to check the GPU works)
"tests/unit/recommenders/models/test_deeprec_model.py::test_xdeepfm_component_definition",
"tests/unit/recommenders/models/test_deeprec_model.py::test_dkn_component_definition",
"tests/unit/recommenders/models/test_deeprec_model.py::test_dkn_item2item_component_definition",
# "tests/unit/recommenders/models/test_deeprec_model.py::test_slirec_component_definition", # FIXME: Issue #1953
# "tests/unit/recommenders/models/test_deeprec_model.py::test_nextitnet_component_definition", # FIXME: Issue #1953
# "tests/unit/recommenders/models/test_deeprec_model.py::test_sum_component_definition", # FIXME: Issue #1953
"tests/unit/recommenders/models/test_deeprec_model.py::test_lightgcn_component_definition",
"tests/unit/recommenders/models/test_rbm.py::test_sampling_funct",
"tests/unit/recommenders/models/test_rbm.py::test_train_param_init",
"tests/unit/recommenders/models/test_rbm.py::test_save_load",
Expand All @@ -237,9 +230,19 @@
"tests/unit/recommenders/models/test_ncf_dataset.py::test_datafile_missing_column",
# "tests/unit/recommenders/models/test_sasrec_model.py::test_prepare_data", # FIXME: it takes too long to run
# "tests/unit/recommenders/models/test_sasrec_model.py::test_sampler", # FIXME: it takes too long to run
#"tests/unit/recommenders/models/test_sasrec_model.py::test_sasrec", # FIXME: it takes too long to run
# "tests/unit/recommenders/models/test_sasrec_model.py::test_sasrec", # FIXME: it takes too long to run
# "tests/unit/recommenders/models/test_sasrec_model.py::test_ssept", # FIXME: it takes too long to run
],
"group_gpu_002": [ # Total group time:
"tests/unit/examples/test_notebooks_gpu.py::test_gpu_vm", # 0.76s (Always the first test to check the GPU works)
"tests/unit/recommenders/models/test_deeprec_model.py::test_xdeepfm_component_definition",
"tests/unit/recommenders/models/test_deeprec_model.py::test_dkn_component_definition",
"tests/unit/recommenders/models/test_deeprec_model.py::test_dkn_item2item_component_definition",
"tests/unit/recommenders/models/test_deeprec_model.py::test_slirec_component_definition",
"tests/unit/recommenders/models/test_deeprec_model.py::test_nextitnet_component_definition",
"tests/unit/recommenders/models/test_deeprec_model.py::test_sum_component_definition",
"tests/unit/recommenders/models/test_deeprec_model.py::test_lightgcn_component_definition",
],
"group_notebooks_gpu_001": [ # Total group time: 563.35s
"tests/unit/examples/test_notebooks_gpu.py::test_gpu_vm", # 0.76s (Always the first test to check the GPU works)
"tests/unit/examples/test_notebooks_gpu.py::test_dkn_quickstart",
Expand Down
225 changes: 138 additions & 87 deletions tests/unit/recommenders/models/test_deeprec_model.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.

import pytest
import os
import pytest

from recommenders.datasets import movielens
from recommenders.datasets.amazon_reviews import (
download_and_extract,
Expand Down Expand Up @@ -36,36 +37,16 @@
pass # skip this import if we are in cpu environment


@pytest.mark.gpu
def test_xdeepfm_component_definition(deeprec_resource_path):
data_path = os.path.join(deeprec_resource_path, "xdeepfm")
yaml_file = os.path.join(data_path, "xDeepFM.yaml")

if not os.path.exists(yaml_file):
download_deeprec_resources(
"https://recodatasets.z20.web.core.windows.net/deeprec/",
data_path,
"xdeepfmresources.zip",
)

hparams = prepare_hparams(yaml_file)
model = XDeepFMModel(hparams, FFMTextIterator)

assert model.logit is not None
assert model.update is not None
assert model.iterator is not None


@pytest.mark.gpu
@pytest.fixture(scope="module")
def dkn_files(deeprec_resource_path):
data_path = os.path.join(deeprec_resource_path, "dkn")
yaml_file = os.path.join(data_path, "dkn.yaml")
news_feature_file = os.path.join(data_path, r"doc_feature.txt")
user_history_file = os.path.join(data_path, r"user_history.txt")
wordEmb_file = os.path.join(data_path, r"word_embeddings_100.npy")
entityEmb_file = os.path.join(data_path, r"TransE_entity2vec_100.npy")
contextEmb_file = os.path.join(data_path, r"TransE_context2vec_100.npy")
news_feature_file = os.path.join(data_path, "doc_feature.txt")
user_history_file = os.path.join(data_path, "user_history.txt")
wordEmb_file = os.path.join(data_path, "word_embeddings_100.npy")
entityEmb_file = os.path.join(data_path, "TransE_entity2vec_100.npy")
contextEmb_file = os.path.join(data_path, "TransE_context2vec_100.npy")

download_deeprec_resources(
"https://recodatasets.z20.web.core.windows.net/deeprec/",
Expand All @@ -83,6 +64,83 @@ def dkn_files(deeprec_resource_path):
)


@pytest.mark.gpu
@pytest.fixture(scope="module")
def sequential_files(deeprec_resource_path):
data_path = os.path.join(deeprec_resource_path, "slirec")
train_file = os.path.join(data_path, "train_data")
valid_file = os.path.join(data_path, "valid_data")
test_file = os.path.join(data_path, "test_data")
user_vocab = os.path.join(data_path, "user_vocab.pkl")
item_vocab = os.path.join(data_path, "item_vocab.pkl")
cate_vocab = os.path.join(data_path, "category_vocab.pkl")

reviews_name = "reviews_Movies_and_TV_5.json"
meta_name = "meta_Movies_and_TV.json"
reviews_file = os.path.join(data_path, reviews_name)
meta_file = os.path.join(data_path, meta_name)

# number of negative instances with a positive instance for validation
valid_num_ngs = 4
# number of negative instances with a positive instance for testing
test_num_ngs = 9
# sample a small item set for training and testing here for example
sample_rate = 0.01

input_files = [
reviews_file,
meta_file,
train_file,
valid_file,
test_file,
user_vocab,
item_vocab,
cate_vocab,
]
download_and_extract(reviews_name, reviews_file)
download_and_extract(meta_name, meta_file)
data_preprocessing(
*input_files,
sample_rate=sample_rate,
valid_num_ngs=valid_num_ngs,
test_num_ngs=test_num_ngs
)

return (
data_path,
user_vocab,
item_vocab,
cate_vocab,
)


@pytest.mark.gpu
def test_xdeepfm_component_definition(deeprec_resource_path):
data_path = os.path.join(deeprec_resource_path, "xdeepfm")
yaml_file = os.path.join(data_path, "xDeepFM.yaml")

if not os.path.exists(yaml_file):
download_deeprec_resources(
"https://recodatasets.z20.web.core.windows.net/deeprec/",
data_path,
"xdeepfmresources.zip",
)

hparams = prepare_hparams(yaml_file)
model = XDeepFMModel(hparams, FFMTextIterator)

assert model.logit is not None
assert model.update is not None
assert model.iterator is not None
assert model.hparams is not None
assert model.hparams.model_type == "xDeepFM"
assert model.hparams.epochs == 50
assert model.hparams.batch_size == 128
assert model.hparams.learning_rate == 0.0005
assert model.hparams.loss == "log_loss"
assert model.hparams.optimizer == "adam"


@pytest.mark.gpu
def test_dkn_component_definition(dkn_files):
# Load params from fixture
Expand All @@ -107,12 +165,18 @@ def test_dkn_component_definition(dkn_files):
epochs=1,
learning_rate=0.0001,
)
assert hparams is not None

model = DKN(hparams, DKNTextIterator)
assert model.logit is not None
assert model.update is not None
assert model.iterator is not None
assert model.hparams is not None
assert model.hparams.model_type == "dkn"
assert model.hparams.epochs == 1
assert model.hparams.batch_size == 100
assert model.hparams.learning_rate == 0.0001
assert model.hparams.loss == "log_loss"
assert model.hparams.optimizer == "adam"


@pytest.mark.gpu
Expand Down Expand Up @@ -143,65 +207,21 @@ def test_dkn_item2item_component_definition(dkn_files):
use_entity=True,
use_context=True,
)
assert hparams is not None

hparams.neg_num = 9
model_item2item = DKNItem2Item(hparams, DKNItem2itemTextIterator)
assert model_item2item.pred_logits is not None
assert model_item2item.update is not None
assert model_item2item.iterator is not None


@pytest.mark.gpu
@pytest.fixture(scope="module")
def sequential_files(deeprec_resource_path):
data_path = os.path.join(deeprec_resource_path, "slirec")
train_file = os.path.join(data_path, r"train_data")
valid_file = os.path.join(data_path, r"valid_data")
test_file = os.path.join(data_path, r"test_data")
user_vocab = os.path.join(data_path, r"user_vocab.pkl")
item_vocab = os.path.join(data_path, r"item_vocab.pkl")
cate_vocab = os.path.join(data_path, r"category_vocab.pkl")

reviews_name = "reviews_Movies_and_TV_5.json"
meta_name = "meta_Movies_and_TV.json"
reviews_file = os.path.join(data_path, reviews_name)
meta_file = os.path.join(data_path, meta_name)
valid_num_ngs = (
4 # number of negative instances with a positive instance for validation
)
test_num_ngs = (
9 # number of negative instances with a positive instance for testing
)
sample_rate = (
0.01 # sample a small item set for training and testing here for example
)

input_files = [
reviews_file,
meta_file,
train_file,
valid_file,
test_file,
user_vocab,
item_vocab,
cate_vocab,
]
download_and_extract(reviews_name, reviews_file)
download_and_extract(meta_name, meta_file)
data_preprocessing(
*input_files,
sample_rate=sample_rate,
valid_num_ngs=valid_num_ngs,
test_num_ngs=test_num_ngs
)

return (
data_path,
user_vocab,
item_vocab,
cate_vocab,
)
assert model_item2item.hparams is not None
assert model_item2item.hparams.model_type == "dkn"
assert model_item2item.hparams.epochs == 1
assert model_item2item.hparams.batch_size == 100
assert model_item2item.hparams.learning_rate == 0.0005
assert model_item2item.hparams.loss == "log_loss"
assert model_item2item.hparams.optimizer == "adam"
assert model_item2item.hparams.max_grad_norm == 0.5
assert model_item2item.hparams.his_size == 20


@pytest.mark.gpu
Expand All @@ -223,12 +243,22 @@ def test_slirec_component_definition(sequential_files, deeprec_config_path):
cate_vocab=cate_vocab,
need_sample=True,
)
assert hparams is not None

model = SLI_RECModel(hparams, SequentialIterator)
assert model.logit is not None
assert model.update is not None
assert model.iterator is not None
assert model.hparams is not None
assert model.hparams.model_type == "sli_rec"
assert model.hparams.epochs == 1
assert model.hparams.batch_size == 400
assert model.hparams.learning_rate == 0.001
assert model.hparams.loss == "softmax"
assert model.hparams.optimizer == "adam"
assert model.hparams.train_num_ngs == 4
assert model.hparams.embed_l2 == 0.0
assert model.hparams.layer_l2 == 0.0
assert model.hparams.need_sample is True


@pytest.mark.gpu
Expand All @@ -251,12 +281,22 @@ def test_nextitnet_component_definition(sequential_files, deeprec_config_path):
cate_vocab=cate_vocab,
need_sample=True,
)
assert hparams_nextitnet is not None

model_nextitnet = NextItNetModel(hparams_nextitnet, NextItNetIterator)
assert model_nextitnet.logit is not None
assert model_nextitnet.update is not None
assert model_nextitnet.iterator is not None
assert model_nextitnet.hparams is not None
assert model_nextitnet.hparams.model_type == "NextItNet"
assert model_nextitnet.hparams.epochs == 1
assert model_nextitnet.hparams.batch_size == 400
assert model_nextitnet.hparams.learning_rate == 0.001
assert model_nextitnet.hparams.loss == "softmax"
assert model_nextitnet.hparams.optimizer == "adam"
assert model_nextitnet.hparams.train_num_ngs == 4
assert model_nextitnet.hparams.embed_l2 == 0.0
assert model_nextitnet.hparams.layer_l2 == 0.0
assert model_nextitnet.hparams.need_sample is True


@pytest.mark.gpu
Expand All @@ -279,12 +319,22 @@ def test_sum_component_definition(sequential_files, deeprec_config_path):
cate_vocab=cate_vocab,
need_sample=True,
)
assert hparams_sum is not None

model_sum = SUMModel(hparams_sum, SequentialIterator)
assert model_sum.logit is not None
assert model_sum.update is not None
assert model_sum.iterator is not None
assert model_sum.hparams is not None
assert model_sum.hparams.model_type == "SUM"
assert model_sum.hparams.epochs == 1
assert model_sum.hparams.batch_size == 400
assert model_sum.hparams.learning_rate == 0.001
assert model_sum.hparams.loss == "softmax"
assert model_sum.hparams.optimizer == "adam"
assert model_sum.hparams.train_num_ngs == 4
assert model_sum.hparams.embed_l2 == 0.0
assert model_sum.hparams.layer_l2 == 0.0
assert model_sum.hparams.need_sample is True


@pytest.mark.gpu
Expand All @@ -296,16 +346,17 @@ def test_lightgcn_component_definition(deeprec_config_path):

data = ImplicitCF(train=train, test=test)

embed_size = 64
hparams = prepare_hparams(yaml_file, embed_size=embed_size)
hparams = prepare_hparams(yaml_file, embed_size=64)
model = LightGCN(hparams, data)

assert model.norm_adj is not None
assert model.ua_embeddings.shape == [data.n_users, embed_size]
assert model.ia_embeddings.shape == [data.n_items, embed_size]
assert model.ua_embeddings.shape == [943, 64]
assert model.ia_embeddings.shape == [1682, 64]
assert model.u_g_embeddings is not None
assert model.pos_i_g_embeddings is not None
assert model.neg_i_g_embeddings is not None
assert model.batch_ratings is not None
assert model.loss is not None
assert model.opt is not None
assert model.batch_size == 1024
assert model.epochs == 1000