- Sponsor
-
Notifications
You must be signed in to change notification settings - Fork 4.4k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add 'diagonal' parameter for LdaModel.diff (#1448)
* add flags for diagnol and annotation * make matrix default * remove duplication * raise error on diff no. of topics * add docstrings * Fix flake8 * rename annotation matrix variable * add tests * fix indent * flake8 fixes
1 parent
5f63081
commit 3cb8495
Showing
2 changed files
with
58 additions
and
16 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,6 +4,7 @@ | |
# Copyright (C) 2016 Radim Rehurek <[email protected]> | ||
# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html | ||
|
||
import logging | ||
import unittest | ||
import numpy as np | ||
|
||
|
@@ -31,14 +32,22 @@ def setUp(self): | |
self.model = LdaModel(corpus=self.corpus, id2word=self.dictionary, num_topics=self.num_topics, passes=10) | ||
|
||
def testBasic(self): | ||
# test for matrix case | ||
mdiff, annotation = self.model.diff(self.model, n_ann_terms=self.n_ann_terms) | ||
|
||
self.assertEqual(mdiff.shape, (self.num_topics, self.num_topics)) | ||
self.assertEquals(len(annotation), self.num_topics) | ||
self.assertEquals(len(annotation[0]), self.num_topics) | ||
|
||
# test for diagonal case | ||
mdiff, annotation = self.model.diff(self.model, n_ann_terms=self.n_ann_terms, diagonal=True) | ||
|
||
self.assertEqual(mdiff.shape, (self.num_topics,)) | ||
self.assertEquals(len(annotation), self.num_topics) | ||
|
||
def testIdentity(self): | ||
for dist_name in ["hellinger", "kullback_leibler", "jaccard"]: | ||
# test for matrix case | ||
mdiff, annotation = self.model.diff(self.model, n_ann_terms=self.n_ann_terms, distance=dist_name) | ||
|
||
for row in annotation: | ||
|
@@ -51,6 +60,23 @@ def testIdentity(self): | |
if dist_name == "jaccard": | ||
self.assertTrue(np.allclose(mdiff, np.zeros(mdiff.shape, dtype=mdiff.dtype))) | ||
|
||
# test for diagonal case | ||
mdiff, annotation = self.model.diff(self.model, n_ann_terms=self.n_ann_terms, distance=dist_name, diagonal=True) | ||
|
||
for (int_tokens, diff_tokens) in annotation: | ||
self.assertEquals(diff_tokens, []) | ||
self.assertEquals(len(int_tokens), self.n_ann_terms) | ||
|
||
self.assertTrue(np.allclose(mdiff, np.zeros(mdiff.shape, dtype=mdiff.dtype))) | ||
|
||
if dist_name == "jaccard": | ||
self.assertTrue(np.allclose(mdiff, np.zeros(mdiff.shape, dtype=mdiff.dtype))) | ||
|
||
def testInput(self): | ||
self.assertRaises(ValueError, self.model.diff, self.model, n_ann_terms=self.n_ann_terms, distance='something') | ||
self.assertRaises(ValueError, self.model.diff, [], n_ann_terms=self.n_ann_terms, distance='something') | ||
|
||
|
||
if __name__ == '__main__': | ||
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.DEBUG) | ||
unittest.main() |