Parameter name change num_topics and num_words in HdpModel and DtmMod…

…el to be consistent with LdaModel (#755)
piskvorky · Jun 24, 2016 · 225fa67 · 225fa67
1 parent c746b7c
commit 225fa67
Show file tree

Hide file tree

Showing 6 changed files with 34 additions and 30 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,10 @@
 Changes
 =======
+0.13.2
+
+* topics, topn parameters changed to num_topics and num_words in show_topics() and print_topics()(@droudy, #747)
+  - In hdpmodel and dtmmodel
+  - NOT BACKWARDS COMPATIBLE!
 
 0.13.1, 2016-06-22
 

diff --git a/gensim/models/hdpmodel.py b/gensim/models/hdpmodel.py
@@ -134,7 +134,7 @@ class HdpModel(interfaces.TransformationABC):
     on a training corpus:
 
     >>> hdp = HdpModel(corpus, id2word)
-    >>> hdp.print_topics(topics=20, topn=10)
+    >>> hdp.print_topics(show_topics=20, num_words=10)
 
     Inference on new documents is based on the approximately LDA-equivalent topics.
 
@@ -456,15 +456,15 @@ def update_expectations(self):
         self.m_timestamp[:] = self.m_updatect
         self.m_status_up_to_date = True
 
-    def print_topics(self, topics=20, topn=20):
-        """Alias for `show_topics()` that prints the `topn` most
+    def print_topics(self, num_topics=20, num_words=20):
+        """Alias for `show_topics()` that prints the `num_words` most
         probable words for `topics` number of topics to log.
         Set `topics=-1` to print all topics."""
-        return self.show_topics(topics=topics, topn=topn, log=True)
+        return self.show_topics(num_topics=num_topics, num_words=num_words, log=True)
 
-    def show_topics(self, topics=20, topn=20, log=False, formatted=True):
+    def show_topics(self, num_topics=20, num_words=20, log=False, formatted=True):
         """
-        Print the `topN` most probable words for `topics` number of topics.
+        Print the `num_words` most probable words for `topics` number of topics.
         Set `topics=-1` to print all topics.
 
         Set `formatted=True` to return the topics as a list of strings, or
@@ -475,7 +475,7 @@ def show_topics(self, topics=20, topn=20, log=False, formatted=True):
             self.update_expectations()
         betas = self.m_lambda + self.m_eta
         hdp_formatter = HdpTopicFormatter(self.id2word, betas)
-        return hdp_formatter.show_topics(topics, topn, log, formatted)
+        return hdp_formatter.show_topics(num_topics, num_words, log, formatted)
 
     def save_topics(self, doc_count=None):
         """legacy method; use `self.save()` instead"""
@@ -578,24 +578,24 @@ def __init__(self, dictionary=None, topic_data=None, topic_file=None, style=None
 
         self.style = style
 
-    def print_topics(self, topics=10, topn=10):
-        return self.show_topics(topics, topn, True)
+    def print_topics(self, num_topics=10, num_words=10):
+        return self.show_topics(num_topics, num_words, True)
 
-    def show_topics(self, topics=10, topn=10, log=False, formatted=True):
+    def show_topics(self, num_topics=10, num_words=10, log=False, formatted=True):
         shown = []
-        if topics < 0:
-            topics = len(self.data)
+        if num_topics < 0:
+            num_topics = len(self.data)
 
-        topics = min(topics, len(self.data))
+        num_topics = min(num_topics, len(self.data))
 
-        for k in xrange(topics):
+        for k in xrange(num_topics):
             lambdak = list(self.data[k, :])
             lambdak = lambdak / sum(lambdak)
 
             temp = zip(lambdak, xrange(len(lambdak)))
             temp = sorted(temp, key=lambda x: x[0], reverse=True)
 
-            topic_terms = self.show_topic_terms(temp, topn)
+            topic_terms = self.show_topic_terms(temp, num_words)
 
             if formatted:
                 topic = self.format_topic(k, topic_terms)
@@ -609,8 +609,8 @@ def show_topics(self, topics=10, topn=10, log=False, formatted=True):
 
         return shown
 
-    def show_topic_terms(self, topic_data, topn):
-        return [(self.dictionary[wid], weight) for (weight, wid) in topic_data[:topn]]
+    def show_topic_terms(self, topic_data, num_words):
+        return [(self.dictionary[wid], weight) for (weight, wid) in topic_data[:num_words]]
 
     def format_topic(self, topic_id, topic_terms):
         if self.STYLE_GENSIM == self.style:

diff --git a/gensim/models/wrappers/dtmmodel.py b/gensim/models/wrappers/dtmmodel.py
@@ -235,23 +235,23 @@ def train(self, corpus, time_slices, mode, model):
                 # influence[2,5] influence of document 2 on topic 5
                 self.influences_time.append(influence)
 
-    def print_topics(self, topics=10, times=5, topn=10):
-        return self.show_topics(topics, times, topn, log=True)
+    def print_topics(self, num_topics=10, times=5, num_words=10):
+        return self.show_topics(num_topics, times, num_words, log=True)
 
-    def show_topics(self, topics=10, times=5, topn=10, log=False, formatted=True):
+    def show_topics(self, num_topics=10, times=5, num_words=10, log=False, formatted=True):
         """
-        Print the `topn` most probable words for `topics` number of topics at 'times' time slices.
+        Print the `num_words` most probable words for `num_topics` number of topics at 'times' time slices.
         Set `topics=-1` to print all topics.
 
         Set `formatted=True` to return the topics as a list of strings, or `False` as lists of (weight, word) pairs.
 
         """
-        if topics < 0 or topics >= self.num_topics:
-            topics = self.num_topics
-            chosen_topics = range(topics)
+        if num_topics < 0 or num_topics >= self.num_topics:
+            num_topics = self.num_topics
+            chosen_topics = range(num_topics)
         else:
-            topics = min(topics, self.num_topics)
-            chosen_topics = range(topics)
+            num_topics = min(num_topics, self.num_topics)
+            chosen_topics = range(num_topics)
              # add a little random jitter, to randomize results around the same
             # alpha
             # sort_alpha = self.alpha + 0.0001 * \

diff --git a/gensim/test/test_dtm.py b/gensim/test/test_dtm.py
@@ -40,7 +40,7 @@ def testDtm(self):
                 self.dtm_path, self.corpus, self.time_slices, num_topics=2,
                 id2word=self.id2word, model='dtm', initialize_lda=True,
                 rng_seed=1)
-            topics = model.show_topics(topics=2, times=2, topn=10)
+            topics = model.show_topics(num_topics=2, times=2, num_words=10)
             self.assertEqual(len(topics), 4)
 
             one_topic = model.show_topic(topicid=1, time=1, topn=10)
@@ -53,7 +53,7 @@ def testDim(self):
                 self.dtm_path, self.corpus, self.time_slices, num_topics=2,
                 id2word=self.id2word, model='fixed', initialize_lda=True,
                 rng_seed=1)
-            topics = model.show_topics(topics=2, times=2, topn=10)
+            topics = model.show_topics(num_topics=2, times=2, num_words=10)
             self.assertEqual(len(topics), 4)
 
             one_topic = model.show_topic(topicid=1, time=1, topn=10)

diff --git a/gensim/test/test_hdpmodel.py b/gensim/test/test_hdpmodel.py
@@ -55,7 +55,7 @@ def setUp(self):
         self.model = self.class_(corpus, id2word=dictionary)
 
     def testShowTopics(self):
-        topics = self.model.show_topics(formatted=False)
+        topics = self.model.show_topics(formatted=False, num_topics=20, num_words=20)
 
         for topic_no, topic in topics:
             self.assertTrue(isinstance(topic_no, int))

diff --git a/gensim/test/test_ldamodel.py b/gensim/test/test_ldamodel.py
@@ -48,7 +48,6 @@ def testfile():
     return os.path.join(tempfile.gettempdir(), 'gensim_models.tst')
 
 
-
 class TestLdaModel(unittest.TestCase):
     def setUp(self):
         self.corpus = mmcorpus.MmCorpus(datapath('testcorpus.mm'))