Skip to content

Commit

Permalink
Parameter name change num_topics and num_words in HdpModel and DtmMod…
Browse files Browse the repository at this point in the history
…el to be consistent with LdaModel (#755)
  • Loading branch information
droudy authored and tmylk committed Jun 24, 2016
1 parent c746b7c commit 225fa67
Show file tree
Hide file tree
Showing 6 changed files with 34 additions and 30 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
Changes
=======
0.13.2

* topics, topn parameters changed to num_topics and num_words in show_topics() and print_topics()(@droudy, #747)
- In hdpmodel and dtmmodel
- NOT BACKWARDS COMPATIBLE!

0.13.1, 2016-06-22

Expand Down
34 changes: 17 additions & 17 deletions gensim/models/hdpmodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ class HdpModel(interfaces.TransformationABC):
on a training corpus:
>>> hdp = HdpModel(corpus, id2word)
>>> hdp.print_topics(topics=20, topn=10)
>>> hdp.print_topics(show_topics=20, num_words=10)
Inference on new documents is based on the approximately LDA-equivalent topics.
Expand Down Expand Up @@ -456,15 +456,15 @@ def update_expectations(self):
self.m_timestamp[:] = self.m_updatect
self.m_status_up_to_date = True

def print_topics(self, topics=20, topn=20):
"""Alias for `show_topics()` that prints the `topn` most
def print_topics(self, num_topics=20, num_words=20):
"""Alias for `show_topics()` that prints the `num_words` most
probable words for `topics` number of topics to log.
Set `topics=-1` to print all topics."""
return self.show_topics(topics=topics, topn=topn, log=True)
return self.show_topics(num_topics=num_topics, num_words=num_words, log=True)

def show_topics(self, topics=20, topn=20, log=False, formatted=True):
def show_topics(self, num_topics=20, num_words=20, log=False, formatted=True):
"""
Print the `topN` most probable words for `topics` number of topics.
Print the `num_words` most probable words for `topics` number of topics.
Set `topics=-1` to print all topics.
Set `formatted=True` to return the topics as a list of strings, or
Expand All @@ -475,7 +475,7 @@ def show_topics(self, topics=20, topn=20, log=False, formatted=True):
self.update_expectations()
betas = self.m_lambda + self.m_eta
hdp_formatter = HdpTopicFormatter(self.id2word, betas)
return hdp_formatter.show_topics(topics, topn, log, formatted)
return hdp_formatter.show_topics(num_topics, num_words, log, formatted)

def save_topics(self, doc_count=None):
"""legacy method; use `self.save()` instead"""
Expand Down Expand Up @@ -578,24 +578,24 @@ def __init__(self, dictionary=None, topic_data=None, topic_file=None, style=None

self.style = style

def print_topics(self, topics=10, topn=10):
return self.show_topics(topics, topn, True)
def print_topics(self, num_topics=10, num_words=10):
return self.show_topics(num_topics, num_words, True)

def show_topics(self, topics=10, topn=10, log=False, formatted=True):
def show_topics(self, num_topics=10, num_words=10, log=False, formatted=True):
shown = []
if topics < 0:
topics = len(self.data)
if num_topics < 0:
num_topics = len(self.data)

topics = min(topics, len(self.data))
num_topics = min(num_topics, len(self.data))

for k in xrange(topics):
for k in xrange(num_topics):
lambdak = list(self.data[k, :])
lambdak = lambdak / sum(lambdak)

temp = zip(lambdak, xrange(len(lambdak)))
temp = sorted(temp, key=lambda x: x[0], reverse=True)

topic_terms = self.show_topic_terms(temp, topn)
topic_terms = self.show_topic_terms(temp, num_words)

if formatted:
topic = self.format_topic(k, topic_terms)
Expand All @@ -609,8 +609,8 @@ def show_topics(self, topics=10, topn=10, log=False, formatted=True):

return shown

def show_topic_terms(self, topic_data, topn):
return [(self.dictionary[wid], weight) for (weight, wid) in topic_data[:topn]]
def show_topic_terms(self, topic_data, num_words):
return [(self.dictionary[wid], weight) for (weight, wid) in topic_data[:num_words]]

def format_topic(self, topic_id, topic_terms):
if self.STYLE_GENSIM == self.style:
Expand Down
18 changes: 9 additions & 9 deletions gensim/models/wrappers/dtmmodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,23 +235,23 @@ def train(self, corpus, time_slices, mode, model):
# influence[2,5] influence of document 2 on topic 5
self.influences_time.append(influence)

def print_topics(self, topics=10, times=5, topn=10):
return self.show_topics(topics, times, topn, log=True)
def print_topics(self, num_topics=10, times=5, num_words=10):
return self.show_topics(num_topics, times, num_words, log=True)

def show_topics(self, topics=10, times=5, topn=10, log=False, formatted=True):
def show_topics(self, num_topics=10, times=5, num_words=10, log=False, formatted=True):
"""
Print the `topn` most probable words for `topics` number of topics at 'times' time slices.
Print the `num_words` most probable words for `num_topics` number of topics at 'times' time slices.
Set `topics=-1` to print all topics.
Set `formatted=True` to return the topics as a list of strings, or `False` as lists of (weight, word) pairs.
"""
if topics < 0 or topics >= self.num_topics:
topics = self.num_topics
chosen_topics = range(topics)
if num_topics < 0 or num_topics >= self.num_topics:
num_topics = self.num_topics
chosen_topics = range(num_topics)
else:
topics = min(topics, self.num_topics)
chosen_topics = range(topics)
num_topics = min(num_topics, self.num_topics)
chosen_topics = range(num_topics)
# add a little random jitter, to randomize results around the same
# alpha
# sort_alpha = self.alpha + 0.0001 * \
Expand Down
4 changes: 2 additions & 2 deletions gensim/test/test_dtm.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def testDtm(self):
self.dtm_path, self.corpus, self.time_slices, num_topics=2,
id2word=self.id2word, model='dtm', initialize_lda=True,
rng_seed=1)
topics = model.show_topics(topics=2, times=2, topn=10)
topics = model.show_topics(num_topics=2, times=2, num_words=10)
self.assertEqual(len(topics), 4)

one_topic = model.show_topic(topicid=1, time=1, topn=10)
Expand All @@ -53,7 +53,7 @@ def testDim(self):
self.dtm_path, self.corpus, self.time_slices, num_topics=2,
id2word=self.id2word, model='fixed', initialize_lda=True,
rng_seed=1)
topics = model.show_topics(topics=2, times=2, topn=10)
topics = model.show_topics(num_topics=2, times=2, num_words=10)
self.assertEqual(len(topics), 4)

one_topic = model.show_topic(topicid=1, time=1, topn=10)
Expand Down
2 changes: 1 addition & 1 deletion gensim/test/test_hdpmodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def setUp(self):
self.model = self.class_(corpus, id2word=dictionary)

def testShowTopics(self):
topics = self.model.show_topics(formatted=False)
topics = self.model.show_topics(formatted=False, num_topics=20, num_words=20)

for topic_no, topic in topics:
self.assertTrue(isinstance(topic_no, int))
Expand Down
1 change: 0 additions & 1 deletion gensim/test/test_ldamodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@ def testfile():
return os.path.join(tempfile.gettempdir(), 'gensim_models.tst')



class TestLdaModel(unittest.TestCase):
def setUp(self):
self.corpus = mmcorpus.MmCorpus(datapath('testcorpus.mm'))
Expand Down

0 comments on commit 225fa67

Please sign in to comment.