Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
jasondoucette committed Feb 22, 2017
0 parents commit a106319
Show file tree
Hide file tree
Showing 10 changed files with 280 additions and 0 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
*.pyc
14 changes: 14 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
Simple Python [word ladder](https://en.wikipedia.org/wiki/Word_ladder)
generator that finds the shortest ladder between two words.

`generate.py` precomputes the dictionaries used to build the ladder (only
needs to run once, or never, since the output jsons are in the repo)

`ladder.py` takes a `start` and `target` argument and outputs a ladder, if
one exists.

Currently only supports words from 2-6 characters, though easy enough to
extend.

Thanks to [ahupp](https://github.com/ahupp) for the
[Burkhard-Keller tree implementation](https://github.com/ahupp/bktree)
148 changes: 148 additions & 0 deletions bktree.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
#!/usr/bin/env python
# https://github.com/ahupp/bktree/blob/master/bktree.py
"""
This module implements Burkhard-Keller Trees (bk-tree). bk-trees
allow fast lookup of words that lie within a specified distance of a
query word. For example, this might be used by a spell checker to
find near matches to a mispelled word.
The implementation is based on the description in this article:
http://blog.notdot.net/2007/4/Damn-Cool-Algorithms-Part-1-BK-Trees
Licensed under the PSF license: http://www.python.org/psf/license/
- Adam Hupp <[email protected]>
"""
from itertools import imap, ifilter
from random import randint
from time import time

class BKTree:
def __init__(self, distfn, words):
"""
Create a new BK-tree from the given distance function and
words.
Arguments:
distfn: a binary function that returns the distance between
two words. Return value is a non-negative integer. the
distance function must be a metric space.
words: an iterable. produces values that can be passed to
distfn
"""
self.distfn = distfn

it = iter(words)
root = it.next()
self.tree = (root, {})

for i in it:
self._add_word(self.tree, i)

def _add_word(self, parent, word):
pword, children = parent
d = self.distfn(word, pword)
if d in children:
self._add_word(children[d], word)
else:
children[d] = (word, {})

def query(self, word, n):
"""
Return all words in the tree that are within a distance of `n'
from `word`.
Arguments:
word: a word to query on
n: a non-negative integer that specifies the allowed distance
from the query word.
Return value is a list of tuples (distance, word), sorted in
ascending order of distance.
"""
def rec(parent):
pword, children = parent
d = self.distfn(word, pword)
results = []
if d <= n:
results.append( (d, pword) )

for i in range(d-n, d+n+1):
child = children.get(i)
if child is not None:
results.extend(rec(child))
return results

# sort by distance
return sorted(rec(self.tree))



def brute_query(word, words, distfn, n):
"""A brute force distance query
Arguments:
word: the word to query for
words: a iterable that produces words to test
distfn: a binary function that returns the distance between a
`word' and an item in `words'.
n: an integer that specifies the distance of a matching word
"""
return [i for i in words
if distfn(i, word) <= n]


def maxdepth(tree, count=0):
_, children = t
if len(children):
return max(maxdepth(i, c+1) for i in children.values())
else:
return c


# http://en.wikibooks.org/wiki/Algorithm_implementation/Strings/Levenshtein_distance#Python
def levenshtein(s, t):
m, n = len(s), len(t)
d = [range(n+1)]
d += [[i] for i in range(1,m+1)]
for i in range(0,m):
for j in range(0,n):
cost = 1
if s[i] == t[j]: cost = 0

d[i+1].append( min(d[i][j+1]+1, # deletion
d[i+1][j]+1, #insertion
d[i][j]+cost) #substitution
)
return d[m][n]


def dict_words(dictfile="/usr/share/dict/american-english"):
"Return an iterator that produces words in the given dictionary."
return ifilter(len,
imap(str.strip,
open(dictfile)))


def timeof(fn, *args):
import time
t = time.time()
res = fn(*args)
print "time: ", (time.time() - t)
return res

0
1 change: 1 addition & 0 deletions fives.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions fours.json

Large diffs are not rendered by default.

32 changes: 32 additions & 0 deletions generate.py

Large diffs are not rendered by default.

80 changes: 80 additions & 0 deletions ladder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
#!/usr/bin/env python
"""
Word ladder - given a start and a target, generates the shortest chain
of words to get from start to target, changing one letter at a time,
with each step being a valid word from the Scrabble dictionary.
"""

import sys
import json
import argparse
from collections import deque

def find_ladder(start, target, linked_words):
"""
Find the shortest word ladder from start to target
Arguments:
start: the start of the ladder
target: the end of the ladder
linked_words: a dictionary <string, list(string)> of words and the
valid words Levenshtein distance 1 from them
Return value is a list of words forming the ladder, or None if no ladder
exists.
"""
queue = deque([[start]])
used_words = []
while (len(queue) > 0):
ladder = queue.popleft()
next_words = linked_words[ladder[-1]]
for next_word in next_words:
attempt = list(ladder)
attempt.append(next_word)
if next_word == target:
return attempt
if next_word not in used_words:
used_words.append(next_word)
queue.append(attempt)
return None

def word_type(x):
"""
Helper type for arg parsing (ensures words are in length range 2-6)
"""
if len(x) < 1 or len(x) > 6:
raise argparse.ArgumentTypeError(
"Words must be 2 to 6 chars in length")
return x.lower()

parser = argparse.ArgumentParser()
parser.add_argument("start", type=word_type)
parser.add_argument("target", type=word_type)
args = parser.parse_args()

if len(args.start) != len(args.target):
print "Words must be equal length"
exit()

print "Loading dictionary"
filenames = ["twos", "threes", "fours", "fives", "sixes"]
filename = filenames[len(args.start) - 2] + ".json"
with open(filename, "r") as f:
word_list = json.load(f)

if args.start not in word_list:
print args.start + " is not a valid word"
exit()
if args.target not in word_list:
print args.target + " is not a valid word"
exit()

print "Finding ladder"
result = find_ladder(args.start, args.target, word_list)
if result is None:
print "No ladder exists from {} to {}".format(args.start, args.target)
else:
for w in result: print w
1 change: 1 addition & 0 deletions sixes.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions threes.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions twos.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"go": ["go", "bo", "do", "ho", "jo", "lo", "mo", "no", "so", "to", "wo", "yo"], "la": ["la", "aa", "ba", "fa", "ha", "ka", "li", "lo", "ma", "na", "pa", "ta", "ya", "za"], "lo": ["lo", "bo", "do", "go", "ho", "jo", "la", "li", "mo", "no", "so", "to", "wo", "yo"], "li": ["li", "ai", "bi", "hi", "ki", "la", "lo", "mi", "oi", "pi", "qi", "si", "ti", "xi"], "to": ["to", "bo", "do", "go", "ho", "jo", "lo", "mo", "no", "so", "ta", "ti", "wo", "yo"], "ti": ["ti", "ai", "bi", "hi", "ki", "li", "mi", "oi", "pi", "qi", "si", "ta", "to", "xi"], "ta": ["ta", "aa", "ba", "fa", "ha", "ka", "la", "ma", "na", "pa", "ti", "to", "ya", "za"], "do": ["do", "bo", "de", "go", "ho", "jo", "lo", "mo", "no", "so", "to", "wo", "yo"], "yo": ["yo", "bo", "do", "go", "ho", "jo", "lo", "mo", "no", "so", "to", "wo", "ya", "ye"], "ya": ["ya", "aa", "ba", "fa", "ha", "ka", "la", "ma", "na", "pa", "ta", "ye", "yo", "za"], "de": ["de", "ae", "be", "do", "fe", "he", "me", "ne", "oe", "pe", "re", "we", "ye"], "ye": ["ye", "ae", "be", "de", "fe", "he", "me", "ne", "oe", "pe", "re", "we", "ya", "yo"], "qi": ["qi", "ai", "bi", "hi", "ki", "li", "mi", "oi", "pi", "si", "ti", "xi"], "em": ["em", "am", "ed", "ef", "eh", "el", "en", "er", "es", "et", "ex", "hm", "mm", "om", "um"], "el": ["el", "al", "ed", "ef", "eh", "em", "en", "er", "es", "et", "ex"], "en": ["en", "an", "ed", "ef", "eh", "el", "em", "er", "es", "et", "ex", "in", "on", "un"], "eh": ["eh", "ah", "ed", "ef", "el", "em", "en", "er", "es", "et", "ex", "oh", "sh", "uh"], "ed": ["ed", "ad", "ef", "eh", "el", "em", "en", "er", "es", "et", "ex", "id", "od"], "ef": ["ef", "ed", "eh", "el", "em", "en", "er", "es", "et", "ex", "if", "of"], "za": ["za", "aa", "ba", "fa", "ha", "ka", "la", "ma", "na", "pa", "ta", "ya"], "ex": ["ex", "ax", "ed", "ef", "eh", "el", "em", "en", "er", "es", "et", "ox"], "et": ["et", "at", "ed", "ef", "eh", "el", "em", "en", "er", "es", "ex", "it", "ut"], "es": ["es", "as", "ed", "ef", "eh", "el", "em", "en", "er", "et", "ex", "is", "os", "us"], "er": ["er", "ar", "ed", "ef", "eh", "el", "em", "en", "es", "et", "ex", "or"], "um": ["um", "am", "em", "hm", "mm", "om", "uh", "un", "up", "us", "ut"], "re": ["re", "ae", "be", "de", "fe", "he", "me", "ne", "oe", "pe", "we", "ye"], "be": ["be", "ae", "ba", "bi", "bo", "by", "de", "fe", "he", "me", "ne", "oe", "pe", "re", "we", "ye"], "we": ["we", "ae", "be", "de", "fe", "he", "me", "ne", "oe", "pe", "re", "wo", "ye"], "ba": ["ba", "aa", "be", "bi", "bo", "by", "fa", "ha", "ka", "la", "ma", "na", "pa", "ta", "ya", "za"], "wo": ["wo", "bo", "do", "go", "ho", "jo", "lo", "mo", "no", "so", "to", "we", "yo"], "bo": ["bo", "ba", "be", "bi", "by", "do", "go", "ho", "jo", "lo", "mo", "no", "so", "to", "wo", "yo"], "bi": ["bi", "ai", "ba", "be", "bo", "by", "hi", "ki", "li", "mi", "oi", "pi", "qi", "si", "ti", "xi"], "jo": ["jo", "bo", "do", "go", "ho", "lo", "mo", "no", "so", "to", "wo", "yo"], "by": ["by", "ay", "ba", "be", "bi", "bo", "my", "oy"], "on": ["on", "an", "en", "in", "od", "oe", "of", "oh", "oi", "om", "op", "or", "os", "ow", "ox", "oy", "un"], "om": ["om", "am", "em", "hm", "mm", "od", "oe", "of", "oh", "oi", "on", "op", "or", "os", "ow", "ox", "oy", "um"], "oi": ["oi", "ai", "bi", "hi", "ki", "li", "mi", "od", "oe", "of", "oh", "om", "on", "op", "or", "os", "ow", "ox", "oy", "pi", "qi", "si", "ti", "xi"], "oh": ["oh", "ah", "eh", "od", "oe", "of", "oi", "om", "on", "op", "or", "os", "ow", "ox", "oy", "sh", "uh"], "of": ["of", "ef", "if", "od", "oe", "oh", "oi", "om", "on", "op", "or", "os", "ow", "ox", "oy"], "oe": ["oe", "ae", "be", "de", "fe", "he", "me", "ne", "od", "of", "oh", "oi", "om", "on", "op", "or", "os", "ow", "ox", "oy", "pe", "re", "we", "ye"], "od": ["od", "ad", "ed", "id", "oe", "of", "oh", "oi", "om", "on", "op", "or", "os", "ow", "ox", "oy"], "oy": ["oy", "ay", "by", "my", "od", "oe", "of", "oh", "oi", "om", "on", "op", "or", "os", "ow", "ox"], "ox": ["ox", "ax", "ex", "od", "oe", "of", "oh", "oi", "om", "on", "op", "or", "os", "ow", "oy"], "ow": ["ow", "aw", "od", "oe", "of", "oh", "oi", "om", "on", "op", "or", "os", "ox", "oy"], "os": ["os", "as", "es", "is", "od", "oe", "of", "oh", "oi", "om", "on", "op", "or", "ow", "ox", "oy", "us"], "or": ["or", "ar", "er", "od", "oe", "of", "oh", "oi", "om", "on", "op", "os", "ow", "ox", "oy"], "op": ["op", "od", "oe", "of", "oh", "oi", "om", "on", "or", "os", "ow", "ox", "oy", "up"], "xi": ["xi", "ai", "bi", "hi", "ki", "li", "mi", "oi", "pi", "qi", "si", "ti", "xu"], "xu": ["xu", "mu", "nu", "xi"], "pa": ["pa", "aa", "ba", "fa", "ha", "ka", "la", "ma", "na", "pe", "pi", "ta", "ya", "za"], "pe": ["pe", "ae", "be", "de", "fe", "he", "me", "ne", "oe", "pa", "pi", "re", "we", "ye"], "pi": ["pi", "ai", "bi", "hi", "ki", "li", "mi", "oi", "pa", "pe", "qi", "si", "ti", "xi"], "it": ["it", "at", "et", "id", "if", "in", "is", "ut"], "hi": ["hi", "ai", "bi", "ha", "he", "hm", "ho", "ki", "li", "mi", "oi", "pi", "qi", "si", "ti", "xi"], "ho": ["ho", "bo", "do", "go", "ha", "he", "hi", "hm", "jo", "lo", "mo", "no", "so", "to", "wo", "yo"], "hm": ["hm", "am", "em", "ha", "he", "hi", "ho", "mm", "om", "um"], "ha": ["ha", "aa", "ba", "fa", "he", "hi", "hm", "ho", "ka", "la", "ma", "na", "pa", "ta", "ya", "za"], "he": ["he", "ae", "be", "de", "fe", "ha", "hi", "hm", "ho", "me", "ne", "oe", "pe", "re", "we", "ye"], "me": ["me", "ae", "be", "de", "fe", "he", "ma", "mi", "mm", "mo", "mu", "my", "ne", "oe", "pe", "re", "we", "ye"], "ma": ["ma", "aa", "ba", "fa", "ha", "ka", "la", "me", "mi", "mm", "mo", "mu", "my", "na", "pa", "ta", "ya", "za"], "mm": ["mm", "am", "em", "hm", "ma", "me", "mi", "mo", "mu", "my", "om", "um"], "ut": ["ut", "at", "et", "it", "uh", "um", "un", "up", "us"], "mo": ["mo", "bo", "do", "go", "ho", "jo", "lo", "ma", "me", "mi", "mm", "mu", "my", "no", "so", "to", "wo", "yo"], "mi": ["mi", "ai", "bi", "hi", "ki", "li", "ma", "me", "mm", "mo", "mu", "my", "oi", "pi", "qi", "si", "ti", "xi"], "up": ["up", "op", "uh", "um", "un", "us", "ut"], "us": ["us", "as", "es", "is", "os", "uh", "um", "un", "up", "ut"], "mu": ["mu", "ma", "me", "mi", "mm", "mo", "my", "nu", "xu"], "un": ["un", "an", "en", "in", "on", "uh", "um", "up", "us", "ut"], "uh": ["uh", "ah", "eh", "oh", "sh", "um", "un", "up", "us", "ut"], "my": ["my", "ay", "by", "ma", "me", "mi", "mm", "mo", "mu", "oy"], "aa": ["aa", "ab", "ad", "ae", "ag", "ah", "ai", "al", "am", "an", "ar", "as", "at", "aw", "ax", "ay", "ba", "fa", "ha", "ka", "la", "ma", "na", "pa", "ta", "ya", "za"], "ab": ["ab", "aa", "ad", "ae", "ag", "ah", "ai", "al", "am", "an", "ar", "as", "at", "aw", "ax", "ay"], "ae": ["ae", "aa", "ab", "ad", "ag", "ah", "ai", "al", "am", "an", "ar", "as", "at", "aw", "ax", "ay", "be", "de", "fe", "he", "me", "ne", "oe", "pe", "re", "we", "ye"], "ad": ["ad", "aa", "ab", "ae", "ag", "ah", "ai", "al", "am", "an", "ar", "as", "at", "aw", "ax", "ay", "ed", "id", "od"], "ag": ["ag", "aa", "ab", "ad", "ae", "ah", "ai", "al", "am", "an", "ar", "as", "at", "aw", "ax", "ay"], "ai": ["ai", "aa", "ab", "ad", "ae", "ag", "ah", "al", "am", "an", "ar", "as", "at", "aw", "ax", "ay", "bi", "hi", "ki", "li", "mi", "oi", "pi", "qi", "si", "ti", "xi"], "ah": ["ah", "aa", "ab", "ad", "ae", "ag", "ai", "al", "am", "an", "ar", "as", "at", "aw", "ax", "ay", "eh", "oh", "sh", "uh"], "is": ["is", "as", "es", "id", "if", "in", "it", "os", "us"], "am": ["am", "aa", "ab", "ad", "ae", "ag", "ah", "ai", "al", "an", "ar", "as", "at", "aw", "ax", "ay", "em", "hm", "mm", "om", "um"], "al": ["al", "aa", "ab", "ad", "ae", "ag", "ah", "ai", "am", "an", "ar", "as", "at", "aw", "ax", "ay", "el"], "an": ["an", "aa", "ab", "ad", "ae", "ag", "ah", "ai", "al", "am", "ar", "as", "at", "aw", "ax", "ay", "en", "in", "on", "un"], "as": ["as", "aa", "ab", "ad", "ae", "ag", "ah", "ai", "al", "am", "an", "ar", "at", "aw", "ax", "ay", "es", "is", "os", "us"], "ar": ["ar", "aa", "ab", "ad", "ae", "ag", "ah", "ai", "al", "am", "an", "as", "at", "aw", "ax", "ay", "er", "or"], "at": ["at", "aa", "ab", "ad", "ae", "ag", "ah", "ai", "al", "am", "an", "ar", "as", "aw", "ax", "ay", "et", "it", "ut"], "aw": ["aw", "aa", "ab", "ad", "ae", "ag", "ah", "ai", "al", "am", "an", "ar", "as", "at", "ax", "ay", "ow"], "in": ["in", "an", "en", "id", "if", "is", "it", "on", "un"], "ay": ["ay", "aa", "ab", "ad", "ae", "ag", "ah", "ai", "al", "am", "an", "ar", "as", "at", "aw", "ax", "by", "my", "oy"], "ax": ["ax", "aa", "ab", "ad", "ae", "ag", "ah", "ai", "al", "am", "an", "ar", "as", "at", "aw", "ay", "ex", "ox"], "id": ["id", "ad", "ed", "if", "in", "is", "it", "od"], "if": ["if", "ef", "id", "in", "is", "it", "of"], "no": ["no", "bo", "do", "go", "ho", "jo", "lo", "mo", "na", "ne", "nu", "so", "to", "wo", "yo"], "na": ["na", "aa", "ba", "fa", "ha", "ka", "la", "ma", "ne", "no", "nu", "pa", "ta", "ya", "za"], "ne": ["ne", "ae", "be", "de", "fe", "he", "me", "na", "no", "nu", "oe", "pe", "re", "we", "ye"], "nu": ["nu", "mu", "na", "ne", "no", "xu"], "fa": ["fa", "aa", "ba", "fe", "ha", "ka", "la", "ma", "na", "pa", "ta", "ya", "za"], "fe": ["fe", "ae", "be", "de", "fa", "he", "me", "ne", "oe", "pe", "re", "we", "ye"], "ka": ["ka", "aa", "ba", "fa", "ha", "ki", "la", "ma", "na", "pa", "ta", "ya", "za"], "ki": ["ki", "ai", "bi", "hi", "ka", "li", "mi", "oi", "pi", "qi", "si", "ti", "xi"], "si": ["si", "ai", "bi", "hi", "ki", "li", "mi", "oi", "pi", "qi", "sh", "so", "ti", "xi"], "sh": ["sh", "ah", "eh", "oh", "si", "so", "uh"], "so": ["so", "bo", "do", "go", "ho", "jo", "lo", "mo", "no", "sh", "si", "to", "wo", "yo"]}

0 comments on commit a106319

Please sign in to comment.