-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit a106319
Showing
10 changed files
with
280 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
*.pyc |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
Simple Python [word ladder](https://en.wikipedia.org/wiki/Word_ladder) | ||
generator that finds the shortest ladder between two words. | ||
|
||
`generate.py` precomputes the dictionaries used to build the ladder (only | ||
needs to run once, or never, since the output jsons are in the repo) | ||
|
||
`ladder.py` takes a `start` and `target` argument and outputs a ladder, if | ||
one exists. | ||
|
||
Currently only supports words from 2-6 characters, though easy enough to | ||
extend. | ||
|
||
Thanks to [ahupp](https://github.com/ahupp) for the | ||
[Burkhard-Keller tree implementation](https://github.com/ahupp/bktree) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,148 @@ | ||
#!/usr/bin/env python | ||
# https://github.com/ahupp/bktree/blob/master/bktree.py | ||
""" | ||
This module implements Burkhard-Keller Trees (bk-tree). bk-trees | ||
allow fast lookup of words that lie within a specified distance of a | ||
query word. For example, this might be used by a spell checker to | ||
find near matches to a mispelled word. | ||
The implementation is based on the description in this article: | ||
http://blog.notdot.net/2007/4/Damn-Cool-Algorithms-Part-1-BK-Trees | ||
Licensed under the PSF license: http://www.python.org/psf/license/ | ||
- Adam Hupp <[email protected]> | ||
""" | ||
from itertools import imap, ifilter | ||
from random import randint | ||
from time import time | ||
|
||
class BKTree: | ||
def __init__(self, distfn, words): | ||
""" | ||
Create a new BK-tree from the given distance function and | ||
words. | ||
Arguments: | ||
distfn: a binary function that returns the distance between | ||
two words. Return value is a non-negative integer. the | ||
distance function must be a metric space. | ||
words: an iterable. produces values that can be passed to | ||
distfn | ||
""" | ||
self.distfn = distfn | ||
|
||
it = iter(words) | ||
root = it.next() | ||
self.tree = (root, {}) | ||
|
||
for i in it: | ||
self._add_word(self.tree, i) | ||
|
||
def _add_word(self, parent, word): | ||
pword, children = parent | ||
d = self.distfn(word, pword) | ||
if d in children: | ||
self._add_word(children[d], word) | ||
else: | ||
children[d] = (word, {}) | ||
|
||
def query(self, word, n): | ||
""" | ||
Return all words in the tree that are within a distance of `n' | ||
from `word`. | ||
Arguments: | ||
word: a word to query on | ||
n: a non-negative integer that specifies the allowed distance | ||
from the query word. | ||
Return value is a list of tuples (distance, word), sorted in | ||
ascending order of distance. | ||
""" | ||
def rec(parent): | ||
pword, children = parent | ||
d = self.distfn(word, pword) | ||
results = [] | ||
if d <= n: | ||
results.append( (d, pword) ) | ||
|
||
for i in range(d-n, d+n+1): | ||
child = children.get(i) | ||
if child is not None: | ||
results.extend(rec(child)) | ||
return results | ||
|
||
# sort by distance | ||
return sorted(rec(self.tree)) | ||
|
||
|
||
|
||
def brute_query(word, words, distfn, n): | ||
"""A brute force distance query | ||
Arguments: | ||
word: the word to query for | ||
words: a iterable that produces words to test | ||
distfn: a binary function that returns the distance between a | ||
`word' and an item in `words'. | ||
n: an integer that specifies the distance of a matching word | ||
""" | ||
return [i for i in words | ||
if distfn(i, word) <= n] | ||
|
||
|
||
def maxdepth(tree, count=0): | ||
_, children = t | ||
if len(children): | ||
return max(maxdepth(i, c+1) for i in children.values()) | ||
else: | ||
return c | ||
|
||
|
||
# http://en.wikibooks.org/wiki/Algorithm_implementation/Strings/Levenshtein_distance#Python | ||
def levenshtein(s, t): | ||
m, n = len(s), len(t) | ||
d = [range(n+1)] | ||
d += [[i] for i in range(1,m+1)] | ||
for i in range(0,m): | ||
for j in range(0,n): | ||
cost = 1 | ||
if s[i] == t[j]: cost = 0 | ||
|
||
d[i+1].append( min(d[i][j+1]+1, # deletion | ||
d[i+1][j]+1, #insertion | ||
d[i][j]+cost) #substitution | ||
) | ||
return d[m][n] | ||
|
||
|
||
def dict_words(dictfile="/usr/share/dict/american-english"): | ||
"Return an iterator that produces words in the given dictionary." | ||
return ifilter(len, | ||
imap(str.strip, | ||
open(dictfile))) | ||
|
||
|
||
def timeof(fn, *args): | ||
import time | ||
t = time.time() | ||
res = fn(*args) | ||
print "time: ", (time.time() - t) | ||
return res | ||
|
||
0 |
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
#!/usr/bin/env python | ||
""" | ||
Word ladder - given a start and a target, generates the shortest chain | ||
of words to get from start to target, changing one letter at a time, | ||
with each step being a valid word from the Scrabble dictionary. | ||
""" | ||
|
||
import sys | ||
import json | ||
import argparse | ||
from collections import deque | ||
|
||
def find_ladder(start, target, linked_words): | ||
""" | ||
Find the shortest word ladder from start to target | ||
Arguments: | ||
start: the start of the ladder | ||
target: the end of the ladder | ||
linked_words: a dictionary <string, list(string)> of words and the | ||
valid words Levenshtein distance 1 from them | ||
Return value is a list of words forming the ladder, or None if no ladder | ||
exists. | ||
""" | ||
queue = deque([[start]]) | ||
used_words = [] | ||
while (len(queue) > 0): | ||
ladder = queue.popleft() | ||
next_words = linked_words[ladder[-1]] | ||
for next_word in next_words: | ||
attempt = list(ladder) | ||
attempt.append(next_word) | ||
if next_word == target: | ||
return attempt | ||
if next_word not in used_words: | ||
used_words.append(next_word) | ||
queue.append(attempt) | ||
return None | ||
|
||
def word_type(x): | ||
""" | ||
Helper type for arg parsing (ensures words are in length range 2-6) | ||
""" | ||
if len(x) < 1 or len(x) > 6: | ||
raise argparse.ArgumentTypeError( | ||
"Words must be 2 to 6 chars in length") | ||
return x.lower() | ||
|
||
parser = argparse.ArgumentParser() | ||
parser.add_argument("start", type=word_type) | ||
parser.add_argument("target", type=word_type) | ||
args = parser.parse_args() | ||
|
||
if len(args.start) != len(args.target): | ||
print "Words must be equal length" | ||
exit() | ||
|
||
print "Loading dictionary" | ||
filenames = ["twos", "threes", "fours", "fives", "sixes"] | ||
filename = filenames[len(args.start) - 2] + ".json" | ||
with open(filename, "r") as f: | ||
word_list = json.load(f) | ||
|
||
if args.start not in word_list: | ||
print args.start + " is not a valid word" | ||
exit() | ||
if args.target not in word_list: | ||
print args.target + " is not a valid word" | ||
exit() | ||
|
||
print "Finding ladder" | ||
result = find_ladder(args.start, args.target, word_list) | ||
if result is None: | ||
print "No ladder exists from {} to {}".format(args.start, args.target) | ||
else: | ||
for w in result: print w |
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
{"go": ["go", "bo", "do", "ho", "jo", "lo", "mo", "no", "so", "to", "wo", "yo"], "la": ["la", "aa", "ba", "fa", "ha", "ka", "li", "lo", "ma", "na", "pa", "ta", "ya", "za"], "lo": ["lo", "bo", "do", "go", "ho", "jo", "la", "li", "mo", "no", "so", "to", "wo", "yo"], "li": ["li", "ai", "bi", "hi", "ki", "la", "lo", "mi", "oi", "pi", "qi", "si", "ti", "xi"], "to": ["to", "bo", "do", "go", "ho", "jo", "lo", "mo", "no", "so", "ta", "ti", "wo", "yo"], "ti": ["ti", "ai", "bi", "hi", "ki", "li", "mi", "oi", "pi", "qi", "si", "ta", "to", "xi"], "ta": ["ta", "aa", "ba", "fa", "ha", "ka", "la", "ma", "na", "pa", "ti", "to", "ya", "za"], "do": ["do", "bo", "de", "go", "ho", "jo", "lo", "mo", "no", "so", "to", "wo", "yo"], "yo": ["yo", "bo", "do", "go", "ho", "jo", "lo", "mo", "no", "so", "to", "wo", "ya", "ye"], "ya": ["ya", "aa", "ba", "fa", "ha", "ka", "la", "ma", "na", "pa", "ta", "ye", "yo", "za"], "de": ["de", "ae", "be", "do", "fe", "he", "me", "ne", "oe", "pe", "re", "we", "ye"], "ye": ["ye", "ae", "be", "de", "fe", "he", "me", "ne", "oe", "pe", "re", "we", "ya", "yo"], "qi": ["qi", "ai", "bi", "hi", "ki", "li", "mi", "oi", "pi", "si", "ti", "xi"], "em": ["em", "am", "ed", "ef", "eh", "el", "en", "er", "es", "et", "ex", "hm", "mm", "om", "um"], "el": ["el", "al", "ed", "ef", "eh", "em", "en", "er", "es", "et", "ex"], "en": ["en", "an", "ed", "ef", "eh", "el", "em", "er", "es", "et", "ex", "in", "on", "un"], "eh": ["eh", "ah", "ed", "ef", "el", "em", "en", "er", "es", "et", "ex", "oh", "sh", "uh"], "ed": ["ed", "ad", "ef", "eh", "el", "em", "en", "er", "es", "et", "ex", "id", "od"], "ef": ["ef", "ed", "eh", "el", "em", "en", "er", "es", "et", "ex", "if", "of"], "za": ["za", "aa", "ba", "fa", "ha", "ka", "la", "ma", "na", "pa", "ta", "ya"], "ex": ["ex", "ax", "ed", "ef", "eh", "el", "em", "en", "er", "es", "et", "ox"], "et": ["et", "at", "ed", "ef", "eh", "el", "em", "en", "er", "es", "ex", "it", "ut"], "es": ["es", "as", "ed", "ef", "eh", "el", "em", "en", "er", "et", "ex", "is", "os", "us"], "er": ["er", "ar", "ed", "ef", "eh", "el", "em", "en", "es", "et", "ex", "or"], "um": ["um", "am", "em", "hm", "mm", "om", "uh", "un", "up", "us", "ut"], "re": ["re", "ae", "be", "de", "fe", "he", "me", "ne", "oe", "pe", "we", "ye"], "be": ["be", "ae", "ba", "bi", "bo", "by", "de", "fe", "he", "me", "ne", "oe", "pe", "re", "we", "ye"], "we": ["we", "ae", "be", "de", "fe", "he", "me", "ne", "oe", "pe", "re", "wo", "ye"], "ba": ["ba", "aa", "be", "bi", "bo", "by", "fa", "ha", "ka", "la", "ma", "na", "pa", "ta", "ya", "za"], "wo": ["wo", "bo", "do", "go", "ho", "jo", "lo", "mo", "no", "so", "to", "we", "yo"], "bo": ["bo", "ba", "be", "bi", "by", "do", "go", "ho", "jo", "lo", "mo", "no", "so", "to", "wo", "yo"], "bi": ["bi", "ai", "ba", "be", "bo", "by", "hi", "ki", "li", "mi", "oi", "pi", "qi", "si", "ti", "xi"], "jo": ["jo", "bo", "do", "go", "ho", "lo", "mo", "no", "so", "to", "wo", "yo"], "by": ["by", "ay", "ba", "be", "bi", "bo", "my", "oy"], "on": ["on", "an", "en", "in", "od", "oe", "of", "oh", "oi", "om", "op", "or", "os", "ow", "ox", "oy", "un"], "om": ["om", "am", "em", "hm", "mm", "od", "oe", "of", "oh", "oi", "on", "op", "or", "os", "ow", "ox", "oy", "um"], "oi": ["oi", "ai", "bi", "hi", "ki", "li", "mi", "od", "oe", "of", "oh", "om", "on", "op", "or", "os", "ow", "ox", "oy", "pi", "qi", "si", "ti", "xi"], "oh": ["oh", "ah", "eh", "od", "oe", "of", "oi", "om", "on", "op", "or", "os", "ow", "ox", "oy", "sh", "uh"], "of": ["of", "ef", "if", "od", "oe", "oh", "oi", "om", "on", "op", "or", "os", "ow", "ox", "oy"], "oe": ["oe", "ae", "be", "de", "fe", "he", "me", "ne", "od", "of", "oh", "oi", "om", "on", "op", "or", "os", "ow", "ox", "oy", "pe", "re", "we", "ye"], "od": ["od", "ad", "ed", "id", "oe", "of", "oh", "oi", "om", "on", "op", "or", "os", "ow", "ox", "oy"], "oy": ["oy", "ay", "by", "my", "od", "oe", "of", "oh", "oi", "om", "on", "op", "or", "os", "ow", "ox"], "ox": ["ox", "ax", "ex", "od", "oe", "of", "oh", "oi", "om", "on", "op", "or", "os", "ow", "oy"], "ow": ["ow", "aw", "od", "oe", "of", "oh", "oi", "om", "on", "op", "or", "os", "ox", "oy"], "os": ["os", "as", "es", "is", "od", "oe", "of", "oh", "oi", "om", "on", "op", "or", "ow", "ox", "oy", "us"], "or": ["or", "ar", "er", "od", "oe", "of", "oh", "oi", "om", "on", "op", "os", "ow", "ox", "oy"], "op": ["op", "od", "oe", "of", "oh", "oi", "om", "on", "or", "os", "ow", "ox", "oy", "up"], "xi": ["xi", "ai", "bi", "hi", "ki", "li", "mi", "oi", "pi", "qi", "si", "ti", "xu"], "xu": ["xu", "mu", "nu", "xi"], "pa": ["pa", "aa", "ba", "fa", "ha", "ka", "la", "ma", "na", "pe", "pi", "ta", "ya", "za"], "pe": ["pe", "ae", "be", "de", "fe", "he", "me", "ne", "oe", "pa", "pi", "re", "we", "ye"], "pi": ["pi", "ai", "bi", "hi", "ki", "li", "mi", "oi", "pa", "pe", "qi", "si", "ti", "xi"], "it": ["it", "at", "et", "id", "if", "in", "is", "ut"], "hi": ["hi", "ai", "bi", "ha", "he", "hm", "ho", "ki", "li", "mi", "oi", "pi", "qi", "si", "ti", "xi"], "ho": ["ho", "bo", "do", "go", "ha", "he", "hi", "hm", "jo", "lo", "mo", "no", "so", "to", "wo", "yo"], "hm": ["hm", "am", "em", "ha", "he", "hi", "ho", "mm", "om", "um"], "ha": ["ha", "aa", "ba", "fa", "he", "hi", "hm", "ho", "ka", "la", "ma", "na", "pa", "ta", "ya", "za"], "he": ["he", "ae", "be", "de", "fe", "ha", "hi", "hm", "ho", "me", "ne", "oe", "pe", "re", "we", "ye"], "me": ["me", "ae", "be", "de", "fe", "he", "ma", "mi", "mm", "mo", "mu", "my", "ne", "oe", "pe", "re", "we", "ye"], "ma": ["ma", "aa", "ba", "fa", "ha", "ka", "la", "me", "mi", "mm", "mo", "mu", "my", "na", "pa", "ta", "ya", "za"], "mm": ["mm", "am", "em", "hm", "ma", "me", "mi", "mo", "mu", "my", "om", "um"], "ut": ["ut", "at", "et", "it", "uh", "um", "un", "up", "us"], "mo": ["mo", "bo", "do", "go", "ho", "jo", "lo", "ma", "me", "mi", "mm", "mu", "my", "no", "so", "to", "wo", "yo"], "mi": ["mi", "ai", "bi", "hi", "ki", "li", "ma", "me", "mm", "mo", "mu", "my", "oi", "pi", "qi", "si", "ti", "xi"], "up": ["up", "op", "uh", "um", "un", "us", "ut"], "us": ["us", "as", "es", "is", "os", "uh", "um", "un", "up", "ut"], "mu": ["mu", "ma", "me", "mi", "mm", "mo", "my", "nu", "xu"], "un": ["un", "an", "en", "in", "on", "uh", "um", "up", "us", "ut"], "uh": ["uh", "ah", "eh", "oh", "sh", "um", "un", "up", "us", "ut"], "my": ["my", "ay", "by", "ma", "me", "mi", "mm", "mo", "mu", "oy"], "aa": ["aa", "ab", "ad", "ae", "ag", "ah", "ai", "al", "am", "an", "ar", "as", "at", "aw", "ax", "ay", "ba", "fa", "ha", "ka", "la", "ma", "na", "pa", "ta", "ya", "za"], "ab": ["ab", "aa", "ad", "ae", "ag", "ah", "ai", "al", "am", "an", "ar", "as", "at", "aw", "ax", "ay"], "ae": ["ae", "aa", "ab", "ad", "ag", "ah", "ai", "al", "am", "an", "ar", "as", "at", "aw", "ax", "ay", "be", "de", "fe", "he", "me", "ne", "oe", "pe", "re", "we", "ye"], "ad": ["ad", "aa", "ab", "ae", "ag", "ah", "ai", "al", "am", "an", "ar", "as", "at", "aw", "ax", "ay", "ed", "id", "od"], "ag": ["ag", "aa", "ab", "ad", "ae", "ah", "ai", "al", "am", "an", "ar", "as", "at", "aw", "ax", "ay"], "ai": ["ai", "aa", "ab", "ad", "ae", "ag", "ah", "al", "am", "an", "ar", "as", "at", "aw", "ax", "ay", "bi", "hi", "ki", "li", "mi", "oi", "pi", "qi", "si", "ti", "xi"], "ah": ["ah", "aa", "ab", "ad", "ae", "ag", "ai", "al", "am", "an", "ar", "as", "at", "aw", "ax", "ay", "eh", "oh", "sh", "uh"], "is": ["is", "as", "es", "id", "if", "in", "it", "os", "us"], "am": ["am", "aa", "ab", "ad", "ae", "ag", "ah", "ai", "al", "an", "ar", "as", "at", "aw", "ax", "ay", "em", "hm", "mm", "om", "um"], "al": ["al", "aa", "ab", "ad", "ae", "ag", "ah", "ai", "am", "an", "ar", "as", "at", "aw", "ax", "ay", "el"], "an": ["an", "aa", "ab", "ad", "ae", "ag", "ah", "ai", "al", "am", "ar", "as", "at", "aw", "ax", "ay", "en", "in", "on", "un"], "as": ["as", "aa", "ab", "ad", "ae", "ag", "ah", "ai", "al", "am", "an", "ar", "at", "aw", "ax", "ay", "es", "is", "os", "us"], "ar": ["ar", "aa", "ab", "ad", "ae", "ag", "ah", "ai", "al", "am", "an", "as", "at", "aw", "ax", "ay", "er", "or"], "at": ["at", "aa", "ab", "ad", "ae", "ag", "ah", "ai", "al", "am", "an", "ar", "as", "aw", "ax", "ay", "et", "it", "ut"], "aw": ["aw", "aa", "ab", "ad", "ae", "ag", "ah", "ai", "al", "am", "an", "ar", "as", "at", "ax", "ay", "ow"], "in": ["in", "an", "en", "id", "if", "is", "it", "on", "un"], "ay": ["ay", "aa", "ab", "ad", "ae", "ag", "ah", "ai", "al", "am", "an", "ar", "as", "at", "aw", "ax", "by", "my", "oy"], "ax": ["ax", "aa", "ab", "ad", "ae", "ag", "ah", "ai", "al", "am", "an", "ar", "as", "at", "aw", "ay", "ex", "ox"], "id": ["id", "ad", "ed", "if", "in", "is", "it", "od"], "if": ["if", "ef", "id", "in", "is", "it", "of"], "no": ["no", "bo", "do", "go", "ho", "jo", "lo", "mo", "na", "ne", "nu", "so", "to", "wo", "yo"], "na": ["na", "aa", "ba", "fa", "ha", "ka", "la", "ma", "ne", "no", "nu", "pa", "ta", "ya", "za"], "ne": ["ne", "ae", "be", "de", "fe", "he", "me", "na", "no", "nu", "oe", "pe", "re", "we", "ye"], "nu": ["nu", "mu", "na", "ne", "no", "xu"], "fa": ["fa", "aa", "ba", "fe", "ha", "ka", "la", "ma", "na", "pa", "ta", "ya", "za"], "fe": ["fe", "ae", "be", "de", "fa", "he", "me", "ne", "oe", "pe", "re", "we", "ye"], "ka": ["ka", "aa", "ba", "fa", "ha", "ki", "la", "ma", "na", "pa", "ta", "ya", "za"], "ki": ["ki", "ai", "bi", "hi", "ka", "li", "mi", "oi", "pi", "qi", "si", "ti", "xi"], "si": ["si", "ai", "bi", "hi", "ki", "li", "mi", "oi", "pi", "qi", "sh", "so", "ti", "xi"], "sh": ["sh", "ah", "eh", "oh", "si", "so", "uh"], "so": ["so", "bo", "do", "go", "ho", "jo", "lo", "mo", "no", "sh", "si", "to", "wo", "yo"]} |