-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdict.cc.py
executable file
·87 lines (71 loc) · 3.02 KB
/
dict.cc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
#!/usr/bin/env python3
import signal
import urllib.request, urllib.error, urllib.parse
from re import findall
from sys import exit
from argparse import ArgumentParser
class Dict:
# Query dict.cc for the translations. 'dictionary' switches to a specific
# dictionary, for example en. 'query' is the search query.
def getResponse(self, dictionary, query):
# urlencode the search query.
query = urllib.parse.quote(query)
# Trick to avoid dict.cc from denying the request: change User-agent to
# firefox's.
request = urllib.request.Request("http://" + dictionary + ".dict.cc/?s=" + query, data=None, headers={"User-agent": "Mozilla/5.0 (X11; Linux x86_64; rv:31.0) Gecko/20100101 Firefox/31.0"})
f = urllib.request.urlopen(request)
self.Response = f.read()
# Parse the HTML-document we got from dict.cc. Search for 'var c1Arr' and
# 'var c2Arr'. Restrict the returned word lists to 'results'.
def parseResponse(self, results):
# Split lines
lines = self.Response.decode("utf-8").split("\n")
# Search for 'var c1Arr' and 'var c2Arr'.
words1 = words2 = ""
for l in lines:
if l.find("var c1Arr") >= 0:
words1 = l
elif l.find("var c2Arr") >= 0:
words2 = l
# Stop when we cannot find anything -> it could be a server error.
if not words1 or not words2:
return False
# Regex to extract the word list.
# pattern = "\"[A-Za-z \.()\-\?ßäöüÄÖÜéáíçÇâêî\']*\""
pattern = r'"(?:[^"\\]|\\.)*"'
# Return list of matching strings; remove first element since it's
# empty.
self.word_list = [findall(pattern, words1)[1:], findall(pattern, words2)[1:]]
# Restrict the wordlists to a maximum of 'results'.
if len(self.word_list[0]) > results:
self.word_list[0] = self.word_list[0][:results]
self.word_list[1] = self.word_list[1][:results]
# Strip double quotes.
for row in self.word_list:
for i in range(0, len(row)):
row[i] = row[i].strip("\"")
# Print the search results (tab separated)
def printResults(self):
for i in range(0, len(self.word_list[0])):
print("{0}\t{1}".format(self.word_list[0][i], self.word_list[1][i]))
def handleSIGINT(signal, frame):
print("")
exit(1)
if __name__ == "__main__":
# Setup signal handler to avoid stacktrace
signal.signal(signal.SIGINT, handleSIGINT)
# Parse commandline
arg_parser = ArgumentParser(usage="%(prog)s [options] SEARCH")
arg_parser.add_argument("search", nargs="+", help="search term")
arg_parser.add_argument("-r", "--results", type=int, default=15, metavar="NUMBER", help="only show NUMBER of results, default=15")
arg_parser.add_argument("-d", "--dictionary", default="ende", metavar="DICT", help="choose dictionary (for example 'enfr' for English/French dictionary)")
arguments = arg_parser.parse_args()
query = " ".join(arguments.search)
myDict = Dict()
# Retrieve translation from dict.cc.
myDict.getResponse(dictionary=arguments.dictionary, query=query)
# Parse the response, exit on failure.
if myDict.parseResponse(arguments.results) == False:
exit(1)
# Print out a list of the results.
myDict.printResults()