This repository has been archived by the owner on Sep 3, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathalignment.py
executable file
·47 lines (44 loc) · 2.2 KB
/
alignment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
from scipy.linalg import orthogonal_procrustes
import numpy as np
from WordVectors import WordVectors
# Word alignment module
def align(wv1, wv2, anchor_indices=None, anchor_words=None, anchor_top=None,
anchor_bot=None, anchor_random=None,
exclude={},
method="procrustes"):
"""
Implement OP alignment for a given set of landmarks.
If no landmark is given, performs global alignment.
Arguments:
wv1 - WordVectors object to align to wv2
wv2 - Target WordVectors. Will align wv1 to it.
anchor_indices - (optional) uses word indices as landmarks
anchor_words - (optional) uses words as landmarks
exclude - set of words to exclude from alignment
method - Alignment objective. Currently only supports orthogonal procrustes.
"""
if anchor_top is not None:
v1 = [wv1.vectors[i] for i in range(anchor_top) if wv1.words[i] not in exclude]
v2 = [wv2.vectors[i] for i in range(anchor_top) if wv2.words[i] not in exclude]
elif anchor_bot is not None:
v1 = [wv1.vectors[-i] for i in range(anchor_bot) if wv1.words[i] not in exclude]
v2 = [wv2.vectors[-i] for i in range(anchor_bot) if wv2.words[i] not in exclude]
elif anchor_random is not None:
anchors = np.random.choice(range(len(wv1.vectors)), anchor_random)
v1 = [wv1.vectors[i] for i in anchors if wv1.words[i] not in exclude]
v2 = [wv2.vectors[i] for i in anchors if wv2.words[i] not in exclude]
elif anchor_indices is not None:
v1 = [wv1.vectors[i] for i in indices if wv1.words[i] not in exclude]
v2 = [wv2.vectors[i] for i in indices if wv2.words[i] not in exclude]
elif anchor_words is not None:
v1 = [wv1[w] for w in anchor_words if w not in exclude]
v2 = [wv2[w] for w in anchor_words if w not in exclude]
else: # just use all words
v1 = [wv1[w] for w in wv1.words if w not in exclude]
v2 = [wv2[w] for w in wv2.words if w not in exclude]
v1 = np.array(v1)
v2 = np.array(v2)
if method=="procrustes": # align with OP
Q, _ = orthogonal_procrustes(v1, v2)
wv1_ = WordVectors(words=wv1.words, vectors=np.dot(wv1.vectors, Q))
return wv1_, wv2, Q