tools.py
1.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import random
def shuffle(lol, seed):
'''
lol :: list of list as input
seed :: seed the shuffling
shuffle inplace each list in the same order
'''
for l in lol:
random.seed(seed)
random.shuffle(l)
def minibatch(l, bs):
'''
l :: list of word idxs
return a list of minibatches of indexes
which size is equal to bs
border cases are treated as follow:
eg: [0,1,2,3] and bs = 3
will output:
[[0],[0,1],[0,1,2],[1,2,3]]
'''
out = [l[:i] for i in xrange(1, min(bs,len(l)+1) )]
out += [l[i-bs:i] for i in xrange(bs,len(l)+1) ]
assert len(l) == len(out)
return out
def contextwin(l, win):
'''
win :: int corresponding to the size of the window
given a list of indexes composing a sentence
it will return a list of list of indexes corresponding
to context windows surrounding each word in the sentence
'''
assert (win % 2) == 1
assert win >=1
l = list(l)
lpadded = win/2 * [-1] + l + win/2 * [-1]
out = [ lpadded[i:i+win] for i in range(len(l)) ]
assert len(out) == len(l)
return out