PipeReranker.java
2.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
package is2.parserR2;
import is2.data.Cluster;
import is2.data.DataF;
import is2.data.Edges;
import is2.data.F2SF;
import is2.data.Instances;
import is2.data.MFB;
import is2.data.ParseNBest;
import is2.data.Parse;
import is2.data.PipeGen;
import is2.data.SentenceData09;
import is2.io.CONLLReader09;
import is2.util.OptionsSuper;
import java.io.IOException;
import java.util.ArrayList;
import java.util.concurrent.ExecutorService;
import extractors.ExtractorClusterStacked;
import extractors.ExtractorReranker;
import extractors.ParallelExtract;
final public class PipeReranker extends PipeGen {
public ExtractorReranker extractor;
final public MFB mf = new MFB();
Cluster cl;
private OptionsSuper options;
public static long timeExtract;
public PipeReranker(OptionsSuper o) {
options = o;
}
public void createInstances(String file, Instances is)
// throws Exception
{
CONLLReader09 depReader = new CONLLReader09(file);
mf.register(REL,"<root-type>");
// register at least one predicate since the parsing data might not contain predicates as in
// the Japaness corpus but the development sets contains some
long sl=0;
System.out.print("Registering feature parts of sentence: ");
int ic = 0;
int del = 0;
while (true) {
SentenceData09 instance = depReader.getNext();
if (instance == null) break;
ic++;
sl+=instance.labels.length;
if (ic % 1000 == 0) {
del = outValue(ic, del);
}
String[] labs1 = instance.labels;
for (int i1 = 0; i1 < labs1.length; i1++) mf.register(REL, labs1[i1]);
String[] w = instance.forms;
for (int i1 = 0; i1 < w.length; i1++) mf.register(WORD, depReader.normalize(w[i1]));
w = instance.plemmas;
for (int i1 = 0; i1 < w.length; i1++) mf.register(WORD, depReader.normalize(w[i1]));
w = instance.ppos;
for (int i1 = 0; i1 < w.length; i1++) mf.register(POS, w[i1]);
w = instance.gpos;
for (int i1 = 0; i1 < w.length; i1++) mf.register(POS, w[i1]);
if (instance.feats !=null) {
String fs[][] = instance.feats;
for (int i1 = 0; i1 < fs.length; i1++){
w =fs[i1];
if (w==null) continue;
for (int i2 = 0; i2 < w.length; i2++) mf.register(FEAT, w[i2]);
}
}
if ((ic-1)>options.count) break;
}
del = outValue(ic, del);
System.out.println();
ExtractorReranker.initFeatures();
ExtractorReranker.maxForm = mf.getFeatureCounter().get(WORD);
if (options.clusterFile==null)cl = new Cluster();
else cl= new Cluster(options.clusterFile, mf,6);
mf.calculateBits();
extractor.initStat();
System.out.println(""+mf.toString());
extractor.init();
depReader.startReading(file);
int num1 = 0;
is.init(ic, new MFB());
Edges.init(mf.getFeatureCounter().get(POS));
del = 0;
del = outValue(num1, del);
System.out.println();
}
public static ExecutorService executerService =java.util.concurrent.Executors.newFixedThreadPool(Parser.THREADS);
}