PipeReranker.java
2.59 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
package is2.parserR2;
import java.util.concurrent.ExecutorService;
import extractors.ExtractorReranker;
import is2.data.Cluster;
import is2.data.Edges;
import is2.data.Instances;
import is2.data.MFB;
import is2.data.PipeGen;
import is2.data.SentenceData09;
import is2.io.CONLLReader09;
import is2.util.OptionsSuper;
final public class PipeReranker extends PipeGen {
public ExtractorReranker extractor;
final public MFB mf = new MFB();
Cluster cl;
private OptionsSuper options;
public static long timeExtract;
public PipeReranker(OptionsSuper o) {
options = o;
}
public void createInstances(String file, Instances is)
// throws Exception
{
CONLLReader09 depReader = new CONLLReader09(file);
mf.register(REL, "<root-type>");
// register at least one predicate since the parsing data might not
// contain predicates as in
// the Japaness corpus but the development sets contains some
System.out.print("Registering feature parts of sentence: ");
int ic = 0;
int del = 0;
while (true) {
SentenceData09 instance = depReader.getNext();
if (instance == null)
break;
ic++;
if (ic % 1000 == 0) {
del = outValue(ic, del);
}
String[] labs1 = instance.labels;
for (String element : labs1)
mf.register(REL, element);
String[] w = instance.forms;
for (String element : w)
mf.register(WORD, depReader.normalize(element));
w = instance.plemmas;
for (String element : w)
mf.register(WORD, depReader.normalize(element));
w = instance.ppos;
for (String element : w)
mf.register(POS, element);
w = instance.gpos;
for (String element : w)
mf.register(POS, element);
if (instance.feats != null) {
String fs[][] = instance.feats;
for (String[] element : fs) {
w = element;
if (w == null)
continue;
for (String element2 : w)
mf.register(FEAT, element2);
}
}
if ((ic - 1) > options.count)
break;
}
del = outValue(ic, del);
System.out.println();
ExtractorReranker.initFeatures();
ExtractorReranker.maxForm = mf.getFeatureCounter().get(WORD);
if (options.clusterFile == null)
cl = new Cluster();
else
cl = new Cluster(options.clusterFile, mf, 6);
mf.calculateBits();
ExtractorReranker.initStat();
System.out.println("" + mf.toString());
extractor.init();
depReader.startReading(file);
int num1 = 0;
is.init(ic, new MFB());
Edges.init(mf.getFeatureCounter().get(POS));
del = 0;
del = outValue(num1, del);
System.out.println();
}
public static ExecutorService executerService = java.util.concurrent.Executors.newFixedThreadPool(Parser.THREADS);
}