Commit d104f60d12d6c6a6335718b56cac59ffe40dd9c3
1 parent
d56a8b56
Dependency Parser split into two versions: basic and experimental
Showing
495 changed files
with
34859 additions
and
5714 deletions
Too many changes to show.
To preserve performance only 32 of 495 files are displayed.
dependencyParser/basic/mate-tools/.classpath
0 → 100644
1 | +<?xml version="1.0" encoding="UTF-8"?> | ||
2 | +<classpath> | ||
3 | + <classpathentry kind="src" path="src"/> | ||
4 | + <classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/> | ||
5 | + <classpathentry kind="lib" path="/mtt/lib/trove.jar"/> | ||
6 | + <classpathentry kind="lib" path="lib/commons-math-2.2.jar"/> | ||
7 | + <classpathentry kind="output" path="classes"/> | ||
8 | +</classpath> |
dependencyParser/mate-tools/.externalToolBuilders/New_Builder.launch renamed to dependencyParser/basic/mate-tools/.externalToolBuilders/New_Builder.launch
dependencyParser/mate-tools/.externalToolBuilders/ana.launch renamed to dependencyParser/basic/mate-tools/.externalToolBuilders/ana.launch
dependencyParser/mate-tools/.project renamed to dependencyParser/basic/mate-tools/.project
dependencyParser/mate-tools/build.xml renamed to dependencyParser/basic/mate-tools/build.xml
dependencyParser/mate-tools/lib/commons-math-2.2.jar renamed to dependencyParser/basic/mate-tools/lib/commons-math-2.2.jar
No preview for this file type
dependencyParser/mate-tools/lib/trove-2.0.4.jar renamed to dependencyParser/basic/mate-tools/lib/trove-2.0.4.jar
No preview for this file type
dependencyParser/basic/mate-tools/src/decoder/ParallelDecoder.java
0 → 100755
1 | +package decoder; | ||
2 | + | ||
3 | +import is2.data.Closed; | ||
4 | +import is2.data.DataF; | ||
5 | +import is2.data.Edges; | ||
6 | +import is2.data.Open; | ||
7 | + | ||
8 | +import java.util.ArrayList; | ||
9 | +import java.util.concurrent.Callable; | ||
10 | + | ||
11 | +/** | ||
12 | + * @author Bernd Bohnet, 30.08.2009 | ||
13 | + * | ||
14 | + * This class implements a parallel feature extractor. | ||
15 | + */ | ||
16 | +final public class ParallelDecoder implements Callable<Object> | ||
17 | +{ | ||
18 | + // some constants | ||
19 | + private static final float INIT_BEST = (-1.0F / 0.0F); | ||
20 | + private static final boolean[] DIR ={false,true}; | ||
21 | + | ||
22 | + // the data space of the weights for a dependency tree | ||
23 | + final private DataF x; | ||
24 | + | ||
25 | + private short[] pos; | ||
26 | + | ||
27 | + private Open O[][][][]; | ||
28 | + private Closed C[][][][] ; | ||
29 | + | ||
30 | + private int n; | ||
31 | + | ||
32 | + boolean done=false; | ||
33 | + public boolean waiting =false; | ||
34 | + | ||
35 | + /** | ||
36 | + * Initialize the parallel decoder. | ||
37 | + * | ||
38 | + * @param pos part-of-speech | ||
39 | + * @param d data | ||
40 | + * @param edges part-of-speech edge mapping | ||
41 | + * @param o open spans | ||
42 | + * @param c closed spans | ||
43 | + * @param n number of words | ||
44 | + */ | ||
45 | + public ParallelDecoder(short[] pos, DataF d, Open o[][][][], Closed c[][][][], int n) { | ||
46 | + | ||
47 | + this.pos =pos; | ||
48 | + this.x =d; | ||
49 | + | ||
50 | + this.O=o; | ||
51 | + this.C=c; | ||
52 | + this.n=n; | ||
53 | + } | ||
54 | + | ||
55 | + | ||
56 | + private static class DSet { short w1,w2;} | ||
57 | + | ||
58 | + @Override | ||
59 | + public Object call() { | ||
60 | + | ||
61 | + while (true){ | ||
62 | + | ||
63 | + DSet set = get(); | ||
64 | + if (done && set==null) break; | ||
65 | + | ||
66 | + if (set ==null) return null; | ||
67 | + | ||
68 | + short s=set.w1, t=set.w2; | ||
69 | + | ||
70 | + for(short dir =1;dir>=0;dir--) { | ||
71 | + | ||
72 | + short[] labs = (dir==1) ? Edges.get(pos[s],pos[t], false):Edges.get(pos[t],pos[s], true); | ||
73 | + | ||
74 | + O[s][t][dir] = new Open[labs.length]; | ||
75 | + for (int l = O[s][t][dir].length - 1; l >= 0; l--) { | ||
76 | + | ||
77 | + double tRP = INIT_BEST; | ||
78 | + | ||
79 | + Closed tL = null, tR = null; | ||
80 | + | ||
81 | + for (int r = s; r < t; r++) { | ||
82 | + | ||
83 | + if (s == 0 && r != 0) continue; | ||
84 | + | ||
85 | + double tLPr = INIT_BEST,tRPr = INIT_BEST; | ||
86 | + Closed tLCld = null, tRCld = null; | ||
87 | + | ||
88 | + if (r == s) tLPr = dir==1 ? x.sib[s][t][s][0][l] : x.gra[t][s][s][1 ][l]; | ||
89 | + else | ||
90 | + for (int i = s + 1; i <= r; i++) | ||
91 | + if (((dir==1 ? x.sib[s][t][i][0][l] : x.gra[t][s][i][1][l]) + C[s][r][1][i].p) > tLPr) { | ||
92 | + tLPr = ((dir==1 ? x.sib[s][t][i][0][l] : x.gra[t][s][i][1][l]) + C[s][r][1][i].p);tLCld = C[s][r][1][i];} | ||
93 | + | ||
94 | + if (r == t-1) tRPr = dir==1 ? x.gra[s][t][s][0][l] : x.sib[t][s][s][1][l]; | ||
95 | + else | ||
96 | + for (int i = r + 1; i < t; i++) | ||
97 | + if (((dir == 1 ? x.gra[s][t][i][0][l] : x.sib[t][s][i][1][l]) + C[r+1][t][0][i].p) > tRPr) { | ||
98 | + tRPr = ((dir==1?x.gra[s][t][i][0][l]:x.sib[t][s][i][1][l]) + C[r+1][t][0][i].p); tRCld=C[r + 1][t][0][i];} | ||
99 | + | ||
100 | + if (tLPr + tRPr > tRP) {tRP = tLPr + tRPr; tL = tLCld;tR = tRCld;} | ||
101 | + } | ||
102 | + O[s][t][dir][l] = new Open(s, t, dir, labs[l],tL, tR, | ||
103 | + (float) ( tRP+((dir==1)?x.pl[s][t]: x.pl[t][s]) + ((dir==1)? x.lab[s][t][labs[l]][0]:x.lab[t][s][labs[l]][1]))); | ||
104 | + } | ||
105 | + } | ||
106 | + C[s][t][1] = new Closed[n]; C[s][t][0] = new Closed[n]; | ||
107 | + | ||
108 | + for (int m = s ; m <= t; m++) { | ||
109 | + for(boolean d : DIR) { | ||
110 | + if ((d && m!=s)||!d && (m!=t && s!=0)) { | ||
111 | + | ||
112 | + // create closed structure | ||
113 | + | ||
114 | + double top = INIT_BEST; | ||
115 | + | ||
116 | + Open tU = null; Closed tL = null; | ||
117 | + int numLabels =O[(d ? s : m)][(d ? m : t)][d?1:0].length; | ||
118 | + | ||
119 | + //for (int l = numLabels-1; l >=0; l--) { | ||
120 | + for (int l = 0; l < numLabels; l++) { | ||
121 | + | ||
122 | + Open hi = O[(d ? s : m)][(d ? m : t)][d?1:0][l]; | ||
123 | + for (int amb = m + (d?1:-1); amb != (d?t:s) + (d?1:-1); amb += (d?1:-1)) { | ||
124 | + | ||
125 | + if ((hi.p + C[d?m:s][d?t:m][d?1:0][amb].p +x.gra[d?s:t][m][amb][d?0:1][l]) > top) { | ||
126 | + top = (hi.p + C[d?m:s][d?t:m][d?1:0][amb].p +x.gra[d?s:t][m][amb][(d?0:1)][l]); tU = hi; tL=C[d?m:s][d?t:m][d?1:0][amb];} | ||
127 | + } | ||
128 | + | ||
129 | + if ((m == (d ? t : s)) && (hi.p + x.gra[d?s:t][m][d?s:t][(d ? 0 :1)][l]) > top) { | ||
130 | + top = (hi.p + x.gra[(d ? s : t)][m][d?s:t][d?0:1][l]); tU = hi; tL = null;} | ||
131 | + } | ||
132 | + C[s][t][d?1:0][m] = new Closed(s, t, m, d?1:0,tU,tL,(float) top); | ||
133 | + } | ||
134 | + } | ||
135 | + } | ||
136 | + } | ||
137 | + return null; | ||
138 | + } | ||
139 | + | ||
140 | + public static ArrayList<DSet> sets = new ArrayList<DSet>(); | ||
141 | + | ||
142 | + static synchronized private DSet get() { | ||
143 | + synchronized (sets) { | ||
144 | + if (sets.size()==0) return null; | ||
145 | + return sets.remove(sets.size()-1); | ||
146 | + } | ||
147 | + } | ||
148 | + | ||
149 | + public static void add(short w1, short w2){ | ||
150 | + DSet ds =new DSet(); | ||
151 | + ds.w1=w1; | ||
152 | + ds.w2=w2; | ||
153 | + sets.add(ds); | ||
154 | + } | ||
155 | +} |
dependencyParser/basic/mate-tools/src/decoder/ParallelRearrangeNBest.java
0 → 100755
1 | +package decoder; | ||
2 | + | ||
3 | +import is2.data.DataF; | ||
4 | +import is2.data.Edges; | ||
5 | +import is2.data.Parse; | ||
6 | +import is2.data.ParseNBest; | ||
7 | + | ||
8 | +import java.util.ArrayList; | ||
9 | +import java.util.concurrent.Callable; | ||
10 | + | ||
11 | +import extractors.Extractor; | ||
12 | + | ||
13 | +/** | ||
14 | + * @author Dr. Bernd Bohnet, 30.08.2009 | ||
15 | + * | ||
16 | + * This class implements a parallel edge rearrangement for non-projective parsing; | ||
17 | + * The linear method was first suggest by Rayn McDonald et. al. 2005. | ||
18 | + */ | ||
19 | +final public class ParallelRearrangeNBest implements Callable<Object> { | ||
20 | + | ||
21 | + // new parent child combination to explore | ||
22 | + final static class PA { | ||
23 | + final float p; | ||
24 | + final short ch, pa; | ||
25 | + | ||
26 | + float best; | ||
27 | + | ||
28 | + | ||
29 | + | ||
30 | + public PA(float p2, short ch2, short pa2) { p=p2; ch=ch2;pa=pa2;} | ||
31 | + } | ||
32 | + | ||
33 | + // list of parent child combinations | ||
34 | + private static ArrayList<PA> parents = new ArrayList<PA>(); | ||
35 | + | ||
36 | + // some data from the dependency tree | ||
37 | + private short[] pos; | ||
38 | + private DataF x; | ||
39 | + private boolean[][] isChild ; | ||
40 | + public short[] heads,types; | ||
41 | + private float lastNBest; | ||
42 | + private float best; // best so far | ||
43 | + private float threshold; | ||
44 | + private Extractor extractor; | ||
45 | + | ||
46 | + | ||
47 | + /** | ||
48 | + * Initialize the parallel rearrange thread | ||
49 | + * | ||
50 | + * @param isChild2 is a child | ||
51 | + * @param edgesC the part-of-speech edge mapping | ||
52 | + * @param pos the part-of-speech | ||
53 | + * @param x the data | ||
54 | + * @param lastNBest | ||
55 | + * @param s the heads | ||
56 | + * @param ts the types | ||
57 | + */ | ||
58 | + public ParallelRearrangeNBest(short[] pos , DataF x, Parse p, float lastNBest, Extractor extractor, float best, float threshold) { | ||
59 | + | ||
60 | + | ||
61 | + heads=p.heads; | ||
62 | + | ||
63 | + types= p.labels; | ||
64 | + | ||
65 | + isChild = new boolean[heads.length][heads.length]; | ||
66 | + | ||
67 | + for(int i = 1, l1=1; i < heads.length; i++,l1=i) | ||
68 | + while((l1= heads[l1]) != -1) isChild[l1][i] = true; | ||
69 | + | ||
70 | + | ||
71 | + this.lastNBest =lastNBest; | ||
72 | + this.pos =pos; | ||
73 | + this.x=x; | ||
74 | + | ||
75 | + this.extractor = extractor; | ||
76 | + this.best=best; | ||
77 | + this.threshold = threshold; | ||
78 | + } | ||
79 | + | ||
80 | + public ArrayList<ParseNBest> parses = new ArrayList<ParseNBest>(); | ||
81 | + | ||
82 | + @Override | ||
83 | + public Object call() { | ||
84 | + | ||
85 | + // check the list of new possible parents and children for a better combination | ||
86 | + for(int ch = 1; ch < heads.length; ch++) { | ||
87 | + for(short pa = 0; pa < heads.length; pa++) { | ||
88 | + if(ch == pa || pa == heads[ch] || isChild[ch][pa]) continue; | ||
89 | + | ||
90 | + short oldP = heads[ch], oldT = types[ch]; | ||
91 | + heads[ch]=pa; | ||
92 | + | ||
93 | + short[] labels = Edges.get(pos[pa], pos[ch],ch<pa); | ||
94 | + | ||
95 | + for(int l=0;l<labels.length;l++) { | ||
96 | + | ||
97 | + types[ch]=labels[l]; | ||
98 | + float p_new = extractor.encode3(pos, heads, types, x); | ||
99 | + | ||
100 | + if (p_new<lastNBest || ((best+this.threshold)>p_new)) continue; | ||
101 | + | ||
102 | + ParseNBest p = new ParseNBest(); | ||
103 | + p.signature(heads, types); | ||
104 | + p.f1=p_new; | ||
105 | + parses.add(p); | ||
106 | + } | ||
107 | + | ||
108 | + // change back | ||
109 | + heads[ch]= oldP; types[ch]=oldT; | ||
110 | + | ||
111 | + // consider changes to labels only | ||
112 | + labels = Edges.get(pos[oldP], pos[ch],ch<oldP); | ||
113 | + | ||
114 | + for(int l=0;l<labels.length;l++) { | ||
115 | + | ||
116 | + types[ch]=labels[l]; | ||
117 | + float p_new = (float) extractor.encode3(pos, heads, types, x); | ||
118 | + | ||
119 | + // optimization: add only if larger than smallest of n-best | ||
120 | + if (p_new<lastNBest || ((best+this.threshold)>p_new)) continue; | ||
121 | + | ||
122 | + ParseNBest p = new ParseNBest(); | ||
123 | + p.signature(heads, types); | ||
124 | + p.f1=p_new; | ||
125 | + parses.add(p); | ||
126 | + } | ||
127 | + | ||
128 | + heads[ch]= oldP; types[ch]=oldT; | ||
129 | + } | ||
130 | + } | ||
131 | + return parses; | ||
132 | + } | ||
133 | + | ||
134 | + | ||
135 | + | ||
136 | +} |
dependencyParser/basic/mate-tools/src/decoder/ParallelRearrangeNBest2.java
0 → 100644
1 | +package decoder; | ||
2 | + | ||
3 | +import is2.data.DataF; | ||
4 | +import is2.data.Edges; | ||
5 | +import is2.data.Parse; | ||
6 | +import is2.data.ParseNBest; | ||
7 | + | ||
8 | +import java.util.ArrayList; | ||
9 | +import java.util.concurrent.Callable; | ||
10 | + | ||
11 | +import decoder.ParallelRearrangeNBest.PA; | ||
12 | + | ||
13 | +import extractors.Extractor; | ||
14 | + | ||
15 | +/** | ||
16 | + * @author Dr. Bernd Bohnet, 30.08.2009 | ||
17 | + * | ||
18 | + * This class implements a parallel edge rearrangement for non-projective parsing; | ||
19 | + * The linear method was first suggest by Rayn McDonald et. al. 2005. | ||
20 | + */ | ||
21 | +final public class ParallelRearrangeNBest2 implements Callable<Object> { | ||
22 | + | ||
23 | + // new parent child combination to explore | ||
24 | + final static class PA { | ||
25 | + final float p; | ||
26 | + final short ch, pa; | ||
27 | + | ||
28 | + | ||
29 | + public short[] heads,types; | ||
30 | + | ||
31 | + public PA(Parse p, short ch2, short pa2) { | ||
32 | + this.p =(float)p.f1; | ||
33 | + heads =p.heads; | ||
34 | + types=p.labels; | ||
35 | + ch=ch2;pa=pa2; | ||
36 | + | ||
37 | + } | ||
38 | + } | ||
39 | + | ||
40 | + // list of parent child combinations | ||
41 | + private static ArrayList<PA> parents = new ArrayList<PA>(); | ||
42 | + | ||
43 | + // some data from the dependency tree | ||
44 | + private short[] pos; | ||
45 | + private DataF x; | ||
46 | + private float lastNBest; | ||
47 | + private float threshold; | ||
48 | + private Extractor extractor; | ||
49 | + | ||
50 | + | ||
51 | + /** | ||
52 | + * Initialize the parallel rearrange thread | ||
53 | + * @param pos the part-of-speech | ||
54 | + * @param x the data | ||
55 | + * @param lastNBest | ||
56 | + * @param isChild2 is a child | ||
57 | + * @param edgesC the part-of-speech edge mapping | ||
58 | + * @param s the heads | ||
59 | + * @param ts the types | ||
60 | + */ | ||
61 | + public ParallelRearrangeNBest2(short[] pos , DataF x, float lastNBest, Extractor extractor, float threshold) { | ||
62 | + | ||
63 | + | ||
64 | + | ||
65 | + this.lastNBest =lastNBest; | ||
66 | + this.pos =pos; | ||
67 | + this.x=x; | ||
68 | + | ||
69 | + this.extractor = extractor; | ||
70 | + this.threshold = threshold; | ||
71 | + } | ||
72 | + | ||
73 | + public ArrayList<ParseNBest> parses = new ArrayList<ParseNBest>(); | ||
74 | + | ||
75 | + @Override | ||
76 | + public Object call() { | ||
77 | + | ||
78 | + try { | ||
79 | + | ||
80 | + while(true) { | ||
81 | + PA p = getPA(); | ||
82 | + | ||
83 | + if (p==null) return parses; | ||
84 | + | ||
85 | + short oldP = p.heads[p.ch], oldT = p.types[p.ch]; | ||
86 | + p.heads[p.ch]=p.pa; | ||
87 | + | ||
88 | + short[] labels = Edges.get(pos[p.pa], pos[p.ch],p.ch<p.pa); | ||
89 | + | ||
90 | + for(int l=0;l<labels.length;l++) { | ||
91 | + | ||
92 | + p.types[p.ch]=labels[l]; | ||
93 | + float p_new = extractor.encode3(pos, p.heads, p.types, x); | ||
94 | + | ||
95 | + if (p_new<lastNBest || ((p.p+this.threshold)>p_new)) continue; | ||
96 | + | ||
97 | + ParseNBest x = new ParseNBest(); | ||
98 | + x.signature(p.heads, p.types); | ||
99 | + x.f1=p_new; | ||
100 | + parses.add(x); | ||
101 | + } | ||
102 | + | ||
103 | + // change back | ||
104 | + p.heads[p.ch]= oldP; p.types[p.ch]=oldT; | ||
105 | + | ||
106 | + // consider changes to labels only | ||
107 | + labels = Edges.get(pos[oldP], pos[p.ch],p.ch<oldP); | ||
108 | + | ||
109 | + for(int l=0;l<labels.length;l++) { | ||
110 | + | ||
111 | + p.types[p.ch]=labels[l]; | ||
112 | + float p_new = (float) extractor.encode3(pos, p.heads, p.types, x); | ||
113 | + | ||
114 | + // optimization: add only if larger than smallest of n-best | ||
115 | + if (p_new<lastNBest || ((p.p+this.threshold)>p_new)) continue; | ||
116 | + | ||
117 | + ParseNBest x = new ParseNBest(); | ||
118 | + x.signature(p.heads, p.types); | ||
119 | + x.f1=p_new; | ||
120 | + parses.add(x); | ||
121 | + } | ||
122 | + | ||
123 | + p.heads[p.ch]= oldP; p.types[p.ch]=oldT; | ||
124 | + } | ||
125 | + } catch(Exception e) { | ||
126 | + e.printStackTrace(); | ||
127 | + } | ||
128 | + return parses; | ||
129 | + } | ||
130 | + | ||
131 | + /** | ||
132 | + * Add a child-parent combination which are latter explored for rearrangement | ||
133 | + * | ||
134 | + * @param p2 | ||
135 | + * @param ch2 | ||
136 | + * @param pa | ||
137 | + */ | ||
138 | + public static void add(Parse p, short ch2, short pa) { | ||
139 | + parents.add(new PA(p,ch2,pa)); | ||
140 | + } | ||
141 | + | ||
142 | + public static PA getPA() { | ||
143 | + synchronized(parents) { | ||
144 | + if (parents.size()==0) return null; | ||
145 | + return parents.remove(parents.size()-1); | ||
146 | + } | ||
147 | + } | ||
148 | + | ||
149 | + | ||
150 | + | ||
151 | +} |
dependencyParser/basic/mate-tools/src/examples/DependencyParser.java
0 → 100644
1 | +package examples; | ||
2 | + | ||
3 | + | ||
4 | +import is2.data.InstancesTagger; | ||
5 | +import is2.data.SentenceData09; | ||
6 | +import is2.io.CONLLReader09; | ||
7 | +import is2.lemmatizer.Lemmatizer; | ||
8 | +import is2.lemmatizer.MFO; | ||
9 | +import is2.parser.Parser; | ||
10 | +import is2.tag.Tagger; | ||
11 | +//import org.apache.log4j.Logger; | ||
12 | + | ||
13 | +import java.io.File; | ||
14 | +import java.util.Arrays; | ||
15 | + | ||
16 | +/** | ||
17 | + * Dependency parsing | ||
18 | + * | ||
19 | + * @author B. Piwowarski <benjamin@bpiwowar.net> | ||
20 | + * @date 10/10/12 | ||
21 | + */ | ||
22 | +//@TaskDescription(name = "dependency-parser", project = "mate-tools") | ||
23 | +public class DependencyParser { | ||
24 | + // final static private Logger LOGGER = Logger.getLogger(DependencyParser.class); | ||
25 | + //@Argument(name = "lemmatizer", required = true, checkers = IOChecker.Readable.class) | ||
26 | + File lemmatizerFile; | ||
27 | + | ||
28 | + //@Argument(name = "tagger", required = true) | ||
29 | + File taggerFile; | ||
30 | + | ||
31 | + //@Argument(name = "parser", required = true) | ||
32 | + File parserFile; | ||
33 | + | ||
34 | + //@Override | ||
35 | + public int execute() throws Throwable { | ||
36 | + | ||
37 | + // Load lemmatizer | ||
38 | + //LOGGER.info("Loading lemmatizer"); | ||
39 | + // true = do uppercase lemmatization | ||
40 | + Lemmatizer lemmatizer = new Lemmatizer(lemmatizerFile.getAbsolutePath()); | ||
41 | + | ||
42 | + // Load tagger | ||
43 | + //LOGGER.info("Loading tagger"); | ||
44 | + Tagger tagger = new Tagger(taggerFile.getAbsolutePath()); | ||
45 | + | ||
46 | + // Load parser | ||
47 | + //LOGGER.info("Loading parser"); | ||
48 | + Parser parser = new Parser(parserFile.getAbsolutePath()); | ||
49 | + | ||
50 | + | ||
51 | + // Sentences to parse | ||
52 | + String sentences[] = new String[]{ | ||
53 | + "Airfields have been constructed on a number of the islands .", | ||
54 | + "Private investment has even made an increasingly modern ferry fleet possible .", | ||
55 | + "Politically , the 1990s have been relatively quite times for the islands ." | ||
56 | + }; | ||
57 | + | ||
58 | + CONLLReader09 reader = new CONLLReader09(CONLLReader09.NO_NORMALIZE); | ||
59 | + | ||
60 | + for (String sentence : sentences) { | ||
61 | + // Prepare the sentence | ||
62 | + InstancesTagger instanceTagger = new InstancesTagger(); | ||
63 | + instanceTagger.init(1, new MFO()); | ||
64 | + | ||
65 | + String[] split = sentence.split("\\s+"); | ||
66 | + String[] splitRoot = new String[split.length+1]; | ||
67 | + System.arraycopy(split, 0, splitRoot, 1, split.length); | ||
68 | + splitRoot[0] = CONLLReader09.ROOT; | ||
69 | + | ||
70 | + SentenceData09 instance = new SentenceData09(); | ||
71 | + instance.init(splitRoot); | ||
72 | + | ||
73 | + reader.insert(instanceTagger, instance); | ||
74 | + | ||
75 | + SentenceData09 result = lemmatizer.apply(instance); | ||
76 | + tagger.apply(result); | ||
77 | + result = parser.parse(result, parser.params, false, parser.options); | ||
78 | + | ||
79 | + | ||
80 | + // Output | ||
81 | + System.out.println(Arrays.toString(result.forms)); | ||
82 | + System.out.println(Arrays.toString(result.plemmas)); | ||
83 | + System.out.println(Arrays.toString(result.ppos)); | ||
84 | + System.out.println(Arrays.toString(result.pheads)); | ||
85 | + System.out.println(Arrays.toString(result.plabels)); | ||
86 | + System.out.println(); | ||
87 | + | ||
88 | + } | ||
89 | + | ||
90 | + return 0; | ||
91 | + } | ||
92 | +} |
dependencyParser/basic/mate-tools/src/examples/FullPipelineSpanish.java
0 → 100644
1 | +package examples; | ||
2 | + | ||
3 | +import is2.data.SentenceData09; | ||
4 | +import is2.io.CONLLWriter09; | ||
5 | +import is2.lemmatizer.Lemmatizer; | ||
6 | + | ||
7 | +import is2.parser.Parser; | ||
8 | +import is2.tag.Tagger; | ||
9 | +import is2.tools.Tool; | ||
10 | + | ||
11 | +import java.io.IOException; | ||
12 | +import java.util.ArrayList; | ||
13 | +import java.util.StringTokenizer; | ||
14 | + | ||
15 | +/** | ||
16 | + * @author Bernd Bohnet, 13.09.2010 | ||
17 | + * | ||
18 | + * Illustrates the application the full pipeline: lemmatizer, morphologic, tagger, and parser | ||
19 | + */ | ||
20 | +public class FullPipelineSpanish { | ||
21 | + | ||
22 | + | ||
23 | + // shows how to parse a sentences and call the tools | ||
24 | + public static void main(String[] args) throws IOException { | ||
25 | + | ||
26 | + // Create a data container for a sentence | ||
27 | + SentenceData09 i = new SentenceData09(); | ||
28 | + | ||
29 | + if (args.length==1) { // input might be a sentence: "This is another test ." | ||
30 | + StringTokenizer st = new StringTokenizer(args[0]); | ||
31 | + ArrayList<String> forms = new ArrayList<String>(); | ||
32 | + | ||
33 | + forms.add("<root>"); | ||
34 | + while(st.hasMoreTokens()) forms.add(st.nextToken()); | ||
35 | + | ||
36 | + i.init(forms.toArray(new String[0])); | ||
37 | + | ||
38 | + } else { | ||
39 | + // provide a default sentence: Haus has a mutated vowel | ||
40 | + i.init(new String[] {"<root>","También","estuve","emocionado","pero","no","pude","imaginar","mi","vida","sin","la", | ||
41 | + "gente","tan","intima","a","mí","."}); | ||
42 | + | ||
43 | + } | ||
44 | + | ||
45 | + // lemmatizing | ||
46 | + | ||
47 | + System.out.println("\nReading the model of the lemmatizer"); | ||
48 | + Tool lemmatizer = new Lemmatizer("models/lemma-spa.model"); // create a lemmatizer | ||
49 | + | ||
50 | + System.out.println("Applying the lemmatizer"); | ||
51 | + lemmatizer.apply(i); | ||
52 | + | ||
53 | + System.out.print(i.toString()); | ||
54 | + System.out.print("Lemmata: "); for (String l : i.plemmas) System.out.print(l+" "); System.out.println(); | ||
55 | + | ||
56 | + // morphologic tagging | ||
57 | + | ||
58 | + System.out.println("\nReading the model of the morphologic tagger"); | ||
59 | + is2.mtag.Tagger morphTagger = new is2.mtag.Tagger("models/mtag-spa.model"); | ||
60 | + | ||
61 | + System.out.println("\nApplying the morpholoigc tagger"); | ||
62 | + morphTagger.apply(i); | ||
63 | + | ||
64 | + System.out.print(i.toString()); | ||
65 | + System.out.print("Morph: "); for (String f : i.pfeats) System.out.print(f+" "); System.out.println(); | ||
66 | + | ||
67 | + // part-of-speech tagging | ||
68 | + | ||
69 | + System.out.println("\nReading the model of the part-of-speech tagger"); | ||
70 | + Tool tagger = new Tagger("models/tag-spa.model"); | ||
71 | + | ||
72 | + System.out.println("\nApplying the part-of-speech tagger"); | ||
73 | + tagger.apply(i); | ||
74 | + | ||
75 | + System.out.print(i.toString()); | ||
76 | + System.out.print("Part-of-Speech tags: "); for (String p : i.ppos) System.out.print(p+" "); System.out.println(); | ||
77 | + | ||
78 | + // parsing | ||
79 | + | ||
80 | + System.out.println("\nReading the model of the dependency parser"); | ||
81 | + Tool parser = new Parser("models/prs-spa.model"); | ||
82 | + | ||
83 | + System.out.println("\nApplying the parser"); | ||
84 | + parser.apply(i); | ||
85 | + | ||
86 | + System.out.println(i.toString()); | ||
87 | + | ||
88 | + // write the result to a file | ||
89 | + | ||
90 | + CONLLWriter09 writer = new is2.io.CONLLWriter09("example-out.txt"); | ||
91 | + | ||
92 | + writer.write(i, CONLLWriter09.NO_ROOT); | ||
93 | + writer.finishWriting(); | ||
94 | + | ||
95 | + } | ||
96 | + | ||
97 | + | ||
98 | +} |
dependencyParser/basic/mate-tools/src/examples/FullPipelineTest.java
0 → 100644
1 | +package examples; | ||
2 | + | ||
3 | + | ||
4 | +import is2.data.InstancesTagger; | ||
5 | +import is2.data.SentenceData09; | ||
6 | +import is2.io.CONLLReader09; | ||
7 | +import is2.io.CONLLWriter09; | ||
8 | +import is2.lemmatizer.Lemmatizer; | ||
9 | +import is2.lemmatizer.MFO; | ||
10 | +import is2.parser.Parser; | ||
11 | +import is2.tag.Tagger; | ||
12 | +//import org.apache.log4j.Logger; | ||
13 | + | ||
14 | +import java.io.File; | ||
15 | +import java.util.Arrays; | ||
16 | + | ||
17 | +/** | ||
18 | + * Dependency parsing | ||
19 | + * | ||
20 | + * @author B. Piwowarski <benjamin@bpiwowar.net> | ||
21 | + * @date 10/10/12 | ||
22 | + */ | ||
23 | +//@TaskDescription(name = "dependency-parser", project = "mate-tools") | ||
24 | +public class FullPipelineTest { | ||
25 | + // final static private Logger LOGGER = Logger.getLogger(DependencyParser.class); | ||
26 | + //@Argument(name = "lemmatizer", required = true, checkers = IOChecker.Readable.class) | ||
27 | + public File lemmatizerFile; | ||
28 | + | ||
29 | + //@Argument(name = "tagger", required = true) | ||
30 | + public File taggerFile; | ||
31 | + | ||
32 | + public File mtaggerFile; | ||
33 | + | ||
34 | + //@Argument(name = "parser", required = true) | ||
35 | + public File parserFile; | ||
36 | + | ||
37 | + //@Override | ||
38 | + public int execute(String source, String target) throws Throwable { | ||
39 | + | ||
40 | + // Load lemmatizer | ||
41 | + //LOGGER.info("Loading lemmatizer"); | ||
42 | + // true = do uppercase lemmatization | ||
43 | + Lemmatizer lemmatizer = new Lemmatizer(lemmatizerFile.getAbsolutePath()); | ||
44 | + | ||
45 | + // Load tagger | ||
46 | + //LOGGER.info("Loading tagger"); | ||
47 | + Tagger tagger = new Tagger(taggerFile.getAbsolutePath()); | ||
48 | + | ||
49 | + is2.mtag.Tagger mtagger = new is2.mtag.Tagger(mtaggerFile.getAbsolutePath()); | ||
50 | + | ||
51 | + // Load parser | ||
52 | + //LOGGER.info("Loading parser"); | ||
53 | + Parser parser = new Parser(parserFile.getAbsolutePath()); | ||
54 | + | ||
55 | + | ||
56 | + CONLLReader09 reader = new CONLLReader09(source); | ||
57 | + CONLLWriter09 writer = new CONLLWriter09(target); | ||
58 | + | ||
59 | + int count=0; | ||
60 | + while (true) { | ||
61 | + // Prepare the sentence | ||
62 | + InstancesTagger is = new InstancesTagger(); | ||
63 | + is.init(1, new MFO()); | ||
64 | + | ||
65 | + SentenceData09 instance= reader.getNext(is); | ||
66 | + if (instance ==null) break; | ||
67 | + SentenceData09 result = null; | ||
68 | +try { | ||
69 | + | ||
70 | + System.out.print("\b\b\b\b"+count); | ||
71 | + result= lemmatizer.apply(instance); | ||
72 | + | ||
73 | + result = tagger.apply(result); | ||
74 | + result= mtagger.apply(result); | ||
75 | + result = parser.apply(result); | ||
76 | + | ||
77 | + count++; | ||
78 | +} catch(Exception e) { | ||
79 | + | ||
80 | + System.out.println("error"+result); | ||
81 | + System.out.println("error"+instance); | ||
82 | + e.printStackTrace(); | ||
83 | + break; | ||
84 | +} | ||
85 | + | ||
86 | + // Output | ||
87 | + writer.write(result); | ||
88 | + | ||
89 | + } | ||
90 | + writer.finishWriting(); | ||
91 | + return 0; | ||
92 | + } | ||
93 | + | ||
94 | + public static void main(String args[]) throws Throwable { | ||
95 | + | ||
96 | + if (args.length<3) { | ||
97 | + System.out.println("lemmatizer-model tagger-model parser-model source target"); | ||
98 | + System.exit(0); | ||
99 | + } | ||
100 | + FullPipelineTest p = new FullPipelineTest(); | ||
101 | + p.lemmatizerFile = new File(args[0]); | ||
102 | + p.taggerFile = new File(args[1]); | ||
103 | + p.mtaggerFile = new File(args[2]); | ||
104 | + p.parserFile = new File(args[3]); | ||
105 | + | ||
106 | + p.execute(args[4], args[5]); | ||
107 | + | ||
108 | + } | ||
109 | + | ||
110 | +} |
dependencyParser/basic/mate-tools/src/examples/MorphTagger.java
0 → 100644
1 | +package examples; | ||
2 | + | ||
3 | +import is2.data.SentenceData09; | ||
4 | +import is2.lemmatizer.Lemmatizer; | ||
5 | +import is2.lemmatizer.Options; | ||
6 | + | ||
7 | +import java.io.IOException; | ||
8 | +import java.util.ArrayList; | ||
9 | +import java.util.StringTokenizer; | ||
10 | + | ||
11 | +/** | ||
12 | + * @author Bernd Bohnet, 13.09.2010 | ||
13 | + * | ||
14 | + * Illustrates the application of some components: lemmatizer, tagger, and parser | ||
15 | + */ | ||
16 | +public class MorphTagger { | ||
17 | + | ||
18 | + | ||
19 | + /** | ||
20 | + * How to lemmatize a sentences? | ||
21 | + */ | ||
22 | + public static void main(String[] args) throws IOException { | ||
23 | + | ||
24 | + | ||
25 | + // Create a data container for a sentence | ||
26 | + SentenceData09 i = new SentenceData09(); | ||
27 | + | ||
28 | + if (args.length==1) { // input might be a sentence: "This is another test ." | ||
29 | + StringTokenizer st = new StringTokenizer(args[0]); | ||
30 | + ArrayList<String> forms = new ArrayList<String>(); | ||
31 | + | ||
32 | + forms.add("<root>"); | ||
33 | + while(st.hasMoreTokens()) forms.add(st.nextToken()); | ||
34 | + | ||
35 | + i.init(forms.toArray(new String[0])); | ||
36 | + | ||
37 | + } else { | ||
38 | + // provide a default sentence | ||
39 | + i.init(new String[] {"<root>","Häuser","hat","ein","Umlaut","."}); | ||
40 | + } | ||
41 | + | ||
42 | + //print the forms | ||
43 | + for (String l : i.forms) System.out.println("forms : "+l); | ||
44 | + | ||
45 | + // tell the lemmatizer the location of the model | ||
46 | + is2.lemmatizer.Options optsLemmatizer = new Options(new String[] {"-model","models/lemma-ger.model"}); | ||
47 | + | ||
48 | + // create a lemmatizer | ||
49 | + Lemmatizer lemmatizer = new Lemmatizer(optsLemmatizer.modelName); | ||
50 | + | ||
51 | + // lemmatize a sentence; the result is stored in the stenenceData09 i | ||
52 | + lemmatizer.apply(i); | ||
53 | + | ||
54 | + | ||
55 | + // output the lemmata | ||
56 | + for (String l : i.plemmas) System.out.println("lemma : "+l); | ||
57 | + | ||
58 | + | ||
59 | + is2.mtag.Options morphologicTaggerOptions = new is2.mtag.Options(new String[] {"-model","models/mtag-ger.model"}); | ||
60 | + | ||
61 | + is2.mtag.Tagger mt = new is2.mtag.Tagger(morphologicTaggerOptions); | ||
62 | + | ||
63 | + try { | ||
64 | + | ||
65 | + | ||
66 | + // SentenceData09 snt = is2.mtag.Main.out(i.forms, lemmata); | ||
67 | + | ||
68 | + SentenceData09 snt = mt.apply(i); | ||
69 | + for(String f : snt.pfeats) System.out.println("feats "+f); | ||
70 | + | ||
71 | + } catch(Exception e){ | ||
72 | + e.printStackTrace(); | ||
73 | + } | ||
74 | + | ||
75 | + | ||
76 | + } | ||
77 | + | ||
78 | + | ||
79 | +} |
dependencyParser/basic/mate-tools/src/examples/ParseOnly.java
0 → 100755
1 | +package examples; | ||
2 | + | ||
3 | +import is2.data.SentenceData09; | ||
4 | +import is2.parser.Options; | ||
5 | +import is2.parser.Parser; | ||
6 | + | ||
7 | + | ||
8 | +public class ParseOnly { | ||
9 | + | ||
10 | + public static void main(String[] args) { | ||
11 | + | ||
12 | + if (args.length ==0) { | ||
13 | + plain(); | ||
14 | + } | ||
15 | + | ||
16 | + } | ||
17 | + | ||
18 | + /** | ||
19 | + * This example shows how to parse a sentence. | ||
20 | + */ | ||
21 | + public static void plain() { | ||
22 | + | ||
23 | + // initialize the options | ||
24 | + String[] opts ={"-model","models/prs-eng-x.model"}; | ||
25 | + Options options = new Options(opts); | ||
26 | + | ||
27 | + // create a parser | ||
28 | + Parser parser = new Parser(options); | ||
29 | + | ||
30 | + // Create a data container for a sentence | ||
31 | + SentenceData09 i = new SentenceData09(); | ||
32 | + | ||
33 | + // Provide the sentence | ||
34 | + i.init(new String[] {"<root>","This","is","a","test","."}); | ||
35 | + i.setPPos(new String[]{"<root-POS>","DT","VBZ","DT","NN","."}); | ||
36 | + | ||
37 | + // parse the sentence | ||
38 | + SentenceData09 out = parser.apply(i); | ||
39 | + | ||
40 | + // output the sentence and dependency tree | ||
41 | + System.out.println(out.toString()); | ||
42 | + | ||
43 | + // Get the parsing results | ||
44 | + out.getLabels(); | ||
45 | + out.getParents(); | ||
46 | + | ||
47 | + } | ||
48 | + | ||
49 | + | ||
50 | +} |
dependencyParser/basic/mate-tools/src/examples/Pipeline.java
0 → 100644
1 | +package examples; | ||
2 | + | ||
3 | +import java.io.File; | ||
4 | +import java.io.IOException; | ||
5 | +import java.util.ArrayList; | ||
6 | +import java.util.StringTokenizer; | ||
7 | + | ||
8 | +import is2.data.SentenceData09; | ||
9 | +import is2.lemmatizer.Lemmatizer; | ||
10 | +import is2.parser.Options; | ||
11 | +import is2.parser.Parser; | ||
12 | +import is2.tag.Tagger; | ||
13 | + | ||
14 | +/** | ||
15 | + * @author Bernd Bohnet, 13.09.2010 | ||
16 | + * | ||
17 | + * Illustrates the application of some components: lemmatizer, tagger, and parser | ||
18 | + */ | ||
19 | +public class Pipeline { | ||
20 | + | ||
21 | + | ||
22 | + // how to parse a sentences and call the tools | ||
23 | + public static void main(String[] args) throws IOException { | ||
24 | + | ||
25 | + | ||
26 | + // Create a data container for a sentence | ||
27 | + SentenceData09 i = new SentenceData09(); | ||
28 | + | ||
29 | + if (args.length==1) { // input might be a sentence: "This is another test ." | ||
30 | + StringTokenizer st = new StringTokenizer(args[0]); | ||
31 | + ArrayList<String> forms = new ArrayList<String>(); | ||
32 | + | ||
33 | + forms.add("<root>"); | ||
34 | + while(st.hasMoreTokens()) forms.add(st.nextToken()); | ||
35 | + | ||
36 | + i.init(forms.toArray(new String[0])); | ||
37 | + | ||
38 | + } else { | ||
39 | + // provide a default sentence | ||
40 | + i.init(new String[] {"<root>","This","is","a","test","."}); | ||
41 | + } | ||
42 | + | ||
43 | + //print the forms | ||
44 | + for (String l : i.forms) System.out.println("form : "+l); | ||
45 | + | ||
46 | + // tell the lemmatizer the location of the model | ||
47 | + is2.lemmatizer.Options optsLemmatizer = new is2.lemmatizer.Options(new String[] {"-model","models/lemma-eng.model"}); | ||
48 | + | ||
49 | + // create a lemmatizer | ||
50 | + Lemmatizer lemmatizer = new Lemmatizer(optsLemmatizer.modelName); | ||
51 | + | ||
52 | + // lemmatize a sentence; the result is stored in the stenenceData09 i | ||
53 | + i = lemmatizer.apply(i); | ||
54 | + | ||
55 | + | ||
56 | + // output the lemmata | ||
57 | + for (String l : i.plemmas) System.out.println("lemma : "+l); | ||
58 | + | ||
59 | + // tell the tagger the location of the model | ||
60 | + is2.tag.Options optsTagger = new is2.tag.Options(new String[]{"-model","models/tag-eng.model"}); | ||
61 | + Tagger tagger = new Tagger(optsTagger); | ||
62 | + | ||
63 | + | ||
64 | + | ||
65 | +// String pos[] =tagger.tag(i.forms, i.lemmas); | ||
66 | +// i.setPPos(pos); | ||
67 | + | ||
68 | + | ||
69 | + SentenceData09 tagged = tagger.tag(i); | ||
70 | + for (String p : tagged.ppos) System.out.println("pos "+p); | ||
71 | + | ||
72 | + | ||
73 | + | ||
74 | + // initialize the options | ||
75 | + Options optsParser = new Options(new String[]{"-model","models/prs-eng-x.model"}); | ||
76 | + | ||
77 | + // create a parser | ||
78 | + Parser parser = new Parser(optsParser); | ||
79 | + | ||
80 | + // parse the sentence (you get a copy of the input i) | ||
81 | + SentenceData09 parse = parser.apply(tagged); | ||
82 | + | ||
83 | + System.out.println(parse.toString()); | ||
84 | + | ||
85 | + // create some trash on the hard drive :-) | ||
86 | + is2.io.CONLLWriter09 writer = new is2.io.CONLLWriter09("example-out.txt"); | ||
87 | + | ||
88 | + writer.write(i); | ||
89 | + writer.finishWriting(); | ||
90 | + } | ||
91 | + | ||
92 | + | ||
93 | + | ||
94 | + | ||
95 | +} |
dependencyParser/basic/mate-tools/src/extractors/Extractor.java
0 → 100644
1 | +/** | ||
2 | + * | ||
3 | + */ | ||
4 | +package extractors; | ||
5 | + | ||
6 | +import is2.data.Cluster; | ||
7 | +import is2.data.DataF; | ||
8 | +import is2.data.FV; | ||
9 | +import is2.data.IFV; | ||
10 | +import is2.data.Instances; | ||
11 | + | ||
12 | +/** | ||
13 | + * @author Dr. Bernd Bohnet, 29.04.2011 | ||
14 | + * | ||
15 | + * | ||
16 | + */ | ||
17 | +public interface Extractor { | ||
18 | + | ||
19 | + | ||
20 | + /** | ||
21 | + * Initializes the Extractor general parts | ||
22 | + */ | ||
23 | + public void initStat(); | ||
24 | + | ||
25 | + /** | ||
26 | + * Initializes the Extractor specific parts | ||
27 | + */ | ||
28 | + public void init(); | ||
29 | + | ||
30 | + public int basic(short[] pos, int[] forms, int w1, int w2, Cluster cluster, IFV f); | ||
31 | + | ||
32 | + public void firstm(Instances is, int i, int w1, int w2, int j, Cluster cluster, long[] svs); | ||
33 | + | ||
34 | + public void siblingm(Instances is, int i, short[] pos, int[] forms, | ||
35 | + int[] lemmas, short[][] feats, int w1, int w2, int g, int j, | ||
36 | + Cluster cluster, long[] svs, int n); | ||
37 | + | ||
38 | + public void gcm(Instances is, int i, int w1, int w2, int g, int j, Cluster cluster, long[] svs); | ||
39 | + | ||
40 | + public int getType(); | ||
41 | + | ||
42 | + public FV encodeCat(Instances is, int n, short[] pos, int[] is2, | ||
43 | + int[] is3, short[] heads, short[] labels, short[][] s, Cluster cl, | ||
44 | + FV pred); | ||
45 | + | ||
46 | + public void setMaxForm(int integer); | ||
47 | + | ||
48 | + /** | ||
49 | + * @return | ||
50 | + */ | ||
51 | + public int getMaxForm(); | ||
52 | + | ||
53 | + | ||
54 | + public float encode3(short[] pos, short[] heads, short[] labs, DataF x); | ||
55 | + | ||
56 | + | ||
57 | + | ||
58 | + | ||
59 | +} |
dependencyParser/basic/mate-tools/src/extractors/ExtractorClusterStacked.java
0 → 100755
1 | +package extractors; | ||
2 | + | ||
3 | + | ||
4 | +import is2.data.Cluster; | ||
5 | +import is2.data.D4; | ||
6 | +import is2.data.DataF; | ||
7 | +import is2.data.Edges; | ||
8 | +import is2.data.FV; | ||
9 | +import is2.data.IFV; | ||
10 | +import is2.data.Instances; | ||
11 | +import is2.data.Long2IntInterface; | ||
12 | +import is2.data.MFB; | ||
13 | +import is2.util.DB; | ||
14 | + | ||
15 | + | ||
16 | + | ||
17 | +final public class ExtractorClusterStacked implements Extractor { | ||
18 | + | ||
19 | + public static int s_rel,s_word,s_type,s_dir,s_dist,s_feat,s_child,s_spath,s_lpath,s_pos; | ||
20 | + | ||
21 | + | ||
22 | + final D4 d0 ,dl1,dl2, dwr,dr,dwwp,dw,dwp,dlf,d3lp, d2lp,d2pw,d2pp ; | ||
23 | + | ||
24 | + public final Long2IntInterface li; | ||
25 | + | ||
26 | + public ExtractorClusterStacked(Long2IntInterface li) { | ||
27 | + | ||
28 | + this.initFeatures(); | ||
29 | + this.li=li; | ||
30 | + d0 = new D4(li);dl1 = new D4(li);dl2 = new D4(li); | ||
31 | + dwr = new D4(li); | ||
32 | + dr = new D4(li); | ||
33 | + dwwp = new D4(li); | ||
34 | + | ||
35 | + dw = new D4(li); | ||
36 | + dwp = new D4(li); | ||
37 | + | ||
38 | + dlf = new D4(li); | ||
39 | + d3lp = new D4(li); d2lp = new D4(li); d2pw = new D4(li); d2pp = new D4(li); | ||
40 | + | ||
41 | + } | ||
42 | + | ||
43 | + public void initStat() { | ||
44 | + | ||
45 | + | ||
46 | + MFB mf = new MFB(); | ||
47 | + s_rel = mf.getFeatureCounter().get(REL).intValue(); | ||
48 | + s_pos = mf.getFeatureCounter().get(POS).intValue(); | ||
49 | + s_word = mf.getFeatureCounter().get(WORD).intValue(); | ||
50 | + s_type = mf.getFeatureCounter().get(TYPE).intValue();//mf.getFeatureBits(); | ||
51 | + s_dir = mf.getFeatureCounter().get(DIR); | ||
52 | + la = mf.getValue(DIR, LA); | ||
53 | + ra = mf.getValue(DIR, RA); | ||
54 | + s_dist = mf.getFeatureCounter().get(DIST);//mf.getFeatureBits(DIST); | ||
55 | + s_feat = mf.getFeatureCounter().get(FEAT);//mf.getFeatureBits(Pipe.FEAT); | ||
56 | + s_spath = mf.getFeatureCounter().get(Cluster.SPATH)==null?0:mf.getFeatureCounter().get(Cluster.SPATH);//mf.getFeatureBits(Cluster.SPATH); | ||
57 | + s_lpath = mf.getFeatureCounter().get(Cluster.LPATH)==null?0:mf.getFeatureCounter().get(Cluster.LPATH);//mf.getFeatureBits(Cluster.LPATH); | ||
58 | + } | ||
59 | + | ||
60 | + public void init(){ | ||
61 | + // DB.println("init"); | ||
62 | + d0.a0 = s_type;d0.a1 = s_pos;d0.a2 = s_pos;d0.a3 = s_pos;d0.a4 = s_pos;d0.a5 = s_pos;d0.a6 = s_pos;d0.a7 = s_pos; | ||
63 | + dl1.a0 = s_type;dl1.a1 = s_rel; dl1.a2 = s_pos;dl1.a3 = s_pos; dl1.a4 = s_pos; dl1.a5 = s_pos; dl1.a6 = s_pos; dl1.a7 = s_pos; | ||
64 | + dl2.a0 = s_type;dl2.a1 = s_rel;dl2.a2 = s_word;dl2.a3 = s_pos;dl2.a4 = s_pos;dl2.a5 = s_pos;dl2.a6 = s_pos;dl2.a7 = s_pos; | ||
65 | + dwp.a0 = s_type; dwp.a1 = s_rel; dwp.a2 = s_word; dwp.a3 = s_pos; dwp.a4 = s_pos; dwp.a5 = s_word; | ||
66 | + dwwp.a0 = s_type; dwwp.a1 = s_rel; dwwp.a2 = s_word; dwwp.a3 = s_word; dwwp.a4 = s_pos; dwwp.a5 = s_word; | ||
67 | + dlf.a0 = s_type;dlf.a1 = s_rel; dlf.a2 = s_pos;dlf.a3 = s_pos; dlf.a4 = s_feat; dlf.a5 = s_feat; dlf.a6 = s_pos; dlf.a7 = s_pos; | ||
68 | + d3lp.a0 = s_type; d3lp.a1 = s_rel; d3lp.a2 = s_lpath; d3lp.a3 = s_lpath; d3lp.a4 = s_lpath; d3lp.a5 = s_word; d3lp.a6 = s_spath; d3lp.a7 = s_spath; | ||
69 | + d2lp.a0 = s_type; d2lp.a1 = s_rel; d2lp.a2 = s_lpath; d2lp.a3 = s_lpath; d2lp.a4 = s_word; d2lp.a5 = s_word; //d3lp.a6 = s_spath; d3lp.a7 = s_spath; | ||
70 | + d2pw.a0 = s_type; d2pw.a1 = s_rel; d2pw.a2 = s_lpath; d2pw.a3 = s_lpath; d2pw.a4 = s_word; d2pw.a5 = s_word; //d3lp.a6 = s_spath; d3lp.a7 = s_spath; | ||
71 | + d2pp.a0 = s_type; d2pp.a1 = s_rel; d2pp.a2 = s_lpath; d2pp.a3 = s_lpath; d2pp.a4 = s_pos; d2pp.a5 = s_pos; //d3lp.a6 = s_spath; d3lp.a7 = s_spath; | ||
72 | + } | ||
73 | + | ||
74 | + | ||
75 | + public int basic(short[] pposs, int[] form, int p, int d, Cluster cluster, IFV f) | ||
76 | + { | ||
77 | + | ||
78 | + d0.clean(); dl1.clean(); dl2.clean(); dwp.clean(); dwwp.clean(); dlf.clean(); d3lp.clean(); | ||
79 | + | ||
80 | + d3lp.clean(); d2lp.clean();d2pw.clean(); d2pp.clean(); | ||
81 | + | ||
82 | + int n=1; | ||
83 | + int dir= (p < d)? ra:la; | ||
84 | + d0.v0= n++; d0.v1=pposs[p]; d0.v2=pposs[d]; //d0.stop=4; | ||
85 | + int end= (p >= d ? p : d); | ||
86 | + int start = (p >= d ? d : p) + 1; | ||
87 | + | ||
88 | + for(int i = start ; i <end ; i++) { | ||
89 | + d0.v3=pposs[i]; | ||
90 | + d0.cz4(); | ||
91 | + d0.csa(s_dir,dir,f); | ||
92 | + } | ||
93 | + return n; | ||
94 | + } | ||
95 | + | ||
96 | + | ||
97 | + public void firstm(Instances is, int i, | ||
98 | + int prnt, int dpnt, int label, Cluster cluster, long[] f) | ||
99 | + { | ||
100 | + | ||
101 | + | ||
102 | + //short[] pposs, int[] form, int[] lemmas, short[][] feats | ||
103 | + for(int k=0;k<f.length;k++) f[k]=0; | ||
104 | + | ||
105 | + short[] pposs = is.pposs[i]; | ||
106 | + int[] form =is.forms[i]; | ||
107 | + short[][] feats = is.feats[i]; | ||
108 | + | ||
109 | + | ||
110 | + int pF = form[prnt],dF = form[dpnt]; | ||
111 | + int pL = is.plemmas[i][prnt],dL = is.plemmas[i][dpnt]; | ||
112 | + int pP = pposs[prnt],dP = pposs[dpnt]; | ||
113 | + | ||
114 | + int prntLS = pF==-1?-1:cluster.getLP(pF), chldLS = dF==-1?-1:cluster.getLP(dF); | ||
115 | + | ||
116 | + final int dir= (prnt < dpnt)? ra:la; | ||
117 | + | ||
118 | + if (pF>maxForm) pF=-1; | ||
119 | + if (pL>maxForm) pL=-1; | ||
120 | + | ||
121 | + if (dF>maxForm) dF=-1; | ||
122 | + if (dL>maxForm) dL=-1; | ||
123 | + | ||
124 | + | ||
125 | + int n=3,c=0; | ||
126 | + | ||
127 | + dl2.v1=label; | ||
128 | + dl2.v0= n++; dl2.v2=pF; dl2.v3=dP; dl2.cz4(); f[c++]=dl2.csa(s_dir,dir); | ||
129 | + dl2.v0= n++; dl2.cz3(); f[c++]=dl2.csa(s_dir,dir); | ||
130 | + dl2.v0= n++; dl2.v2=dF; dl2.v3=pP; dl2.cz4(); f[c++]=dl2.csa(s_dir,dir); | ||
131 | + dl2.v0= n++; dl2.cz3(); f[c++]=dl2.csa(s_dir,dir); | ||
132 | + | ||
133 | + | ||
134 | + dwwp.v1=label; | ||
135 | + dwwp.v0= n++; dwwp.v2=pF; dwwp.v3=dF; dwwp.cz4(); f[c++]=dwwp.csa(s_dir,dir); | ||
136 | + | ||
137 | + dl1.v1=label; | ||
138 | + dl1.v0= n++; dl1.v2=dP; dl1.cz3(); f[c++]=dl1.csa(s_dir,dir); | ||
139 | + dl1.v0= n++; dl1.v2=pP; dl1.cz3(); f[c++]=dl1.csa(s_dir,dir); | ||
140 | + dl1.v0= n++; dl1.v3=dP; dl1.cz4(); f[c++]=dl1.csa(s_dir,dir); | ||
141 | + | ||
142 | + int pPm1 = prnt > 0 ? pposs[prnt - 1] : s_str, dPm1 = dpnt > 0 ? pposs[dpnt - 1] : s_str; | ||
143 | + int pPp1 = prnt < pposs.length - 1 ? pposs[prnt + 1]:s_end, dPp1 = dpnt < pposs.length - 1 ? pposs[dpnt + 1]:s_end; | ||
144 | + | ||
145 | + int pPm2 = prnt > 1 ? pposs[prnt - 2] : s_str, dPm2 = dpnt > 1 ? pposs[dpnt - 2] : s_str; | ||
146 | + int pPp2 = prnt < pposs.length - 2 ? pposs[prnt + 2]:s_end, dPp2 = dpnt < pposs.length - 2 ? pposs[dpnt + 2]:s_end; | ||
147 | + | ||
148 | + int pFm1 = prnt > 0 ? form[prnt - 1] : s_stwrd, dFm1 = dpnt > 0 ? form[dpnt - 1] : s_stwrd; | ||
149 | + int pFp1 = prnt < form.length - 1 ? form[prnt + 1]:s_stwrd, dFp1 = dpnt < form.length - 1 ? form[dpnt + 1]:s_stwrd; | ||
150 | + | ||
151 | + | ||
152 | + | ||
153 | + dl1.v0= n++;dl1.v2=pP; dl1.v3=pPp1; dl1.v4=dP;dl1.v5=dPp1; dl1.cz6(); f[n++]=dl1.csa(s_dir,dir); | ||
154 | + dl1.v0= n++; dl1.v5=dPm1; dl1.cz6(); f[n++]=dl1.csa(s_dir,dir); | ||
155 | + dl1.v0= n++; dl1.v3=pPm1; dl1.cz6(); f[n++]=dl1.csa(s_dir,dir); | ||
156 | + dl1.v0= n++; dl1.v5=dPp1; dl1.cz6(); f[n++]=dl1.csa(s_dir,dir); | ||
157 | + | ||
158 | + | ||
159 | + dl1.v0= n++; dl1.v3=pPm1; dl1.cz5(); f[n++]=dl1.csa(s_dir,dir); | ||
160 | + dl1.v0= n++; dl1.v3=dPm1; dl1.cz5(); f[n++]=dl1.csa(s_dir,dir); | ||
161 | + dl1.v0= n++; dl1.v3=dPp1; dl1.cz5(); f[n++]=dl1.csa(s_dir,dir); | ||
162 | + dl1.v0= n++; dl1.v3=pPp1; dl1.cz5(); f[n++]=dl1.csa(s_dir,dir); | ||
163 | + | ||
164 | + dl1.v0= n++;dl1.v2=pP; dl1.v3=pPp2; dl1.v4=dP;dl1.v5=dPp2; dl1.cz6(); f[n++]=dl1.csa(s_dir,dir); | ||
165 | + dl1.v0= n++; dl1.v5=dPm2; dl1.cz6(); f[n++]=dl1.csa(s_dir,dir); | ||
166 | + dl1.v0= n++; dl1.v3=pPm2; dl1.cz6(); f[n++]=dl1.csa(s_dir,dir); | ||
167 | + dl1.v0= n++; dl1.v5=dPp2; dl1.cz6(); f[n++]=dl1.csa(s_dir,dir); | ||
168 | + | ||
169 | + dl1.v0= n++; dl1.v3=pPm2; dl1.cz5(); f[n++]=dl1.csa(s_dir,dir); | ||
170 | + dl1.v0= n++; dl1.v3=dPm2; dl1.cz5(); f[n++]=dl1.csa(s_dir,dir); | ||
171 | + dl1.v0= n++; dl1.v3=dPp2; dl1.cz5(); f[n++]=dl1.csa(s_dir,dir); | ||
172 | + dl1.v0= n++; dl1.v3=pPp2; dl1.cz5(); f[n++]=dl1.csa(s_dir,dir); | ||
173 | + | ||
174 | + | ||
175 | + | ||
176 | + dl2.v0= n++; dl2.v3=dFm1; dl2.v3=pPp1;dl2.v4=pP; dl2.cz5(); f[n++]=dl2.getVal(); | ||
177 | + dl2.v0= n++; dl2.v3=dFp1; dl2.v3=pPm1; dl2.cz5(); f[n++]=dl2.getVal(); | ||
178 | + dl2.v0= n++; dl2.v3=pFm1; dl2.v3=dPp1;dl2.v4=dP; dl2.cz5(); f[n++]=dl2.getVal(); | ||
179 | + dl2.v0= n++; dl2.v3=pFp1; dl2.v3=dPm1; dl2.cz5(); f[n++]=dl2.getVal(); | ||
180 | + | ||
181 | + | ||
182 | + dl2.v0= n++; dl2.v3=dFm1; dl2.v3=dPm2;dl2.v4=pP; dl2.cz5(); f[n++]=dl2.getVal(); | ||
183 | + dl2.v0= n++; dl2.v3=dFp1; dl2.v3=dPp2; dl2.cz5(); f[n++]=dl2.getVal(); | ||
184 | + dl2.v0= n++; dl2.v3=pFm1; dl2.v3=pPm2;dl2.v4=dP; dl2.cz5(); f[n++]=dl2.getVal(); | ||
185 | + dl2.v0= n++; dl2.v3=pFp1; dl2.v3=pPp2; dl2.cz5(); f[n++]=dl2.getVal(); | ||
186 | + | ||
187 | + | ||
188 | + dwwp.v0= n++; dwwp.v2=pF; dwwp.v3=dF; dwwp.v4=dP; dwwp.cz5(); f[n++]=dwwp.csa(s_dir,dir); | ||
189 | + dwwp.v0= n++; dwwp.v2=pF; dwwp.v3=dF; dwwp.v4=pP; dwwp.cz5(); f[n++]=dwwp.csa(s_dir,dir); | ||
190 | + dwwp.v0= n++; dwwp.v2=dF; dwwp.v3=pF; dwwp.v4=pP; dwwp.v4=dP; dwwp.cz6(); f[n++]=dwwp.csa(s_dir,dir); | ||
191 | + | ||
192 | + | ||
193 | + | ||
194 | + // lemmas | ||
195 | + | ||
196 | + dl2.v1=label; | ||
197 | + dl2.v0= n++; dl2.v2=pL; dl2.v3=dP; dl2.cz4(); f[c++]=dl2.csa(s_dir,dir); | ||
198 | + dl2.v0= n++; dl2.cz3(); f[c++]=dl2.csa(s_dir,dir); | ||
199 | + dl2.v0= n++; dl2.v2=dL; dl2.v3=pP; dl2.cz4(); f[c++]=dl2.csa(s_dir,dir); | ||
200 | + dl2.v0= n++; dl2.cz3(); f[c++]=dl2.csa(s_dir,dir); | ||
201 | + | ||
202 | + | ||
203 | + dwwp.v1=label; | ||
204 | + dwwp.v0= n++; dwwp.v2=pL; dwwp.v3=dL; dwwp.cz4(); f[c++]=dwwp.csa(s_dir,dir); | ||
205 | + | ||
206 | + dwp.v1= label; | ||
207 | + dwp.v0=n++;dwp.v2=dL; dwp.v3=pP;dwp.v4=dP;dwp.v5=pL; dwp.cz6(); f[c++]=dwp.csa(s_dir,dir); | ||
208 | + dwp.v0=n++;dwp.cz5(); f[c++]=dwp.csa(s_dir,dir); | ||
209 | + | ||
210 | + dwp.v0=n++;dwp.v2=pL; dwp.cz5(); f[c++]=dwp.csa(s_dir,dir); | ||
211 | + dwwp.v0= n++; dwwp.v2=pL; dwwp.v3=dL; dwwp.v4=dP; dwwp.cz5(); f[c++]=dwwp.csa(s_dir,dir); | ||
212 | + dwwp.v0= n++; dwwp.v4=pP; dwwp.cz5(); f[c++]=dwwp.csa(s_dir,dir); | ||
213 | + | ||
214 | + | ||
215 | + // cluster | ||
216 | + | ||
217 | + d2pw.v1=label; | ||
218 | + d2pw.v0=n++; d2pw.v2=prntLS; d2pw.v3=chldLS; d2pw.cz4(); f[c++]=d2pw.csa(s_dir,dir); | ||
219 | + d2pw.v0=n++; d2pw.v4=pF; d2pw.cz5(); f[c++]=d2pw.csa(s_dir,dir); | ||
220 | + d2pw.v0=n++; d2pw.v4=dF; d2pw.cz5(); f[c++]=d2pw.csa(s_dir,dir); | ||
221 | + d2pw.v0=n++; d2pw.v5=pF; d2pw.cz6(); f[c++]=d2pw.csa(s_dir,dir); | ||
222 | + | ||
223 | + | ||
224 | + d2pp.v1=label; | ||
225 | + d2pp.v0=n++; d2pp.v2=prntLS; d2pp.v3=chldLS; d2pp.cz4(); f[c++]=d2pp.csa(s_dir,dir); | ||
226 | + d2pp.v0=n++; d2pp.v4=pP; d2pp.cz5(); f[c++]=d2pp.csa(s_dir,dir); | ||
227 | + d2pp.v0=n++; d2pp.v4=dP; d2pp.cz5(); f[c++]=d2pp.csa(s_dir,dir); | ||
228 | + d2pp.v0=n++; d2pp.v5=pP; d2pp.cz6(); f[c++]=d2pp.csa(s_dir,dir); | ||
229 | + | ||
230 | + | ||
231 | + short[] prel = is.plabels[i]; | ||
232 | + short[] phead = is.pheads[i]; | ||
233 | + | ||
234 | + | ||
235 | + //take those in for stacking | ||
236 | + // dl2.v1=label; | ||
237 | + // dl2.v0= n++;dl2.v2=prel[dpnt];dl2.v3=pP;dl2.v4=dP; dl2.v5=prnt==phead[dpnt]?1:2; dl2.cz6(); f[c++]=dl2.csa(s_dir,dir); | ||
238 | + // dl2.v0= n++;dl2.v2=pP;dl2.v3=dP; dl2.v4=prnt==phead[dpnt]?1:2; dl2.cz5(); f[c++]=dl2.csa(s_dir,dir); | ||
239 | + | ||
240 | + | ||
241 | + | ||
242 | + if (feats==null) return; | ||
243 | + | ||
244 | + short[] featsP =feats[prnt], featsD =feats[dpnt]; | ||
245 | + dlf.v0= n++; dlf.v1=label; dlf.v2=pP; dlf.v3=dP; | ||
246 | + extractFeat(f, c, dir, featsP, featsD); | ||
247 | + | ||
248 | + return; | ||
249 | + } | ||
250 | + | ||
251 | + | ||
252 | + | ||
253 | + public void gcm(Instances is , int i, int p, int d, int gc, int label,Cluster cluster, long[] f) { | ||
254 | + | ||
255 | + for(int k=0;k<f.length;k++) f[k]=0; | ||
256 | + | ||
257 | + short[] pos= is.pposs[i]; | ||
258 | + int[] forms=is.forms[i]; | ||
259 | + int[] lemmas=is.plemmas[i]; | ||
260 | + short[][] feats=is.feats[i]; | ||
261 | + | ||
262 | + int pP = pos[p], dP = pos[d]; | ||
263 | + int prntF = forms[p], chldF = forms[d]; | ||
264 | + int prntL = lemmas[p], chldL = lemmas[d]; | ||
265 | + int prntLS = prntF==-1?-1:cluster.getLP(prntF), chldLS = chldF==-1?-1:cluster.getLP(chldF); | ||
266 | + | ||
267 | + int gP = gc != -1 ? pos[gc] : s_str; | ||
268 | + int gcF = gc != -1 ? forms[gc] : s_stwrd; | ||
269 | + int gcL = gc != -1 ? lemmas[gc] : s_stwrd; | ||
270 | + int gcLS = (gc != -1) && (gcF!=-1) ? cluster.getLP(gcF) : s_stwrd; | ||
271 | + | ||
272 | + if (prntF>maxForm) prntF=-1; | ||
273 | + if (prntL>maxForm) prntL=-1; | ||
274 | + | ||
275 | + if (chldF>maxForm) chldF=-1; | ||
276 | + if (chldL>maxForm) chldL=-1; | ||
277 | + | ||
278 | + if (gcF>maxForm) gcF=-1; | ||
279 | + if (gcL>maxForm) gcL=-1; | ||
280 | + | ||
281 | + | ||
282 | + int dir= (p < d)? ra:la, dir_gra =(d < gc)? ra:la; | ||
283 | + | ||
284 | + int n=84,c=0; | ||
285 | + | ||
286 | + //dl1.v023(); | ||
287 | + dl1.v1=label; | ||
288 | + dl1.v0= n++; dl1.v2=pP; dl1.v3=dP;dl1.v4=gP; dl1.cz5(); dl1.cs(s_dir,dir);f[c++]=dl1.csa(s_dir,dir_gra); | ||
289 | + dl1.v0= n++; dl1.v2=pP; dl1.v3=gP; dl1.cz4();dl1.cs(s_dir,dir);f[c++]=dl1.csa(s_dir,dir_gra); | ||
290 | + dl1.v0= n++; dl1.v2=dP; dl1.cz4(); dl1.cs(s_dir,dir);f[c++]=dl1.csa(s_dir,dir_gra); | ||
291 | + | ||
292 | + dwwp.v1=label; | ||
293 | + dwwp.v0= n++; dwwp.v2=prntF; dwwp.v3=gcF; | ||
294 | + dwwp.cz4(); dwwp.cs(s_dir,dir);f[c++]=dwwp.csa(s_dir,dir_gra); | ||
295 | + | ||
296 | + dwwp.v0= n++; dwwp.v2=chldF; dwwp.v3=gcF; | ||
297 | + dwwp.cz4(); dwwp.cs(s_dir,dir);f[c++]=dwwp.csa(s_dir,dir_gra); | ||
298 | + | ||
299 | + dwp.v1=label; | ||
300 | + dwp.v0= n++; dwp.v2=gcF; dwp.v3=pP; | ||
301 | + dwp.cz4(); dwp.cs(s_dir,dir);f[c++]=dwp.csa(s_dir,dir_gra); | ||
302 | + | ||
303 | + dwp.v0= n++; dwp.v2=gcF; dwp.v3=dP; | ||
304 | + dwp.cz4(); dwp.cs(s_dir,dir);f[c++]=dwp.csa(s_dir,dir_gra); | ||
305 | + | ||
306 | + dwp.v0= n++; dwp.v2=prntF; dwp.v3=gP; | ||
307 | + dwp.cz4(); dwp.cs(s_dir,dir);f[c++]=dwp.csa(s_dir,dir_gra); | ||
308 | + | ||
309 | + dwp.v0= n++; dwp.v2=chldF; dwp.v3=gP; | ||
310 | + dwp.cz4(); dwp.cs(s_dir,dir); f[c++]=dwp.csa(s_dir,dir_gra); | ||
311 | + | ||
312 | + | ||
313 | + // lemma | ||
314 | + | ||
315 | + dwwp.v0= n++; dwwp.v2=prntL; dwwp.v3=gcL; | ||
316 | + dwwp.cz4();dwwp.cs(s_dir,dir);f[c++]=dwwp.csa(s_dir,dir_gra); | ||
317 | + | ||
318 | + dwwp.v0= n++; dwwp.v2=chldL; dwwp.v3=gcL; | ||
319 | + dwwp.cz4(); dwwp.cs(s_dir,dir);f[c++]=dwwp.csa(s_dir,dir_gra); | ||
320 | + | ||
321 | + dwp.v0= n++; dwp.v2=gcL; dwp.v3=pP; | ||
322 | + dwp.cz4(); dwp.cs(s_dir,dir);f[c++]=dwp.csa(s_dir,dir_gra); | ||
323 | + | ||
324 | + dwp.v0= n++; dwp.v2=gcL; dwp.v3=dP; | ||
325 | + dwp.cz4(); dwp.cs(s_dir,dir);f[c++]=dwp.csa(s_dir,dir_gra); | ||
326 | + | ||
327 | + dwp.v0= n++; dwp.v2=prntL; dwp.v3=gP; | ||
328 | + dwp.cz4(); dwp.cs(s_dir,dir);f[c++]=dwp.csa(s_dir,dir_gra); | ||
329 | + | ||
330 | + dwp.v0= n++; dwp.v2=chldL; dwp.v3=gP; | ||
331 | + dwp.cz4(); dwp.cs(s_dir,dir); f[c++]=dwp.csa(s_dir,dir_gra); | ||
332 | + | ||
333 | + | ||
334 | + // clusters | ||
335 | + | ||
336 | + d2lp.v1= label; | ||
337 | + d2lp.v0= n++; d2lp.v2=prntLS; d2lp.v3=gcLS; d2lp.cz4(); d2lp.cs(s_dir,dir);f[c++]=d2lp.csa(s_dir,dir_gra);// f.add(li.l2i(l)); | ||
338 | + d2lp.v0= n++; d2lp.v2=chldLS; d2lp.v3=gcLS; d2lp.cz4(); d2lp.cs(s_dir,dir);f[c++]=d2lp.csa(s_dir,dir_gra); | ||
339 | + d3lp.v0= n++; d3lp.v1= label; d3lp.v2=prntLS; d3lp.v3=chldLS; d3lp.v4=gcLS; d3lp.cz5(); d3lp.cs(s_dir,dir);f[c++]=d3lp.csa(s_dir,dir_gra); | ||
340 | + | ||
341 | + //_f83; | ||
342 | + d2lp.v0= n++; d2lp.v2=prntLS; d2lp.v3=chldLS; d2lp.v4=gcF; d2lp.cz5(); f[c++]=d2lp.csa(s_dir,dir); | ||
343 | + d2lp.v0= n++; d2lp.v2=prntLS; d2lp.v3=gcLS; d2lp.v4=chldF; d2lp.cz5(); f[c++]=d2lp.csa(s_dir,dir); | ||
344 | + d2lp.v0= n++; d2lp.v2=chldLS; d2lp.v3=gcLS; d2lp.v4=prntF; d2lp.cz5(); f[c++]=d2lp.csa(s_dir,dir); | ||
345 | + | ||
346 | + d2pp.v1= label; | ||
347 | + d2pp.v0= n++; d2pp.v2=prntLS; d2pp.v3=chldLS; d2pp.v4=gP; d2pp.cz5(); f[c++]=d2pp.csa(s_dir,dir); | ||
348 | + d2pp.v0= n++; d2pp.v2=prntLS; d2pp.v3=gcLS; d2pp.v4=dP; d2pp.cz5(); f[c++]=d2pp.csa(s_dir,dir); | ||
349 | + d2pp.v0= n++; d2pp.v2=chldLS; d2pp.v3=gcLS; d2pp.v4=pP; d2pp.cz5(); f[c++]=d2pp.csa(s_dir,dir); | ||
350 | + | ||
351 | + | ||
352 | + | ||
353 | + // linear features | ||
354 | + | ||
355 | + int prntPm1 = p != 0 ? pos[p - 1] : s_str; // parent-pos-minus1 | ||
356 | + int chldPm1 = d - 1 >=0 ? pos[d - 1] : s_str; // child-pos-minus1 | ||
357 | + int prntPp1 = p != pos.length - 1 ? pos[p + 1] : s_end; | ||
358 | + int chldPp1 = d != pos.length - 1 ? pos[d + 1] : s_end; | ||
359 | + | ||
360 | + int gcPm1 = gc > 0 ? pos[gc - 1] : s_str; | ||
361 | + int gcPp1 = gc < pos.length - 1 ? pos[gc + 1] : s_end; | ||
362 | + | ||
363 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcPp1;dl1.v4=dP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | ||
364 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcPm1;dl1.v4=dP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | ||
365 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=dP;dl1.v4=chldPp1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | ||
366 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=dP;dl1.v4=chldPm1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | ||
367 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcPp1;dl1.v4=chldPm1;dl1.v5=dP;dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | ||
368 | + dl1.v0= n++; dl1.v2=gcPm1; dl1.v3=gP;dl1.v4=chldPm1;dl1.v5=dP; dl1.cz6();f[c++]=dl1.csa(s_dir,dir); | ||
369 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcPp1;dl1.v4=dP;dl1.v5=chldPp1; dl1.cz6();f[c++]=dl1.csa(s_dir,dir); | ||
370 | + dl1.v0= n++; dl1.v2=gcPm1; dl1.v3=gP;dl1.v4=dP;dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | ||
371 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcPp1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | ||
372 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcPm1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | ||
373 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=pP;dl1.v4=prntPp1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | ||
374 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=pP;dl1.v4=prntPm1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | ||
375 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcPp1;dl1.v4=prntPm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | ||
376 | + dl1.v0= n++; dl1.v2=gcPm1; dl1.v3=gP;dl1.v4=prntPm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | ||
377 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcPp1;dl1.v4=pP;dl1.v5=prntPp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | ||
378 | + dl1.v0= n++; dl1.v2=gcPm1; dl1.v3=gP; dl1.v4=pP; dl1.v5=prntPp1;dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | ||
379 | + | ||
380 | + | ||
381 | + int pLSp1 = p != pos.length - 1 ? forms[p + 1]==-1?-1:cluster.getLP(forms[p + 1]): _cend; | ||
382 | + int cLSp1 = d != pos.length - 1 ? forms[d + 1] ==-1?-1:cluster.getLP(forms[d + 1]):_cend; | ||
383 | + int gcLSp1 = gc < pos.length -1 ? forms[gc + 1] ==-1?-1:cluster.getLP(forms[gc + 1]) : s_end; | ||
384 | + | ||
385 | + int pLSm1 = p != 0 ? lemmas[p - 1]==-1?-1:cluster.getLP(lemmas[p - 1]): _cstr; | ||
386 | + int cLSm1 = d - 1 >=0 ? lemmas[d - 1] ==-1?-1:cluster.getLP(lemmas[d - 1]):_cstr; | ||
387 | + int gcLSm1 = gc > 0 ? lemmas[gc - 1] ==-1?-1:cluster.getLP(lemmas[gc - 1]) : _cstr; | ||
388 | + | ||
389 | + | ||
390 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcLSp1;dl1.v4=dP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | ||
391 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcLSm1;dl1.v4=dP; dl1.cz5();f[c++]=dl1.csa(s_dir,dir); | ||
392 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=dP;dl1.v4=cLSp1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | ||
393 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=dP;dl1.v4=cLSm1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | ||
394 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcLSp1;dl1.v4=cLSm1;dl1.v5=dP;dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | ||
395 | + dl1.v0= n++; dl1.v2=gcLSm1; dl1.v3=gP;dl1.v4=cLSm1;dl1.v5=dP; dl1.cz6();f[c++]=dl1.csa(s_dir,dir); | ||
396 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcLSp1;dl1.v4=dP;dl1.v5=cLSp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | ||
397 | + dl1.v0= n++; dl1.v2=cLSm1; dl1.v3=gP;dl1.v4=dP;dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | ||
398 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcLSp1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | ||
399 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcLSm1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | ||
400 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=pP;dl1.v4=pLSp1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | ||
401 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=pP;dl1.v4=pLSm1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | ||
402 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcLSp1;dl1.v4=pLSm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | ||
403 | + dl1.v0= n++; dl1.v2=gcLSm1; dl1.v3=gP;dl1.v4=pLSm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | ||
404 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcLSp1;dl1.v4=pP;dl1.v5=pLSp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | ||
405 | + dl1.v0= n++; dl1.v2=gcLSm1; dl1.v3=gP; dl1.v4=pP; dl1.v5=pLSp1;dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | ||
406 | + | ||
407 | + | ||
408 | + | ||
409 | + short[] prel = is.plabels[i],phead=is.pheads[i]; | ||
410 | + | ||
411 | + int g = p==phead[d]?1:2 ; | ||
412 | + if (gc>=0) g += d==phead[gc]?4:8; | ||
413 | + | ||
414 | + int gr = gc==-1?s_relend:prel[gc]; | ||
415 | + | ||
416 | + // take those in for stacking | ||
417 | + /* | ||
418 | + dl2.v1=label; | ||
419 | + dl2.v0= n++;dl2.v2=prel[d];dl2.v3=g;dl2.v4=gP;dl2.v5=dP;dl2.cz6();f[c++]=dl2.csa(s_dir,dir); | ||
420 | + dl2.v0= n++;dl2.v2=prel[d];dl2.v3=g;dl2.v4=gP;dl2.v5=pP;dl2.cz6();f[c++]=dl2.csa(s_dir,dir); | ||
421 | + dl2.v0= n++;dl2.v2=prel[d];dl2.v3=g;dl2.v4=gP;dl2.v5=pP;dl2.v6=dP;dl2.cz7();f[c++]=dl2.csa(s_dir,dir); | ||
422 | + | ||
423 | + dl2.v0= n++;dl2.v2=gr;dl2.v3=g;dl2.v4=gP;dl2.v5=dP;dl2.cz6();f[c++]=dl2.csa(s_dir,dir); | ||
424 | + dl2.v0= n++;dl2.v2=gr;dl2.v3=g;dl2.v4=gP;dl2.v5=pP;dl2.cz6();f[c++]=dl2.csa(s_dir,dir); | ||
425 | + dl2.v0= n++;dl2.v2=gr;dl2.v3=g;dl2.v4=gP;dl2.v5=pP;dl2.v6=dP;dl2.cz7();f[c++]=dl2.csa(s_dir,dir); | ||
426 | + | ||
427 | +*/ | ||
428 | + if (feats==null) return; | ||
429 | + | ||
430 | + short[] featsP =feats[d]; | ||
431 | + short[] featsD =gc!=-1?feats[gc]:null; | ||
432 | + | ||
433 | + dlf.v0= n++; dlf.v1=label; dlf.v2=gP; dlf.v3=dP; | ||
434 | + extractFeat(f, c, dir, featsP, featsD); | ||
435 | + return; | ||
436 | + } | ||
437 | + | ||
438 | + | ||
439 | + public void siblingm(Instances is , int i,short pos[], int forms[], int[] lemmas, short[][] feats, int prnt, int d, int sblng, int label, Cluster cluster, long[] f, int v) | ||
440 | + { | ||
441 | + | ||
442 | + for(int k=0;k<f.length;k++) f[k]=0; | ||
443 | + | ||
444 | + int pP = pos[prnt], dP = pos[d]; | ||
445 | + int prntF = forms[prnt],chldF = forms[d]; | ||
446 | + int prntL = lemmas[prnt], chldL = lemmas[d]; | ||
447 | + int prntLS = prntF==-1?-1:cluster.getLP(prntF), chldLS = chldF==-1?-1:cluster.getLP(chldF); | ||
448 | + | ||
449 | + int sP = sblng!=-1 ? pos[sblng] : s_str, sblF = sblng!=-1 ? forms[sblng] : s_stwrd, sblL = sblng!=-1 ? lemmas[sblng] : s_stwrd; | ||
450 | + | ||
451 | + int sblLS = (sblng != -1)&&(sblF!=-1) ? cluster.getLP(sblF) : s_stwrd; | ||
452 | + | ||
453 | + | ||
454 | + int dir= (prnt < d)? ra:la; | ||
455 | + | ||
456 | + int abs = Math.abs(prnt-d); | ||
457 | + | ||
458 | + final int dist; | ||
459 | + if (abs > 10)dist=d10;else if (abs>5) dist=d5;else if( abs==5)dist=d4;else if (abs==4)dist=d3;else if (abs==3)dist=d2; | ||
460 | + else if (abs==2)dist=d1; else dist=di0; | ||
461 | + | ||
462 | + int n=147; | ||
463 | + | ||
464 | + if (prntF>maxForm) prntF=-1; | ||
465 | + if (prntL>maxForm) prntL=-1; | ||
466 | + | ||
467 | + if (chldF>maxForm) chldF=-1; | ||
468 | + if (chldL>maxForm) chldL=-1; | ||
469 | + | ||
470 | + if (sblF>maxForm) sblF=-1; | ||
471 | + if (sblL>maxForm) sblL=-1; | ||
472 | + | ||
473 | + | ||
474 | + dl1.v0= n++; dl1.v1=label;dl1.v2=pP; dl1.v3=dP;dl1.v4=sP; dl1.cz5(); f[0]=dl1.csa(s_dir,dir);f[1]=dl1.csa(s_dist,dist); | ||
475 | + dl1.v0= n++; dl1.v3=sP; dl1.cz4(); f[2]=dl1.csa(s_dir,dir); f[3]=dl1.csa(s_dist,dist); | ||
476 | + dl1.v0= n++; dl1.v2=dP;dl1.cz4(); f[4]=dl1.csa(s_dir,dir); f[5]=dl1.csa(s_dist,dist); | ||
477 | + | ||
478 | + // sibling only could be tried | ||
479 | + dwwp.v1=label; | ||
480 | + dwwp.v0= n++; dwwp.v2=prntF; dwwp.v3=sblF; dwwp.cz4(); f[6]=dwwp.csa(s_dir,dir); f[7]=dwwp.csa(s_dist,dist); | ||
481 | + dwwp.v0= n++; dwwp.v2=chldF; dwwp.cz4(); f[8]=dwwp.csa(s_dir,dir); f[9]=dwwp.csa(s_dist,dist); | ||
482 | + dwp.v0= n++; dwp.v1=label; dwp.v2=sblF; dwp.v3=pP; dwp.cz4(); f[10]=dwp.csa(s_dir,dir); f[11]=dwp.csa(s_dist,dist); | ||
483 | + dwp.v0= n++; /*dwp.v1=label; */dwp.v3=dP; dwp.cz4(); f[12]=dwp.csa(s_dir,dir); f[13]=dwp.csa(s_dist,dist); | ||
484 | + dwp.v0= n++; /*dwp.v1=label;*/ dwp.v2=prntF; dwp.v3=sP; dwp.cz4(); f[14]=dwp.csa(s_dir,dir); f[15]=dwp.csa(s_dist,dist); | ||
485 | + dwp.v0= n++; /*dwp.v1=label;*/ dwp.v2=chldF; dwp.cz4(); f[16]=dwp.csa(s_dir,dir); f[17]=dwp.csa(s_dist,dist); | ||
486 | + | ||
487 | + //lemmas | ||
488 | + dwwp.v0= n++; dwwp.v2=prntL; dwwp.v3=sblL; dwwp.cz4(); f[18]=dwwp.csa(s_dir,dir); | ||
489 | + dwwp.v0= n++; dwwp.v2=chldL; dwwp.cz4(); f[19]=dwwp.csa(s_dir,dir); f[20]=dwwp.csa(s_dist,dist); | ||
490 | + dwp.v0= n++; /*dwp.v1=label;*/ dwp.v2=sblL; dwp.v3=pP; dwp.cz4(); f[21]=dwp.csa(s_dir,dir); f[22]=dwp.csa(s_dist,dist); | ||
491 | + dwp.v0= n++; /*dwp.v1=label; */ dwp.v3=dP; dwp.cz4(); f[23]=dwp.csa(s_dir,dir);f[24]=dwp.csa(s_dist,dist); | ||
492 | + dwp.v0= n++; /*dwp.v1=label;*/ dwp.v2=prntL; dwp.v3=sP; dwp.cz4(); f[25]=dwp.csa(s_dir,dir); f[26]=dwp.csa(s_dist,dist); | ||
493 | + dwp.v0= n++; /*dwp.v1=label;*/ dwp.v2=chldL; dwp.cz4(); f[27]=dwp.csa(s_dir,dir);f[28]=dwp.csa(s_dist,dist); | ||
494 | + | ||
495 | + | ||
496 | + // clusters | ||
497 | + | ||
498 | + d2lp.v1=label; | ||
499 | + d2lp.v0= n++; d2lp.v2=prntLS; d2lp.v3=sblLS; d2lp.cz4(); f[29]=d2lp.csa(s_dir,dir); | ||
500 | + d2lp.v0= n++; d2lp.v2=chldLS; d2lp.v3=sblLS; d2lp.cz4(); f[30]=d2lp.csa(s_dir,dir); f[31]=d2lp.csa(s_dist,dist); | ||
501 | + | ||
502 | + d3lp.v1= label; | ||
503 | + d3lp.v0= n++; d3lp.v2=prntLS; d3lp.v3=chldLS; d3lp.v4=sblLS;d3lp.cz5(); f[32]=d3lp.csa(s_dir,dir); | ||
504 | + | ||
505 | + d2lp.v0= n++; d2lp.v2=prntLS; d2lp.v3=chldLS; d2lp.v4=sblF; d2lp.cz5(); f[33]=d2lp.csa(s_dir,dir); f[34]=d2lp.csa(s_dist,dist); | ||
506 | + d2lp.v0= n++; d2lp.v2=prntLS; d2lp.v3=sblLS; d2lp.v4=chldF; d2lp.cz5(); f[35]=d2lp.csa(s_dir,dir); f[36]=d2lp.csa(s_dist,dist); | ||
507 | + d2lp.v0= n++; d2lp.v2=chldLS; d2lp.v3=sblLS; d2lp.v4=prntF; d2lp.cz5(); f[37]=d2lp.csa(s_dir,dir); f[38]=d2lp.csa(s_dist,dist); | ||
508 | + | ||
509 | + d2pp.v1=label; | ||
510 | + d2pp.v0= n++; d2pp.v2=prntLS; d2pp.v3=chldLS; d2pp.v4=sP; d2pp.cz5(); f[39]=d2pp.csa(s_dir,dir); f[40]=d2pp.csa(s_dist,dist); | ||
511 | + d2pp.v0= n++; d2pp.v2=prntLS; d2pp.v3=sblLS; d2pp.v4=dP; d2pp.cz5(); f[41]=d2pp.csa(s_dir,dir); f[42]=d2pp.csa(s_dist,dist); | ||
512 | + d2pp.v0= n++; d2pp.v2=chldLS; d2pp.v3=sblLS; d2pp.v4=pP; d2pp.cz5(); f[43]=d2pp.csa(s_dir,dir); f[44]=d2pp.csa(s_dist,dist); | ||
513 | + | ||
514 | + | ||
515 | + int prntPm1 = prnt!=0 ? pos[prnt-1] : s_str; | ||
516 | + int chldPm1 = d-1>=0 ? pos[d-1] : s_str; | ||
517 | + int prntPp1 = prnt!=pos.length-1 ? pos[prnt+1] : s_end; | ||
518 | + int chldPp1 = d!=pos.length-1 ? pos[d+1] : s_end; | ||
519 | + | ||
520 | + // sibling part of speech minus and plus 1 | ||
521 | + int sblPm1 = sblng>0 ? pos[sblng-1]:s_str; | ||
522 | + int sblPp1 = sblng<pos.length-1 ? pos[sblng + 1]:s_end; | ||
523 | + | ||
524 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sblPp1;dl1.v4=pP; dl1.cz5(); f[45]=dl1.csa(s_dir,dir); | ||
525 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sblPm1;dl1.v4=pP; dl1.cz5(); f[46]=dl1.csa(s_dir,dir);// f.add(li.l2i(l)); | ||
526 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=pP;dl1.v4=prntPp1;dl1.cz5(); f[47]=dl1.csa(s_dir,dir);// f.add(li.l2i(l)); | ||
527 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=pP;dl1.v4=prntPm1; dl1.cz5(); f[48]=dl1.csa(s_dir,dir);// f.add(li.l2i(l)); | ||
528 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sblPp1;dl1.v4=prntPm1;dl1.v5=pP; dl1.cz6(); f[49]=dl1.csa(s_dir,dir); | ||
529 | + dl1.v0=n++; dl1.v2=sblPm1; dl1.v3=sP;dl1.v4=prntPm1;dl1.v5=pP;dl1.cz6(); f[50]=dl1.csa(s_dir,dir);// f.add(li.l2i(l)); | ||
530 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sblPp1;dl1.v4=pP;dl1.v5=prntPp1; dl1.cz6(); f[51]=dl1.csa(s_dir,dir);// f.add(li.l2i(l)); | ||
531 | + dl1.v0=n++; dl1.v2=sblPm1; dl1.v3=sP; dl1.v4=pP;dl1.v5=prntPp1; dl1.cz6(); f[52]=dl1.csa(s_dir,dir);// f.add(li.l2i(l)); | ||
532 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sblPp1;dl1.v4=dP; dl1.cz5(); f[53]=dl1.csa(s_dir,dir); | ||
533 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sblPm1;dl1.v4=dP; dl1.cz5(); f[54]=dl1.csa(s_dir,dir); | ||
534 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=dP;dl1.v4=chldPp1;dl1.cz5(); f[55]=dl1.csa(s_dir,dir); | ||
535 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=dP;dl1.v4=chldPm1; dl1.cz5(); f[56]=dl1.csa(s_dir,dir); | ||
536 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sblPp1;dl1.v4=chldPm1;dl1.v5=dP; dl1.cz6(); f[57]=dl1.csa(s_dir,dir);// f.add(li.l2i(l)); | ||
537 | + dl1.v0=n++; dl1.v2=sblPm1; dl1.v3=sP;dl1.v4=chldPm1;dl1.v5=dP;dl1.cz6(); f[58]=dl1.csa(s_dir,dir); | ||
538 | + dl1.v0= n++;dl1.v2=sP; dl1.v3=sblPp1;dl1.v4=dP;dl1.v5=chldPp1;dl1.cz6();f[59]=dl1.csa(s_dir,dir);// f.add(li.l2i(l)); | ||
539 | + dl1.v0= n++; dl1.v2=sblPm1; dl1.v3=sP;dl1.v4=dP;dl1.v5=chldPp1;dl1.cz6(); f[60]=dl1.csa(s_dir,dir); | ||
540 | + | ||
541 | + int c=61; | ||
542 | + | ||
543 | + int pLSp1 = prnt != pos.length - 1 ? forms[prnt + 1]==-1?-1:cluster.getLP(forms[prnt + 1]): _cend; | ||
544 | + int cLSp1 = d != pos.length - 1 ? forms[d + 1] ==-1?-1:cluster.getLP(forms[d + 1]):_cend; | ||
545 | + int sLSp1 = sblng < pos.length -1 ? forms[sblng + 1] ==-1?-1:cluster.getLP(forms[sblng + 1]) : _cend; | ||
546 | + | ||
547 | + int pLSm1 = prnt!=0 ? forms[prnt - 1]==-1?-1:cluster.getLP(forms[prnt - 1]): _cstr; | ||
548 | + int cLSm1 = d-1>=0 ? forms[d - 1] ==-1?-1:cluster.getLP(forms[d - 1]):_cstr; | ||
549 | + int sLSm1 = sblng>0 ? forms[sblng - 1] ==-1?-1:cluster.getLP(forms[sblng - 1]):_cstr; | ||
550 | + | ||
551 | + //int c=61; | ||
552 | + | ||
553 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | ||
554 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSm1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | ||
555 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=pP;dl1.v4=pLSp1;dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | ||
556 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=pP;dl1.v4=pLSm1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | ||
557 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=pLSm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | ||
558 | + dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP;dl1.v4=pLSm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | ||
559 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=pP;dl1.v5=pLSp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | ||
560 | + dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP; dl1.v4=pP;dl1.v5=pLSp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | ||
561 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=dP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | ||
562 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSm1;dl1.v4=dP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | ||
563 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=dP;dl1.v4=cLSp1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | ||
564 | + dl1.v0=n++;dl1.v2=sP; dl1.v3=dP;dl1.v4=cLSm1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | ||
565 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSm1;dl1.v4=cLSm1;dl1.v5=dP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | ||
566 | + dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP;dl1.v4=cLSm1;dl1.v5=dP;dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | ||
567 | + dl1.v0=n++;dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=dP;dl1.v5=cLSp1;dl1.cz6();f[c++]=dl1.csa(s_dir,dir); | ||
568 | + dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP;dl1.v4=dP;dl1.v5=cLSp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | ||
569 | + | ||
570 | + | ||
571 | + | ||
572 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | ||
573 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSm1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | ||
574 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=pP;dl1.v4=pLSp1;dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | ||
575 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=pP;dl1.v4=pLSm1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | ||
576 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=pLSm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | ||
577 | + dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP;dl1.v4=pLSm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | ||
578 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=pP;dl1.v5=pLSp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | ||
579 | + dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP; dl1.v4=pP;dl1.v5=pLSp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | ||
580 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=dP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | ||
581 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSm1;dl1.v4=dP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | ||
582 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=dP;dl1.v4=cLSp1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | ||
583 | + dl1.v0=n++;dl1.v2=sP; dl1.v3=dP;dl1.v4=cLSm1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | ||
584 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSm1;dl1.v4=cLSm1;dl1.v5=dP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | ||
585 | + dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP;dl1.v4=cLSm1;dl1.v5=dP;dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | ||
586 | + dl1.v0= n++;dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=dP;dl1.v5=cLSp1;dl1.cz6();f[c++]=dl1.csa(s_dir,dir); | ||
587 | + dl1.v0= n++; dl1.v2=sLSm1; dl1.v3=sP;dl1.v4=dP;dl1.v5=cLSp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | ||
588 | + | ||
589 | + // take those in for stacking | ||
590 | + | ||
591 | + /* | ||
592 | + short[] prel = is.plabels[i],phead=is.pheads[i]; | ||
593 | + | ||
594 | + int g = prnt==phead[d]?1:2 ; | ||
595 | + if (sblng>=0) g += prnt==phead[sblng]?4:8; | ||
596 | + | ||
597 | + int gr = sblng==-1?s_relend:prel[sblng]; | ||
598 | + | ||
599 | + | ||
600 | + dl2.v0= n++;dl2.v2=prel[d];dl2.v3=g;dl2.v4=sP;dl2.v5=dP;dl2.cz6();f[c++]=dl2.csa(s_dir,dir); | ||
601 | + dl2.v0= n++;dl2.v2=prel[d];dl2.v3=g;dl2.v4=sP;dl2.v5=pP;dl2.cz6();f[c++]=dl2.csa(s_dir,dir); | ||
602 | + dl2.v0= n++;dl2.v2=prel[d];dl2.v3=g;dl2.v4=sP;dl2.v5=pP;dl2.v6=dP;dl2.cz7();f[c++]=dl2.csa(s_dir,dir); | ||
603 | + | ||
604 | + dl2.v0= n++;dl2.v2=gr;dl2.v3=g;dl2.v4=sP;dl2.v5=dP;dl2.cz6();f[c++]=dl2.csa(s_dir,dir); | ||
605 | + dl2.v0= n++;dl2.v2=gr;dl2.v3=g;dl2.v4=sP;dl2.v5=pP;dl2.cz6();f[c++]=dl2.csa(s_dir,dir); | ||
606 | + dl2.v0= n++;dl2.v2=gr;dl2.v3=g;dl2.v4=sP;dl2.v5=pP;dl2.v6=dP;dl2.cz7();f[c++]=dl2.csa(s_dir,dir); | ||
607 | +*/ | ||
608 | + | ||
609 | + if (feats==null) return; | ||
610 | + | ||
611 | + int cnt=c; | ||
612 | + | ||
613 | + short[] featsP =feats[d]; | ||
614 | + short[] featsSbl =sblng!=-1?feats[sblng]:null; | ||
615 | + | ||
616 | + dlf.v0= n++; dlf.v1=label; dlf.v2=sP; dlf.v3=dP; | ||
617 | + | ||
618 | + | ||
619 | + cnt = extractFeat(f, cnt ,dir, featsP, featsSbl); | ||
620 | + | ||
621 | + featsP =feats[prnt]; | ||
622 | + featsSbl =sblng!=-1?feats[sblng]:null; | ||
623 | + | ||
624 | + dlf.v0= n++; dlf.v1=label; dlf.v2=pP; dlf.v3=sP; | ||
625 | + if (featsP!=null && featsSbl!=null) { | ||
626 | + for(short i1=0;i1<featsP.length;i1++) { | ||
627 | + for(short i2=0;i2<featsSbl.length;i2++) { | ||
628 | + dlf.v4=featsP[i1]; dlf.v5=featsSbl[i2]; | ||
629 | + dlf.cz6(); f[cnt++]=dlf.csa(s_dir,prnt<sblng?1:2); | ||
630 | + } | ||
631 | + } | ||
632 | + } else if (featsP==null && featsSbl!=null) { | ||
633 | + | ||
634 | + for(short i2=0;i2<featsSbl.length;i2++) { | ||
635 | + dlf.v4=nofeat; dlf.v5=featsSbl[i2]; | ||
636 | + dlf.cz6(); f[cnt++]=dlf.csa(s_dir,dir); | ||
637 | + } | ||
638 | + | ||
639 | + } else if (featsP!=null && featsSbl==null) { | ||
640 | + | ||
641 | + for(short i1=0;i1<featsP.length;i1++) { | ||
642 | + dlf.v4=featsP[i1]; dlf.v5=nofeat; | ||
643 | + dlf.cz6(); f[cnt++]=dlf.csa(s_dir,dir); | ||
644 | + } | ||
645 | + } | ||
646 | + | ||
647 | + return; | ||
648 | + } | ||
649 | + | ||
650 | + private int extractFeat(long[] f, int cnt, int dir, short[] featsP, short[] featsD) { | ||
651 | + if (featsP!=null && featsD!=null) { | ||
652 | + for(short i1=0;i1<featsP.length;i1++) { | ||
653 | + for(short i2=0;i2<featsD.length;i2++) { | ||
654 | + dlf.v4=featsP[i1]; dlf.v5=featsD[i2]; | ||
655 | + dlf.cz6(); f[cnt++]=dlf.csa(s_dir,dir); | ||
656 | + } | ||
657 | + } | ||
658 | + } else if (featsP==null && featsD!=null) { | ||
659 | + | ||
660 | + for(short i2=0;i2<featsD.length;i2++) { | ||
661 | + dlf.v4=nofeat; dlf.v5=featsD[i2]; | ||
662 | + dlf.cz6(); f[cnt++]=dlf.csa(s_dir,dir); | ||
663 | + | ||
664 | + } | ||
665 | + } else if (featsP!=null && featsD==null) { | ||
666 | + | ||
667 | + for(short i1=0;i1<featsP.length;i1++) { | ||
668 | + dlf.v4=featsP[i1]; dlf.v5=nofeat; | ||
669 | + dlf.cz6(); f[cnt++]=dlf.csa(s_dir,dir); | ||
670 | + | ||
671 | + } | ||
672 | + } | ||
673 | + return cnt; | ||
674 | + } | ||
675 | + | ||
676 | + public IFV encodeCat2(Instances is, int ic, short pposs[], int forms[], int[] lemmas, short[] heads, short[] types, short feats[][], | ||
677 | + Cluster cluster, IFV f, Long2IntInterface li) { | ||
678 | + | ||
679 | + | ||
680 | + long[] svs = new long[250]; | ||
681 | + | ||
682 | + for (int i = 1; i < heads.length; i++) { | ||
683 | + | ||
684 | + | ||
685 | + int n =basic(pposs, forms, heads[i], i, cluster, f); | ||
686 | + firstm(is, ic, heads[i], i, types[i], cluster,svs); | ||
687 | + for(int k=0;k<svs.length;k++) f.add(li.l2i(svs[k])); | ||
688 | + | ||
689 | + int ch,cmi,cmo; | ||
690 | + if (heads[i] < i) { | ||
691 | + ch = rightmostRight(heads, heads[i], i); | ||
692 | + cmi = leftmostLeft(heads, i, heads[i]); | ||
693 | + cmo = rightmostRight(heads, i, heads.length); | ||
694 | + | ||
695 | + } else { | ||
696 | + ch = leftmostLeft(heads, heads[i], i); | ||
697 | + cmi = rightmostRight(heads, i, heads[i]); | ||
698 | + cmo = leftmostLeft(heads, i, 0); | ||
699 | + } | ||
700 | + | ||
701 | + siblingm(is,ic,pposs, forms,lemmas, feats, heads[i], i, ch,types[i], cluster, svs,n); | ||
702 | + for(int k=0;k<svs.length;k++) f.add(li.l2i(svs[k])); | ||
703 | + | ||
704 | + | ||
705 | + gcm(is, ic,heads[i],i,cmi, types[i], cluster, svs); | ||
706 | + for(int k=0;k<svs.length;k++) f.add(li.l2i(svs[k])); | ||
707 | + | ||
708 | + gcm(is, ic, heads[i],i,cmo, types[i], cluster, svs); | ||
709 | + for(int k=0;k<svs.length;k++)f.add(li.l2i(svs[k])); | ||
710 | + } | ||
711 | + | ||
712 | + return f; | ||
713 | + } | ||
714 | + | ||
715 | + public FV encodeCat(Instances is, int ic, short pposs[], int forms[], int[] lemmas, short[] heads, short[] types, short feats[][], Cluster cluster, FV f) { | ||
716 | + | ||
717 | + | ||
718 | + long[] svs = new long[250]; | ||
719 | + | ||
720 | + for (int i = 1; i < heads.length; i++) { | ||
721 | + | ||
722 | + | ||
723 | + int n =basic(pposs, forms, heads[i], i, cluster, f); | ||
724 | + firstm(is, ic, heads[i], i, types[i], cluster,svs); | ||
725 | + for(int k=0;k<svs.length;k++) dl1.map(f,svs[k]); | ||
726 | + | ||
727 | + int ch,cmi,cmo; | ||
728 | + if (heads[i] < i) { | ||
729 | + ch = rightmostRight(heads, heads[i], i); | ||
730 | + cmi = leftmostLeft(heads, i, heads[i]); | ||
731 | + cmo = rightmostRight(heads, i, heads.length); | ||
732 | + | ||
733 | + } else { | ||
734 | + ch = leftmostLeft(heads, heads[i], i); | ||
735 | + cmi = rightmostRight(heads, i, heads[i]); | ||
736 | + cmo = leftmostLeft(heads, i, 0); | ||
737 | + } | ||
738 | + | ||
739 | + siblingm(is,ic,pposs, forms,lemmas, feats, heads[i], i, ch,types[i], cluster, svs,n); | ||
740 | + for(int k=0;k<svs.length;k++) dl1.map(f,svs[k]); | ||
741 | + | ||
742 | + | ||
743 | + gcm(is, ic,heads[i],i,cmi, types[i], cluster, svs); | ||
744 | + for(int k=0;k<svs.length;k++) dl1.map(f,svs[k]); | ||
745 | + | ||
746 | + gcm(is, ic, heads[i],i,cmo, types[i], cluster, svs); | ||
747 | + for(int k=0;k<svs.length;k++) dl1.map(f,svs[k]); | ||
748 | + } | ||
749 | + | ||
750 | + return f; | ||
751 | + } | ||
752 | + | ||
753 | + | ||
754 | + public float encode3(short[] pos, short heads[] , short[] types, DataF d2) { | ||
755 | + | ||
756 | + double v = 0; | ||
757 | + for (int i = 1; i < heads.length; i++) { | ||
758 | + | ||
759 | + int dir= (heads[i] < i)? 0:1; | ||
760 | + | ||
761 | + v += d2.pl[heads[i]][i]; | ||
762 | + v += d2.lab[heads[i]][i][types[i]][dir]; | ||
763 | + | ||
764 | + boolean left = i<heads[i]; | ||
765 | + short[] labels = Edges.get(pos[heads[i]], pos[i], left); | ||
766 | + int lid=-1; | ||
767 | + for(int k=0;k<labels.length;k++) if (types[i]== labels[k]) {lid= k;break;} | ||
768 | + | ||
769 | + int ch,cmi,cmo; | ||
770 | + if (heads[i] < i) { | ||
771 | + ch = rightmostRight(heads, heads[i], i); | ||
772 | + cmi = leftmostLeft(heads, i, heads[i]); | ||
773 | + cmo = rightmostRight(heads, i, heads.length); | ||
774 | + | ||
775 | + if (ch==-1) ch=heads[i]; | ||
776 | + if (cmi==-1) cmi=heads[i]; | ||
777 | + if (cmo==-1) cmo=heads[i]; | ||
778 | + | ||
779 | + } else { | ||
780 | + ch = leftmostLeft(heads, heads[i], i); | ||
781 | + cmi = rightmostRight(heads, i, heads[i]); | ||
782 | + cmo = leftmostLeft(heads, i, 0); | ||
783 | + | ||
784 | + if (ch==-1) ch=i; | ||
785 | + if (cmi==-1) cmi=i; | ||
786 | + if (cmo==-1) cmo=i; | ||
787 | + } | ||
788 | + v += d2.sib[heads[i]][i][ch][dir][lid]; | ||
789 | + v += d2.gra[heads[i]][i][cmi][dir][lid]; | ||
790 | + v += d2.gra[heads[i]][i][cmo][dir][lid]; | ||
791 | + } | ||
792 | + return (float)v; | ||
793 | + } | ||
794 | + | ||
795 | + /** | ||
796 | + * Provide the scores of the edges | ||
797 | + * @param pos | ||
798 | + * @param heads | ||
799 | + * @param types | ||
800 | + * @param edgesScores | ||
801 | + * @param d2 | ||
802 | + * @return | ||
803 | + */ | ||
804 | + public static float encode3(short[] pos, short heads[] , short[] types, float[] edgesScores, DataF d2) { | ||
805 | + | ||
806 | + double v = 0; | ||
807 | + for (int i = 1; i < heads.length; i++) { | ||
808 | + | ||
809 | + int dir= (heads[i] < i)? 0:1; | ||
810 | + | ||
811 | + edgesScores[i] = d2.pl[heads[i]][i]; | ||
812 | + edgesScores[i] += d2.lab[heads[i]][i][types[i]][dir]; | ||
813 | + | ||
814 | + boolean left = i<heads[i]; | ||
815 | + short[] labels = Edges.get(pos[heads[i]], pos[i], left); | ||
816 | + int lid=-1; | ||
817 | + for(int k=0;k<labels.length;k++) if (types[i]== labels[k]) {lid= k;break;} | ||
818 | + | ||
819 | + int ch,cmi,cmo; | ||
820 | + if (heads[i] < i) { | ||
821 | + ch = rightmostRight(heads, heads[i], i); | ||
822 | + cmi = leftmostLeft(heads, i, heads[i]); | ||
823 | + cmo = rightmostRight(heads, i, heads.length); | ||
824 | + | ||
825 | + if (ch==-1) ch=heads[i]; | ||
826 | + if (cmi==-1) cmi=heads[i]; | ||
827 | + if (cmo==-1) cmo=heads[i]; | ||
828 | + | ||
829 | + } else { | ||
830 | + ch = leftmostLeft(heads, heads[i], i); | ||
831 | + cmi = rightmostRight(heads, i, heads[i]); | ||
832 | + cmo = leftmostLeft(heads, i, 0); | ||
833 | + | ||
834 | + if (ch==-1) ch=i; | ||
835 | + if (cmi==-1) cmi=i; | ||
836 | + if (cmo==-1) cmo=i; | ||
837 | + } | ||
838 | + edgesScores[i] += d2.sib[heads[i]][i][ch][dir][lid]; | ||
839 | + edgesScores[i] += d2.gra[heads[i]][i][cmi][dir][lid]; | ||
840 | + edgesScores[i] += d2.gra[heads[i]][i][cmo][dir][lid]; | ||
841 | + v+=edgesScores[i]; | ||
842 | + } | ||
843 | + return (float)v; | ||
844 | + } | ||
845 | + | ||
846 | + | ||
847 | + private static int rightmostRight(short[] heads, int head, int max) { | ||
848 | + int rightmost = -1; | ||
849 | + for (int i = head + 1; i < max; i++) if (heads[i] == head) rightmost = i; | ||
850 | + | ||
851 | + return rightmost; | ||
852 | + } | ||
853 | + | ||
854 | + private static int leftmostLeft(short[] heads, int head, int min) { | ||
855 | + int leftmost = -1; | ||
856 | + for (int i = head - 1; i > min; i--) if (heads[i] == head) leftmost = i; | ||
857 | + return leftmost; | ||
858 | + } | ||
859 | + | ||
860 | + public static final String REL = "REL",END = "END",STR = "STR",LA = "LA",RA = "RA"; | ||
861 | + | ||
862 | + private static int ra,la; | ||
863 | + private static int s_str; | ||
864 | + private static int s_end, _cend,_cstr, s_stwrd,s_relend; | ||
865 | + | ||
866 | + protected static final String TYPE = "TYPE",DIR = "D"; | ||
867 | + public static final String POS = "POS"; | ||
868 | + protected static final String DIST = "DIST",MID = "MID", FEAT="F"; | ||
869 | + | ||
870 | + private static final String _0 = "0",_4 = "4",_3 = "3", _2 = "2",_1 = "1",_5 = "5",_10 = "10"; | ||
871 | + | ||
872 | + private static int di0, d4,d3,d2,d1,d5,d10; | ||
873 | + | ||
874 | + | ||
875 | + private static final String WORD = "WORD",STWRD = "STWRD", STPOS = "STPOS"; | ||
876 | + | ||
877 | + | ||
878 | + | ||
879 | + private static int nofeat; | ||
880 | + | ||
881 | + | ||
882 | + public static int maxForm; | ||
883 | + | ||
884 | + | ||
885 | + /** | ||
886 | + * Initialize the features. | ||
887 | + * @param maxFeatures | ||
888 | + */ | ||
889 | + static public void initFeatures() { | ||
890 | + | ||
891 | + | ||
892 | + MFB mf = new MFB(); | ||
893 | + mf.register(POS, MID); | ||
894 | + s_str = mf.register(POS, STR); | ||
895 | + s_end = mf.register(POS, END); | ||
896 | + | ||
897 | + s_relend = mf.register(REL, END); | ||
898 | + | ||
899 | + _cstr= mf.register(Cluster.SPATH,STR); | ||
900 | + _cend=mf.register(Cluster.SPATH,END); | ||
901 | + | ||
902 | + | ||
903 | + mf.register(TYPE, POS); | ||
904 | + | ||
905 | + s_stwrd=mf.register(WORD,STWRD); | ||
906 | + mf.register(POS,STPOS); | ||
907 | + | ||
908 | + la = mf.register(DIR, LA); | ||
909 | + ra = mf.register(DIR, RA); | ||
910 | + | ||
911 | + // mf.register(TYPE, CHAR); | ||
912 | + | ||
913 | + mf.register(TYPE, FEAT); | ||
914 | + nofeat=mf.register(FEAT, "NOFEAT"); | ||
915 | + | ||
916 | + for(int k=0;k<215;k++) mf.register(TYPE, "F"+k); | ||
917 | + | ||
918 | + | ||
919 | + di0=mf.register(DIST, _0); | ||
920 | + d1=mf.register(DIST, _1); | ||
921 | + d2=mf.register(DIST, _2); | ||
922 | + d3=mf.register(DIST, _3); | ||
923 | + d4=mf.register(DIST, _4); | ||
924 | + d5=mf.register(DIST, _5); | ||
925 | + // d5l=mf.register(DIST, _5l); | ||
926 | + d10=mf.register(DIST, _10); | ||
927 | + | ||
928 | + | ||
929 | + } | ||
930 | + | ||
931 | + /* (non-Javadoc) | ||
932 | + * @see extractors.Extractor#getType() | ||
933 | + */ | ||
934 | + @Override | ||
935 | + public int getType() { | ||
936 | + | ||
937 | + return s_type; | ||
938 | + } | ||
939 | + | ||
940 | + /* (non-Javadoc) | ||
941 | + * @see extractors.Extractor#setMaxForm(int) | ||
942 | + */ | ||
943 | + @Override | ||
944 | + public void setMaxForm(int max) { | ||
945 | + maxForm = max; | ||
946 | + } | ||
947 | + | ||
948 | + /* (non-Javadoc) | ||
949 | + * @see extractors.Extractor#getMaxForm() | ||
950 | + */ | ||
951 | + @Override | ||
952 | + public int getMaxForm() { | ||
953 | + return maxForm; | ||
954 | + } | ||
955 | + | ||
956 | + | ||
957 | + | ||
958 | +} |
dependencyParser/basic/mate-tools/src/extractors/ExtractorClusterStackedR2.java
0 → 100644
1 | +package extractors; | ||
2 | + | ||
3 | + | ||
4 | +import java.util.Arrays; | ||
5 | + | ||
6 | +import is2.data.Cluster; | ||
7 | +import is2.data.D4; | ||
8 | +import is2.data.DataF; | ||
9 | +import is2.data.Edges; | ||
10 | +import is2.data.FV; | ||
11 | +import is2.data.IFV; | ||
12 | +import is2.data.Instances; | ||
13 | +import is2.data.Long2IntInterface; | ||
14 | +import is2.data.MFB; | ||
15 | +import is2.util.DB; | ||
16 | + | ||
17 | + | ||
18 | + | ||
19 | +final public class ExtractorClusterStackedR2 implements Extractor { | ||
20 | + | ||
21 | + public static int s_rel,s_word,s_type,s_dir,s_dist,s_feat,s_child,s_spath,s_lpath,s_pos; | ||
22 | + | ||
23 | + MFB mf; | ||
24 | + | ||
25 | + final D4 d0 ,dl1,dl2, dwr,dr,dwwp,dw,dwp,dlf,d3lp, d2lp,d2pw,d2pp ; | ||
26 | + | ||
27 | + public final Long2IntInterface li; | ||
28 | + | ||
29 | + public ExtractorClusterStackedR2(Long2IntInterface li) { | ||
30 | + | ||
31 | + initFeatures(); | ||
32 | + this.li=li; | ||
33 | + d0 = new D4(li);dl1 = new D4(li);dl2 = new D4(li); | ||
34 | + dwr = new D4(li); | ||
35 | + dr = new D4(li); | ||
36 | + dwwp = new D4(li); | ||
37 | + | ||
38 | + dw = new D4(li); | ||
39 | + dwp = new D4(li); | ||
40 | + | ||
41 | + dlf = new D4(li); | ||
42 | + d3lp = new D4(li); d2lp = new D4(li); d2pw = new D4(li); d2pp = new D4(li); | ||
43 | + | ||
44 | + } | ||
45 | + | ||
46 | + public void initStat() { | ||
47 | + | ||
48 | + | ||
49 | + mf = new MFB(); | ||
50 | + s_rel = mf.getFeatureCounter().get(REL).intValue(); | ||
51 | + s_pos = mf.getFeatureCounter().get(POS).intValue(); | ||
52 | + s_word = mf.getFeatureCounter().get(WORD).intValue(); | ||
53 | + s_type = mf.getFeatureCounter().get(TYPE).intValue();//mf.getFeatureBits(); | ||
54 | + s_dir = mf.getFeatureCounter().get(DIR); | ||
55 | + la = mf.getValue(DIR, LA); | ||
56 | + ra = mf.getValue(DIR, RA); | ||
57 | + s_dist = mf.getFeatureCounter().get(DIST);//mf.getFeatureBits(DIST); | ||
58 | + s_feat = mf.getFeatureCounter().get(FEAT);//mf.getFeatureBits(Pipe.FEAT); | ||
59 | + s_spath = mf.getFeatureCounter().get(Cluster.SPATH)==null?0:mf.getFeatureCounter().get(Cluster.SPATH);//mf.getFeatureBits(Cluster.SPATH); | ||
60 | + s_lpath = mf.getFeatureCounter().get(Cluster.LPATH)==null?0:mf.getFeatureCounter().get(Cluster.LPATH);//mf.getFeatureBits(Cluster.LPATH); | ||
61 | + } | ||
62 | + | ||
63 | + public void init(){ | ||
64 | + // DB.println("init"); | ||
65 | + d0.a0 = s_type;d0.a1 = s_pos;d0.a2 = s_pos;d0.a3 = s_pos;d0.a4 = s_pos;d0.a5 = s_pos;d0.a6 = s_pos;d0.a7 = s_pos; | ||
66 | + dl1.a0 = s_type;dl1.a1 = s_rel; dl1.a2 = s_pos;dl1.a3 = s_pos; dl1.a4 = s_pos; dl1.a5 = s_pos; dl1.a6 = s_pos; dl1.a7 = s_pos; | ||
67 | + dl2.a0 = s_type;dl2.a1 = s_rel;dl2.a2 = s_word;dl2.a3 = s_pos;dl2.a4 = s_pos;dl2.a5 = s_pos;dl2.a6 = s_pos;dl2.a7 = s_pos; | ||
68 | + dwp.a0 = s_type; dwp.a1 = s_rel; dwp.a2 = s_word; dwp.a3 = s_pos; dwp.a4 = s_pos; dwp.a5 = s_word; | ||
69 | + dwwp.a0 = s_type; dwwp.a1 = s_rel; dwwp.a2 = s_word; dwwp.a3 = s_word; dwwp.a4 = s_pos; dwwp.a5 = s_word; | ||
70 | + dlf.a0 = s_type;dlf.a1 = s_rel; dlf.a2 = s_pos;dlf.a3 = s_pos; dlf.a4 = s_feat; dlf.a5 = s_feat; dlf.a6 = s_pos; dlf.a7 = s_pos; | ||
71 | + d3lp.a0 = s_type; d3lp.a1 = s_rel; d3lp.a2 = s_lpath; d3lp.a3 = s_lpath; d3lp.a4 = s_lpath; d3lp.a5 = s_word; d3lp.a6 = s_spath; d3lp.a7 = s_spath; | ||
72 | + d2lp.a0 = s_type; d2lp.a1 = s_rel; d2lp.a2 = s_lpath; d2lp.a3 = s_lpath; d2lp.a4 = s_word; d2lp.a5 = s_word; //d3lp.a6 = s_spath; d3lp.a7 = s_spath; | ||
73 | + d2pw.a0 = s_type; d2pw.a1 = s_rel; d2pw.a2 = s_lpath; d2pw.a3 = s_lpath; d2pw.a4 = s_word; d2pw.a5 = s_word; //d3lp.a6 = s_spath; d3lp.a7 = s_spath; | ||
74 | + d2pp.a0 = s_type; d2pp.a1 = s_rel; d2pp.a2 = s_lpath; d2pp.a3 = s_lpath; d2pp.a4 = s_pos; d2pp.a5 = s_pos; //d3lp.a6 = s_spath; d3lp.a7 = s_spath; | ||
75 | + } | ||
76 | + | ||
77 | + | ||
78 | + public int basic(short[] pposs, int[] form, int p, int d, Cluster cluster, IFV f) | ||
79 | + { | ||
80 | + | ||
81 | + d0.clean(); dl1.clean(); dl2.clean(); dwp.clean(); dwwp.clean(); dlf.clean(); d3lp.clean(); | ||
82 | + | ||
83 | + d3lp.clean(); d2lp.clean();d2pw.clean(); d2pp.clean(); | ||
84 | + | ||
85 | + int n=1; | ||
86 | + int dir= (p < d)? ra:la; | ||
87 | + // d0.v0= n; d0.v1=pposs[p]; d0.v2=pposs[d]; //d0.stop=4; | ||
88 | + int end= (p >= d ? p : d); | ||
89 | + int start = (p >= d ? d : p) + 1; | ||
90 | + | ||
91 | + StringBuilder s = new StringBuilder(end-start); | ||
92 | + int[] x = new int[end-start]; | ||
93 | + int c=0; | ||
94 | + for(int i = start ; i <end ; i++) { | ||
95 | + //d0.v3=pposs[i]; | ||
96 | + //d0.cz4(); | ||
97 | + //d0.csa(s_dir,dir,f); | ||
98 | +// s.append((char)pposs[i]); | ||
99 | + x[c++] =pposs[i]; | ||
100 | + } | ||
101 | + | ||
102 | + Arrays.sort(x); | ||
103 | + for(int i = 0;i<x.length ; i++) { | ||
104 | + if (i==0 || x[i]!=x[i-1] ) s.append(x[i]); | ||
105 | + } | ||
106 | + int v = mf.register("px", s.toString()); | ||
107 | + | ||
108 | + dwp.v0 = n++; dwp.v1 = 1;dwp.v2 = v; dwp.v3 = pposs[p]; dwp.v4 = pposs[d]; dwp.cz5(); dwp.csa(s_dir,dir,f); | ||
109 | + | ||
110 | + return n; | ||
111 | + } | ||
112 | + | ||
113 | + | ||
114 | + public void firstm(Instances is, int i, | ||
115 | + int prnt, int dpnt, int label, Cluster cluster, long[] f) | ||
116 | + { | ||
117 | + | ||
118 | + | ||
119 | + //short[] pposs, int[] form, int[] lemmas, short[][] feats | ||
120 | + for(int k=0;k<f.length;k++) f[k]=0; | ||
121 | + | ||
122 | + short[] pposs = is.pposs[i]; | ||
123 | + int[] form =is.forms[i]; | ||
124 | + short[][] feats = is.feats[i]; | ||
125 | + | ||
126 | + | ||
127 | + int pF = form[prnt],dF = form[dpnt]; | ||
128 | + int pL = is.plemmas[i][prnt],dL = is.plemmas[i][dpnt]; | ||
129 | + int pP = pposs[prnt],dP = pposs[dpnt]; | ||
130 | + | ||
131 | + int prntLS = pF==-1?-1:cluster.getLP(pF), chldLS = dF==-1?-1:cluster.getLP(dF); | ||
132 | + | ||
133 | + final int dir= (prnt < dpnt)? ra:la; | ||
134 | + | ||
135 | + if (pF>maxForm) pF=-1; | ||
136 | + if (pL>maxForm) pL=-1; | ||
137 | + | ||
138 | + if (dF>maxForm) dF=-1; | ||
139 | + if (dL>maxForm) dL=-1; | ||
140 | + | ||
141 | + | ||
142 | + int n=3,c=0; | ||
143 | + | ||
144 | + dl2.v1=label; | ||
145 | + dl2.v0= n++; dl2.v2=pF; dl2.v3=dP; dl2.cz4(); f[c++]=dl2.csa(s_dir,dir); | ||
146 | + dl2.v0= n++; dl2.cz3(); f[c++]=dl2.csa(s_dir,dir); | ||
147 | + dl2.v0= n++; dl2.v2=dF; dl2.v3=pP; dl2.cz4(); f[c++]=dl2.csa(s_dir,dir); | ||
148 | + dl2.v0= n++; dl2.cz3(); f[c++]=dl2.csa(s_dir,dir); | ||
149 | + | ||
150 | + | ||
151 | + dwwp.v1=label; | ||
152 | + dwwp.v0= n++; dwwp.v2=pF; dwwp.v3=dF; dwwp.cz4(); f[c++]=dwwp.csa(s_dir,dir); | ||
153 | + | ||
154 | + dl1.v1=label; | ||
155 | + dl1.v0= n++; dl1.v2=dP; dl1.cz3(); f[c++]=dl1.csa(s_dir,dir); | ||
156 | + dl1.v0= n++; dl1.v2=pP; dl1.cz3(); f[c++]=dl1.csa(s_dir,dir); | ||
157 | + dl1.v0= n++; dl1.v3=dP; dl1.cz4(); f[c++]=dl1.csa(s_dir,dir); | ||
158 | + | ||
159 | + int pPm1 = prnt > 0 ? pposs[prnt - 1] : s_str, dPm1 = dpnt > 0 ? pposs[dpnt - 1] : s_str; | ||
160 | + int pPp1 = prnt < pposs.length - 1 ? pposs[prnt + 1]:s_end, dPp1 = dpnt < pposs.length - 1 ? pposs[dpnt + 1]:s_end; | ||
161 | + | ||
162 | + int pPm2 = prnt > 1 ? pposs[prnt - 2] : s_str, dPm2 = dpnt > 1 ? pposs[dpnt - 2] : s_str; | ||
163 | + int pPp2 = prnt < pposs.length - 2 ? pposs[prnt + 2]:s_end, dPp2 = dpnt < pposs.length - 2 ? pposs[dpnt + 2]:s_end; | ||
164 | + | ||
165 | + int pFm1 = prnt > 0 ? form[prnt - 1] : s_stwrd, dFm1 = dpnt > 0 ? form[dpnt - 1] : s_stwrd; | ||
166 | + int pFp1 = prnt < form.length - 1 ? form[prnt + 1]:s_stwrd, dFp1 = dpnt < form.length - 1 ? form[dpnt + 1]:s_stwrd; | ||
167 | + | ||
168 | + | ||
169 | + | ||
170 | + dl1.v0= n++;dl1.v2=pP; dl1.v3=pPp1; dl1.v4=dP;dl1.v5=dPp1; dl1.cz6(); f[n++]=dl1.csa(s_dir,dir); | ||
171 | + dl1.v0= n++; dl1.v5=dPm1; dl1.cz6(); f[n++]=dl1.csa(s_dir,dir); | ||
172 | + dl1.v0= n++; dl1.v3=pPm1; dl1.cz6(); f[n++]=dl1.csa(s_dir,dir); | ||
173 | + dl1.v0= n++; dl1.v5=dPp1; dl1.cz6(); f[n++]=dl1.csa(s_dir,dir); | ||
174 | + | ||
175 | + | ||
176 | + dl1.v0= n++; dl1.v3=pPm1; dl1.cz5(); f[n++]=dl1.csa(s_dir,dir); | ||
177 | + dl1.v0= n++; dl1.v3=dPm1; dl1.cz5(); f[n++]=dl1.csa(s_dir,dir); | ||
178 | + dl1.v0= n++; dl1.v3=dPp1; dl1.cz5(); f[n++]=dl1.csa(s_dir,dir); | ||
179 | + dl1.v0= n++; dl1.v3=pPp1; dl1.cz5(); f[n++]=dl1.csa(s_dir,dir); | ||
180 | + | ||
181 | + dl1.v0= n++;dl1.v2=pP; dl1.v3=pPp2; dl1.v4=dP;dl1.v5=dPp2; dl1.cz6(); f[n++]=dl1.csa(s_dir,dir); | ||
182 | + dl1.v0= n++; dl1.v5=dPm2; dl1.cz6(); f[n++]=dl1.csa(s_dir,dir); | ||
183 | + dl1.v0= n++; dl1.v3=pPm2; dl1.cz6(); f[n++]=dl1.csa(s_dir,dir); | ||
184 | + dl1.v0= n++; dl1.v5=dPp2; dl1.cz6(); f[n++]=dl1.csa(s_dir,dir); | ||
185 | + | ||
186 | + dl1.v0= n++; dl1.v3=pPm2; dl1.cz5(); f[n++]=dl1.csa(s_dir,dir); | ||
187 | + dl1.v0= n++; dl1.v3=dPm2; dl1.cz5(); f[n++]=dl1.csa(s_dir,dir); | ||
188 | + dl1.v0= n++; dl1.v3=dPp2; dl1.cz5(); f[n++]=dl1.csa(s_dir,dir); | ||
189 | + dl1.v0= n++; dl1.v3=pPp2; dl1.cz5(); f[n++]=dl1.csa(s_dir,dir); | ||
190 | + | ||
191 | + | ||
192 | + | ||
193 | + dl2.v0= n++; dl2.v3=dFm1; dl2.v3=pPp1;dl2.v4=pP; dl2.cz5(); f[n++]=dl2.getVal(); | ||
194 | + dl2.v0= n++; dl2.v3=dFp1; dl2.v3=pPm1; dl2.cz5(); f[n++]=dl2.getVal(); | ||
195 | + dl2.v0= n++; dl2.v3=pFm1; dl2.v3=dPp1;dl2.v4=dP; dl2.cz5(); f[n++]=dl2.getVal(); | ||
196 | + dl2.v0= n++; dl2.v3=pFp1; dl2.v3=dPm1; dl2.cz5(); f[n++]=dl2.getVal(); | ||
197 | + | ||
198 | + | ||
199 | + dl2.v0= n++; dl2.v3=dFm1; dl2.v3=dPm2;dl2.v4=pP; dl2.cz5(); f[n++]=dl2.getVal(); | ||
200 | + dl2.v0= n++; dl2.v3=dFp1; dl2.v3=dPp2; dl2.cz5(); f[n++]=dl2.getVal(); | ||
201 | + dl2.v0= n++; dl2.v3=pFm1; dl2.v3=pPm2;dl2.v4=dP; dl2.cz5(); f[n++]=dl2.getVal(); | ||
202 | + dl2.v0= n++; dl2.v3=pFp1; dl2.v3=pPp2; dl2.cz5(); f[n++]=dl2.getVal(); | ||
203 | + | ||
204 | + | ||
205 | + dwwp.v0= n++; dwwp.v2=pF; dwwp.v3=dF; dwwp.v4=dP; dwwp.cz5(); f[n++]=dwwp.csa(s_dir,dir); | ||
206 | + dwwp.v0= n++; dwwp.v2=pF; dwwp.v3=dF; dwwp.v4=pP; dwwp.cz5(); f[n++]=dwwp.csa(s_dir,dir); | ||
207 | + dwwp.v0= n++; dwwp.v2=dF; dwwp.v3=pF; dwwp.v4=pP; dwwp.v4=dP; dwwp.cz6(); f[n++]=dwwp.csa(s_dir,dir); | ||
208 | + | ||
209 | + | ||
210 | + | ||
211 | + // lemmas | ||
212 | + | ||
213 | + dl2.v1=label; | ||
214 | + dl2.v0= n++; dl2.v2=pL; dl2.v3=dP; dl2.cz4(); f[c++]=dl2.csa(s_dir,dir); | ||
215 | + dl2.v0= n++; dl2.cz3(); f[c++]=dl2.csa(s_dir,dir); | ||
216 | + dl2.v0= n++; dl2.v2=dL; dl2.v3=pP; dl2.cz4(); f[c++]=dl2.csa(s_dir,dir); | ||
217 | + dl2.v0= n++; dl2.cz3(); f[c++]=dl2.csa(s_dir,dir); | ||
218 | + | ||
219 | + | ||
220 | + dwwp.v1=label; | ||
221 | + dwwp.v0= n++; dwwp.v2=pL; dwwp.v3=dL; dwwp.cz4(); f[c++]=dwwp.csa(s_dir,dir); | ||
222 | + | ||
223 | + dwp.v1= label; | ||
224 | + dwp.v0=n++;dwp.v2=dL; dwp.v3=pP;dwp.v4=dP;dwp.v5=pL; dwp.cz6(); f[c++]=dwp.csa(s_dir,dir); | ||
225 | + dwp.v0=n++;dwp.cz5(); f[c++]=dwp.csa(s_dir,dir); | ||
226 | + | ||
227 | + dwp.v0=n++;dwp.v2=pL; dwp.cz5(); f[c++]=dwp.csa(s_dir,dir); | ||
228 | + dwwp.v0= n++; dwwp.v2=pL; dwwp.v3=dL; dwwp.v4=dP; dwwp.cz5(); f[c++]=dwwp.csa(s_dir,dir); | ||
229 | + dwwp.v0= n++; dwwp.v4=pP; dwwp.cz5(); f[c++]=dwwp.csa(s_dir,dir); | ||
230 | + | ||
231 | + | ||
232 | + // cluster | ||
233 | + | ||
234 | + d2pw.v1=label; | ||
235 | + d2pw.v0=n++; d2pw.v2=prntLS; d2pw.v3=chldLS; d2pw.cz4(); f[c++]=d2pw.csa(s_dir,dir); | ||
236 | + d2pw.v0=n++; d2pw.v4=pF; d2pw.cz5(); f[c++]=d2pw.csa(s_dir,dir); | ||
237 | + d2pw.v0=n++; d2pw.v4=dF; d2pw.cz5(); f[c++]=d2pw.csa(s_dir,dir); | ||
238 | + d2pw.v0=n++; d2pw.v5=pF; d2pw.cz6(); f[c++]=d2pw.csa(s_dir,dir); | ||
239 | + | ||
240 | + | ||
241 | + d2pp.v1=label; | ||
242 | + d2pp.v0=n++; d2pp.v2=prntLS; d2pp.v3=chldLS; d2pp.cz4(); f[c++]=d2pp.csa(s_dir,dir); | ||
243 | + d2pp.v0=n++; d2pp.v4=pP; d2pp.cz5(); f[c++]=d2pp.csa(s_dir,dir); | ||
244 | + d2pp.v0=n++; d2pp.v4=dP; d2pp.cz5(); f[c++]=d2pp.csa(s_dir,dir); | ||
245 | + d2pp.v0=n++; d2pp.v5=pP; d2pp.cz6(); f[c++]=d2pp.csa(s_dir,dir); | ||
246 | + | ||
247 | + | ||
248 | + short[] prel = is.plabels[i]; | ||
249 | + short[] phead = is.pheads[i]; | ||
250 | + | ||
251 | + | ||
252 | + //take those in for stacking | ||
253 | + // dl2.v1=label; | ||
254 | + // dl2.v0= n++;dl2.v2=prel[dpnt];dl2.v3=pP;dl2.v4=dP; dl2.v5=prnt==phead[dpnt]?1:2; dl2.cz6(); f[c++]=dl2.csa(s_dir,dir); | ||
255 | + // dl2.v0= n++;dl2.v2=pP;dl2.v3=dP; dl2.v4=prnt==phead[dpnt]?1:2; dl2.cz5(); f[c++]=dl2.csa(s_dir,dir); | ||
256 | + | ||
257 | + | ||
258 | + | ||
259 | + if (feats==null) return; | ||
260 | + | ||
261 | + short[] featsP =feats[prnt], featsD =feats[dpnt]; | ||
262 | + dlf.v0= n++; dlf.v1=label; dlf.v2=pP; dlf.v3=dP; | ||
263 | + extractFeat(f, c, dir, featsP, featsD); | ||
264 | + | ||
265 | + return; | ||
266 | + } | ||
267 | + | ||
268 | + | ||
269 | + | ||
270 | + public void gcm(Instances is , int i, int p, int d, int gc, int label,Cluster cluster, long[] f) { | ||
271 | + | ||
272 | + for(int k=0;k<f.length;k++) f[k]=0; | ||
273 | + | ||
274 | + short[] pos= is.pposs[i]; | ||
275 | + int[] forms=is.forms[i]; | ||
276 | + int[] lemmas=is.plemmas[i]; | ||
277 | + short[][] feats=is.feats[i]; | ||
278 | + | ||
279 | + int pP = pos[p], dP = pos[d]; | ||
280 | + int prntF = forms[p], chldF = forms[d]; | ||
281 | + int prntL = lemmas[p], chldL = lemmas[d]; | ||
282 | + int prntLS = prntF==-1?-1:cluster.getLP(prntF), chldLS = chldF==-1?-1:cluster.getLP(chldF); | ||
283 | + | ||
284 | + int gP = gc != -1 ? pos[gc] : s_str; | ||
285 | + int gcF = gc != -1 ? forms[gc] : s_stwrd; | ||
286 | + int gcL = gc != -1 ? lemmas[gc] : s_stwrd; | ||
287 | + int gcLS = (gc != -1) && (gcF!=-1) ? cluster.getLP(gcF) : s_stwrd; | ||
288 | + | ||
289 | + if (prntF>maxForm) prntF=-1; | ||
290 | + if (prntL>maxForm) prntL=-1; | ||
291 | + | ||
292 | + if (chldF>maxForm) chldF=-1; | ||
293 | + if (chldL>maxForm) chldL=-1; | ||
294 | + | ||
295 | + if (gcF>maxForm) gcF=-1; | ||
296 | + if (gcL>maxForm) gcL=-1; | ||
297 | + | ||
298 | + | ||
299 | + int dir= (p < d)? ra:la, dir_gra =(d < gc)? ra:la; | ||
300 | + | ||
301 | + int n=84,c=0; | ||
302 | + | ||
303 | + //dl1.v023(); | ||
304 | + dl1.v1=label; | ||
305 | + dl1.v0= n++; dl1.v2=pP; dl1.v3=dP;dl1.v4=gP; dl1.cz5(); dl1.cs(s_dir,dir);f[c++]=dl1.csa(s_dir,dir_gra); | ||
306 | + dl1.v0= n++; dl1.v2=pP; dl1.v3=gP; dl1.cz4();dl1.cs(s_dir,dir);f[c++]=dl1.csa(s_dir,dir_gra); | ||
307 | + dl1.v0= n++; dl1.v2=dP; dl1.cz4(); dl1.cs(s_dir,dir);f[c++]=dl1.csa(s_dir,dir_gra); | ||
308 | + | ||
309 | + dwwp.v1=label; | ||
310 | + dwwp.v0= n++; dwwp.v2=prntF; dwwp.v3=gcF; | ||
311 | + dwwp.cz4(); dwwp.cs(s_dir,dir);f[c++]=dwwp.csa(s_dir,dir_gra); | ||
312 | + | ||
313 | + dwwp.v0= n++; dwwp.v2=chldF; dwwp.v3=gcF; | ||
314 | + dwwp.cz4(); dwwp.cs(s_dir,dir);f[c++]=dwwp.csa(s_dir,dir_gra); | ||
315 | + | ||
316 | + dwp.v1=label; | ||
317 | + dwp.v0= n++; dwp.v2=gcF; dwp.v3=pP; | ||
318 | + dwp.cz4(); dwp.cs(s_dir,dir);f[c++]=dwp.csa(s_dir,dir_gra); | ||
319 | + | ||
320 | + dwp.v0= n++; dwp.v2=gcF; dwp.v3=dP; | ||
321 | + dwp.cz4(); dwp.cs(s_dir,dir);f[c++]=dwp.csa(s_dir,dir_gra); | ||
322 | + | ||
323 | + dwp.v0= n++; dwp.v2=prntF; dwp.v3=gP; | ||
324 | + dwp.cz4(); dwp.cs(s_dir,dir);f[c++]=dwp.csa(s_dir,dir_gra); | ||
325 | + | ||
326 | + dwp.v0= n++; dwp.v2=chldF; dwp.v3=gP; | ||
327 | + dwp.cz4(); dwp.cs(s_dir,dir); f[c++]=dwp.csa(s_dir,dir_gra); | ||
328 | + | ||
329 | + | ||
330 | + // lemma | ||
331 | + | ||
332 | + dwwp.v0= n++; dwwp.v2=prntL; dwwp.v3=gcL; | ||
333 | + dwwp.cz4();dwwp.cs(s_dir,dir);f[c++]=dwwp.csa(s_dir,dir_gra); | ||
334 | + | ||
335 | + dwwp.v0= n++; dwwp.v2=chldL; dwwp.v3=gcL; | ||
336 | + dwwp.cz4(); dwwp.cs(s_dir,dir);f[c++]=dwwp.csa(s_dir,dir_gra); | ||
337 | + | ||
338 | + dwp.v0= n++; dwp.v2=gcL; dwp.v3=pP; | ||
339 | + dwp.cz4(); dwp.cs(s_dir,dir);f[c++]=dwp.csa(s_dir,dir_gra); | ||
340 | + | ||
341 | + dwp.v0= n++; dwp.v2=gcL; dwp.v3=dP; | ||
342 | + dwp.cz4(); dwp.cs(s_dir,dir);f[c++]=dwp.csa(s_dir,dir_gra); | ||
343 | + | ||
344 | + dwp.v0= n++; dwp.v2=prntL; dwp.v3=gP; | ||
345 | + dwp.cz4(); dwp.cs(s_dir,dir);f[c++]=dwp.csa(s_dir,dir_gra); | ||
346 | + | ||
347 | + dwp.v0= n++; dwp.v2=chldL; dwp.v3=gP; | ||
348 | + dwp.cz4(); dwp.cs(s_dir,dir); f[c++]=dwp.csa(s_dir,dir_gra); | ||
349 | + | ||
350 | + | ||
351 | + // clusters | ||
352 | + | ||
353 | + d2lp.v1= label; | ||
354 | + d2lp.v0= n++; d2lp.v2=prntLS; d2lp.v3=gcLS; d2lp.cz4(); d2lp.cs(s_dir,dir);f[c++]=d2lp.csa(s_dir,dir_gra);// f.add(li.l2i(l)); | ||
355 | + d2lp.v0= n++; d2lp.v2=chldLS; d2lp.v3=gcLS; d2lp.cz4(); d2lp.cs(s_dir,dir);f[c++]=d2lp.csa(s_dir,dir_gra); | ||
356 | + d3lp.v0= n++; d3lp.v1= label; d3lp.v2=prntLS; d3lp.v3=chldLS; d3lp.v4=gcLS; d3lp.cz5(); d3lp.cs(s_dir,dir);f[c++]=d3lp.csa(s_dir,dir_gra); | ||
357 | + | ||
358 | + //_f83; | ||
359 | + d2lp.v0= n++; d2lp.v2=prntLS; d2lp.v3=chldLS; d2lp.v4=gcF; d2lp.cz5(); f[c++]=d2lp.csa(s_dir,dir); | ||
360 | + d2lp.v0= n++; d2lp.v2=prntLS; d2lp.v3=gcLS; d2lp.v4=chldF; d2lp.cz5(); f[c++]=d2lp.csa(s_dir,dir); | ||
361 | + d2lp.v0= n++; d2lp.v2=chldLS; d2lp.v3=gcLS; d2lp.v4=prntF; d2lp.cz5(); f[c++]=d2lp.csa(s_dir,dir); | ||
362 | + | ||
363 | + d2pp.v1= label; | ||
364 | + d2pp.v0= n++; d2pp.v2=prntLS; d2pp.v3=chldLS; d2pp.v4=gP; d2pp.cz5(); f[c++]=d2pp.csa(s_dir,dir); | ||
365 | + d2pp.v0= n++; d2pp.v2=prntLS; d2pp.v3=gcLS; d2pp.v4=dP; d2pp.cz5(); f[c++]=d2pp.csa(s_dir,dir); | ||
366 | + d2pp.v0= n++; d2pp.v2=chldLS; d2pp.v3=gcLS; d2pp.v4=pP; d2pp.cz5(); f[c++]=d2pp.csa(s_dir,dir); | ||
367 | + | ||
368 | + | ||
369 | + | ||
370 | + // linear features | ||
371 | + | ||
372 | + int prntPm1 = p != 0 ? pos[p - 1] : s_str; // parent-pos-minus1 | ||
373 | + int chldPm1 = d - 1 >=0 ? pos[d - 1] : s_str; // child-pos-minus1 | ||
374 | + int prntPp1 = p != pos.length - 1 ? pos[p + 1] : s_end; | ||
375 | + int chldPp1 = d != pos.length - 1 ? pos[d + 1] : s_end; | ||
376 | + | ||
377 | + int gcPm1 = gc > 0 ? pos[gc - 1] : s_str; | ||
378 | + int gcPp1 = gc < pos.length - 1 ? pos[gc + 1] : s_end; | ||
379 | + | ||
380 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcPp1;dl1.v4=dP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | ||
381 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcPm1;dl1.v4=dP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | ||
382 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=dP;dl1.v4=chldPp1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | ||
383 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=dP;dl1.v4=chldPm1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | ||
384 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcPp1;dl1.v4=chldPm1;dl1.v5=dP;dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | ||
385 | + dl1.v0= n++; dl1.v2=gcPm1; dl1.v3=gP;dl1.v4=chldPm1;dl1.v5=dP; dl1.cz6();f[c++]=dl1.csa(s_dir,dir); | ||
386 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcPp1;dl1.v4=dP;dl1.v5=chldPp1; dl1.cz6();f[c++]=dl1.csa(s_dir,dir); | ||
387 | + dl1.v0= n++; dl1.v2=gcPm1; dl1.v3=gP;dl1.v4=dP;dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | ||
388 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcPp1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | ||
389 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcPm1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | ||
390 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=pP;dl1.v4=prntPp1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | ||
391 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=pP;dl1.v4=prntPm1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | ||
392 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcPp1;dl1.v4=prntPm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | ||
393 | + dl1.v0= n++; dl1.v2=gcPm1; dl1.v3=gP;dl1.v4=prntPm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | ||
394 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcPp1;dl1.v4=pP;dl1.v5=prntPp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | ||
395 | + dl1.v0= n++; dl1.v2=gcPm1; dl1.v3=gP; dl1.v4=pP; dl1.v5=prntPp1;dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | ||
396 | + | ||
397 | + | ||
398 | + int pLSp1 = p != pos.length - 1 ? forms[p + 1]==-1?-1:cluster.getLP(forms[p + 1]): _cend; | ||
399 | + int cLSp1 = d != pos.length - 1 ? forms[d + 1] ==-1?-1:cluster.getLP(forms[d + 1]):_cend; | ||
400 | + int gcLSp1 = gc < pos.length -1 ? forms[gc + 1] ==-1?-1:cluster.getLP(forms[gc + 1]) : s_end; | ||
401 | + | ||
402 | + int pLSm1 = p != 0 ? lemmas[p - 1]==-1?-1:cluster.getLP(lemmas[p - 1]): _cstr; | ||
403 | + int cLSm1 = d - 1 >=0 ? lemmas[d - 1] ==-1?-1:cluster.getLP(lemmas[d - 1]):_cstr; | ||
404 | + int gcLSm1 = gc > 0 ? lemmas[gc - 1] ==-1?-1:cluster.getLP(lemmas[gc - 1]) : _cstr; | ||
405 | + | ||
406 | + | ||
407 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcLSp1;dl1.v4=dP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | ||
408 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcLSm1;dl1.v4=dP; dl1.cz5();f[c++]=dl1.csa(s_dir,dir); | ||
409 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=dP;dl1.v4=cLSp1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | ||
410 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=dP;dl1.v4=cLSm1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | ||
411 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcLSp1;dl1.v4=cLSm1;dl1.v5=dP;dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | ||
412 | + dl1.v0= n++; dl1.v2=gcLSm1; dl1.v3=gP;dl1.v4=cLSm1;dl1.v5=dP; dl1.cz6();f[c++]=dl1.csa(s_dir,dir); | ||
413 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcLSp1;dl1.v4=dP;dl1.v5=cLSp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | ||
414 | + dl1.v0= n++; dl1.v2=cLSm1; dl1.v3=gP;dl1.v4=dP;dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | ||
415 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcLSp1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | ||
416 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcLSm1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | ||
417 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=pP;dl1.v4=pLSp1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | ||
418 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=pP;dl1.v4=pLSm1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | ||
419 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcLSp1;dl1.v4=pLSm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | ||
420 | + dl1.v0= n++; dl1.v2=gcLSm1; dl1.v3=gP;dl1.v4=pLSm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | ||
421 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcLSp1;dl1.v4=pP;dl1.v5=pLSp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | ||
422 | + dl1.v0= n++; dl1.v2=gcLSm1; dl1.v3=gP; dl1.v4=pP; dl1.v5=pLSp1;dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | ||
423 | + | ||
424 | + | ||
425 | + | ||
426 | + short[] prel = is.plabels[i],phead=is.pheads[i]; | ||
427 | + | ||
428 | + int g = p==phead[d]?1:2 ; | ||
429 | + if (gc>=0) g += d==phead[gc]?4:8; | ||
430 | + | ||
431 | + int gr = gc==-1?s_relend:prel[gc]; | ||
432 | + | ||
433 | + // take those in for stacking | ||
434 | + /* | ||
435 | + dl2.v1=label; | ||
436 | + dl2.v0= n++;dl2.v2=prel[d];dl2.v3=g;dl2.v4=gP;dl2.v5=dP;dl2.cz6();f[c++]=dl2.csa(s_dir,dir); | ||
437 | + dl2.v0= n++;dl2.v2=prel[d];dl2.v3=g;dl2.v4=gP;dl2.v5=pP;dl2.cz6();f[c++]=dl2.csa(s_dir,dir); | ||
438 | + dl2.v0= n++;dl2.v2=prel[d];dl2.v3=g;dl2.v4=gP;dl2.v5=pP;dl2.v6=dP;dl2.cz7();f[c++]=dl2.csa(s_dir,dir); | ||
439 | + | ||
440 | + dl2.v0= n++;dl2.v2=gr;dl2.v3=g;dl2.v4=gP;dl2.v5=dP;dl2.cz6();f[c++]=dl2.csa(s_dir,dir); | ||
441 | + dl2.v0= n++;dl2.v2=gr;dl2.v3=g;dl2.v4=gP;dl2.v5=pP;dl2.cz6();f[c++]=dl2.csa(s_dir,dir); | ||
442 | + dl2.v0= n++;dl2.v2=gr;dl2.v3=g;dl2.v4=gP;dl2.v5=pP;dl2.v6=dP;dl2.cz7();f[c++]=dl2.csa(s_dir,dir); | ||
443 | + | ||
444 | +*/ | ||
445 | + if (feats==null) return; | ||
446 | + | ||
447 | + short[] featsP =feats[d]; | ||
448 | + short[] featsD =gc!=-1?feats[gc]:null; | ||
449 | + | ||
450 | + dlf.v0= n++; dlf.v1=label; dlf.v2=gP; dlf.v3=dP; | ||
451 | + extractFeat(f, c, dir, featsP, featsD); | ||
452 | + return; | ||
453 | + } | ||
454 | + | ||
455 | + | ||
456 | + public void siblingm(Instances is , int i,short pos[], int forms[], int[] lemmas, short[][] feats, int prnt, int d, int sblng, int label, Cluster cluster, long[] f, int v) | ||
457 | + { | ||
458 | + | ||
459 | + for(int k=0;k<f.length;k++) f[k]=0; | ||
460 | + | ||
461 | + int pP = pos[prnt], dP = pos[d]; | ||
462 | + int prntF = forms[prnt],chldF = forms[d]; | ||
463 | + int prntL = lemmas[prnt], chldL = lemmas[d]; | ||
464 | + int prntLS = prntF==-1?-1:cluster.getLP(prntF), chldLS = chldF==-1?-1:cluster.getLP(chldF); | ||
465 | + | ||
466 | + int sP = sblng!=-1 ? pos[sblng] : s_str, sblF = sblng!=-1 ? forms[sblng] : s_stwrd, sblL = sblng!=-1 ? lemmas[sblng] : s_stwrd; | ||
467 | + | ||
468 | + int sblLS = (sblng != -1)&&(sblF!=-1) ? cluster.getLP(sblF) : s_stwrd; | ||
469 | + | ||
470 | + | ||
471 | + int dir= (prnt < d)? ra:la; | ||
472 | + | ||
473 | + int abs = Math.abs(prnt-d); | ||
474 | + | ||
475 | + final int dist; | ||
476 | + if (abs > 10)dist=d10;else if (abs>5) dist=d5;else if( abs==5)dist=d4;else if (abs==4)dist=d3;else if (abs==3)dist=d2; | ||
477 | + else if (abs==2)dist=d1; else dist=di0; | ||
478 | + | ||
479 | + int n=147; | ||
480 | + | ||
481 | + if (prntF>maxForm) prntF=-1; | ||
482 | + if (prntL>maxForm) prntL=-1; | ||
483 | + | ||
484 | + if (chldF>maxForm) chldF=-1; | ||
485 | + if (chldL>maxForm) chldL=-1; | ||
486 | + | ||
487 | + if (sblF>maxForm) sblF=-1; | ||
488 | + if (sblL>maxForm) sblL=-1; | ||
489 | + | ||
490 | + | ||
491 | + dl1.v0= n++; dl1.v1=label;dl1.v2=pP; dl1.v3=dP;dl1.v4=sP; dl1.cz5(); f[0]=dl1.csa(s_dir,dir);f[1]=dl1.csa(s_dist,dist); | ||
492 | + dl1.v0= n++; dl1.v3=sP; dl1.cz4(); f[2]=dl1.csa(s_dir,dir); f[3]=dl1.csa(s_dist,dist); | ||
493 | + dl1.v0= n++; dl1.v2=dP;dl1.cz4(); f[4]=dl1.csa(s_dir,dir); f[5]=dl1.csa(s_dist,dist); | ||
494 | + | ||
495 | + // sibling only could be tried | ||
496 | + dwwp.v1=label; | ||
497 | + dwwp.v0= n++; dwwp.v2=prntF; dwwp.v3=sblF; dwwp.cz4(); f[6]=dwwp.csa(s_dir,dir); f[7]=dwwp.csa(s_dist,dist); | ||
498 | + dwwp.v0= n++; dwwp.v2=chldF; dwwp.cz4(); f[8]=dwwp.csa(s_dir,dir); f[9]=dwwp.csa(s_dist,dist); | ||
499 | + dwp.v0= n++; dwp.v1=label; dwp.v2=sblF; dwp.v3=pP; dwp.cz4(); f[10]=dwp.csa(s_dir,dir); f[11]=dwp.csa(s_dist,dist); | ||
500 | + dwp.v0= n++; /*dwp.v1=label; */dwp.v3=dP; dwp.cz4(); f[12]=dwp.csa(s_dir,dir); f[13]=dwp.csa(s_dist,dist); | ||
501 | + dwp.v0= n++; /*dwp.v1=label;*/ dwp.v2=prntF; dwp.v3=sP; dwp.cz4(); f[14]=dwp.csa(s_dir,dir); f[15]=dwp.csa(s_dist,dist); | ||
502 | + dwp.v0= n++; /*dwp.v1=label;*/ dwp.v2=chldF; dwp.cz4(); f[16]=dwp.csa(s_dir,dir); f[17]=dwp.csa(s_dist,dist); | ||
503 | + | ||
504 | + //lemmas | ||
505 | + dwwp.v0= n++; dwwp.v2=prntL; dwwp.v3=sblL; dwwp.cz4(); f[18]=dwwp.csa(s_dir,dir); | ||
506 | + dwwp.v0= n++; dwwp.v2=chldL; dwwp.cz4(); f[19]=dwwp.csa(s_dir,dir); f[20]=dwwp.csa(s_dist,dist); | ||
507 | + dwp.v0= n++; /*dwp.v1=label;*/ dwp.v2=sblL; dwp.v3=pP; dwp.cz4(); f[21]=dwp.csa(s_dir,dir); f[22]=dwp.csa(s_dist,dist); | ||
508 | + dwp.v0= n++; /*dwp.v1=label; */ dwp.v3=dP; dwp.cz4(); f[23]=dwp.csa(s_dir,dir);f[24]=dwp.csa(s_dist,dist); | ||
509 | + dwp.v0= n++; /*dwp.v1=label;*/ dwp.v2=prntL; dwp.v3=sP; dwp.cz4(); f[25]=dwp.csa(s_dir,dir); f[26]=dwp.csa(s_dist,dist); | ||
510 | + dwp.v0= n++; /*dwp.v1=label;*/ dwp.v2=chldL; dwp.cz4(); f[27]=dwp.csa(s_dir,dir);f[28]=dwp.csa(s_dist,dist); | ||
511 | + | ||
512 | + | ||
513 | + // clusters | ||
514 | + | ||
515 | + d2lp.v1=label; | ||
516 | + d2lp.v0= n++; d2lp.v2=prntLS; d2lp.v3=sblLS; d2lp.cz4(); f[29]=d2lp.csa(s_dir,dir); | ||
517 | + d2lp.v0= n++; d2lp.v2=chldLS; d2lp.v3=sblLS; d2lp.cz4(); f[30]=d2lp.csa(s_dir,dir); f[31]=d2lp.csa(s_dist,dist); | ||
518 | + | ||
519 | + d3lp.v1= label; | ||
520 | + d3lp.v0= n++; d3lp.v2=prntLS; d3lp.v3=chldLS; d3lp.v4=sblLS;d3lp.cz5(); f[32]=d3lp.csa(s_dir,dir); | ||
521 | + | ||
522 | + d2lp.v0= n++; d2lp.v2=prntLS; d2lp.v3=chldLS; d2lp.v4=sblF; d2lp.cz5(); f[33]=d2lp.csa(s_dir,dir); f[34]=d2lp.csa(s_dist,dist); | ||
523 | + d2lp.v0= n++; d2lp.v2=prntLS; d2lp.v3=sblLS; d2lp.v4=chldF; d2lp.cz5(); f[35]=d2lp.csa(s_dir,dir); f[36]=d2lp.csa(s_dist,dist); | ||
524 | + d2lp.v0= n++; d2lp.v2=chldLS; d2lp.v3=sblLS; d2lp.v4=prntF; d2lp.cz5(); f[37]=d2lp.csa(s_dir,dir); f[38]=d2lp.csa(s_dist,dist); | ||
525 | + | ||
526 | + d2pp.v1=label; | ||
527 | + d2pp.v0= n++; d2pp.v2=prntLS; d2pp.v3=chldLS; d2pp.v4=sP; d2pp.cz5(); f[39]=d2pp.csa(s_dir,dir); f[40]=d2pp.csa(s_dist,dist); | ||
528 | + d2pp.v0= n++; d2pp.v2=prntLS; d2pp.v3=sblLS; d2pp.v4=dP; d2pp.cz5(); f[41]=d2pp.csa(s_dir,dir); f[42]=d2pp.csa(s_dist,dist); | ||
529 | + d2pp.v0= n++; d2pp.v2=chldLS; d2pp.v3=sblLS; d2pp.v4=pP; d2pp.cz5(); f[43]=d2pp.csa(s_dir,dir); f[44]=d2pp.csa(s_dist,dist); | ||
530 | + | ||
531 | + | ||
532 | + int prntPm1 = prnt!=0 ? pos[prnt-1] : s_str; | ||
533 | + int chldPm1 = d-1>=0 ? pos[d-1] : s_str; | ||
534 | + int prntPp1 = prnt!=pos.length-1 ? pos[prnt+1] : s_end; | ||
535 | + int chldPp1 = d!=pos.length-1 ? pos[d+1] : s_end; | ||
536 | + | ||
537 | + // sibling part of speech minus and plus 1 | ||
538 | + int sblPm1 = sblng>0 ? pos[sblng-1]:s_str; | ||
539 | + int sblPp1 = sblng<pos.length-1 ? pos[sblng + 1]:s_end; | ||
540 | + | ||
541 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sblPp1;dl1.v4=pP; dl1.cz5(); f[45]=dl1.csa(s_dir,dir); | ||
542 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sblPm1;dl1.v4=pP; dl1.cz5(); f[46]=dl1.csa(s_dir,dir);// f.add(li.l2i(l)); | ||
543 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=pP;dl1.v4=prntPp1;dl1.cz5(); f[47]=dl1.csa(s_dir,dir);// f.add(li.l2i(l)); | ||
544 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=pP;dl1.v4=prntPm1; dl1.cz5(); f[48]=dl1.csa(s_dir,dir);// f.add(li.l2i(l)); | ||
545 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sblPp1;dl1.v4=prntPm1;dl1.v5=pP; dl1.cz6(); f[49]=dl1.csa(s_dir,dir); | ||
546 | + dl1.v0=n++; dl1.v2=sblPm1; dl1.v3=sP;dl1.v4=prntPm1;dl1.v5=pP;dl1.cz6(); f[50]=dl1.csa(s_dir,dir);// f.add(li.l2i(l)); | ||
547 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sblPp1;dl1.v4=pP;dl1.v5=prntPp1; dl1.cz6(); f[51]=dl1.csa(s_dir,dir);// f.add(li.l2i(l)); | ||
548 | + dl1.v0=n++; dl1.v2=sblPm1; dl1.v3=sP; dl1.v4=pP;dl1.v5=prntPp1; dl1.cz6(); f[52]=dl1.csa(s_dir,dir);// f.add(li.l2i(l)); | ||
549 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sblPp1;dl1.v4=dP; dl1.cz5(); f[53]=dl1.csa(s_dir,dir); | ||
550 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sblPm1;dl1.v4=dP; dl1.cz5(); f[54]=dl1.csa(s_dir,dir); | ||
551 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=dP;dl1.v4=chldPp1;dl1.cz5(); f[55]=dl1.csa(s_dir,dir); | ||
552 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=dP;dl1.v4=chldPm1; dl1.cz5(); f[56]=dl1.csa(s_dir,dir); | ||
553 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sblPp1;dl1.v4=chldPm1;dl1.v5=dP; dl1.cz6(); f[57]=dl1.csa(s_dir,dir);// f.add(li.l2i(l)); | ||
554 | + dl1.v0=n++; dl1.v2=sblPm1; dl1.v3=sP;dl1.v4=chldPm1;dl1.v5=dP;dl1.cz6(); f[58]=dl1.csa(s_dir,dir); | ||
555 | + dl1.v0= n++;dl1.v2=sP; dl1.v3=sblPp1;dl1.v4=dP;dl1.v5=chldPp1;dl1.cz6();f[59]=dl1.csa(s_dir,dir);// f.add(li.l2i(l)); | ||
556 | + dl1.v0= n++; dl1.v2=sblPm1; dl1.v3=sP;dl1.v4=dP;dl1.v5=chldPp1;dl1.cz6(); f[60]=dl1.csa(s_dir,dir); | ||
557 | + | ||
558 | + int c=61; | ||
559 | + | ||
560 | + int pLSp1 = prnt != pos.length - 1 ? forms[prnt + 1]==-1?-1:cluster.getLP(forms[prnt + 1]): _cend; | ||
561 | + int cLSp1 = d != pos.length - 1 ? forms[d + 1] ==-1?-1:cluster.getLP(forms[d + 1]):_cend; | ||
562 | + int sLSp1 = sblng < pos.length -1 ? forms[sblng + 1] ==-1?-1:cluster.getLP(forms[sblng + 1]) : _cend; | ||
563 | + | ||
564 | + int pLSm1 = prnt!=0 ? forms[prnt - 1]==-1?-1:cluster.getLP(forms[prnt - 1]): _cstr; | ||
565 | + int cLSm1 = d-1>=0 ? forms[d - 1] ==-1?-1:cluster.getLP(forms[d - 1]):_cstr; | ||
566 | + int sLSm1 = sblng>0 ? forms[sblng - 1] ==-1?-1:cluster.getLP(forms[sblng - 1]):_cstr; | ||
567 | + | ||
568 | + //int c=61; | ||
569 | + | ||
570 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | ||
571 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSm1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | ||
572 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=pP;dl1.v4=pLSp1;dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | ||
573 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=pP;dl1.v4=pLSm1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | ||
574 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=pLSm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | ||
575 | + dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP;dl1.v4=pLSm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | ||
576 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=pP;dl1.v5=pLSp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | ||
577 | + dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP; dl1.v4=pP;dl1.v5=pLSp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | ||
578 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=dP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | ||
579 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSm1;dl1.v4=dP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | ||
580 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=dP;dl1.v4=cLSp1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | ||
581 | + dl1.v0=n++;dl1.v2=sP; dl1.v3=dP;dl1.v4=cLSm1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | ||
582 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSm1;dl1.v4=cLSm1;dl1.v5=dP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | ||
583 | + dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP;dl1.v4=cLSm1;dl1.v5=dP;dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | ||
584 | + dl1.v0=n++;dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=dP;dl1.v5=cLSp1;dl1.cz6();f[c++]=dl1.csa(s_dir,dir); | ||
585 | + dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP;dl1.v4=dP;dl1.v5=cLSp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | ||
586 | + | ||
587 | + | ||
588 | + | ||
589 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | ||
590 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSm1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | ||
591 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=pP;dl1.v4=pLSp1;dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | ||
592 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=pP;dl1.v4=pLSm1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | ||
593 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=pLSm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | ||
594 | + dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP;dl1.v4=pLSm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | ||
595 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=pP;dl1.v5=pLSp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | ||
596 | + dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP; dl1.v4=pP;dl1.v5=pLSp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | ||
597 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=dP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | ||
598 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSm1;dl1.v4=dP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | ||
599 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=dP;dl1.v4=cLSp1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | ||
600 | + dl1.v0=n++;dl1.v2=sP; dl1.v3=dP;dl1.v4=cLSm1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | ||
601 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSm1;dl1.v4=cLSm1;dl1.v5=dP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | ||
602 | + dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP;dl1.v4=cLSm1;dl1.v5=dP;dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | ||
603 | + dl1.v0= n++;dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=dP;dl1.v5=cLSp1;dl1.cz6();f[c++]=dl1.csa(s_dir,dir); | ||
604 | + dl1.v0= n++; dl1.v2=sLSm1; dl1.v3=sP;dl1.v4=dP;dl1.v5=cLSp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | ||
605 | + | ||
606 | + // take those in for stacking | ||
607 | + | ||
608 | + /* | ||
609 | + short[] prel = is.plabels[i],phead=is.pheads[i]; | ||
610 | + | ||
611 | + int g = prnt==phead[d]?1:2 ; | ||
612 | + if (sblng>=0) g += prnt==phead[sblng]?4:8; | ||
613 | + | ||
614 | + int gr = sblng==-1?s_relend:prel[sblng]; | ||
615 | + | ||
616 | + | ||
617 | + dl2.v0= n++;dl2.v2=prel[d];dl2.v3=g;dl2.v4=sP;dl2.v5=dP;dl2.cz6();f[c++]=dl2.csa(s_dir,dir); | ||
618 | + dl2.v0= n++;dl2.v2=prel[d];dl2.v3=g;dl2.v4=sP;dl2.v5=pP;dl2.cz6();f[c++]=dl2.csa(s_dir,dir); | ||
619 | + dl2.v0= n++;dl2.v2=prel[d];dl2.v3=g;dl2.v4=sP;dl2.v5=pP;dl2.v6=dP;dl2.cz7();f[c++]=dl2.csa(s_dir,dir); | ||
620 | + | ||
621 | + dl2.v0= n++;dl2.v2=gr;dl2.v3=g;dl2.v4=sP;dl2.v5=dP;dl2.cz6();f[c++]=dl2.csa(s_dir,dir); | ||
622 | + dl2.v0= n++;dl2.v2=gr;dl2.v3=g;dl2.v4=sP;dl2.v5=pP;dl2.cz6();f[c++]=dl2.csa(s_dir,dir); | ||
623 | + dl2.v0= n++;dl2.v2=gr;dl2.v3=g;dl2.v4=sP;dl2.v5=pP;dl2.v6=dP;dl2.cz7();f[c++]=dl2.csa(s_dir,dir); | ||
624 | +*/ | ||
625 | + | ||
626 | + if (feats==null) return; | ||
627 | + | ||
628 | + int cnt=c; | ||
629 | + | ||
630 | + short[] featsP =feats[d]; | ||
631 | + short[] featsSbl =sblng!=-1?feats[sblng]:null; | ||
632 | + | ||
633 | + dlf.v0= n++; dlf.v1=label; dlf.v2=sP; dlf.v3=dP; | ||
634 | + | ||
635 | + | ||
636 | + cnt = extractFeat(f, cnt ,dir, featsP, featsSbl); | ||
637 | + | ||
638 | + featsP =feats[prnt]; | ||
639 | + featsSbl =sblng!=-1?feats[sblng]:null; | ||
640 | + | ||
641 | + dlf.v0= n++; dlf.v1=label; dlf.v2=pP; dlf.v3=sP; | ||
642 | + if (featsP!=null && featsSbl!=null) { | ||
643 | + for(short i1=0;i1<featsP.length;i1++) { | ||
644 | + for(short i2=0;i2<featsSbl.length;i2++) { | ||
645 | + dlf.v4=featsP[i1]; dlf.v5=featsSbl[i2]; | ||
646 | + dlf.cz6(); f[cnt++]=dlf.csa(s_dir,prnt<sblng?1:2); | ||
647 | + } | ||
648 | + } | ||
649 | + } else if (featsP==null && featsSbl!=null) { | ||
650 | + | ||
651 | + for(short i2=0;i2<featsSbl.length;i2++) { | ||
652 | + dlf.v4=nofeat; dlf.v5=featsSbl[i2]; | ||
653 | + dlf.cz6(); f[cnt++]=dlf.csa(s_dir,dir); | ||
654 | + } | ||
655 | + | ||
656 | + } else if (featsP!=null && featsSbl==null) { | ||
657 | + | ||
658 | + for(short i1=0;i1<featsP.length;i1++) { | ||
659 | + dlf.v4=featsP[i1]; dlf.v5=nofeat; | ||
660 | + dlf.cz6(); f[cnt++]=dlf.csa(s_dir,dir); | ||
661 | + } | ||
662 | + } | ||
663 | + | ||
664 | + return; | ||
665 | + } | ||
666 | + | ||
667 | + private int extractFeat(long[] f, int cnt, int dir, short[] featsP, short[] featsD) { | ||
668 | + if (featsP!=null && featsD!=null) { | ||
669 | + for(short i1=0;i1<featsP.length;i1++) { | ||
670 | + for(short i2=0;i2<featsD.length;i2++) { | ||
671 | + dlf.v4=featsP[i1]; dlf.v5=featsD[i2]; | ||
672 | + dlf.cz6(); f[cnt++]=dlf.csa(s_dir,dir); | ||
673 | + } | ||
674 | + } | ||
675 | + } else if (featsP==null && featsD!=null) { | ||
676 | + | ||
677 | + for(short i2=0;i2<featsD.length;i2++) { | ||
678 | + dlf.v4=nofeat; dlf.v5=featsD[i2]; | ||
679 | + dlf.cz6(); f[cnt++]=dlf.csa(s_dir,dir); | ||
680 | + | ||
681 | + } | ||
682 | + } else if (featsP!=null && featsD==null) { | ||
683 | + | ||
684 | + for(short i1=0;i1<featsP.length;i1++) { | ||
685 | + dlf.v4=featsP[i1]; dlf.v5=nofeat; | ||
686 | + dlf.cz6(); f[cnt++]=dlf.csa(s_dir,dir); | ||
687 | + | ||
688 | + } | ||
689 | + } | ||
690 | + return cnt; | ||
691 | + } | ||
692 | + | ||
693 | + | ||
694 | + public FV encodeCat(Instances is, int ic, short pposs[], int forms[], int[] lemmas, short[] heads, short[] types, short feats[][], Cluster cluster, FV f) { | ||
695 | + | ||
696 | + | ||
697 | + long[] svs = new long[250]; | ||
698 | + | ||
699 | + for (int i = 1; i < heads.length; i++) { | ||
700 | + | ||
701 | + | ||
702 | + int n =basic(pposs, forms, heads[i], i, cluster, f); | ||
703 | + | ||
704 | + firstm(is, ic, heads[i], i, types[i], cluster,svs); | ||
705 | + for(int k=0;k<svs.length;k++) dl1.map(f,svs[k]); | ||
706 | + | ||
707 | + int ch,cmi,cmo; | ||
708 | + if (heads[i] < i) { | ||
709 | + ch = rightmostRight(heads, heads[i], i); | ||
710 | + cmi = leftmostLeft(heads, i, heads[i]); | ||
711 | + cmo = rightmostRight(heads, i, heads.length); | ||
712 | + | ||
713 | + } else { | ||
714 | + ch = leftmostLeft(heads, heads[i], i); | ||
715 | + cmi = rightmostRight(heads, i, heads[i]); | ||
716 | + cmo = leftmostLeft(heads, i, 0); | ||
717 | + } | ||
718 | + | ||
719 | + siblingm(is,ic,pposs, forms,lemmas, feats, heads[i], i, ch,types[i], cluster, svs,n); | ||
720 | + for(int k=0;k<svs.length;k++) dl1.map(f,svs[k]); | ||
721 | + | ||
722 | + | ||
723 | + gcm(is, ic,heads[i],i,cmi, types[i], cluster, svs); | ||
724 | + for(int k=0;k<svs.length;k++) dl1.map(f,svs[k]); | ||
725 | + | ||
726 | + gcm(is, ic, heads[i],i,cmo, types[i], cluster, svs); | ||
727 | + for(int k=0;k<svs.length;k++) dl1.map(f,svs[k]); | ||
728 | + } | ||
729 | + | ||
730 | + return f; | ||
731 | + } | ||
732 | + | ||
733 | + | ||
734 | + public float encode3(short[] pos, short heads[] , short[] types, DataF d2) { | ||
735 | + | ||
736 | + double v = 0; | ||
737 | + for (int i = 1; i < heads.length; i++) { | ||
738 | + | ||
739 | + int dir= (heads[i] < i)? 0:1; | ||
740 | + | ||
741 | + v += d2.pl[heads[i]][i]; | ||
742 | + v += d2.lab[heads[i]][i][types[i]][dir]; | ||
743 | + | ||
744 | + boolean left = i<heads[i]; | ||
745 | + short[] labels = Edges.get(pos[heads[i]], pos[i], left); | ||
746 | + int lid=-1; | ||
747 | + for(int k=0;k<labels.length;k++) if (types[i]== labels[k]) {lid= k;break;} | ||
748 | + | ||
749 | + int ch,cmi,cmo; | ||
750 | + if (heads[i] < i) { | ||
751 | + ch = rightmostRight(heads, heads[i], i); | ||
752 | + cmi = leftmostLeft(heads, i, heads[i]); | ||
753 | + cmo = rightmostRight(heads, i, heads.length); | ||
754 | + | ||
755 | + if (ch==-1) ch=heads[i]; | ||
756 | + if (cmi==-1) cmi=heads[i]; | ||
757 | + if (cmo==-1) cmo=heads[i]; | ||
758 | + | ||
759 | + } else { | ||
760 | + ch = leftmostLeft(heads, heads[i], i); | ||
761 | + cmi = rightmostRight(heads, i, heads[i]); | ||
762 | + cmo = leftmostLeft(heads, i, 0); | ||
763 | + | ||
764 | + if (ch==-1) ch=i; | ||
765 | + if (cmi==-1) cmi=i; | ||
766 | + if (cmo==-1) cmo=i; | ||
767 | + } | ||
768 | + v += d2.sib[heads[i]][i][ch][dir][lid]; | ||
769 | + v += d2.gra[heads[i]][i][cmi][dir][lid]; | ||
770 | + v += d2.gra[heads[i]][i][cmo][dir][lid]; | ||
771 | + } | ||
772 | + return (float)v; | ||
773 | + } | ||
774 | + | ||
775 | + /** | ||
776 | + * Provide the scores of the edges | ||
777 | + * @param pos | ||
778 | + * @param heads | ||
779 | + * @param types | ||
780 | + * @param edgesScores | ||
781 | + * @param d2 | ||
782 | + * @return | ||
783 | + */ | ||
784 | + public static float encode3(short[] pos, short heads[] , short[] types, float[] edgesScores, DataF d2) { | ||
785 | + | ||
786 | + double v = 0; | ||
787 | + for (int i = 1; i < heads.length; i++) { | ||
788 | + | ||
789 | + int dir= (heads[i] < i)? 0:1; | ||
790 | + | ||
791 | + edgesScores[i] = d2.pl[heads[i]][i]; | ||
792 | + edgesScores[i] += d2.lab[heads[i]][i][types[i]][dir]; | ||
793 | + | ||
794 | + boolean left = i<heads[i]; | ||
795 | + short[] labels = Edges.get(pos[heads[i]], pos[i], left); | ||
796 | + int lid=-1; | ||
797 | + for(int k=0;k<labels.length;k++) if (types[i]== labels[k]) {lid= k;break;} | ||
798 | + | ||
799 | + int ch,cmi,cmo; | ||
800 | + if (heads[i] < i) { | ||
801 | + ch = rightmostRight(heads, heads[i], i); | ||
802 | + cmi = leftmostLeft(heads, i, heads[i]); | ||
803 | + cmo = rightmostRight(heads, i, heads.length); | ||
804 | + | ||
805 | + if (ch==-1) ch=heads[i]; | ||
806 | + if (cmi==-1) cmi=heads[i]; | ||
807 | + if (cmo==-1) cmo=heads[i]; | ||
808 | + | ||
809 | + } else { | ||
810 | + ch = leftmostLeft(heads, heads[i], i); | ||
811 | + cmi = rightmostRight(heads, i, heads[i]); | ||
812 | + cmo = leftmostLeft(heads, i, 0); | ||
813 | + | ||
814 | + if (ch==-1) ch=i; | ||
815 | + if (cmi==-1) cmi=i; | ||
816 | + if (cmo==-1) cmo=i; | ||
817 | + } | ||
818 | + edgesScores[i] += d2.sib[heads[i]][i][ch][dir][lid]; | ||
819 | + edgesScores[i] += d2.gra[heads[i]][i][cmi][dir][lid]; | ||
820 | + edgesScores[i] += d2.gra[heads[i]][i][cmo][dir][lid]; | ||
821 | + v+=edgesScores[i]; | ||
822 | + } | ||
823 | + return (float)v; | ||
824 | + } | ||
825 | + | ||
826 | + | ||
827 | + private static int rightmostRight(short[] heads, int head, int max) { | ||
828 | + int rightmost = -1; | ||
829 | + for (int i = head + 1; i < max; i++) if (heads[i] == head) rightmost = i; | ||
830 | + | ||
831 | + return rightmost; | ||
832 | + } | ||
833 | + | ||
834 | + private static int leftmostLeft(short[] heads, int head, int min) { | ||
835 | + int leftmost = -1; | ||
836 | + for (int i = head - 1; i > min; i--) if (heads[i] == head) leftmost = i; | ||
837 | + return leftmost; | ||
838 | + } | ||
839 | + | ||
840 | + public static final String REL = "REL",END = "END",STR = "STR",LA = "LA",RA = "RA"; | ||
841 | + | ||
842 | + private static int ra,la; | ||
843 | + private static int s_str; | ||
844 | + private static int s_end, _cend,_cstr, s_stwrd,s_relend; | ||
845 | + | ||
846 | + protected static final String TYPE = "TYPE",DIR = "D", FEAT="F"; | ||
847 | + public static final String POS = "POS"; | ||
848 | + protected static final String DIST = "DIST",MID = "MID"; | ||
849 | + | ||
850 | + private static final String _0 = "0",_4 = "4",_3 = "3", _2 = "2",_1 = "1",_5 = "5",_10 = "10"; | ||
851 | + | ||
852 | + private static int di0, d4,d3,d2,d1,d5,d10; | ||
853 | + | ||
854 | + | ||
855 | + private static final String WORD = "WORD",STWRD = "STWRD", STPOS = "STPOS"; | ||
856 | + | ||
857 | + | ||
858 | + | ||
859 | + private static int nofeat; | ||
860 | + | ||
861 | + | ||
862 | + private static int maxForm; | ||
863 | + | ||
864 | + | ||
865 | + /** | ||
866 | + * Initialize the features. | ||
867 | + * @param maxFeatures | ||
868 | + */ | ||
869 | + static public void initFeatures() { | ||
870 | + | ||
871 | + | ||
872 | + MFB mf = new MFB(); | ||
873 | + mf.register(POS, MID); | ||
874 | + s_str = mf.register(POS, STR); | ||
875 | + s_end = mf.register(POS, END); | ||
876 | + | ||
877 | + s_relend = mf.register(REL, END); | ||
878 | + | ||
879 | + _cstr= mf.register(Cluster.SPATH,STR); | ||
880 | + _cend=mf.register(Cluster.SPATH,END); | ||
881 | + | ||
882 | + | ||
883 | + mf.register(TYPE, POS); | ||
884 | + | ||
885 | + s_stwrd=mf.register(WORD,STWRD); | ||
886 | + mf.register(POS,STPOS); | ||
887 | + | ||
888 | + la = mf.register(DIR, LA); | ||
889 | + ra = mf.register(DIR, RA); | ||
890 | + | ||
891 | + // mf.register(TYPE, CHAR); | ||
892 | + | ||
893 | + mf.register(TYPE, FEAT); | ||
894 | + nofeat=mf.register(FEAT, "NOFEAT"); | ||
895 | + | ||
896 | + for(int k=0;k<215;k++) mf.register(TYPE, "F"+k); | ||
897 | + | ||
898 | + | ||
899 | + di0=mf.register(DIST, _0); | ||
900 | + d1=mf.register(DIST, _1); | ||
901 | + d2=mf.register(DIST, _2); | ||
902 | + d3=mf.register(DIST, _3); | ||
903 | + d4=mf.register(DIST, _4); | ||
904 | + d5=mf.register(DIST, _5); | ||
905 | + // d5l=mf.register(DIST, _5l); | ||
906 | + d10=mf.register(DIST, _10); | ||
907 | + | ||
908 | + | ||
909 | + } | ||
910 | + | ||
911 | + /* (non-Javadoc) | ||
912 | + * @see extractors.Extractor#getType() | ||
913 | + */ | ||
914 | + @Override | ||
915 | + public int getType() { | ||
916 | + return s_type; | ||
917 | + } | ||
918 | + | ||
919 | + /* (non-Javadoc) | ||
920 | + * @see extractors.Extractor#setMaxForm(java.lang.Integer) | ||
921 | + */ | ||
922 | + @Override | ||
923 | + public void setMaxForm(int max) { | ||
924 | + maxForm = max; | ||
925 | + } | ||
926 | + | ||
927 | + /* (non-Javadoc) | ||
928 | + * @see extractors.Extractor#getMaxForm() | ||
929 | + */ | ||
930 | + @Override | ||
931 | + public int getMaxForm() { | ||
932 | + return maxForm; | ||
933 | + } | ||
934 | + | ||
935 | + | ||
936 | + | ||
937 | +} |
dependencyParser/basic/mate-tools/src/extractors/ExtractorFactory.java
0 → 100644
1 | +/** | ||
2 | + * | ||
3 | + */ | ||
4 | +package extractors; | ||
5 | + | ||
6 | +import is2.data.Long2IntInterface; | ||
7 | + | ||
8 | +/** | ||
9 | + * @author Dr. Bernd Bohnet, 29.04.2011 | ||
10 | + * | ||
11 | + * | ||
12 | + */ | ||
13 | +public class ExtractorFactory { | ||
14 | + | ||
15 | + public static final int StackedClustered = 4; | ||
16 | + public static final int StackedClusteredR2 = 5; | ||
17 | + | ||
18 | + | ||
19 | + private int type=-1; | ||
20 | + | ||
21 | + /** | ||
22 | + * @param stackedClusteredR22 | ||
23 | + */ | ||
24 | + public ExtractorFactory(int t) { | ||
25 | + type=t; | ||
26 | + } | ||
27 | + | ||
28 | + /** | ||
29 | + * @param stackedClusteredR22 | ||
30 | + * @param l2i | ||
31 | + * @return | ||
32 | + */ | ||
33 | + public Extractor getExtractor(Long2IntInterface l2i) { | ||
34 | + switch(type) | ||
35 | + { | ||
36 | + case StackedClustered: | ||
37 | + return new ExtractorClusterStacked(l2i); | ||
38 | + case StackedClusteredR2: | ||
39 | + return new ExtractorClusterStackedR2(l2i); | ||
40 | + } | ||
41 | + return null; | ||
42 | + } | ||
43 | + | ||
44 | +} |
dependencyParser/basic/mate-tools/src/extractors/ExtractorReranker.java
0 → 100644
1 | +package extractors; | ||
2 | + | ||
3 | + | ||
4 | +import is2.data.Cluster; | ||
5 | +import is2.data.D4; | ||
6 | +import is2.data.Instances; | ||
7 | +import is2.data.Long2IntInterface; | ||
8 | +import is2.data.MFB; | ||
9 | +import is2.data.ParseNBest; | ||
10 | +import is2.util.DB; | ||
11 | + | ||
12 | +import java.util.Arrays; | ||
13 | + | ||
14 | + | ||
15 | + | ||
16 | +final public class ExtractorReranker { | ||
17 | + | ||
18 | + public static int s_rel,s_word,s_type,s_dir,s_dist,s_feat,s_child,s_spath,s_lpath,s_pos; | ||
19 | + public static int d0,d1,d2,d3,d4,d5,d10; | ||
20 | + | ||
21 | + MFB mf; | ||
22 | + | ||
23 | + final D4 dl1,dl2, dwr,dr,dwwp,dw,dwp,dlf,d3lp, d2lp,d2pw,d2pp ; | ||
24 | + | ||
25 | + public final Long2IntInterface li; | ||
26 | + | ||
27 | + public ExtractorReranker(Long2IntInterface li) { | ||
28 | + this.li=li; | ||
29 | + dl1 = new D4(li);dl2 = new D4(li); | ||
30 | + dwr = new D4(li); | ||
31 | + dr = new D4(li); | ||
32 | + dwwp = new D4(li); | ||
33 | + | ||
34 | + dw = new D4(li); | ||
35 | + dwp = new D4(li); | ||
36 | + | ||
37 | + dlf = new D4(li); | ||
38 | + d3lp = new D4(li); d2lp = new D4(li); d2pw = new D4(li); d2pp = new D4(li); | ||
39 | + | ||
40 | + } | ||
41 | + | ||
42 | + public static void initStat() { | ||
43 | + DB.println("init called "); | ||
44 | + MFB mf = new MFB(); | ||
45 | + s_rel = mf.getFeatureCounter().get(REL).intValue();; | ||
46 | + s_pos = mf.getFeatureCounter().get(POS).intValue(); | ||
47 | + s_word = mf.getFeatureCounter().get(WORD).intValue(); | ||
48 | + s_type = mf.getFeatureCounter().get(TYPE).intValue();//mf.getFeatureBits(); | ||
49 | + s_dir = mf.getFeatureCounter().get(DIR); | ||
50 | + la = mf.getValue(DIR, LA); | ||
51 | + ra = mf.getValue(DIR, RA); | ||
52 | + s_dist = mf.getFeatureCounter().get(DIST);//mf.getFeatureBits(DIST); | ||
53 | + s_feat = mf.getFeatureCounter().get(FEAT);//mf.getFeatureBits(Pipe.FEAT); | ||
54 | + s_spath = mf.getFeatureCounter().get(Cluster.SPATH)==null?0:mf.getFeatureCounter().get(Cluster.SPATH);//mf.getFeatureBits(Cluster.SPATH); | ||
55 | + s_lpath = mf.getFeatureCounter().get(Cluster.LPATH)==null?0:mf.getFeatureCounter().get(Cluster.LPATH);//mf.getFeatureBits(Cluster.LPATH); | ||
56 | + } | ||
57 | + | ||
58 | + public void init(){ | ||
59 | + mf = new MFB(); | ||
60 | + | ||
61 | + dl1.a0 = s_type;dl1.a1 = 3; dl1.a2 = s_pos;dl1.a3 = s_pos; dl1.a4 = s_pos; dl1.a5 = s_pos; dl1.a6 = s_pos; dl1.a7 = s_pos; | ||
62 | + dl2.a0 = s_type;dl2.a1 = 3;dl2.a2 = s_rel;dl2.a3 = s_rel;dl2.a4 = s_rel;dl2.a5 = s_rel;dl2.a6 = s_rel;dl2.a7 = s_rel;dl2.a8 = s_rel; dl2.a9 = s_rel; | ||
63 | + dwp.a0 = s_type; dwp.a1 = 3; dwp.a2 = s_word; dwp.a3 = s_rel; dwp.a4 = s_rel; dwp.a5 = s_rel;dwp.a6 = s_rel;dwp.a7 = s_rel; | ||
64 | + dwwp.a0 = s_type; dwwp.a1 = 3; dwwp.a2 = s_word; dwwp.a3 = s_word; dwwp.a4 = s_pos; dwwp.a5 = s_word;dwwp.a6 = s_pos;dwwp.a7 = s_pos; | ||
65 | + } | ||
66 | + | ||
67 | + | ||
68 | + | ||
69 | + | ||
70 | + | ||
71 | + | ||
72 | + public static final String REL = "REL",END = "END",STR = "STR",LA = "LA",RA = "RA", FEAT="F"; | ||
73 | + | ||
74 | + private static int ra,la; | ||
75 | + private static int s_str; | ||
76 | + private static int s_end, _cend,_cstr, s_stwrd,s_relend; | ||
77 | + | ||
78 | + protected static final String TYPE = "TYPE",DIR = "D"; | ||
79 | + public static final String POS = "POS"; | ||
80 | + protected static final String DIST = "DIST",MID = "MID"; | ||
81 | + | ||
82 | + private static final String _0 = "0",_4 = "4",_3 = "3", _2 = "2",_1 = "1",_5 = "5",_10 = "10"; | ||
83 | + | ||
84 | + | ||
85 | + | ||
86 | + private static final String WORD = "WORD",STWRD = "STWRD", STPOS = "STPOS"; | ||
87 | + | ||
88 | + | ||
89 | + | ||
90 | + private static int nofeat; | ||
91 | + | ||
92 | + | ||
93 | + public static int maxForm; | ||
94 | + | ||
95 | + | ||
96 | + final public static int _FC =60; | ||
97 | + | ||
98 | + | ||
99 | + /** | ||
100 | + * Initialize the features. | ||
101 | + * @param maxFeatures | ||
102 | + */ | ||
103 | + static public void initFeatures() { | ||
104 | + | ||
105 | + | ||
106 | + MFB mf = new MFB(); | ||
107 | + mf.register(POS, MID); | ||
108 | + s_str = mf.register(POS, STR); | ||
109 | + s_end = mf.register(POS, END); | ||
110 | + | ||
111 | + s_relend = mf.register(REL, END); | ||
112 | + | ||
113 | + _cstr= mf.register(Cluster.SPATH,STR); | ||
114 | + _cend=mf.register(Cluster.SPATH,END); | ||
115 | + | ||
116 | + | ||
117 | + mf.register(TYPE, POS); | ||
118 | + | ||
119 | + s_stwrd=mf.register(WORD,STWRD); | ||
120 | + mf.register(POS,STPOS); | ||
121 | + | ||
122 | + la = mf.register(DIR, LA); | ||
123 | + ra = mf.register(DIR, RA); | ||
124 | + | ||
125 | + // mf.register(TYPE, CHAR); | ||
126 | + | ||
127 | + mf.register(TYPE, FEAT); | ||
128 | + nofeat=mf.register(FEAT, "NOFEAT"); | ||
129 | + | ||
130 | + for(int k=0;k<60;k++) mf.register(TYPE, "F"+k); | ||
131 | + | ||
132 | + | ||
133 | + d0 =mf.register(DIST, _0); | ||
134 | + d1= mf.register(DIST, _1); | ||
135 | + d2 =mf.register(DIST, _2); | ||
136 | + d3= mf.register(DIST, _3); | ||
137 | + d4= mf.register(DIST, _4); | ||
138 | + d5= mf.register(DIST, _5); | ||
139 | + // d5l=mf.register(DIST, _5l); | ||
140 | + d10= mf.register(DIST, _10); | ||
141 | + | ||
142 | + | ||
143 | + } | ||
144 | + | ||
145 | + /** | ||
146 | + * @param is | ||
147 | + * @param n | ||
148 | + * @param parseNBest | ||
149 | + * @param vs | ||
150 | + */ | ||
151 | + public void extractFeatures3(Instances is, int i, ParseNBest parse, int rank, long[] v) { | ||
152 | + | ||
153 | + int f=1,n=0; | ||
154 | + | ||
155 | + for(short k= 0; k<is.length(i)-1;k++) { | ||
156 | + | ||
157 | + short[] chld = children(parse.heads,k); | ||
158 | + | ||
159 | + f=2; | ||
160 | + | ||
161 | + int fm = is.forms[i][k]; | ||
162 | + int hh = k!=0? is.pposs[i][parse.heads[k]]:s_end; | ||
163 | + int h = is.pposs[i][k]; | ||
164 | + int hrel = parse.labels[k]; | ||
165 | + int hhrel = k!=0? parse.labels[parse.heads[k]]:s_relend; | ||
166 | + int hhf = k!=0? is.forms[i][parse.heads[k]]:s_stwrd; | ||
167 | + | ||
168 | + | ||
169 | + | ||
170 | + int rlast = chld.length>0?parse.labels[chld[chld.length-1]]:s_relend; | ||
171 | + | ||
172 | + int [] rels = new int[chld.length]; | ||
173 | + int [] pss = new int[chld.length]; | ||
174 | + for(int j=0;j<chld.length;j++) { | ||
175 | + rels[j] = parse.labels[chld[j]]; | ||
176 | + pss[j] = is.pposs[i][chld[j]]; | ||
177 | + } | ||
178 | + | ||
179 | + StringBuilder rl = new StringBuilder(chld.length); | ||
180 | + StringBuilder psl = new StringBuilder(chld.length); | ||
181 | + for(int j=0;j<chld.length;j++) { | ||
182 | + rl.append((char)rels[j]); | ||
183 | + psl.append((char)pss[j]); | ||
184 | + } | ||
185 | + | ||
186 | + int rli = mf.register("rli", rl.toString()); | ||
187 | + int pli = mf.register("pli", psl.toString()); | ||
188 | + | ||
189 | + dwwp.v0=f++; dwwp.v2=rli; dwwp.cz3(); v[n++]=dwwp.getVal(); | ||
190 | + dwwp.v0=f++; dwwp.v2=pli; dwwp.cz3(); v[n++]=dwwp.getVal(); | ||
191 | + | ||
192 | + dwwp.v0=f++; dwwp.v2=rli; dwwp.v3=h;dwwp.cz4(); v[n++]=dwwp.getVal(); | ||
193 | + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=h;dwwp.cz4(); v[n++]=dwwp.getVal(); | ||
194 | + | ||
195 | + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=hh; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal(); | ||
196 | + dwwp.v0=f++; dwwp.v2=rli; dwwp.v3=hh; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal(); | ||
197 | + | ||
198 | + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=rli; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal(); | ||
199 | + dwwp.v0=f++; dwwp.v2=fm; dwwp.v3=rli; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal(); | ||
200 | + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=fm; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal(); | ||
201 | + | ||
202 | + | ||
203 | + | ||
204 | + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=rli; dwwp.v4=hh; dwwp.cz5(); v[n++]=dwwp.getVal(); | ||
205 | + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=rli; dwwp.v4=hh; dwwp.v5=h; dwwp.cz6(); v[n++]=dwwp.getVal(); | ||
206 | + | ||
207 | + dwp.v0= f++; dwp.v2=rli; dwp.v3=hrel;dwp.v4=hh; dwp.v5=h; dwp.cz6(); v[n++]=dwp.getVal(); | ||
208 | + | ||
209 | + Arrays.sort(rels); | ||
210 | + Arrays.sort(pss); | ||
211 | + | ||
212 | + rl = new StringBuilder(chld.length); | ||
213 | + psl = new StringBuilder(chld.length); | ||
214 | + for(int j=0;j<chld.length;j++) { | ||
215 | + rl.append((char)rels[j]); | ||
216 | + psl.append((char)pss[j]); | ||
217 | + } | ||
218 | + rli = mf.register("rli", rl.toString()); | ||
219 | + pli = mf.register("pli", psl.toString()); | ||
220 | + | ||
221 | + | ||
222 | + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=rli; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal(); | ||
223 | + dwwp.v0=f++; dwwp.v2=fm; dwwp.v3=rli; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal(); | ||
224 | + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=fm; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal(); | ||
225 | + | ||
226 | + dwwp.v0=f++; dwwp.v2=rli; dwwp.v3=h;dwwp.cz4(); v[n++]=dwwp.getVal(); | ||
227 | + | ||
228 | + dl1.v0= f++; dl1.v2=h; dl1.v3=hrel; dl1.v4=hhrel;dl1.v5=hh; dl1.v6=rlast; dl1.cz6(); v[n++]=dl1.getVal(); | ||
229 | + dwp.v0= f++; dwp.v2=fm; dwp.v3=hrel; dwp.v4=hh; dwp.cz5(); v[n++]=dwp.getVal(); | ||
230 | + dwp.v0= f++; dwp.v2=hhf; dwp.v3=hrel; dwp.v4=hh; dwp.v5=h; dwp.cz6(); v[n++]=dwp.getVal(); | ||
231 | + | ||
232 | + dwwp.v0=f++; dwwp.v2=fm; dwwp.v3=hhf; dwwp.v4=hrel; dwwp.v5=hhrel; dwwp.cz6(); v[n++]=dwwp.getVal(); | ||
233 | + dwwp.v0=f++; dwwp.v2=h; dwwp.v3=hhf; dwwp.v4=hrel; dwwp.v5=hhrel; dwwp.cz6(); v[n++]=dwwp.getVal(); | ||
234 | + dwwp.v0=f++; dwwp.v2=fm; dwwp.v3=hh; dwwp.v4=hrel; dwwp.v5=hhrel; dwwp.cz6(); v[n++]=dwwp.getVal(); | ||
235 | + | ||
236 | + dwwp.v0=f++; dwwp.v2=fm; dwwp.v3=hhf; dwwp.v4=h; dwwp.v5=hh; dwwp.cz6(); v[n++]=dwwp.getVal(); | ||
237 | + dwwp.v0=f++; dwwp.v2=h; dwwp.v3=hhf; dwwp.v4=hrel; dwwp.v5=hh; dwwp.cz6(); v[n++]=dwwp.getVal(); | ||
238 | + dwwp.v0=f++; dwwp.v2=fm; dwwp.v3=hh; dwwp.v4=h; dwwp.v5=hrel; dwwp.cz6(); v[n++]=dwwp.getVal(); | ||
239 | + | ||
240 | + | ||
241 | + // dl1.v0= f++; dl1.v2=h;dl1.v3=hrel; dl1.v4=hhrel;dl1.v5=hh; dl1.v6=hhhrel;dl1.v7=hhh; dl1.v8=rlast; dl1.cz9(); v[n++]=dl1.getVal(); | ||
242 | +// dl1.v0= f++; dl1.v2=h;dl1.v3=hrel; dl1.v4=hhrel;dl1.v5=hh; dl1.v6=hhhrel;dl1.v7=hhh; dl1.v8=rlast; dl1.cz9(); v[n++]=dl1.getVal(); | ||
243 | + // dl1.v0= f++; dl1.v2=h;dl1.v3=hrel; dl1.v4=dir;dl1.v5=hh; dl1.v6=hhh;dl1.v7=rlast; dl1.v8=r1; dl1.cz9(); v[n++]=dl1.getVal(); | ||
244 | + // dl1.v0= f++; dl1.v2=h;dl1.v3=hh; dl1.v4=hhh;dl1.v5=hrel; dl1.cz6(); v[n++]=dl1.getVal(); | ||
245 | + | ||
246 | + | ||
247 | + short hp = parse.heads[k]; | ||
248 | + short[] hchld = hp==-1?new short[0]:children(parse.heads,hp); | ||
249 | + | ||
250 | + int [] hrels = new int[hchld.length]; | ||
251 | + int [] hpss = new int[hchld.length]; | ||
252 | + for(int j=0;j<hchld.length;j++) { | ||
253 | + hrels[j] = parse.labels[hchld[j]]; | ||
254 | + hpss[j] = is.pposs[i][hchld[j]]; | ||
255 | + } | ||
256 | + | ||
257 | + | ||
258 | + StringBuilder hrl = new StringBuilder(hchld.length); | ||
259 | + StringBuilder hpsl = new StringBuilder(hchld.length); | ||
260 | + for(int j=0;j<hchld.length;j++) { | ||
261 | + hrl.append((char)hrels[j]); | ||
262 | + hpsl.append((char)hpss[j]); | ||
263 | + } | ||
264 | + int hrli = mf.register("rli", hrl.toString()); | ||
265 | + int hpli = mf.register("pli", hpsl.toString()); | ||
266 | + | ||
267 | + dwwp.v0=f++; dwwp.v2=hpli; dwwp.v3=hrli; dwwp.cz4(); v[n++]=dwwp.getVal(); | ||
268 | + dwwp.v0=f++; dwwp.v2=fm; dwwp.v3=hrli; dwwp.cz4(); v[n++]=dwwp.getVal(); | ||
269 | + dwwp.v0=f++; dwwp.v2=hpli; dwwp.v3=fm; dwwp.cz4(); v[n++]=dwwp.getVal(); | ||
270 | + | ||
271 | + dwwp.v0=f++; dwwp.v2=hpli; dwwp.v3=rli; dwwp.v4=hrel;dwwp.v5=h; dwwp.cz5(); v[n++]=dwwp.getVal(); | ||
272 | + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=hrli;dwwp.v4=hrel;dwwp.v5=h; dwwp.cz5(); v[n++]=dwwp.getVal(); | ||
273 | + dwwp.v0=f++; dwwp.v2=hpli; dwwp.v3=hpli;dwwp.v4=hrel;dwwp.v5=h; dwwp.cz5(); v[n++]=dwwp.getVal(); | ||
274 | + | ||
275 | + | ||
276 | + | ||
277 | + } | ||
278 | + | ||
279 | + v[n]=Integer.MIN_VALUE; | ||
280 | + } | ||
281 | + | ||
282 | + /** | ||
283 | + * This works seem works well with n-best n=8 (88.858074) , n=10 (88.836884), n=12 (88.858) | ||
284 | + * n=14 (88.913417) n=16 (88.79546) n=20 (88.80621) n 50 (88.729364) | ||
285 | + * 1-best: 88.749605 | ||
286 | + * | ||
287 | + * @param is | ||
288 | + * @param i | ||
289 | + * @param parse | ||
290 | + * @param rank | ||
291 | + * @param v | ||
292 | + * @param cluster | ||
293 | + */ | ||
294 | + public void extractFeatures(Instances is, int i, ParseNBest parse, int rank, long[] v, Cluster cluster) { | ||
295 | + | ||
296 | + // mf.getValue(REL, "SB"); | ||
297 | + | ||
298 | + int f=1,n=0; | ||
299 | + | ||
300 | + for(short k= 0; k<is.length(i)-1;k++) { | ||
301 | + | ||
302 | + short[] chld = children(parse.heads,k); | ||
303 | + | ||
304 | + int abs = Math.abs(parse.heads[k]-k); | ||
305 | + final int dist; | ||
306 | + if (abs > 10)dist=d10;else if (abs>5) dist=d5;else if( abs==5)dist=d4;else if (abs==4)dist=d3;else if (abs==3)dist=d2; | ||
307 | + else if (abs==2)dist=d1; else dist=d0; | ||
308 | + | ||
309 | + | ||
310 | + f=2; | ||
311 | + | ||
312 | + int fm = is.forms[i][k]; | ||
313 | + int hh = k!=0? is.pposs[i][parse.heads[k]]:s_end; | ||
314 | + int h = is.pposs[i][k]; | ||
315 | + int hrel = parse.labels[k];//is.labels[i][k]; | ||
316 | + int hhrel = k!=0? parse.labels[parse.heads[k]]:s_relend; | ||
317 | + int hhf = k!=0? is.forms[i][parse.heads[k]]:s_stwrd; | ||
318 | + | ||
319 | + int r1 = chld.length>0?parse.labels[chld[0]]:s_relend; | ||
320 | + int rlast = chld.length>0?parse.labels[chld[chld.length-1]]:s_relend; | ||
321 | + | ||
322 | + int [] rels = new int[chld.length]; | ||
323 | + int [] pss = new int[chld.length]; | ||
324 | + int [] cls = new int[chld.length]; | ||
325 | + | ||
326 | + int[] rc = new int[30]; // 20 was a good length | ||
327 | + | ||
328 | + for(int j=0;j<chld.length;j++) { | ||
329 | + rels[j] = parse.labels[chld[j]]; | ||
330 | + if (rels[j]<rc.length) rc[rels[j]]++; | ||
331 | + pss[j] = is.pposs[i][chld[j]]; | ||
332 | +// cls[j] = is.forms[i][chld[j]]==-1?0:cluster.getLP(is.forms[i][chld[j]]); | ||
333 | +// cls[j] = cls[j]==-1?0:cls[j]; | ||
334 | + } | ||
335 | + | ||
336 | + StringBuilder rl = new StringBuilder(chld.length); | ||
337 | + StringBuilder psl = new StringBuilder(chld.length); | ||
338 | + StringBuilder csl = new StringBuilder(chld.length); | ||
339 | + for(int j=0;j<chld.length;j++) { | ||
340 | + rl.append((char)rels[j]); | ||
341 | + psl.append((char)pss[j]); | ||
342 | +// csl.append((char)cls[j]); | ||
343 | + } | ||
344 | + | ||
345 | + int rli = mf.register("rli", rl.toString()); | ||
346 | + int pli = mf.register("pli", psl.toString()); | ||
347 | +// int cli = mf.register("cli", csl.toString()); | ||
348 | + | ||
349 | + | ||
350 | + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=rli; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal(); | ||
351 | + dwwp.v0=f++; dwwp.v2=fm; dwwp.v3=rli; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal(); | ||
352 | + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=fm; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal(); | ||
353 | + // dwwp.v0=f++; dwwp.v2=cli; dwwp.v3=fm; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal(); | ||
354 | + | ||
355 | + dwwp.v0=f++; dwwp.v2=rli; dwwp.cz3(); v[n++]=dwwp.getVal(); | ||
356 | + dwwp.v0=f++; dwwp.v2=pli; dwwp.cz3(); v[n++]=dwwp.getVal(); | ||
357 | + //dwwp.v0=f++; dwwp.v2=cli; dwwp.cz3(); v[n++]=dwwp.getVal(); | ||
358 | + | ||
359 | + // dwwp.v0=f++; dwwp.v2=cli;dwwp.v3=h; dwwp.cz4(); v[n++]=dwwp.getVal(); | ||
360 | + | ||
361 | + for(int j=1;j<rc.length;j++) { | ||
362 | + dwwp.v0=f++; dwwp.v2=rc[j]==0?1:rc[j]==1?2:3; dwwp.v3=j; dwwp.cz4(); v[n++]=dwwp.getVal();// | ||
363 | + } | ||
364 | + | ||
365 | + dwwp.v0=f++; dwwp.v2=rli; dwwp.v3=h;dwwp.cz4(); v[n++]=dwwp.getVal(); | ||
366 | + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=h;dwwp.cz4(); v[n++]=dwwp.getVal(); | ||
367 | + | ||
368 | + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=hh; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal(); | ||
369 | + dwwp.v0=f++; dwwp.v2=rli; dwwp.v3=hh; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal(); | ||
370 | + | ||
371 | + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=rli; dwwp.v4=hh; dwwp.cz5(); v[n++]=dwwp.getVal(); | ||
372 | + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=rli; dwwp.v4=hh; dwwp.v5=h; dwwp.cz6(); v[n++]=dwwp.getVal(); | ||
373 | + | ||
374 | + dwp.v0= f++; dwp.v2=rli; dwp.v3=hrel;dwp.v4=hh; dwp.v5=h; dwp.cz6(); v[n++]=dwp.getVal(); | ||
375 | + | ||
376 | + //dwwp.v0=f++; dwwp.v2=h; dwwp.v3=hh; dwwp.v4=dist; dwwp.cz5(); v[n++]=dwwp.getVal(); | ||
377 | + | ||
378 | + Arrays.sort(rels); | ||
379 | + Arrays.sort(pss); | ||
380 | + | ||
381 | + rl = new StringBuilder(chld.length); | ||
382 | + psl = new StringBuilder(chld.length); | ||
383 | + for(int j=0;j<chld.length;j++) { | ||
384 | + rl.append((char)rels[j]); | ||
385 | + psl.append((char)pss[j]); | ||
386 | + } | ||
387 | + rli = mf.register("rli", rl.toString()); | ||
388 | + pli = mf.register("pli", psl.toString()); | ||
389 | + | ||
390 | + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=rli; dwwp.v4=1; dwwp.v5=h; dwwp.cz6(); v[n++]=dwwp.getVal(); | ||
391 | + dwwp.v0=f++; dwwp.v2=fm; dwwp.v3=rli; dwwp.v4=1; dwwp.v5=h; dwwp.cz6(); v[n++]=dwwp.getVal(); | ||
392 | + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=fm; dwwp.v4=1; dwwp.v5=h; dwwp.cz6(); v[n++]=dwwp.getVal(); | ||
393 | + | ||
394 | + dwwp.v0=f++; dwwp.v2=rli; dwwp.v3=h;dwwp.cz4(); v[n++]=dwwp.getVal(); | ||
395 | + | ||
396 | + dl1.v0= f++; dl1.v2=h; dl1.v3=hrel; dl1.v4=hhrel;dl1.v5=hh; dl1.v6=rlast; dl1.cz6(); v[n++]=dl1.getVal(); | ||
397 | + dwp.v0= f++; dwp.v2=fm; dwp.v3=hrel; dwp.v4=hh; dwp.cz5(); v[n++]=dwp.getVal(); | ||
398 | + dwp.v0= f++; dwp.v2=hhf; dwp.v3=hrel;dwp.v4=hh; dwp.v5=h; dwp.cz6(); v[n++]=dwp.getVal(); | ||
399 | + } | ||
400 | + | ||
401 | + v[n]=Integer.MIN_VALUE; | ||
402 | + } | ||
403 | + | ||
404 | + /** | ||
405 | + | ||
406 | + * Works well! | ||
407 | + * @param is | ||
408 | + * @param i | ||
409 | + * @param parse | ||
410 | + * @param rank | ||
411 | + * @param v | ||
412 | + */ | ||
413 | + public void extractFeatures6(Instances is, int i, ParseNBest parse, int rank, long[] v) { | ||
414 | + | ||
415 | + // mf.getValue(REL, "SB"); | ||
416 | + | ||
417 | + int f=1,n=0; | ||
418 | + | ||
419 | + for(short k= 0; k<is.length(i)-1;k++) { | ||
420 | + | ||
421 | + short[] chld = children(parse.heads,k); | ||
422 | + | ||
423 | + f=2; | ||
424 | + | ||
425 | + int fm = is.forms[i][k]; | ||
426 | + int hh = k!=0? is.pposs[i][parse.heads[k]]:s_end; | ||
427 | + int h = is.pposs[i][k]; | ||
428 | + int hrel = parse.labels[k];//is.labels[i][k]; | ||
429 | + int hhrel = k!=0? parse.labels[parse.heads[k]]:s_relend; | ||
430 | + int hhf = k!=0? is.forms[i][parse.heads[k]]:s_stwrd; | ||
431 | + | ||
432 | + int r1 = chld.length>0?parse.labels[chld[0]]:s_relend; | ||
433 | + int rlast = chld.length>0?parse.labels[chld[chld.length-1]]:s_relend; | ||
434 | + | ||
435 | + int [] rels = new int[chld.length]; | ||
436 | + int [] pss = new int[chld.length]; | ||
437 | + | ||
438 | + int[] rc = new int[30]; // 20 was a good length | ||
439 | + | ||
440 | + for(int j=0;j<chld.length;j++) { | ||
441 | + rels[j] = parse.labels[chld[j]]; | ||
442 | + if (rels[j]<rc.length) rc[rels[j]]++; | ||
443 | + // if (rels[j]==sb) numSB++; | ||
444 | + pss[j] = is.pposs[i][chld[j]]; | ||
445 | + } | ||
446 | + | ||
447 | + StringBuilder rl = new StringBuilder(chld.length); | ||
448 | + StringBuilder psl = new StringBuilder(chld.length); | ||
449 | + for(int j=0;j<chld.length;j++) { | ||
450 | + rl.append((char)rels[j]); | ||
451 | + psl.append((char)pss[j]); | ||
452 | + } | ||
453 | + | ||
454 | + int rli = mf.register("rli", rl.toString()); | ||
455 | + int pli = mf.register("pli", psl.toString()); | ||
456 | + | ||
457 | + | ||
458 | + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=rli; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal(); | ||
459 | + dwwp.v0=f++; dwwp.v2=fm; dwwp.v3=rli; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal(); | ||
460 | + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=fm; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal(); | ||
461 | + | ||
462 | + dwwp.v0=f++; dwwp.v2=rli; dwwp.cz3(); v[n++]=dwwp.getVal(); | ||
463 | + dwwp.v0=f++; dwwp.v2=pli; dwwp.cz3(); v[n++]=dwwp.getVal(); | ||
464 | + | ||
465 | + for(int j=1;j<rc.length;j++) { | ||
466 | + dwwp.v0=f++; dwwp.v2=rc[j]==0?1:rc[j]==1?2:3; dwwp.v3=j; dwwp.cz4(); v[n++]=dwwp.getVal();// | ||
467 | + } | ||
468 | + | ||
469 | + dwwp.v0=f++; dwwp.v2=rli; dwwp.v3=h;dwwp.cz4(); v[n++]=dwwp.getVal(); | ||
470 | + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=h;dwwp.cz4(); v[n++]=dwwp.getVal(); | ||
471 | + | ||
472 | + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=hh; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal(); | ||
473 | + dwwp.v0=f++; dwwp.v2=rli; dwwp.v3=hh; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal(); | ||
474 | + | ||
475 | + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=rli; dwwp.v4=hh; dwwp.cz5(); v[n++]=dwwp.getVal(); | ||
476 | + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=rli; dwwp.v4=hh; dwwp.v5=h; dwwp.cz6(); v[n++]=dwwp.getVal(); | ||
477 | + | ||
478 | + dwp.v0= f++; dwp.v2=rli; dwp.v3=hrel;dwp.v4=hh; dwp.v5=h; dwp.cz6(); v[n++]=dwp.getVal(); | ||
479 | + | ||
480 | + | ||
481 | + Arrays.sort(rels); | ||
482 | + Arrays.sort(pss); | ||
483 | + | ||
484 | + rl = new StringBuilder(chld.length); | ||
485 | + psl = new StringBuilder(chld.length); | ||
486 | + for(int j=0;j<chld.length;j++) { | ||
487 | + rl.append((char)rels[j]); | ||
488 | + psl.append((char)pss[j]); | ||
489 | + } | ||
490 | + rli = mf.register("rli", rl.toString()); | ||
491 | + pli = mf.register("pli", psl.toString()); | ||
492 | + | ||
493 | + | ||
494 | + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=rli; dwwp.v4=1; dwwp.v5=h; dwwp.cz6(); v[n++]=dwwp.getVal(); | ||
495 | + dwwp.v0=f++; dwwp.v2=fm; dwwp.v3=rli; dwwp.v4=1; dwwp.v5=h; dwwp.cz6(); v[n++]=dwwp.getVal(); | ||
496 | + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=fm; dwwp.v4=1; dwwp.v5=h; dwwp.cz6(); v[n++]=dwwp.getVal(); | ||
497 | + | ||
498 | + dwwp.v0=f++; dwwp.v2=rli; dwwp.v3=h;dwwp.cz4(); v[n++]=dwwp.getVal(); | ||
499 | + | ||
500 | + dl1.v0= f++; dl1.v2=h; dl1.v3=hrel; dl1.v4=hhrel;dl1.v5=hh; dl1.v6=rlast; dl1.cz6(); v[n++]=dl1.getVal(); | ||
501 | + dwp.v0= f++; dwp.v2=fm; dwp.v3=hrel; dwp.v4=hh; dwp.cz5(); v[n++]=dwp.getVal(); | ||
502 | + dwp.v0= f++; dwp.v2=hhf; dwp.v3=hrel;dwp.v4=hh; dwp.v5=h; dwp.cz6(); v[n++]=dwp.getVal(); | ||
503 | + | ||
504 | + } | ||
505 | + | ||
506 | + v[n]=Integer.MIN_VALUE; | ||
507 | + } | ||
508 | + | ||
509 | + | ||
510 | + | ||
511 | + public void extractFeatures2(Instances is, int i, ParseNBest parse, int rank, long[] v) { | ||
512 | + | ||
513 | + | ||
514 | + | ||
515 | + int f=1,n=0; | ||
516 | + | ||
517 | + for(short k= 0; k<is.length(i)-1;k++) { | ||
518 | + | ||
519 | + short[] chld = children(parse.heads,k); | ||
520 | + | ||
521 | + f=2; | ||
522 | + | ||
523 | + int fm = is.forms[i][k]; | ||
524 | + int hh = k!=0? is.pposs[i][parse.heads[k]]:s_end; | ||
525 | + int h = is.pposs[i][k]; | ||
526 | + int hrel = parse.labels[k];//is.labels[i][k]; | ||
527 | + int hhrel = k!=0? parse.labels[parse.heads[k]]:s_relend; | ||
528 | + int hhf = k!=0? is.forms[i][parse.heads[k]]:s_stwrd; | ||
529 | + | ||
530 | + int r1 = chld.length>0?parse.labels[chld[0]]:s_relend; | ||
531 | + int rlast = chld.length>0?parse.labels[chld[chld.length-1]]:s_relend; | ||
532 | + | ||
533 | + int [] rels = new int[chld.length]; | ||
534 | + int [] pss = new int[chld.length]; | ||
535 | + | ||
536 | + | ||
537 | + | ||
538 | + for(int j=0;j<chld.length;j++) { | ||
539 | + rels[j] = parse.labels[chld[j]]; | ||
540 | + pss[j] = is.pposs[i][chld[j]]; | ||
541 | + } | ||
542 | + | ||
543 | + StringBuilder rl = new StringBuilder(chld.length); | ||
544 | + StringBuilder psl = new StringBuilder(chld.length); | ||
545 | + for(int j=0;j<chld.length;j++) { | ||
546 | + rl.append((char)rels[j]); | ||
547 | + psl.append((char)pss[j]); | ||
548 | + } | ||
549 | + | ||
550 | + int rli = mf.register("rli", rl.toString()); | ||
551 | + int pli = mf.register("pli", psl.toString()); | ||
552 | + | ||
553 | + | ||
554 | + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=rli; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal(); | ||
555 | + dwwp.v0=f++; dwwp.v2=fm; dwwp.v3=rli; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal(); | ||
556 | + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=fm; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal(); | ||
557 | + | ||
558 | + dwwp.v0=f++; dwwp.v2=rli; dwwp.cz3(); v[n++]=dwwp.getVal(); | ||
559 | + dwwp.v0=f++; dwwp.v2=pli; dwwp.cz3(); v[n++]=dwwp.getVal(); | ||
560 | + | ||
561 | + dwwp.v0=f++; dwwp.v2=rli; dwwp.v3=h;dwwp.cz4(); v[n++]=dwwp.getVal(); | ||
562 | + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=h;dwwp.cz4(); v[n++]=dwwp.getVal(); | ||
563 | + | ||
564 | + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=hh; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal(); | ||
565 | + dwwp.v0=f++; dwwp.v2=rli; dwwp.v3=hh; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal(); | ||
566 | + | ||
567 | + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=rli; dwwp.v4=hh; dwwp.cz5(); v[n++]=dwwp.getVal(); | ||
568 | + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=rli; dwwp.v4=hh; dwwp.v5=h; dwwp.cz6(); v[n++]=dwwp.getVal(); | ||
569 | + | ||
570 | + dwp.v0= f++; dwp.v2=rli; dwp.v3=hrel;dwp.v4=hh; dwp.v5=h; dwp.cz6(); v[n++]=dwp.getVal(); | ||
571 | + | ||
572 | + | ||
573 | + Arrays.sort(rels); | ||
574 | + Arrays.sort(pss); | ||
575 | + | ||
576 | + rl = new StringBuilder(chld.length); | ||
577 | + psl = new StringBuilder(chld.length); | ||
578 | + for(int j=0;j<chld.length;j++) { | ||
579 | + rl.append((char)rels[j]); | ||
580 | + psl.append((char)pss[j]); | ||
581 | + } | ||
582 | + rli = mf.register("rli", rl.toString()); | ||
583 | + pli = mf.register("pli", psl.toString()); | ||
584 | + | ||
585 | + | ||
586 | + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=rli; dwwp.v4=1; dwwp.v5=h; dwwp.cz6(); v[n++]=dwwp.getVal(); | ||
587 | + dwwp.v0=f++; dwwp.v2=fm; dwwp.v3=rli; dwwp.v4=1; dwwp.v5=h; dwwp.cz6(); v[n++]=dwwp.getVal(); | ||
588 | + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=fm; dwwp.v4=1; dwwp.v5=h; dwwp.cz6(); v[n++]=dwwp.getVal(); | ||
589 | + | ||
590 | + dwwp.v0=f++; dwwp.v2=rli; dwwp.v3=h;dwwp.cz4(); v[n++]=dwwp.getVal(); | ||
591 | + | ||
592 | + dl1.v0= f++; dl1.v2=h; dl1.v3=hrel; dl1.v4=hhrel;dl1.v5=hh; dl1.v6=rlast; dl1.cz6(); v[n++]=dl1.getVal(); | ||
593 | + dwp.v0= f++; dwp.v2=fm; dwp.v3=hrel; dwp.v4=hh; dwp.cz5(); v[n++]=dwp.getVal(); | ||
594 | + dwp.v0= f++; dwp.v2=hhf; dwp.v3=hrel;dwp.v4=hh; dwp.v5=h; dwp.cz6(); v[n++]=dwp.getVal(); | ||
595 | + | ||
596 | + } | ||
597 | + | ||
598 | + v[n]=Integer.MIN_VALUE; | ||
599 | + } | ||
600 | + | ||
601 | + | ||
602 | + | ||
603 | + /** | ||
604 | + * @param parse | ||
605 | + * @param k | ||
606 | + * @return | ||
607 | + */ | ||
608 | + private short[] children(short[] heads, short h) { | ||
609 | + | ||
610 | + int c=0; | ||
611 | + for(int k=0;k<heads.length;k++) if (heads[k] ==h ) c++; | ||
612 | + | ||
613 | + short[] clds = new short[c]; | ||
614 | + c=0; | ||
615 | + for(int k=0;k<heads.length;k++) if (heads[k] ==h ) clds[c++]=(short)k; | ||
616 | + return clds; | ||
617 | + } | ||
618 | + | ||
619 | + | ||
620 | + | ||
621 | +} |
dependencyParser/basic/mate-tools/src/extractors/ParallelExtract.java
0 → 100755
1 | +package extractors; | ||
2 | + | ||
3 | +import is2.data.Cluster; | ||
4 | +import is2.data.DataF; | ||
5 | +import is2.data.Edges; | ||
6 | +import is2.data.F2SF; | ||
7 | +import is2.data.FV; | ||
8 | +import is2.data.Instances; | ||
9 | +import is2.data.Long2IntInterface; | ||
10 | + | ||
11 | +import java.util.ArrayList; | ||
12 | +import java.util.concurrent.Callable; | ||
13 | + | ||
14 | + | ||
15 | +/** | ||
16 | + * @author Bernd Bohnet, 30.08.2009 | ||
17 | + * | ||
18 | + * This class implements a parallel feature extractor. | ||
19 | + */ | ||
20 | +final public class ParallelExtract implements Callable<Object> | ||
21 | +{ | ||
22 | + // the data space of the weights for a dependency tree | ||
23 | + final DataF d; | ||
24 | + | ||
25 | + // the data extractor does the actual work | ||
26 | + final Extractor extractor; | ||
27 | + | ||
28 | + private Instances is; | ||
29 | + private int i; | ||
30 | + | ||
31 | + private F2SF para; | ||
32 | + | ||
33 | + private Cluster cluster; | ||
34 | + | ||
35 | + private Long2IntInterface li; | ||
36 | + | ||
37 | + public ParallelExtract(Extractor e, Instances is, int i, DataF d, F2SF para,Cluster cluster, Long2IntInterface li) { | ||
38 | + | ||
39 | + this.is =is; | ||
40 | + extractor=e; | ||
41 | + this.d =d; | ||
42 | + this.i=i; | ||
43 | + this.para=para; | ||
44 | + this.cluster = cluster; | ||
45 | + this.li=li; | ||
46 | + } | ||
47 | + | ||
48 | + | ||
49 | + public static class DSet { | ||
50 | + int w1,w2; | ||
51 | + } | ||
52 | + | ||
53 | + public Object call() { | ||
54 | + | ||
55 | + try { | ||
56 | + | ||
57 | + F2SF f= para; | ||
58 | + | ||
59 | + | ||
60 | + short[] pos=is.pposs[i]; | ||
61 | + int[] forms=is.forms[i]; | ||
62 | + int[] lemmas=is.plemmas[i]; | ||
63 | + short[][] feats=is.feats[i]; | ||
64 | + int length = pos.length; | ||
65 | + | ||
66 | + long[] svs = new long[250]; | ||
67 | + | ||
68 | + int type=extractor.getType(); | ||
69 | + | ||
70 | + while (true) { | ||
71 | + | ||
72 | + DSet set = get(); | ||
73 | + if (set ==null) break; | ||
74 | + | ||
75 | + int w1=set.w1; | ||
76 | + int w2=set.w2; | ||
77 | + | ||
78 | + f.clear(); | ||
79 | + int n =extractor.basic(pos, forms, w1, w2,cluster, f); | ||
80 | + d.pl[w1][w2]=f.getScoreF(); | ||
81 | + | ||
82 | + short[] labels = Edges.get(pos[w1], pos[w2],false); | ||
83 | + float[][] lab = d.lab[w1][w2]; | ||
84 | + | ||
85 | + extractor.firstm(is, i, w1, w2, 0, cluster, svs); | ||
86 | + | ||
87 | + if (labels!=null) { | ||
88 | + | ||
89 | + | ||
90 | + for (int l = labels.length - 1; l >= 0; l--) { | ||
91 | + | ||
92 | + short label = labels[l]; | ||
93 | + | ||
94 | + f.clear(); | ||
95 | + for(int k=svs.length-1;k>=0;k--) if (svs[k]>0) f.add(li.l2i(svs[k]+label*type)); | ||
96 | + lab[label][0]=f.getScoreF(); | ||
97 | + } | ||
98 | + } | ||
99 | + | ||
100 | + labels = Edges.get(pos[w1], pos[w2],true); | ||
101 | + | ||
102 | + if (labels!=null) { | ||
103 | + | ||
104 | + for (int l = labels.length - 1; l >= 0; l--) { | ||
105 | + | ||
106 | + int label = labels[l]; | ||
107 | + f.clear(); | ||
108 | + for(int k=svs.length-1;k>=0;k--) if (svs[k]>0) f.add(li.l2i(svs[k]+label*type)); | ||
109 | + lab[label][1]=f.getScoreF(); | ||
110 | + } | ||
111 | + } | ||
112 | + | ||
113 | + int s = w1<w2 ? w1 : w2; | ||
114 | + int e = w1<w2 ? w2 : w1; | ||
115 | + | ||
116 | + int sg = w1<w2 ? w1 : 0; | ||
117 | + int eg = w1<w2 ? length : w1+1; | ||
118 | + | ||
119 | + | ||
120 | + for(int m=s;m<e;m++) { | ||
121 | + for(int dir=0;dir<2;dir++) { | ||
122 | + labels = Edges.get(pos[w1], pos[w2],dir==1); | ||
123 | + float lab2[]= new float[labels.length]; | ||
124 | + | ||
125 | + int g = (m==s||e==m) ? -1 : m; | ||
126 | + | ||
127 | + | ||
128 | + extractor.siblingm(is,i,pos,forms,lemmas,feats, w1, w2, g, 0, cluster, svs,n); | ||
129 | + | ||
130 | + for (int l = labels.length - 1; l >= 0; l--) { | ||
131 | + | ||
132 | + int label = labels[l]; | ||
133 | + f.clear(); | ||
134 | + | ||
135 | + for(int k=svs.length-1;k>=0;k--) { | ||
136 | + if (svs[k]>0) f.add(li.l2i(svs[k]+label*type)); | ||
137 | + } | ||
138 | + lab2[l] = (float)f.score;//f.getScoreF(); | ||
139 | + } | ||
140 | + d.sib[w1][w2][m][dir]=lab2; | ||
141 | + } | ||
142 | + } | ||
143 | + | ||
144 | + for(int m=sg;m<eg;m++) { | ||
145 | + for(int dir=0;dir<2;dir++) { | ||
146 | + labels = Edges.get(pos[w1], pos[w2],dir==1); | ||
147 | + float[] lab2 = new float[labels.length]; | ||
148 | + | ||
149 | + int g = (m==s||e==m) ? -1 : m; | ||
150 | + | ||
151 | + extractor.gcm(is, i, w1,w2,g, 0, cluster, svs); | ||
152 | + | ||
153 | + for (int l = labels.length - 1; l >= 0; l--) { | ||
154 | + | ||
155 | + int label = labels[l]; | ||
156 | + | ||
157 | + f.clear(); | ||
158 | + for(int k=svs.length-1;k>=0;k--) { | ||
159 | + if (svs[k]>0) f.add(li.l2i(svs[k]+label*type)); | ||
160 | + } | ||
161 | + lab2[l] = f.getScoreF(); | ||
162 | + } | ||
163 | + d.gra[w1][w2][m][dir] =lab2; | ||
164 | + } | ||
165 | + } | ||
166 | + | ||
167 | + } | ||
168 | + } catch(Exception e ) { | ||
169 | + e.printStackTrace(); | ||
170 | + } | ||
171 | + return null; | ||
172 | + } | ||
173 | + | ||
174 | + | ||
175 | + static ArrayList<DSet> sets = new ArrayList<DSet>(); | ||
176 | + | ||
177 | + private DSet get() { | ||
178 | + | ||
179 | + synchronized (sets) { | ||
180 | + if (sets.size()==0) return null; | ||
181 | + return sets.remove(sets.size()-1); | ||
182 | + } | ||
183 | + } | ||
184 | + static public void add(int w1, int w2){ | ||
185 | + DSet ds =new DSet(); | ||
186 | + ds.w1=w1; | ||
187 | + ds.w2=w2; | ||
188 | + sets.add(ds); | ||
189 | + } | ||
190 | + | ||
191 | + | ||
192 | + | ||
193 | + | ||
194 | +} |
dependencyParser/basic/mate-tools/src/is2/data/Closed.java
0 → 100755
1 | +package is2.data; | ||
2 | + | ||
3 | + | ||
4 | + | ||
5 | +final public class Closed { | ||
6 | + | ||
7 | + public double p; | ||
8 | + short b,e,m; | ||
9 | + byte dir; | ||
10 | + | ||
11 | + Closed d; | ||
12 | + Open u; | ||
13 | + | ||
14 | + public Closed(short s, short t, int m, int dir,Open u, Closed d, float score) { | ||
15 | + this.b = s; | ||
16 | + this.e = t; | ||
17 | + this.m = (short)m; | ||
18 | + this.dir = (byte)dir; | ||
19 | + this.u=u; | ||
20 | + this.d =d; | ||
21 | + p=score; | ||
22 | + } | ||
23 | + | ||
24 | + | ||
25 | + public void create(Parse parse) { | ||
26 | + if (u != null) u.create(parse); | ||
27 | + if (d != null) d.create(parse); | ||
28 | + } | ||
29 | +} | ||
30 | + | ||
31 | + |
dependencyParser/basic/mate-tools/src/is2/data/Cluster.java
0 → 100644
1 | +/** | ||
2 | + * | ||
3 | + */ | ||
4 | +package is2.data; | ||
5 | + | ||
6 | + | ||
7 | + | ||
8 | +import is2.util.DB; | ||
9 | + | ||
10 | +import java.io.BufferedReader; | ||
11 | +import java.io.DataInputStream; | ||
12 | +import java.io.DataOutputStream; | ||
13 | +import java.io.FileInputStream; | ||
14 | +import java.io.IOException; | ||
15 | +import java.io.InputStreamReader; | ||
16 | + | ||
17 | +/** | ||
18 | + * @author Dr. Bernd Bohnet, 28.10.2010 | ||
19 | + * | ||
20 | + * | ||
21 | + */ | ||
22 | +final public class Cluster { | ||
23 | + | ||
24 | + public static final String LPATH = "LP"; | ||
25 | + public static final String SPATH = "SP"; | ||
26 | + | ||
27 | + // [word][p] p = [0:long-path | 1:short-path] | ||
28 | + final private short[][] word2path; | ||
29 | + | ||
30 | + public Cluster() { | ||
31 | + word2path =new short[0][0]; | ||
32 | + } | ||
33 | + | ||
34 | + /** | ||
35 | + * @param clusterFile | ||
36 | + * @param mf | ||
37 | + * | ||
38 | + */ | ||
39 | + public Cluster(String clusterFile, IEncoderPlus mf, int ls) { | ||
40 | + | ||
41 | + final String REGEX = "\t"; | ||
42 | + | ||
43 | + // register words | ||
44 | + try { | ||
45 | + BufferedReader inputReader = new BufferedReader(new InputStreamReader(new FileInputStream(clusterFile),"UTF-8"),32768); | ||
46 | + | ||
47 | + int cnt=0; | ||
48 | + String line; | ||
49 | + while ((line =inputReader.readLine())!=null) { | ||
50 | + | ||
51 | + cnt++; | ||
52 | + try { | ||
53 | + String[] split = line.split(REGEX); | ||
54 | + mf.register(SPATH, split[0].length()<ls?split[0]:split[0].substring(0,ls)); | ||
55 | + mf.register(LPATH, split[0]); | ||
56 | + mf.register(PipeGen.WORD, split[1]); | ||
57 | + } catch(Exception e) { | ||
58 | + System.out.println("Error in cluster line "+cnt+" error: "+e.getMessage()); | ||
59 | + } | ||
60 | + } | ||
61 | + System.out.println("read number of clusters "+cnt); | ||
62 | + inputReader.close(); | ||
63 | + | ||
64 | + } catch (Exception e) { | ||
65 | + e.printStackTrace(); | ||
66 | + } | ||
67 | + | ||
68 | + word2path = new short[mf.getFeatureCounter().get(PipeGen.WORD)][2]; | ||
69 | + | ||
70 | + | ||
71 | + // insert words | ||
72 | + try { | ||
73 | + String line; | ||
74 | + BufferedReader inputReader = new BufferedReader(new InputStreamReader(new FileInputStream(clusterFile),"UTF-8"),32768); | ||
75 | + | ||
76 | + while ((line =inputReader.readLine())!=null) { | ||
77 | + | ||
78 | + String[] split = line.split(REGEX); | ||
79 | + int wd = mf.getValue(PipeGen.WORD, split[1]); | ||
80 | + word2path[wd][0] = (short)mf.getValue(SPATH, split[0].length()<ls?split[0]:split[0].substring(0,ls)); | ||
81 | + word2path[wd][1] = (short)mf.getValue(LPATH, split[0]); | ||
82 | + } | ||
83 | + inputReader.close(); | ||
84 | + int fill=0; | ||
85 | + for(int l = 0; l<word2path.length; l++ ){ | ||
86 | + if (word2path[l][0]!=0) fill++; | ||
87 | + } | ||
88 | + /* | ||
89 | + for(int l = 0; l<word2path.length; l++ ){ | ||
90 | + if (word2path[l][1]!=0) fillL++; | ||
91 | + if (word2path[l][1]<-1) System.out.println("lower "+word2path[l][1]); | ||
92 | + } | ||
93 | + */ | ||
94 | + System.out.println("filled "+fill+" of "+word2path.length); | ||
95 | + | ||
96 | + } catch (Exception e) { | ||
97 | + e.printStackTrace(); | ||
98 | + } | ||
99 | + } | ||
100 | + | ||
101 | + /** | ||
102 | + * Read the cluster | ||
103 | + * @param dos | ||
104 | + * @throws IOException | ||
105 | + */ | ||
106 | + public Cluster(DataInputStream dis) throws IOException { | ||
107 | + | ||
108 | + word2path = new short[dis.readInt()][2]; | ||
109 | + for(int i =0;i<word2path.length;i++) { | ||
110 | + word2path[i][0]=dis.readShort(); | ||
111 | + word2path[i][1]=dis.readShort(); | ||
112 | + } | ||
113 | + DB.println("Read cluster with "+word2path.length+" words "); | ||
114 | + } | ||
115 | + | ||
116 | + /** | ||
117 | + * Write the cluster | ||
118 | + * @param dos | ||
119 | + * @throws IOException | ||
120 | + */ | ||
121 | + public void write(DataOutputStream dos) throws IOException { | ||
122 | + | ||
123 | + dos.writeInt(word2path.length); | ||
124 | + for(short[] i : word2path) { | ||
125 | + dos.writeShort(i[0]); | ||
126 | + dos.writeShort(i[1]); | ||
127 | + } | ||
128 | + | ||
129 | + } | ||
130 | + | ||
131 | + /** | ||
132 | + * @param form the id of a word form | ||
133 | + * @return the short path to the word form in the cluster | ||
134 | + | ||
135 | + final public int getSP(int form) { | ||
136 | + if (word2path.length<form) return -1; | ||
137 | + return word2path[form][0]; | ||
138 | + } | ||
139 | + */ | ||
140 | + /** | ||
141 | + * get the long path to a word form in the cluster | ||
142 | + * @param form the id of a word form | ||
143 | + * @return the long path to the word | ||
144 | + */ | ||
145 | + final public int getLP(int form) { | ||
146 | + if (word2path.length<=form || word2path[form].length<=0) return -1; | ||
147 | + return word2path[form][0]==0?-1:word2path[form][0]; | ||
148 | + } | ||
149 | + | ||
150 | + final public int getLP(int form, int l) { | ||
151 | + if (word2path.length<form) return -1; | ||
152 | + return word2path[form][l]==0?-1:word2path[form][l]; | ||
153 | + } | ||
154 | + | ||
155 | + final public int size() { | ||
156 | + return word2path.length; | ||
157 | + } | ||
158 | +} |
dependencyParser/basic/mate-tools/src/is2/data/D4.java
0 → 100644
1 | +/** | ||
2 | + * | ||
3 | + */ | ||
4 | +package is2.data; | ||
5 | + | ||
6 | +import is2.util.DB; | ||
7 | + | ||
8 | +/** | ||
9 | + * @author Dr. Bernd Bohnet, 30.10.2010 | ||
10 | + * | ||
11 | + * This class computes the mapping of features to the weight vector. | ||
12 | + */ | ||
13 | +final public class D4 extends DX { | ||
14 | + private long shift; | ||
15 | + private long h; | ||
16 | + | ||
17 | + | ||
18 | + private final Long2IntInterface _li; | ||
19 | + public D4(Long2IntInterface li) { | ||
20 | + _li=li; | ||
21 | + } | ||
22 | + | ||
23 | + | ||
24 | + final public void clean() { | ||
25 | + v0=0;v1=0;v2=0;v3=0;v4=0;v5=0;v6=0;v7=0;v8=0; | ||
26 | + shift=0;h=0; | ||
27 | + } | ||
28 | + | ||
29 | + final public void cz3(){ | ||
30 | + if (v0<0||v1<0||v2<0) { h=-1;return;} | ||
31 | + | ||
32 | + h= v0+v1*(shift =a0)+(long)v2*(shift *=a1); | ||
33 | + shift *=a2; | ||
34 | + } | ||
35 | + | ||
36 | + final public long c3(){ | ||
37 | + if (v0<0||v1<0||v2<0) { h=-1;return h;} | ||
38 | + | ||
39 | + h= v0+v1*(shift =a0)+(long)v2*(shift *=a1); | ||
40 | + shift *=a2; | ||
41 | + return h; | ||
42 | + } | ||
43 | + | ||
44 | + final public void cz4(){ | ||
45 | + if (v0<0||v1<0||v2<0||v3<0) {h=-1;return;} | ||
46 | + | ||
47 | + h =v0+v1*(shift =a0)+v2*(shift *=a1)+v3*(shift *=a2); | ||
48 | + shift *=a3; | ||
49 | + } | ||
50 | + | ||
51 | + final public long c4(){ | ||
52 | + if (v0<0||v1<0||v2<0||v3<0) {h=-1;return h;} | ||
53 | + | ||
54 | + h =v0+v1*(shift =a0)+v2*(shift *=a1)+v3*(shift *=a2); | ||
55 | + shift *=a3; | ||
56 | + return h; | ||
57 | + } | ||
58 | + | ||
59 | + | ||
60 | + final public void cz5(){ | ||
61 | + | ||
62 | + if (v0<0||v1<0||v2<0||v3<0||v4<0) {h=-1;return;} | ||
63 | + | ||
64 | + h =v0+v1*(shift =a0)+v2*(shift *=a1)+v3*(shift*=a2)+v4*(shift*=a3); | ||
65 | + shift*=a4; | ||
66 | + | ||
67 | + } | ||
68 | + | ||
69 | + final public long c5(){ | ||
70 | + | ||
71 | + if (v0<0||v1<0||v2<0||v3<0||v4<0) {h=-1;return h;} | ||
72 | + | ||
73 | + h =v0+v1*(shift =a0)+v2*(shift *=a1)+v3*(shift *=a2)+v4*(shift*=a3); | ||
74 | + shift*=a4; | ||
75 | + return h; | ||
76 | + } | ||
77 | + | ||
78 | + | ||
79 | + final public void cz6(){ | ||
80 | + | ||
81 | + if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0) {h=-1; return;} | ||
82 | + | ||
83 | + h =v0+v1*(shift =a0)+v2*(shift *=a1)+v3*(shift *=a2); | ||
84 | + h +=v4*(shift*=a3)+v5*(shift*=a4); | ||
85 | + shift*=a5; | ||
86 | + } | ||
87 | + | ||
88 | + final public long c6(){ | ||
89 | + | ||
90 | + if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0) {h=-1; return h;} | ||
91 | + | ||
92 | + h =v0+v1*(shift =a0)+v2*(shift *=a1)+v3*(shift *=a2); | ||
93 | + h +=v4*(shift*=a3)+v5*(shift*=a4); | ||
94 | + shift*=a5; | ||
95 | + return h; | ||
96 | + } | ||
97 | + | ||
98 | + | ||
99 | + final public long cs(int b, int v) { | ||
100 | + if (h<0) {h=-1; return h;} | ||
101 | + | ||
102 | + h += v*shift; | ||
103 | + shift *=b; | ||
104 | + return h; | ||
105 | + | ||
106 | + } | ||
107 | + | ||
108 | + final public void csa(int b, int v, IFV f) { | ||
109 | + if (h<0) {h=-1; return;} | ||
110 | + | ||
111 | + h += v*shift; | ||
112 | + shift *=b; | ||
113 | + f.add(_li.l2i(h)); | ||
114 | + } | ||
115 | + | ||
116 | + final public long csa(int b, int v) { | ||
117 | + if (h<0) {h=-1; return-1; } | ||
118 | + | ||
119 | + h += v*shift; | ||
120 | + shift *=b; | ||
121 | + return h; | ||
122 | + } | ||
123 | + | ||
124 | + public final long getVal(){ | ||
125 | + return h; | ||
126 | + } | ||
127 | + | ||
128 | + public final void map(IFV f, long l){ | ||
129 | + if (l>0) f.add(this._li.l2i(l)); | ||
130 | + } | ||
131 | + | ||
132 | + /** | ||
133 | + * @param f | ||
134 | + */ | ||
135 | + final public void add(IFV f) { | ||
136 | + f.add(_li.l2i(h)); | ||
137 | + } | ||
138 | + | ||
139 | + final public void cz7() { | ||
140 | + if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0||v6<0) {h=-1; return;} | ||
141 | + | ||
142 | + h =v0+v1*(shift =a0)+v2*(shift *=a1)+v3*(shift *=a2); | ||
143 | + h +=v4*(shift*=a3)+v5*(shift*=a4)+v6*(shift*=a5); | ||
144 | + shift*=a6; | ||
145 | + | ||
146 | + } | ||
147 | + | ||
148 | + final public long c7() { | ||
149 | + if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0||v6<0) {h=-1; return h;} | ||
150 | + | ||
151 | + h =v0+v1*(shift =a0)+v2*(shift *=a1)+v3*(shift *=a2); | ||
152 | + h +=v4*(shift*=a3)+v5*(shift*=a4)+v6*(shift*=a5); | ||
153 | + shift*=a6; | ||
154 | + return h; | ||
155 | + } | ||
156 | + | ||
157 | + /** | ||
158 | + * | ||
159 | + */ | ||
160 | + final public void cz8() { | ||
161 | + if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0||v6<0||v7<0) {h=-1; return;} | ||
162 | + | ||
163 | + h =v0+v1*(shift =a0)+v2*(shift *=a1)+v3*(shift *=a2); | ||
164 | + h +=v4*(shift*=a3)+v5*(shift*=a4)+v6*(shift*=a5)+v7*(shift*=a6); | ||
165 | + shift*=a7; | ||
166 | + } | ||
167 | + | ||
168 | + final public void cz9() { | ||
169 | + if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0||v6<0||v7<0||v8<0) {h=-1; return;} | ||
170 | + | ||
171 | + h =v0+v1*(shift =a0)+v2*(shift *=a1)+v3*(shift *=a2); | ||
172 | + h +=v4*(shift*=a3)+v5*(shift*=a4)+v6*(shift*=a5)+v7*(shift*=a6)+v8*(shift*=a7); | ||
173 | + shift*=a8; | ||
174 | + } | ||
175 | + | ||
176 | + | ||
177 | + /* (non-Javadoc) | ||
178 | + * @see is2.data.DX#computeLabeValue(short, short) | ||
179 | + */ | ||
180 | + @Override | ||
181 | + public int computeLabeValue(int label, int shift) { | ||
182 | + return label*shift; | ||
183 | + } | ||
184 | + | ||
185 | + | ||
186 | + public void fix() { | ||
187 | + | ||
188 | + } | ||
189 | + | ||
190 | + | ||
191 | +} | ||
0 | \ No newline at end of file | 192 | \ No newline at end of file |
dependencyParser/basic/mate-tools/src/is2/data/D6.java
0 → 100644
1 | +/** | ||
2 | + * | ||
3 | + */ | ||
4 | +package is2.data; | ||
5 | + | ||
6 | +import is2.util.DB; | ||
7 | + | ||
8 | +/** | ||
9 | + * @author Dr. Bernd Bohnet, 30.10.2010 | ||
10 | + * | ||
11 | + * This class computes the mapping of features to the weight vector. | ||
12 | + */ | ||
13 | +final public class D6 extends DX { | ||
14 | + private long shift; | ||
15 | + private long h; | ||
16 | + | ||
17 | + | ||
18 | + private final Long2IntInterface _li; | ||
19 | + public D6(Long2IntInterface li) { | ||
20 | + _li=li; | ||
21 | + } | ||
22 | + | ||
23 | + boolean fixed =false; | ||
24 | + | ||
25 | + public void fix() { | ||
26 | + | ||
27 | + if (fixed) { | ||
28 | + DB.println("warning: already fixed"); | ||
29 | + // return; | ||
30 | + } | ||
31 | + | ||
32 | + long t0= 1, t1=a0, t2=t1*a1, t3=t2*a2,t4=t3*a3, t5=t4*a4,t6=t5*a5, t7=t6*a6, t8=t7*a7, t9=t8*a8; | ||
33 | + | ||
34 | + | ||
35 | + | ||
36 | + | ||
37 | + a0=t0;a1=t1;a2=t2;a3=t3;a4=t4;a5=t5;a6=t6;a7=t7;a8=t8; a9=t9; | ||
38 | + | ||
39 | + fixed=true; | ||
40 | + } | ||
41 | + | ||
42 | + | ||
43 | + | ||
44 | + final public void clean() { | ||
45 | + v0=0;v1=0;v2=0;v3=0;v4=0;v5=0;v6=0;v7=0;v8=0; | ||
46 | + shift=0;h=0; | ||
47 | + } | ||
48 | + | ||
49 | + final public void cz3(){ | ||
50 | + if (v0<0||v1<0||v2<0) { h=-1;return;} | ||
51 | + | ||
52 | + h= v0+v1*a1+v2*a2; | ||
53 | + shift =a3; | ||
54 | + } | ||
55 | + | ||
56 | + final public long c3(){ | ||
57 | + if (v0<0||v1<0||v2<0) { h=-1;return h;} | ||
58 | + | ||
59 | + h= v0+v1*a1+v2*a2; | ||
60 | + shift =a3; | ||
61 | + return h; | ||
62 | + } | ||
63 | + | ||
64 | + final public void cz4(){ | ||
65 | + if (v0<0||v1<0||v2<0||v3<0) {h=-1;return;} | ||
66 | + | ||
67 | + h =v0+v1*a1+v2*a2+v3*a3; | ||
68 | + shift =a4; | ||
69 | + } | ||
70 | + | ||
71 | + final public long c4(){ | ||
72 | + if (v0<0||v1<0||v2<0||v3<0) {h=-1;return h;} | ||
73 | + | ||
74 | + h =v0+v1*a1+v2*a2+v3*a3; | ||
75 | + shift =a4; | ||
76 | + return h; | ||
77 | + } | ||
78 | + | ||
79 | + | ||
80 | + final public void cz5(){ | ||
81 | + | ||
82 | + if (v0<0||v1<0||v2<0||v3<0||v4<0) {h=-1;return;} | ||
83 | + | ||
84 | + h =v0+v1*a1+v2*a2+v3*a3+v4*a4; | ||
85 | + shift=a5; | ||
86 | + | ||
87 | + } | ||
88 | + | ||
89 | + final public long c5(){ | ||
90 | + | ||
91 | + if (v0<0||v1<0||v2<0||v3<0||v4<0) {h=-1;return h;} | ||
92 | + | ||
93 | + h =v0+v1*a1+v2*a2+v3*a3+v4*a4; | ||
94 | + shift=a5; | ||
95 | + return h; | ||
96 | + } | ||
97 | + | ||
98 | + | ||
99 | + final public void cz6(){ | ||
100 | + | ||
101 | + if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0) {h=-1; return;} | ||
102 | + | ||
103 | + h =v0+v1*a1+v2*a2+v3*a3 +v4*a4+v5*a5; | ||
104 | + shift=a6; | ||
105 | + } | ||
106 | + | ||
107 | + final public long c6(){ | ||
108 | + | ||
109 | + if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0) {h=-1; return h;} | ||
110 | + | ||
111 | + h =v0+v1*a1+v2*a2+v3*a3 +v4*a4+v5*a5; | ||
112 | + shift=a6; | ||
113 | + return h; | ||
114 | + } | ||
115 | + | ||
116 | + | ||
117 | + final public long cs(int b, int v) { | ||
118 | + if (h<0) {h=-1; return h;} | ||
119 | + | ||
120 | + h += v*shift; | ||
121 | + shift *=b; | ||
122 | + return h; | ||
123 | + | ||
124 | + } | ||
125 | + | ||
126 | + final public void csa(int b, int v, IFV f) { | ||
127 | + if (h<0) {h=-1; return;} | ||
128 | + | ||
129 | + h += v*shift; | ||
130 | + shift *=b; | ||
131 | + f.add(_li.l2i(h)); | ||
132 | + } | ||
133 | + | ||
134 | + final public long csa(int b, int v) { | ||
135 | + if (h<0) {h=-1; return-1; } | ||
136 | + | ||
137 | + h += v*shift; | ||
138 | + shift *=b; | ||
139 | + return h; | ||
140 | + } | ||
141 | + | ||
142 | + public final long getVal(){ | ||
143 | + return h; | ||
144 | + } | ||
145 | + | ||
146 | + public final void map(IFV f, long l){ | ||
147 | + if (l>0) f.add(this._li.l2i(l)); | ||
148 | + } | ||
149 | + | ||
150 | + /** | ||
151 | + * @param f | ||
152 | + */ | ||
153 | + final public void add(IFV f) { | ||
154 | + f.add(_li.l2i(h)); | ||
155 | + } | ||
156 | + | ||
157 | + final public void cz7() { | ||
158 | + if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0||v6<0) {h=-1; return;} | ||
159 | + | ||
160 | + h =v0+v1*a1+v2*a2+v3*a3 +v4*a4+v5*a5+v6*a6; | ||
161 | + shift=a7; | ||
162 | + | ||
163 | + } | ||
164 | + | ||
165 | + final public long c7() { | ||
166 | + if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0||v6<0) {h=-1; return h;} | ||
167 | + | ||
168 | + h =v0+v1*a1+v2*a2+v3*a3 +v4*a4+v5*a5+v6*a6; | ||
169 | + shift=a7; | ||
170 | + return h; | ||
171 | + } | ||
172 | + | ||
173 | + /** | ||
174 | + * | ||
175 | + */ | ||
176 | + final public void cz8() { | ||
177 | + if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0||v6<0||v7<0) {h=-1; return;} | ||
178 | + | ||
179 | + h =v0+v1*a1+v2*a2+v3*a3 +v4*a4+v5*a5+v6*a6+v7*a7; | ||
180 | + shift=a8; | ||
181 | + } | ||
182 | + | ||
183 | + | ||
184 | + | ||
185 | + /* (non-Javadoc) | ||
186 | + * @see is2.data.DX#computeLabeValue(short, short) | ||
187 | + */ | ||
188 | + @Override | ||
189 | + public int computeLabeValue(int label, int shift) { | ||
190 | + return label*shift; | ||
191 | + } | ||
192 | + | ||
193 | + | ||
194 | + | ||
195 | + | ||
196 | + | ||
197 | +} | ||
0 | \ No newline at end of file | 198 | \ No newline at end of file |
dependencyParser/basic/mate-tools/src/is2/data/D7.java
0 → 100644
1 | +/** | ||
2 | + * | ||
3 | + */ | ||
4 | +package is2.data; | ||
5 | + | ||
6 | + | ||
7 | +/** | ||
8 | + * @author Dr. Bernd Bohnet, 30.10.2010 | ||
9 | + * | ||
10 | + * This class computes the mapping of features to the weight vector. | ||
11 | + */ | ||
12 | +final public class D7 extends DX { | ||
13 | + | ||
14 | + private long shift; | ||
15 | + private long h; | ||
16 | + private final Long2IntInterface _li; | ||
17 | + | ||
18 | + public D7(Long2IntInterface li) { | ||
19 | + _li=li; | ||
20 | + } | ||
21 | + | ||
22 | + boolean fixed =false; | ||
23 | + | ||
24 | + public void fix() { | ||
25 | + | ||
26 | + long t0= 1, t1=a0, t2=t1*a1, t3=t2*a2,t4=t3*a3, t5=t4*a4,t6=t5*a5, t7=t6*a6, t8=t7*a7, t9=t8*a8; | ||
27 | + | ||
28 | + a0=t0;a1=t1;a2=t2;a3=t3;a4=t4;a5=t5;a6=t6;a7=t7;a8=t8; a9=t9; | ||
29 | + | ||
30 | + } | ||
31 | + | ||
32 | + | ||
33 | + | ||
34 | + final public void clean() { | ||
35 | + v0=0;v1=0;v2=0;v3=0;v4=0;v5=0;v6=0;v7=0;v8=0; | ||
36 | + shift=0;h=0; | ||
37 | + } | ||
38 | + | ||
39 | + final public void cz3(){ | ||
40 | + if (v2<0) { h=-1;return;} | ||
41 | + | ||
42 | + h= v0+v1*a1+v2*a2; | ||
43 | + shift =a3; | ||
44 | + } | ||
45 | + | ||
46 | + final public long c3(){ | ||
47 | + if (v2<0) { h=-1;return h;} | ||
48 | + | ||
49 | + h= v0+v1*a1+v2*a2; | ||
50 | + shift =a3; | ||
51 | + return h; | ||
52 | + } | ||
53 | + | ||
54 | + final public long d3(){ | ||
55 | + if (v2<0)return -1; | ||
56 | + return v0+v2*a2; | ||
57 | + } | ||
58 | + | ||
59 | + final public void cz4(){ | ||
60 | + // if (v0<0||v1<0||v2<0||v3<0) {h=-1;return;} | ||
61 | + if (v2<0||v3<0) {h=-1;return;} | ||
62 | + | ||
63 | + h =v0+v1*a1+v2*a2+v3*a3; | ||
64 | + shift =a4; | ||
65 | + } | ||
66 | + | ||
67 | + final public long c4(){ | ||
68 | + if (v2<0||v3<0) {h=-1;return h;} | ||
69 | + | ||
70 | + h =v0+v1*a1+v2*a2+v3*a3; | ||
71 | + shift =a4; | ||
72 | + return h; | ||
73 | + } | ||
74 | + | ||
75 | + | ||
76 | + final public long d4(){ | ||
77 | + if (v2<0||v3<0) return -1; | ||
78 | + return v0+v2*a2+v3*a3; | ||
79 | + } | ||
80 | + | ||
81 | + | ||
82 | + final public void cz5(){ | ||
83 | + | ||
84 | + if (v2<0||v3<0||v4<0) {h=-1;return;} | ||
85 | + | ||
86 | + h =v0+v1*a1+v2*a2+v3*a3+v4*a4; | ||
87 | + shift=a5; | ||
88 | + | ||
89 | + } | ||
90 | + | ||
91 | + final public long c5(){ | ||
92 | + | ||
93 | + if (v2<0||v3<0||v4<0) {h=-1;return h;} | ||
94 | + | ||
95 | + h =v0+v1*a1+v2*a2+v3*a3+v4*a4; | ||
96 | + shift=a5; | ||
97 | + return h; | ||
98 | + } | ||
99 | + | ||
100 | + final public long d5(){ | ||
101 | + if (v2<0||v3<0||v4<0) return -1; | ||
102 | + return v0+v2*a2+v3*a3+v4*a4; | ||
103 | + } | ||
104 | + | ||
105 | + | ||
106 | + final public void cz6(){ | ||
107 | + | ||
108 | + if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0) {h=-1; return;} | ||
109 | + | ||
110 | + h =v0+v1*a1+v2*a2+v3*a3 +v4*a4+v5*a5; | ||
111 | + shift=a6; | ||
112 | + } | ||
113 | + | ||
114 | + final public long c6(){ | ||
115 | + | ||
116 | + if (v2<0||v3<0||v4<0||v5<0) {h=-1; return h;} | ||
117 | + | ||
118 | + h =v0+v1*a1+v2*a2+v3*a3 +v4*a4+v5*a5; | ||
119 | + shift=a6; | ||
120 | + return h; | ||
121 | + } | ||
122 | + | ||
123 | + final public long d6(){ | ||
124 | + if (v2<0||v3<0||v4<0||v5<0) return -1; | ||
125 | + return v0+v2*a2+v3*a3 +v4*a4+v5*a5; | ||
126 | + } | ||
127 | + | ||
128 | + | ||
129 | + final public long cs(int b, int v) { | ||
130 | + if (h<0) {h=-1; return h;} | ||
131 | + | ||
132 | + h += v*shift; | ||
133 | + shift *=b; | ||
134 | + return h; | ||
135 | + | ||
136 | + } | ||
137 | + | ||
138 | + final public void csa(int b, int v, IFV f) { | ||
139 | + if (h<0) {h=-1; return;} | ||
140 | + | ||
141 | + h += v*shift; | ||
142 | + shift *=b; | ||
143 | + f.add(_li.l2i(h)); | ||
144 | + } | ||
145 | + | ||
146 | + final public long csa(int b, int v) { | ||
147 | + if (h<0) {h=-1; return-1; } | ||
148 | + | ||
149 | + h += v*shift; | ||
150 | + shift *=b; | ||
151 | + return h; | ||
152 | + } | ||
153 | + | ||
154 | + public final long getVal(){ | ||
155 | + return h; | ||
156 | + } | ||
157 | + | ||
158 | + public final void map(IFV f, long l){ | ||
159 | + if (l>0) f.add(this._li.l2i(l)); | ||
160 | + } | ||
161 | + | ||
162 | + /** | ||
163 | + * @param f | ||
164 | + */ | ||
165 | + final public void add(IFV f) { | ||
166 | + f.add(_li.l2i(h)); | ||
167 | + } | ||
168 | + | ||
169 | + final public void cz7() { | ||
170 | + if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0||v6<0) {h=-1; return;} | ||
171 | + | ||
172 | + h =v0+v1*a1+v2*a2+v3*a3 +v4*a4+v5*a5+v6*a6; | ||
173 | + shift=a7; | ||
174 | + | ||
175 | + } | ||
176 | + | ||
177 | + | ||
178 | + final public long c7() { | ||
179 | + if (v2<0||v3<0||v4<0||v5<0||v6<0) {h=-1; return h;} | ||
180 | + | ||
181 | + h =v0+v1*a1+v2*a2+v3*a3 +v4*a4+v5*a5+v6*a6; | ||
182 | + shift=a7; | ||
183 | + return h; | ||
184 | + } | ||
185 | + | ||
186 | + final public long d7() { | ||
187 | + if (v2<0||v3<0||v4<0||v5<0||v6<0) return -1; | ||
188 | + return v0+v2*a2+v3*a3 +v4*a4+v5*a5+v6*a6; | ||
189 | + } | ||
190 | + | ||
191 | + /** | ||
192 | + * | ||
193 | + */ | ||
194 | + final public void cz8() { | ||
195 | + if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0||v6<0||v7<0) {h=-1; return;} | ||
196 | + | ||
197 | + h =v0+v1*a1+v2*a2+v3*a3 +v4*a4+v5*a5+v6*a6+v7*a7; | ||
198 | + shift=a8; | ||
199 | + } | ||
200 | + | ||
201 | + final public long d8() { | ||
202 | + if (v2<0||v3<0||v4<0||v5<0||v6<0||v7<0) {return-1;} | ||
203 | + return v0+v2*a2+v3*a3 +v4*a4+v5*a5+v6*a6+v7*a7; | ||
204 | + } | ||
205 | + | ||
206 | + | ||
207 | + | ||
208 | + /* (non-Javadoc) | ||
209 | + * @see is2.data.DX#computeLabeValue(short, short) | ||
210 | + */ | ||
211 | + @Override | ||
212 | + public int computeLabeValue(int label, int shift) { | ||
213 | + return label*shift; | ||
214 | + } | ||
215 | + | ||
216 | + | ||
217 | + | ||
218 | + | ||
219 | + | ||
220 | +} | ||
0 | \ No newline at end of file | 221 | \ No newline at end of file |
dependencyParser/basic/mate-tools/src/is2/data/DPSTree.java
0 → 100644
1 | +/** | ||
2 | + * | ||
3 | + */ | ||
4 | +package is2.data; | ||
5 | + | ||
6 | +import is2.util.DB; | ||
7 | + | ||
8 | +import java.util.ArrayList; | ||
9 | +import java.util.Collections; | ||
10 | +import java.util.Stack; | ||
11 | + | ||
12 | +/** | ||
13 | + * @author Dr. Bernd Bohnet, 17.01.2011 | ||
14 | + * | ||
15 | + * Dynamic phrase structure tree. | ||
16 | + */ | ||
17 | +public class DPSTree { | ||
18 | + | ||
19 | + | ||
20 | + private int size=0; | ||
21 | + | ||
22 | + public int[] heads; | ||
23 | + public int[] labels; | ||
24 | + | ||
25 | + public DPSTree() { | ||
26 | + this(30); | ||
27 | + } | ||
28 | + | ||
29 | + public DPSTree(int initialCapacity) { | ||
30 | + heads = new int[initialCapacity]; | ||
31 | + labels = new int[initialCapacity]; | ||
32 | + } | ||
33 | + | ||
34 | + | ||
35 | + /** | ||
36 | + * Increases the capacity of this <tt>Graph</tt> instance, if | ||
37 | + * necessary, to ensure that it can hold at least the number of nodes | ||
38 | + * specified by the minimum capacity argument. | ||
39 | + * | ||
40 | + * @param minCapacity the desired minimum capacity. | ||
41 | + */ | ||
42 | + private void ensureCapacity(int minCapacity) { | ||
43 | + | ||
44 | + | ||
45 | + if (minCapacity > heads.length) { | ||
46 | + | ||
47 | + int newCapacity =minCapacity + 1; | ||
48 | + | ||
49 | + if (newCapacity < minCapacity) newCapacity = minCapacity; | ||
50 | + int oldIndex[] = heads; | ||
51 | + heads = new int[newCapacity]; | ||
52 | + System.arraycopy(oldIndex, 0, heads, 0, oldIndex.length); | ||
53 | + | ||
54 | + oldIndex = labels; | ||
55 | + labels = new int[newCapacity]; | ||
56 | + System.arraycopy(oldIndex, 0, labels, 0, oldIndex.length); | ||
57 | + | ||
58 | + } | ||
59 | + } | ||
60 | + | ||
61 | + | ||
62 | + final public int size() { | ||
63 | + return size; | ||
64 | + } | ||
65 | + | ||
66 | + | ||
67 | + final public boolean isEmpty() { | ||
68 | + return size == 0; | ||
69 | + } | ||
70 | + | ||
71 | + final public void clear() { | ||
72 | + size = 0; | ||
73 | + } | ||
74 | + | ||
75 | + final public void createTerminals(int terminals) { | ||
76 | + ensureCapacity(terminals+1); | ||
77 | + size= terminals+1; | ||
78 | + } | ||
79 | + | ||
80 | + final public int create(int phrase) { | ||
81 | + | ||
82 | + ensureCapacity(size+1); | ||
83 | + labels[size] =phrase; | ||
84 | + size++; | ||
85 | + return size-1; | ||
86 | + } | ||
87 | + | ||
88 | + public int create(int phrase, int nodeId) { | ||
89 | + | ||
90 | + if (nodeId<0) return this.create(phrase); | ||
91 | +// DB.println("create phrase "+nodeId+" label "+phrase); | ||
92 | + ensureCapacity(nodeId+1); | ||
93 | + labels[nodeId] =phrase; | ||
94 | + if (size<nodeId) size=nodeId+1; | ||
95 | + return nodeId; | ||
96 | + } | ||
97 | + | ||
98 | + public void createEdge(int i, int j) { | ||
99 | + heads[i] =j; | ||
100 | +// DB.println("create edge "+i+"\t "+j); | ||
101 | + } | ||
102 | + | ||
103 | + public DPSTree clone() { | ||
104 | + DPSTree ps = new DPSTree(this.size+1); | ||
105 | + | ||
106 | + for(int k=0;k<size;k++) { | ||
107 | + ps.heads[k] = heads[k]; | ||
108 | + ps.labels[k] = labels[k]; | ||
109 | + } | ||
110 | + ps.size=size; | ||
111 | + return ps; | ||
112 | + | ||
113 | + } | ||
114 | + | ||
115 | +} | ||
0 | \ No newline at end of file | 116 | \ No newline at end of file |
dependencyParser/basic/mate-tools/src/is2/data/DX.java
0 → 100644
1 | +/** | ||
2 | + * | ||
3 | + */ | ||
4 | +package is2.data; | ||
5 | + | ||
6 | +import is2.data.IFV; | ||
7 | + | ||
8 | +/** | ||
9 | + * @author Dr. Bernd Bohnet, 30.08.2011 | ||
10 | + * | ||
11 | + * | ||
12 | + */ | ||
13 | +public abstract class DX { | ||
14 | + | ||
15 | + public long a0,a1,a2,a3,a4,a5,a6,a7,a8,a9; | ||
16 | + public long v0,v1,v2,v3,v4,v5,v6,v7,v8,v9; | ||
17 | + | ||
18 | + public abstract void cz3(); | ||
19 | + | ||
20 | + public abstract void cz4(); | ||
21 | + | ||
22 | + public abstract void cz5(); | ||
23 | + | ||
24 | + public abstract void cz6(); | ||
25 | + | ||
26 | + public abstract void cz7(); | ||
27 | + | ||
28 | + public abstract void cz8(); | ||
29 | + | ||
30 | + public abstract void clean(); | ||
31 | + | ||
32 | + public abstract long cs(int b, int v); | ||
33 | + | ||
34 | + public abstract long csa(int b, int v); | ||
35 | + | ||
36 | + public abstract void csa(int b, int v, IFV f); | ||
37 | + | ||
38 | + /** | ||
39 | + * @return | ||
40 | + */ | ||
41 | + public abstract long getVal(); | ||
42 | + | ||
43 | + /** | ||
44 | + * @param f | ||
45 | + * @param l | ||
46 | + */ | ||
47 | + public abstract void map(IFV f, long l); | ||
48 | + | ||
49 | + /** | ||
50 | + * @param label | ||
51 | + * @param s_type | ||
52 | + * @return | ||
53 | + */ | ||
54 | + public abstract int computeLabeValue(int label,int s_type) ; | ||
55 | + | ||
56 | + public abstract void fix(); | ||
57 | + | ||
58 | +} | ||
0 | \ No newline at end of file | 59 | \ No newline at end of file |
dependencyParser/basic/mate-tools/src/is2/data/DataF.java
0 → 100755
1 | +package is2.data; | ||
2 | + | ||
3 | + | ||
4 | + | ||
5 | +final public class DataF { | ||
6 | + | ||
7 | + final public short typesLen; | ||
8 | + final public int len; | ||
9 | + | ||
10 | + // first order features | ||
11 | + final public float[][] pl; | ||
12 | + | ||
13 | + // remove !!!! | ||
14 | +// final public float[][] highestLab; | ||
15 | + | ||
16 | + //final public FV[][][] label; | ||
17 | + final public float[][][][] lab; | ||
18 | + | ||
19 | + | ||
20 | + public FV fv; | ||
21 | + | ||
22 | + final public float[][][][][] sib; | ||
23 | + | ||
24 | + final public float[][][][][] gra; | ||
25 | + | ||
26 | + | ||
27 | + public DataF(int length, short types) { | ||
28 | + typesLen=types; | ||
29 | + len =length; | ||
30 | + | ||
31 | + pl = new float[length][length]; | ||
32 | + lab = new float[length][length][types][2]; | ||
33 | + // highestLab = new float[length][length]; | ||
34 | + | ||
35 | + sib = new float[length][length][length][2][]; | ||
36 | + gra = new float[length][length][length][2][]; | ||
37 | + | ||
38 | + } | ||
39 | +} |
dependencyParser/basic/mate-tools/src/is2/data/DataFES.java
0 → 100644
1 | +package is2.data; | ||
2 | + | ||
3 | + | ||
4 | + | ||
5 | +final public class DataFES { | ||
6 | + | ||
7 | + final public short typesLen; | ||
8 | + final public int len; | ||
9 | + | ||
10 | + // first order features | ||
11 | + final public float[][] pl; | ||
12 | + | ||
13 | + // remove !!!! | ||
14 | +// final public float[][] highestLab; | ||
15 | + | ||
16 | + //final public FV[][][] label; | ||
17 | + final public float[][][] lab; | ||
18 | + | ||
19 | + | ||
20 | + public FV fv; | ||
21 | + | ||
22 | + final public float[][][][] sib; | ||
23 | + | ||
24 | + final public float[][][][] gra; | ||
25 | + | ||
26 | + | ||
27 | + public DataFES(int length, short types) { | ||
28 | + typesLen=types; | ||
29 | + len =length; | ||
30 | + | ||
31 | + pl = new float[length][length]; | ||
32 | + lab = new float[length][length][types]; | ||
33 | + | ||
34 | + sib = new float[length][length][length][]; | ||
35 | + gra = new float[length][length][length][]; | ||
36 | + | ||
37 | + } | ||
38 | +} |
dependencyParser/basic/mate-tools/src/is2/data/DataT.java
0 → 100644
1 | +package is2.data; | ||
2 | + | ||
3 | + | ||
4 | + | ||
5 | +final public class DataT { | ||
6 | + | ||
7 | + final public short typesLen; | ||
8 | + final public int len; | ||
9 | + | ||
10 | + | ||
11 | + //final public FV[][][] label; | ||
12 | + // a b lab op | ||
13 | + final public float[][][][] lab; | ||
14 | + | ||
15 | + | ||
16 | + | ||
17 | + public DataT(int length, short types) { | ||
18 | + typesLen=types; | ||
19 | + len =length; | ||
20 | + | ||
21 | + lab = new float[length][length][types][4]; | ||
22 | + | ||
23 | + | ||
24 | + } | ||
25 | +} |