Commit d104f60d12d6c6a6335718b56cac59ffe40dd9c3
1 parent
d56a8b56
Dependency Parser split into two versions: basic and experimental
Showing
495 changed files
with
34859 additions
and
5714 deletions
Too many changes to show.
To preserve performance only 32 of 495 files are displayed.
dependencyParser/basic/mate-tools/.classpath
0 → 100644
1 | +<?xml version="1.0" encoding="UTF-8"?> | |
2 | +<classpath> | |
3 | + <classpathentry kind="src" path="src"/> | |
4 | + <classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/> | |
5 | + <classpathentry kind="lib" path="/mtt/lib/trove.jar"/> | |
6 | + <classpathentry kind="lib" path="lib/commons-math-2.2.jar"/> | |
7 | + <classpathentry kind="output" path="classes"/> | |
8 | +</classpath> | |
... | ... |
dependencyParser/mate-tools/.externalToolBuilders/New_Builder.launch renamed to dependencyParser/basic/mate-tools/.externalToolBuilders/New_Builder.launch
dependencyParser/mate-tools/.externalToolBuilders/ana.launch renamed to dependencyParser/basic/mate-tools/.externalToolBuilders/ana.launch
dependencyParser/mate-tools/.project renamed to dependencyParser/basic/mate-tools/.project
dependencyParser/mate-tools/build.xml renamed to dependencyParser/basic/mate-tools/build.xml
dependencyParser/mate-tools/lib/commons-math-2.2.jar renamed to dependencyParser/basic/mate-tools/lib/commons-math-2.2.jar
No preview for this file type
dependencyParser/mate-tools/lib/trove-2.0.4.jar renamed to dependencyParser/basic/mate-tools/lib/trove-2.0.4.jar
No preview for this file type
dependencyParser/basic/mate-tools/src/decoder/ParallelDecoder.java
0 → 100755
1 | +package decoder; | |
2 | + | |
3 | +import is2.data.Closed; | |
4 | +import is2.data.DataF; | |
5 | +import is2.data.Edges; | |
6 | +import is2.data.Open; | |
7 | + | |
8 | +import java.util.ArrayList; | |
9 | +import java.util.concurrent.Callable; | |
10 | + | |
11 | +/** | |
12 | + * @author Bernd Bohnet, 30.08.2009 | |
13 | + * | |
14 | + * This class implements a parallel feature extractor. | |
15 | + */ | |
16 | +final public class ParallelDecoder implements Callable<Object> | |
17 | +{ | |
18 | + // some constants | |
19 | + private static final float INIT_BEST = (-1.0F / 0.0F); | |
20 | + private static final boolean[] DIR ={false,true}; | |
21 | + | |
22 | + // the data space of the weights for a dependency tree | |
23 | + final private DataF x; | |
24 | + | |
25 | + private short[] pos; | |
26 | + | |
27 | + private Open O[][][][]; | |
28 | + private Closed C[][][][] ; | |
29 | + | |
30 | + private int n; | |
31 | + | |
32 | + boolean done=false; | |
33 | + public boolean waiting =false; | |
34 | + | |
35 | + /** | |
36 | + * Initialize the parallel decoder. | |
37 | + * | |
38 | + * @param pos part-of-speech | |
39 | + * @param d data | |
40 | + * @param edges part-of-speech edge mapping | |
41 | + * @param o open spans | |
42 | + * @param c closed spans | |
43 | + * @param n number of words | |
44 | + */ | |
45 | + public ParallelDecoder(short[] pos, DataF d, Open o[][][][], Closed c[][][][], int n) { | |
46 | + | |
47 | + this.pos =pos; | |
48 | + this.x =d; | |
49 | + | |
50 | + this.O=o; | |
51 | + this.C=c; | |
52 | + this.n=n; | |
53 | + } | |
54 | + | |
55 | + | |
56 | + private static class DSet { short w1,w2;} | |
57 | + | |
58 | + @Override | |
59 | + public Object call() { | |
60 | + | |
61 | + while (true){ | |
62 | + | |
63 | + DSet set = get(); | |
64 | + if (done && set==null) break; | |
65 | + | |
66 | + if (set ==null) return null; | |
67 | + | |
68 | + short s=set.w1, t=set.w2; | |
69 | + | |
70 | + for(short dir =1;dir>=0;dir--) { | |
71 | + | |
72 | + short[] labs = (dir==1) ? Edges.get(pos[s],pos[t], false):Edges.get(pos[t],pos[s], true); | |
73 | + | |
74 | + O[s][t][dir] = new Open[labs.length]; | |
75 | + for (int l = O[s][t][dir].length - 1; l >= 0; l--) { | |
76 | + | |
77 | + double tRP = INIT_BEST; | |
78 | + | |
79 | + Closed tL = null, tR = null; | |
80 | + | |
81 | + for (int r = s; r < t; r++) { | |
82 | + | |
83 | + if (s == 0 && r != 0) continue; | |
84 | + | |
85 | + double tLPr = INIT_BEST,tRPr = INIT_BEST; | |
86 | + Closed tLCld = null, tRCld = null; | |
87 | + | |
88 | + if (r == s) tLPr = dir==1 ? x.sib[s][t][s][0][l] : x.gra[t][s][s][1 ][l]; | |
89 | + else | |
90 | + for (int i = s + 1; i <= r; i++) | |
91 | + if (((dir==1 ? x.sib[s][t][i][0][l] : x.gra[t][s][i][1][l]) + C[s][r][1][i].p) > tLPr) { | |
92 | + tLPr = ((dir==1 ? x.sib[s][t][i][0][l] : x.gra[t][s][i][1][l]) + C[s][r][1][i].p);tLCld = C[s][r][1][i];} | |
93 | + | |
94 | + if (r == t-1) tRPr = dir==1 ? x.gra[s][t][s][0][l] : x.sib[t][s][s][1][l]; | |
95 | + else | |
96 | + for (int i = r + 1; i < t; i++) | |
97 | + if (((dir == 1 ? x.gra[s][t][i][0][l] : x.sib[t][s][i][1][l]) + C[r+1][t][0][i].p) > tRPr) { | |
98 | + tRPr = ((dir==1?x.gra[s][t][i][0][l]:x.sib[t][s][i][1][l]) + C[r+1][t][0][i].p); tRCld=C[r + 1][t][0][i];} | |
99 | + | |
100 | + if (tLPr + tRPr > tRP) {tRP = tLPr + tRPr; tL = tLCld;tR = tRCld;} | |
101 | + } | |
102 | + O[s][t][dir][l] = new Open(s, t, dir, labs[l],tL, tR, | |
103 | + (float) ( tRP+((dir==1)?x.pl[s][t]: x.pl[t][s]) + ((dir==1)? x.lab[s][t][labs[l]][0]:x.lab[t][s][labs[l]][1]))); | |
104 | + } | |
105 | + } | |
106 | + C[s][t][1] = new Closed[n]; C[s][t][0] = new Closed[n]; | |
107 | + | |
108 | + for (int m = s ; m <= t; m++) { | |
109 | + for(boolean d : DIR) { | |
110 | + if ((d && m!=s)||!d && (m!=t && s!=0)) { | |
111 | + | |
112 | + // create closed structure | |
113 | + | |
114 | + double top = INIT_BEST; | |
115 | + | |
116 | + Open tU = null; Closed tL = null; | |
117 | + int numLabels =O[(d ? s : m)][(d ? m : t)][d?1:0].length; | |
118 | + | |
119 | + //for (int l = numLabels-1; l >=0; l--) { | |
120 | + for (int l = 0; l < numLabels; l++) { | |
121 | + | |
122 | + Open hi = O[(d ? s : m)][(d ? m : t)][d?1:0][l]; | |
123 | + for (int amb = m + (d?1:-1); amb != (d?t:s) + (d?1:-1); amb += (d?1:-1)) { | |
124 | + | |
125 | + if ((hi.p + C[d?m:s][d?t:m][d?1:0][amb].p +x.gra[d?s:t][m][amb][d?0:1][l]) > top) { | |
126 | + top = (hi.p + C[d?m:s][d?t:m][d?1:0][amb].p +x.gra[d?s:t][m][amb][(d?0:1)][l]); tU = hi; tL=C[d?m:s][d?t:m][d?1:0][amb];} | |
127 | + } | |
128 | + | |
129 | + if ((m == (d ? t : s)) && (hi.p + x.gra[d?s:t][m][d?s:t][(d ? 0 :1)][l]) > top) { | |
130 | + top = (hi.p + x.gra[(d ? s : t)][m][d?s:t][d?0:1][l]); tU = hi; tL = null;} | |
131 | + } | |
132 | + C[s][t][d?1:0][m] = new Closed(s, t, m, d?1:0,tU,tL,(float) top); | |
133 | + } | |
134 | + } | |
135 | + } | |
136 | + } | |
137 | + return null; | |
138 | + } | |
139 | + | |
140 | + public static ArrayList<DSet> sets = new ArrayList<DSet>(); | |
141 | + | |
142 | + static synchronized private DSet get() { | |
143 | + synchronized (sets) { | |
144 | + if (sets.size()==0) return null; | |
145 | + return sets.remove(sets.size()-1); | |
146 | + } | |
147 | + } | |
148 | + | |
149 | + public static void add(short w1, short w2){ | |
150 | + DSet ds =new DSet(); | |
151 | + ds.w1=w1; | |
152 | + ds.w2=w2; | |
153 | + sets.add(ds); | |
154 | + } | |
155 | +} | |
... | ... |
dependencyParser/basic/mate-tools/src/decoder/ParallelRearrangeNBest.java
0 → 100755
1 | +package decoder; | |
2 | + | |
3 | +import is2.data.DataF; | |
4 | +import is2.data.Edges; | |
5 | +import is2.data.Parse; | |
6 | +import is2.data.ParseNBest; | |
7 | + | |
8 | +import java.util.ArrayList; | |
9 | +import java.util.concurrent.Callable; | |
10 | + | |
11 | +import extractors.Extractor; | |
12 | + | |
13 | +/** | |
14 | + * @author Dr. Bernd Bohnet, 30.08.2009 | |
15 | + * | |
16 | + * This class implements a parallel edge rearrangement for non-projective parsing; | |
17 | + * The linear method was first suggest by Rayn McDonald et. al. 2005. | |
18 | + */ | |
19 | +final public class ParallelRearrangeNBest implements Callable<Object> { | |
20 | + | |
21 | + // new parent child combination to explore | |
22 | + final static class PA { | |
23 | + final float p; | |
24 | + final short ch, pa; | |
25 | + | |
26 | + float best; | |
27 | + | |
28 | + | |
29 | + | |
30 | + public PA(float p2, short ch2, short pa2) { p=p2; ch=ch2;pa=pa2;} | |
31 | + } | |
32 | + | |
33 | + // list of parent child combinations | |
34 | + private static ArrayList<PA> parents = new ArrayList<PA>(); | |
35 | + | |
36 | + // some data from the dependency tree | |
37 | + private short[] pos; | |
38 | + private DataF x; | |
39 | + private boolean[][] isChild ; | |
40 | + public short[] heads,types; | |
41 | + private float lastNBest; | |
42 | + private float best; // best so far | |
43 | + private float threshold; | |
44 | + private Extractor extractor; | |
45 | + | |
46 | + | |
47 | + /** | |
48 | + * Initialize the parallel rearrange thread | |
49 | + * | |
50 | + * @param isChild2 is a child | |
51 | + * @param edgesC the part-of-speech edge mapping | |
52 | + * @param pos the part-of-speech | |
53 | + * @param x the data | |
54 | + * @param lastNBest | |
55 | + * @param s the heads | |
56 | + * @param ts the types | |
57 | + */ | |
58 | + public ParallelRearrangeNBest(short[] pos , DataF x, Parse p, float lastNBest, Extractor extractor, float best, float threshold) { | |
59 | + | |
60 | + | |
61 | + heads=p.heads; | |
62 | + | |
63 | + types= p.labels; | |
64 | + | |
65 | + isChild = new boolean[heads.length][heads.length]; | |
66 | + | |
67 | + for(int i = 1, l1=1; i < heads.length; i++,l1=i) | |
68 | + while((l1= heads[l1]) != -1) isChild[l1][i] = true; | |
69 | + | |
70 | + | |
71 | + this.lastNBest =lastNBest; | |
72 | + this.pos =pos; | |
73 | + this.x=x; | |
74 | + | |
75 | + this.extractor = extractor; | |
76 | + this.best=best; | |
77 | + this.threshold = threshold; | |
78 | + } | |
79 | + | |
80 | + public ArrayList<ParseNBest> parses = new ArrayList<ParseNBest>(); | |
81 | + | |
82 | + @Override | |
83 | + public Object call() { | |
84 | + | |
85 | + // check the list of new possible parents and children for a better combination | |
86 | + for(int ch = 1; ch < heads.length; ch++) { | |
87 | + for(short pa = 0; pa < heads.length; pa++) { | |
88 | + if(ch == pa || pa == heads[ch] || isChild[ch][pa]) continue; | |
89 | + | |
90 | + short oldP = heads[ch], oldT = types[ch]; | |
91 | + heads[ch]=pa; | |
92 | + | |
93 | + short[] labels = Edges.get(pos[pa], pos[ch],ch<pa); | |
94 | + | |
95 | + for(int l=0;l<labels.length;l++) { | |
96 | + | |
97 | + types[ch]=labels[l]; | |
98 | + float p_new = extractor.encode3(pos, heads, types, x); | |
99 | + | |
100 | + if (p_new<lastNBest || ((best+this.threshold)>p_new)) continue; | |
101 | + | |
102 | + ParseNBest p = new ParseNBest(); | |
103 | + p.signature(heads, types); | |
104 | + p.f1=p_new; | |
105 | + parses.add(p); | |
106 | + } | |
107 | + | |
108 | + // change back | |
109 | + heads[ch]= oldP; types[ch]=oldT; | |
110 | + | |
111 | + // consider changes to labels only | |
112 | + labels = Edges.get(pos[oldP], pos[ch],ch<oldP); | |
113 | + | |
114 | + for(int l=0;l<labels.length;l++) { | |
115 | + | |
116 | + types[ch]=labels[l]; | |
117 | + float p_new = (float) extractor.encode3(pos, heads, types, x); | |
118 | + | |
119 | + // optimization: add only if larger than smallest of n-best | |
120 | + if (p_new<lastNBest || ((best+this.threshold)>p_new)) continue; | |
121 | + | |
122 | + ParseNBest p = new ParseNBest(); | |
123 | + p.signature(heads, types); | |
124 | + p.f1=p_new; | |
125 | + parses.add(p); | |
126 | + } | |
127 | + | |
128 | + heads[ch]= oldP; types[ch]=oldT; | |
129 | + } | |
130 | + } | |
131 | + return parses; | |
132 | + } | |
133 | + | |
134 | + | |
135 | + | |
136 | +} | |
... | ... |
dependencyParser/basic/mate-tools/src/decoder/ParallelRearrangeNBest2.java
0 → 100644
1 | +package decoder; | |
2 | + | |
3 | +import is2.data.DataF; | |
4 | +import is2.data.Edges; | |
5 | +import is2.data.Parse; | |
6 | +import is2.data.ParseNBest; | |
7 | + | |
8 | +import java.util.ArrayList; | |
9 | +import java.util.concurrent.Callable; | |
10 | + | |
11 | +import decoder.ParallelRearrangeNBest.PA; | |
12 | + | |
13 | +import extractors.Extractor; | |
14 | + | |
15 | +/** | |
16 | + * @author Dr. Bernd Bohnet, 30.08.2009 | |
17 | + * | |
18 | + * This class implements a parallel edge rearrangement for non-projective parsing; | |
19 | + * The linear method was first suggest by Rayn McDonald et. al. 2005. | |
20 | + */ | |
21 | +final public class ParallelRearrangeNBest2 implements Callable<Object> { | |
22 | + | |
23 | + // new parent child combination to explore | |
24 | + final static class PA { | |
25 | + final float p; | |
26 | + final short ch, pa; | |
27 | + | |
28 | + | |
29 | + public short[] heads,types; | |
30 | + | |
31 | + public PA(Parse p, short ch2, short pa2) { | |
32 | + this.p =(float)p.f1; | |
33 | + heads =p.heads; | |
34 | + types=p.labels; | |
35 | + ch=ch2;pa=pa2; | |
36 | + | |
37 | + } | |
38 | + } | |
39 | + | |
40 | + // list of parent child combinations | |
41 | + private static ArrayList<PA> parents = new ArrayList<PA>(); | |
42 | + | |
43 | + // some data from the dependency tree | |
44 | + private short[] pos; | |
45 | + private DataF x; | |
46 | + private float lastNBest; | |
47 | + private float threshold; | |
48 | + private Extractor extractor; | |
49 | + | |
50 | + | |
51 | + /** | |
52 | + * Initialize the parallel rearrange thread | |
53 | + * @param pos the part-of-speech | |
54 | + * @param x the data | |
55 | + * @param lastNBest | |
56 | + * @param isChild2 is a child | |
57 | + * @param edgesC the part-of-speech edge mapping | |
58 | + * @param s the heads | |
59 | + * @param ts the types | |
60 | + */ | |
61 | + public ParallelRearrangeNBest2(short[] pos , DataF x, float lastNBest, Extractor extractor, float threshold) { | |
62 | + | |
63 | + | |
64 | + | |
65 | + this.lastNBest =lastNBest; | |
66 | + this.pos =pos; | |
67 | + this.x=x; | |
68 | + | |
69 | + this.extractor = extractor; | |
70 | + this.threshold = threshold; | |
71 | + } | |
72 | + | |
73 | + public ArrayList<ParseNBest> parses = new ArrayList<ParseNBest>(); | |
74 | + | |
75 | + @Override | |
76 | + public Object call() { | |
77 | + | |
78 | + try { | |
79 | + | |
80 | + while(true) { | |
81 | + PA p = getPA(); | |
82 | + | |
83 | + if (p==null) return parses; | |
84 | + | |
85 | + short oldP = p.heads[p.ch], oldT = p.types[p.ch]; | |
86 | + p.heads[p.ch]=p.pa; | |
87 | + | |
88 | + short[] labels = Edges.get(pos[p.pa], pos[p.ch],p.ch<p.pa); | |
89 | + | |
90 | + for(int l=0;l<labels.length;l++) { | |
91 | + | |
92 | + p.types[p.ch]=labels[l]; | |
93 | + float p_new = extractor.encode3(pos, p.heads, p.types, x); | |
94 | + | |
95 | + if (p_new<lastNBest || ((p.p+this.threshold)>p_new)) continue; | |
96 | + | |
97 | + ParseNBest x = new ParseNBest(); | |
98 | + x.signature(p.heads, p.types); | |
99 | + x.f1=p_new; | |
100 | + parses.add(x); | |
101 | + } | |
102 | + | |
103 | + // change back | |
104 | + p.heads[p.ch]= oldP; p.types[p.ch]=oldT; | |
105 | + | |
106 | + // consider changes to labels only | |
107 | + labels = Edges.get(pos[oldP], pos[p.ch],p.ch<oldP); | |
108 | + | |
109 | + for(int l=0;l<labels.length;l++) { | |
110 | + | |
111 | + p.types[p.ch]=labels[l]; | |
112 | + float p_new = (float) extractor.encode3(pos, p.heads, p.types, x); | |
113 | + | |
114 | + // optimization: add only if larger than smallest of n-best | |
115 | + if (p_new<lastNBest || ((p.p+this.threshold)>p_new)) continue; | |
116 | + | |
117 | + ParseNBest x = new ParseNBest(); | |
118 | + x.signature(p.heads, p.types); | |
119 | + x.f1=p_new; | |
120 | + parses.add(x); | |
121 | + } | |
122 | + | |
123 | + p.heads[p.ch]= oldP; p.types[p.ch]=oldT; | |
124 | + } | |
125 | + } catch(Exception e) { | |
126 | + e.printStackTrace(); | |
127 | + } | |
128 | + return parses; | |
129 | + } | |
130 | + | |
131 | + /** | |
132 | + * Add a child-parent combination which are latter explored for rearrangement | |
133 | + * | |
134 | + * @param p2 | |
135 | + * @param ch2 | |
136 | + * @param pa | |
137 | + */ | |
138 | + public static void add(Parse p, short ch2, short pa) { | |
139 | + parents.add(new PA(p,ch2,pa)); | |
140 | + } | |
141 | + | |
142 | + public static PA getPA() { | |
143 | + synchronized(parents) { | |
144 | + if (parents.size()==0) return null; | |
145 | + return parents.remove(parents.size()-1); | |
146 | + } | |
147 | + } | |
148 | + | |
149 | + | |
150 | + | |
151 | +} | |
... | ... |
dependencyParser/basic/mate-tools/src/examples/DependencyParser.java
0 → 100644
1 | +package examples; | |
2 | + | |
3 | + | |
4 | +import is2.data.InstancesTagger; | |
5 | +import is2.data.SentenceData09; | |
6 | +import is2.io.CONLLReader09; | |
7 | +import is2.lemmatizer.Lemmatizer; | |
8 | +import is2.lemmatizer.MFO; | |
9 | +import is2.parser.Parser; | |
10 | +import is2.tag.Tagger; | |
11 | +//import org.apache.log4j.Logger; | |
12 | + | |
13 | +import java.io.File; | |
14 | +import java.util.Arrays; | |
15 | + | |
16 | +/** | |
17 | + * Dependency parsing | |
18 | + * | |
19 | + * @author B. Piwowarski <benjamin@bpiwowar.net> | |
20 | + * @date 10/10/12 | |
21 | + */ | |
22 | +//@TaskDescription(name = "dependency-parser", project = "mate-tools") | |
23 | +public class DependencyParser { | |
24 | + // final static private Logger LOGGER = Logger.getLogger(DependencyParser.class); | |
25 | + //@Argument(name = "lemmatizer", required = true, checkers = IOChecker.Readable.class) | |
26 | + File lemmatizerFile; | |
27 | + | |
28 | + //@Argument(name = "tagger", required = true) | |
29 | + File taggerFile; | |
30 | + | |
31 | + //@Argument(name = "parser", required = true) | |
32 | + File parserFile; | |
33 | + | |
34 | + //@Override | |
35 | + public int execute() throws Throwable { | |
36 | + | |
37 | + // Load lemmatizer | |
38 | + //LOGGER.info("Loading lemmatizer"); | |
39 | + // true = do uppercase lemmatization | |
40 | + Lemmatizer lemmatizer = new Lemmatizer(lemmatizerFile.getAbsolutePath()); | |
41 | + | |
42 | + // Load tagger | |
43 | + //LOGGER.info("Loading tagger"); | |
44 | + Tagger tagger = new Tagger(taggerFile.getAbsolutePath()); | |
45 | + | |
46 | + // Load parser | |
47 | + //LOGGER.info("Loading parser"); | |
48 | + Parser parser = new Parser(parserFile.getAbsolutePath()); | |
49 | + | |
50 | + | |
51 | + // Sentences to parse | |
52 | + String sentences[] = new String[]{ | |
53 | + "Airfields have been constructed on a number of the islands .", | |
54 | + "Private investment has even made an increasingly modern ferry fleet possible .", | |
55 | + "Politically , the 1990s have been relatively quite times for the islands ." | |
56 | + }; | |
57 | + | |
58 | + CONLLReader09 reader = new CONLLReader09(CONLLReader09.NO_NORMALIZE); | |
59 | + | |
60 | + for (String sentence : sentences) { | |
61 | + // Prepare the sentence | |
62 | + InstancesTagger instanceTagger = new InstancesTagger(); | |
63 | + instanceTagger.init(1, new MFO()); | |
64 | + | |
65 | + String[] split = sentence.split("\\s+"); | |
66 | + String[] splitRoot = new String[split.length+1]; | |
67 | + System.arraycopy(split, 0, splitRoot, 1, split.length); | |
68 | + splitRoot[0] = CONLLReader09.ROOT; | |
69 | + | |
70 | + SentenceData09 instance = new SentenceData09(); | |
71 | + instance.init(splitRoot); | |
72 | + | |
73 | + reader.insert(instanceTagger, instance); | |
74 | + | |
75 | + SentenceData09 result = lemmatizer.apply(instance); | |
76 | + tagger.apply(result); | |
77 | + result = parser.parse(result, parser.params, false, parser.options); | |
78 | + | |
79 | + | |
80 | + // Output | |
81 | + System.out.println(Arrays.toString(result.forms)); | |
82 | + System.out.println(Arrays.toString(result.plemmas)); | |
83 | + System.out.println(Arrays.toString(result.ppos)); | |
84 | + System.out.println(Arrays.toString(result.pheads)); | |
85 | + System.out.println(Arrays.toString(result.plabels)); | |
86 | + System.out.println(); | |
87 | + | |
88 | + } | |
89 | + | |
90 | + return 0; | |
91 | + } | |
92 | +} | |
... | ... |
dependencyParser/basic/mate-tools/src/examples/FullPipelineSpanish.java
0 → 100644
1 | +package examples; | |
2 | + | |
3 | +import is2.data.SentenceData09; | |
4 | +import is2.io.CONLLWriter09; | |
5 | +import is2.lemmatizer.Lemmatizer; | |
6 | + | |
7 | +import is2.parser.Parser; | |
8 | +import is2.tag.Tagger; | |
9 | +import is2.tools.Tool; | |
10 | + | |
11 | +import java.io.IOException; | |
12 | +import java.util.ArrayList; | |
13 | +import java.util.StringTokenizer; | |
14 | + | |
15 | +/** | |
16 | + * @author Bernd Bohnet, 13.09.2010 | |
17 | + * | |
18 | + * Illustrates the application the full pipeline: lemmatizer, morphologic, tagger, and parser | |
19 | + */ | |
20 | +public class FullPipelineSpanish { | |
21 | + | |
22 | + | |
23 | + // shows how to parse a sentences and call the tools | |
24 | + public static void main(String[] args) throws IOException { | |
25 | + | |
26 | + // Create a data container for a sentence | |
27 | + SentenceData09 i = new SentenceData09(); | |
28 | + | |
29 | + if (args.length==1) { // input might be a sentence: "This is another test ." | |
30 | + StringTokenizer st = new StringTokenizer(args[0]); | |
31 | + ArrayList<String> forms = new ArrayList<String>(); | |
32 | + | |
33 | + forms.add("<root>"); | |
34 | + while(st.hasMoreTokens()) forms.add(st.nextToken()); | |
35 | + | |
36 | + i.init(forms.toArray(new String[0])); | |
37 | + | |
38 | + } else { | |
39 | + // provide a default sentence: Haus has a mutated vowel | |
40 | + i.init(new String[] {"<root>","También","estuve","emocionado","pero","no","pude","imaginar","mi","vida","sin","la", | |
41 | + "gente","tan","intima","a","mí","."}); | |
42 | + | |
43 | + } | |
44 | + | |
45 | + // lemmatizing | |
46 | + | |
47 | + System.out.println("\nReading the model of the lemmatizer"); | |
48 | + Tool lemmatizer = new Lemmatizer("models/lemma-spa.model"); // create a lemmatizer | |
49 | + | |
50 | + System.out.println("Applying the lemmatizer"); | |
51 | + lemmatizer.apply(i); | |
52 | + | |
53 | + System.out.print(i.toString()); | |
54 | + System.out.print("Lemmata: "); for (String l : i.plemmas) System.out.print(l+" "); System.out.println(); | |
55 | + | |
56 | + // morphologic tagging | |
57 | + | |
58 | + System.out.println("\nReading the model of the morphologic tagger"); | |
59 | + is2.mtag.Tagger morphTagger = new is2.mtag.Tagger("models/mtag-spa.model"); | |
60 | + | |
61 | + System.out.println("\nApplying the morpholoigc tagger"); | |
62 | + morphTagger.apply(i); | |
63 | + | |
64 | + System.out.print(i.toString()); | |
65 | + System.out.print("Morph: "); for (String f : i.pfeats) System.out.print(f+" "); System.out.println(); | |
66 | + | |
67 | + // part-of-speech tagging | |
68 | + | |
69 | + System.out.println("\nReading the model of the part-of-speech tagger"); | |
70 | + Tool tagger = new Tagger("models/tag-spa.model"); | |
71 | + | |
72 | + System.out.println("\nApplying the part-of-speech tagger"); | |
73 | + tagger.apply(i); | |
74 | + | |
75 | + System.out.print(i.toString()); | |
76 | + System.out.print("Part-of-Speech tags: "); for (String p : i.ppos) System.out.print(p+" "); System.out.println(); | |
77 | + | |
78 | + // parsing | |
79 | + | |
80 | + System.out.println("\nReading the model of the dependency parser"); | |
81 | + Tool parser = new Parser("models/prs-spa.model"); | |
82 | + | |
83 | + System.out.println("\nApplying the parser"); | |
84 | + parser.apply(i); | |
85 | + | |
86 | + System.out.println(i.toString()); | |
87 | + | |
88 | + // write the result to a file | |
89 | + | |
90 | + CONLLWriter09 writer = new is2.io.CONLLWriter09("example-out.txt"); | |
91 | + | |
92 | + writer.write(i, CONLLWriter09.NO_ROOT); | |
93 | + writer.finishWriting(); | |
94 | + | |
95 | + } | |
96 | + | |
97 | + | |
98 | +} | |
... | ... |
dependencyParser/basic/mate-tools/src/examples/FullPipelineTest.java
0 → 100644
1 | +package examples; | |
2 | + | |
3 | + | |
4 | +import is2.data.InstancesTagger; | |
5 | +import is2.data.SentenceData09; | |
6 | +import is2.io.CONLLReader09; | |
7 | +import is2.io.CONLLWriter09; | |
8 | +import is2.lemmatizer.Lemmatizer; | |
9 | +import is2.lemmatizer.MFO; | |
10 | +import is2.parser.Parser; | |
11 | +import is2.tag.Tagger; | |
12 | +//import org.apache.log4j.Logger; | |
13 | + | |
14 | +import java.io.File; | |
15 | +import java.util.Arrays; | |
16 | + | |
17 | +/** | |
18 | + * Dependency parsing | |
19 | + * | |
20 | + * @author B. Piwowarski <benjamin@bpiwowar.net> | |
21 | + * @date 10/10/12 | |
22 | + */ | |
23 | +//@TaskDescription(name = "dependency-parser", project = "mate-tools") | |
24 | +public class FullPipelineTest { | |
25 | + // final static private Logger LOGGER = Logger.getLogger(DependencyParser.class); | |
26 | + //@Argument(name = "lemmatizer", required = true, checkers = IOChecker.Readable.class) | |
27 | + public File lemmatizerFile; | |
28 | + | |
29 | + //@Argument(name = "tagger", required = true) | |
30 | + public File taggerFile; | |
31 | + | |
32 | + public File mtaggerFile; | |
33 | + | |
34 | + //@Argument(name = "parser", required = true) | |
35 | + public File parserFile; | |
36 | + | |
37 | + //@Override | |
38 | + public int execute(String source, String target) throws Throwable { | |
39 | + | |
40 | + // Load lemmatizer | |
41 | + //LOGGER.info("Loading lemmatizer"); | |
42 | + // true = do uppercase lemmatization | |
43 | + Lemmatizer lemmatizer = new Lemmatizer(lemmatizerFile.getAbsolutePath()); | |
44 | + | |
45 | + // Load tagger | |
46 | + //LOGGER.info("Loading tagger"); | |
47 | + Tagger tagger = new Tagger(taggerFile.getAbsolutePath()); | |
48 | + | |
49 | + is2.mtag.Tagger mtagger = new is2.mtag.Tagger(mtaggerFile.getAbsolutePath()); | |
50 | + | |
51 | + // Load parser | |
52 | + //LOGGER.info("Loading parser"); | |
53 | + Parser parser = new Parser(parserFile.getAbsolutePath()); | |
54 | + | |
55 | + | |
56 | + CONLLReader09 reader = new CONLLReader09(source); | |
57 | + CONLLWriter09 writer = new CONLLWriter09(target); | |
58 | + | |
59 | + int count=0; | |
60 | + while (true) { | |
61 | + // Prepare the sentence | |
62 | + InstancesTagger is = new InstancesTagger(); | |
63 | + is.init(1, new MFO()); | |
64 | + | |
65 | + SentenceData09 instance= reader.getNext(is); | |
66 | + if (instance ==null) break; | |
67 | + SentenceData09 result = null; | |
68 | +try { | |
69 | + | |
70 | + System.out.print("\b\b\b\b"+count); | |
71 | + result= lemmatizer.apply(instance); | |
72 | + | |
73 | + result = tagger.apply(result); | |
74 | + result= mtagger.apply(result); | |
75 | + result = parser.apply(result); | |
76 | + | |
77 | + count++; | |
78 | +} catch(Exception e) { | |
79 | + | |
80 | + System.out.println("error"+result); | |
81 | + System.out.println("error"+instance); | |
82 | + e.printStackTrace(); | |
83 | + break; | |
84 | +} | |
85 | + | |
86 | + // Output | |
87 | + writer.write(result); | |
88 | + | |
89 | + } | |
90 | + writer.finishWriting(); | |
91 | + return 0; | |
92 | + } | |
93 | + | |
94 | + public static void main(String args[]) throws Throwable { | |
95 | + | |
96 | + if (args.length<3) { | |
97 | + System.out.println("lemmatizer-model tagger-model parser-model source target"); | |
98 | + System.exit(0); | |
99 | + } | |
100 | + FullPipelineTest p = new FullPipelineTest(); | |
101 | + p.lemmatizerFile = new File(args[0]); | |
102 | + p.taggerFile = new File(args[1]); | |
103 | + p.mtaggerFile = new File(args[2]); | |
104 | + p.parserFile = new File(args[3]); | |
105 | + | |
106 | + p.execute(args[4], args[5]); | |
107 | + | |
108 | + } | |
109 | + | |
110 | +} | |
... | ... |
dependencyParser/basic/mate-tools/src/examples/MorphTagger.java
0 → 100644
1 | +package examples; | |
2 | + | |
3 | +import is2.data.SentenceData09; | |
4 | +import is2.lemmatizer.Lemmatizer; | |
5 | +import is2.lemmatizer.Options; | |
6 | + | |
7 | +import java.io.IOException; | |
8 | +import java.util.ArrayList; | |
9 | +import java.util.StringTokenizer; | |
10 | + | |
11 | +/** | |
12 | + * @author Bernd Bohnet, 13.09.2010 | |
13 | + * | |
14 | + * Illustrates the application of some components: lemmatizer, tagger, and parser | |
15 | + */ | |
16 | +public class MorphTagger { | |
17 | + | |
18 | + | |
19 | + /** | |
20 | + * How to lemmatize a sentences? | |
21 | + */ | |
22 | + public static void main(String[] args) throws IOException { | |
23 | + | |
24 | + | |
25 | + // Create a data container for a sentence | |
26 | + SentenceData09 i = new SentenceData09(); | |
27 | + | |
28 | + if (args.length==1) { // input might be a sentence: "This is another test ." | |
29 | + StringTokenizer st = new StringTokenizer(args[0]); | |
30 | + ArrayList<String> forms = new ArrayList<String>(); | |
31 | + | |
32 | + forms.add("<root>"); | |
33 | + while(st.hasMoreTokens()) forms.add(st.nextToken()); | |
34 | + | |
35 | + i.init(forms.toArray(new String[0])); | |
36 | + | |
37 | + } else { | |
38 | + // provide a default sentence | |
39 | + i.init(new String[] {"<root>","Häuser","hat","ein","Umlaut","."}); | |
40 | + } | |
41 | + | |
42 | + //print the forms | |
43 | + for (String l : i.forms) System.out.println("forms : "+l); | |
44 | + | |
45 | + // tell the lemmatizer the location of the model | |
46 | + is2.lemmatizer.Options optsLemmatizer = new Options(new String[] {"-model","models/lemma-ger.model"}); | |
47 | + | |
48 | + // create a lemmatizer | |
49 | + Lemmatizer lemmatizer = new Lemmatizer(optsLemmatizer.modelName); | |
50 | + | |
51 | + // lemmatize a sentence; the result is stored in the stenenceData09 i | |
52 | + lemmatizer.apply(i); | |
53 | + | |
54 | + | |
55 | + // output the lemmata | |
56 | + for (String l : i.plemmas) System.out.println("lemma : "+l); | |
57 | + | |
58 | + | |
59 | + is2.mtag.Options morphologicTaggerOptions = new is2.mtag.Options(new String[] {"-model","models/mtag-ger.model"}); | |
60 | + | |
61 | + is2.mtag.Tagger mt = new is2.mtag.Tagger(morphologicTaggerOptions); | |
62 | + | |
63 | + try { | |
64 | + | |
65 | + | |
66 | + // SentenceData09 snt = is2.mtag.Main.out(i.forms, lemmata); | |
67 | + | |
68 | + SentenceData09 snt = mt.apply(i); | |
69 | + for(String f : snt.pfeats) System.out.println("feats "+f); | |
70 | + | |
71 | + } catch(Exception e){ | |
72 | + e.printStackTrace(); | |
73 | + } | |
74 | + | |
75 | + | |
76 | + } | |
77 | + | |
78 | + | |
79 | +} | |
... | ... |
dependencyParser/basic/mate-tools/src/examples/ParseOnly.java
0 → 100755
1 | +package examples; | |
2 | + | |
3 | +import is2.data.SentenceData09; | |
4 | +import is2.parser.Options; | |
5 | +import is2.parser.Parser; | |
6 | + | |
7 | + | |
8 | +public class ParseOnly { | |
9 | + | |
10 | + public static void main(String[] args) { | |
11 | + | |
12 | + if (args.length ==0) { | |
13 | + plain(); | |
14 | + } | |
15 | + | |
16 | + } | |
17 | + | |
18 | + /** | |
19 | + * This example shows how to parse a sentence. | |
20 | + */ | |
21 | + public static void plain() { | |
22 | + | |
23 | + // initialize the options | |
24 | + String[] opts ={"-model","models/prs-eng-x.model"}; | |
25 | + Options options = new Options(opts); | |
26 | + | |
27 | + // create a parser | |
28 | + Parser parser = new Parser(options); | |
29 | + | |
30 | + // Create a data container for a sentence | |
31 | + SentenceData09 i = new SentenceData09(); | |
32 | + | |
33 | + // Provide the sentence | |
34 | + i.init(new String[] {"<root>","This","is","a","test","."}); | |
35 | + i.setPPos(new String[]{"<root-POS>","DT","VBZ","DT","NN","."}); | |
36 | + | |
37 | + // parse the sentence | |
38 | + SentenceData09 out = parser.apply(i); | |
39 | + | |
40 | + // output the sentence and dependency tree | |
41 | + System.out.println(out.toString()); | |
42 | + | |
43 | + // Get the parsing results | |
44 | + out.getLabels(); | |
45 | + out.getParents(); | |
46 | + | |
47 | + } | |
48 | + | |
49 | + | |
50 | +} | |
... | ... |
dependencyParser/basic/mate-tools/src/examples/Pipeline.java
0 → 100644
1 | +package examples; | |
2 | + | |
3 | +import java.io.File; | |
4 | +import java.io.IOException; | |
5 | +import java.util.ArrayList; | |
6 | +import java.util.StringTokenizer; | |
7 | + | |
8 | +import is2.data.SentenceData09; | |
9 | +import is2.lemmatizer.Lemmatizer; | |
10 | +import is2.parser.Options; | |
11 | +import is2.parser.Parser; | |
12 | +import is2.tag.Tagger; | |
13 | + | |
14 | +/** | |
15 | + * @author Bernd Bohnet, 13.09.2010 | |
16 | + * | |
17 | + * Illustrates the application of some components: lemmatizer, tagger, and parser | |
18 | + */ | |
19 | +public class Pipeline { | |
20 | + | |
21 | + | |
22 | + // how to parse a sentences and call the tools | |
23 | + public static void main(String[] args) throws IOException { | |
24 | + | |
25 | + | |
26 | + // Create a data container for a sentence | |
27 | + SentenceData09 i = new SentenceData09(); | |
28 | + | |
29 | + if (args.length==1) { // input might be a sentence: "This is another test ." | |
30 | + StringTokenizer st = new StringTokenizer(args[0]); | |
31 | + ArrayList<String> forms = new ArrayList<String>(); | |
32 | + | |
33 | + forms.add("<root>"); | |
34 | + while(st.hasMoreTokens()) forms.add(st.nextToken()); | |
35 | + | |
36 | + i.init(forms.toArray(new String[0])); | |
37 | + | |
38 | + } else { | |
39 | + // provide a default sentence | |
40 | + i.init(new String[] {"<root>","This","is","a","test","."}); | |
41 | + } | |
42 | + | |
43 | + //print the forms | |
44 | + for (String l : i.forms) System.out.println("form : "+l); | |
45 | + | |
46 | + // tell the lemmatizer the location of the model | |
47 | + is2.lemmatizer.Options optsLemmatizer = new is2.lemmatizer.Options(new String[] {"-model","models/lemma-eng.model"}); | |
48 | + | |
49 | + // create a lemmatizer | |
50 | + Lemmatizer lemmatizer = new Lemmatizer(optsLemmatizer.modelName); | |
51 | + | |
52 | + // lemmatize a sentence; the result is stored in the stenenceData09 i | |
53 | + i = lemmatizer.apply(i); | |
54 | + | |
55 | + | |
56 | + // output the lemmata | |
57 | + for (String l : i.plemmas) System.out.println("lemma : "+l); | |
58 | + | |
59 | + // tell the tagger the location of the model | |
60 | + is2.tag.Options optsTagger = new is2.tag.Options(new String[]{"-model","models/tag-eng.model"}); | |
61 | + Tagger tagger = new Tagger(optsTagger); | |
62 | + | |
63 | + | |
64 | + | |
65 | +// String pos[] =tagger.tag(i.forms, i.lemmas); | |
66 | +// i.setPPos(pos); | |
67 | + | |
68 | + | |
69 | + SentenceData09 tagged = tagger.tag(i); | |
70 | + for (String p : tagged.ppos) System.out.println("pos "+p); | |
71 | + | |
72 | + | |
73 | + | |
74 | + // initialize the options | |
75 | + Options optsParser = new Options(new String[]{"-model","models/prs-eng-x.model"}); | |
76 | + | |
77 | + // create a parser | |
78 | + Parser parser = new Parser(optsParser); | |
79 | + | |
80 | + // parse the sentence (you get a copy of the input i) | |
81 | + SentenceData09 parse = parser.apply(tagged); | |
82 | + | |
83 | + System.out.println(parse.toString()); | |
84 | + | |
85 | + // create some trash on the hard drive :-) | |
86 | + is2.io.CONLLWriter09 writer = new is2.io.CONLLWriter09("example-out.txt"); | |
87 | + | |
88 | + writer.write(i); | |
89 | + writer.finishWriting(); | |
90 | + } | |
91 | + | |
92 | + | |
93 | + | |
94 | + | |
95 | +} | |
... | ... |
dependencyParser/basic/mate-tools/src/extractors/Extractor.java
0 → 100644
1 | +/** | |
2 | + * | |
3 | + */ | |
4 | +package extractors; | |
5 | + | |
6 | +import is2.data.Cluster; | |
7 | +import is2.data.DataF; | |
8 | +import is2.data.FV; | |
9 | +import is2.data.IFV; | |
10 | +import is2.data.Instances; | |
11 | + | |
12 | +/** | |
13 | + * @author Dr. Bernd Bohnet, 29.04.2011 | |
14 | + * | |
15 | + * | |
16 | + */ | |
17 | +public interface Extractor { | |
18 | + | |
19 | + | |
20 | + /** | |
21 | + * Initializes the Extractor general parts | |
22 | + */ | |
23 | + public void initStat(); | |
24 | + | |
25 | + /** | |
26 | + * Initializes the Extractor specific parts | |
27 | + */ | |
28 | + public void init(); | |
29 | + | |
30 | + public int basic(short[] pos, int[] forms, int w1, int w2, Cluster cluster, IFV f); | |
31 | + | |
32 | + public void firstm(Instances is, int i, int w1, int w2, int j, Cluster cluster, long[] svs); | |
33 | + | |
34 | + public void siblingm(Instances is, int i, short[] pos, int[] forms, | |
35 | + int[] lemmas, short[][] feats, int w1, int w2, int g, int j, | |
36 | + Cluster cluster, long[] svs, int n); | |
37 | + | |
38 | + public void gcm(Instances is, int i, int w1, int w2, int g, int j, Cluster cluster, long[] svs); | |
39 | + | |
40 | + public int getType(); | |
41 | + | |
42 | + public FV encodeCat(Instances is, int n, short[] pos, int[] is2, | |
43 | + int[] is3, short[] heads, short[] labels, short[][] s, Cluster cl, | |
44 | + FV pred); | |
45 | + | |
46 | + public void setMaxForm(int integer); | |
47 | + | |
48 | + /** | |
49 | + * @return | |
50 | + */ | |
51 | + public int getMaxForm(); | |
52 | + | |
53 | + | |
54 | + public float encode3(short[] pos, short[] heads, short[] labs, DataF x); | |
55 | + | |
56 | + | |
57 | + | |
58 | + | |
59 | +} | |
... | ... |
dependencyParser/basic/mate-tools/src/extractors/ExtractorClusterStacked.java
0 → 100755
1 | +package extractors; | |
2 | + | |
3 | + | |
4 | +import is2.data.Cluster; | |
5 | +import is2.data.D4; | |
6 | +import is2.data.DataF; | |
7 | +import is2.data.Edges; | |
8 | +import is2.data.FV; | |
9 | +import is2.data.IFV; | |
10 | +import is2.data.Instances; | |
11 | +import is2.data.Long2IntInterface; | |
12 | +import is2.data.MFB; | |
13 | +import is2.util.DB; | |
14 | + | |
15 | + | |
16 | + | |
17 | +final public class ExtractorClusterStacked implements Extractor { | |
18 | + | |
19 | + public static int s_rel,s_word,s_type,s_dir,s_dist,s_feat,s_child,s_spath,s_lpath,s_pos; | |
20 | + | |
21 | + | |
22 | + final D4 d0 ,dl1,dl2, dwr,dr,dwwp,dw,dwp,dlf,d3lp, d2lp,d2pw,d2pp ; | |
23 | + | |
24 | + public final Long2IntInterface li; | |
25 | + | |
26 | + public ExtractorClusterStacked(Long2IntInterface li) { | |
27 | + | |
28 | + this.initFeatures(); | |
29 | + this.li=li; | |
30 | + d0 = new D4(li);dl1 = new D4(li);dl2 = new D4(li); | |
31 | + dwr = new D4(li); | |
32 | + dr = new D4(li); | |
33 | + dwwp = new D4(li); | |
34 | + | |
35 | + dw = new D4(li); | |
36 | + dwp = new D4(li); | |
37 | + | |
38 | + dlf = new D4(li); | |
39 | + d3lp = new D4(li); d2lp = new D4(li); d2pw = new D4(li); d2pp = new D4(li); | |
40 | + | |
41 | + } | |
42 | + | |
43 | + public void initStat() { | |
44 | + | |
45 | + | |
46 | + MFB mf = new MFB(); | |
47 | + s_rel = mf.getFeatureCounter().get(REL).intValue(); | |
48 | + s_pos = mf.getFeatureCounter().get(POS).intValue(); | |
49 | + s_word = mf.getFeatureCounter().get(WORD).intValue(); | |
50 | + s_type = mf.getFeatureCounter().get(TYPE).intValue();//mf.getFeatureBits(); | |
51 | + s_dir = mf.getFeatureCounter().get(DIR); | |
52 | + la = mf.getValue(DIR, LA); | |
53 | + ra = mf.getValue(DIR, RA); | |
54 | + s_dist = mf.getFeatureCounter().get(DIST);//mf.getFeatureBits(DIST); | |
55 | + s_feat = mf.getFeatureCounter().get(FEAT);//mf.getFeatureBits(Pipe.FEAT); | |
56 | + s_spath = mf.getFeatureCounter().get(Cluster.SPATH)==null?0:mf.getFeatureCounter().get(Cluster.SPATH);//mf.getFeatureBits(Cluster.SPATH); | |
57 | + s_lpath = mf.getFeatureCounter().get(Cluster.LPATH)==null?0:mf.getFeatureCounter().get(Cluster.LPATH);//mf.getFeatureBits(Cluster.LPATH); | |
58 | + } | |
59 | + | |
60 | + public void init(){ | |
61 | + // DB.println("init"); | |
62 | + d0.a0 = s_type;d0.a1 = s_pos;d0.a2 = s_pos;d0.a3 = s_pos;d0.a4 = s_pos;d0.a5 = s_pos;d0.a6 = s_pos;d0.a7 = s_pos; | |
63 | + dl1.a0 = s_type;dl1.a1 = s_rel; dl1.a2 = s_pos;dl1.a3 = s_pos; dl1.a4 = s_pos; dl1.a5 = s_pos; dl1.a6 = s_pos; dl1.a7 = s_pos; | |
64 | + dl2.a0 = s_type;dl2.a1 = s_rel;dl2.a2 = s_word;dl2.a3 = s_pos;dl2.a4 = s_pos;dl2.a5 = s_pos;dl2.a6 = s_pos;dl2.a7 = s_pos; | |
65 | + dwp.a0 = s_type; dwp.a1 = s_rel; dwp.a2 = s_word; dwp.a3 = s_pos; dwp.a4 = s_pos; dwp.a5 = s_word; | |
66 | + dwwp.a0 = s_type; dwwp.a1 = s_rel; dwwp.a2 = s_word; dwwp.a3 = s_word; dwwp.a4 = s_pos; dwwp.a5 = s_word; | |
67 | + dlf.a0 = s_type;dlf.a1 = s_rel; dlf.a2 = s_pos;dlf.a3 = s_pos; dlf.a4 = s_feat; dlf.a5 = s_feat; dlf.a6 = s_pos; dlf.a7 = s_pos; | |
68 | + d3lp.a0 = s_type; d3lp.a1 = s_rel; d3lp.a2 = s_lpath; d3lp.a3 = s_lpath; d3lp.a4 = s_lpath; d3lp.a5 = s_word; d3lp.a6 = s_spath; d3lp.a7 = s_spath; | |
69 | + d2lp.a0 = s_type; d2lp.a1 = s_rel; d2lp.a2 = s_lpath; d2lp.a3 = s_lpath; d2lp.a4 = s_word; d2lp.a5 = s_word; //d3lp.a6 = s_spath; d3lp.a7 = s_spath; | |
70 | + d2pw.a0 = s_type; d2pw.a1 = s_rel; d2pw.a2 = s_lpath; d2pw.a3 = s_lpath; d2pw.a4 = s_word; d2pw.a5 = s_word; //d3lp.a6 = s_spath; d3lp.a7 = s_spath; | |
71 | + d2pp.a0 = s_type; d2pp.a1 = s_rel; d2pp.a2 = s_lpath; d2pp.a3 = s_lpath; d2pp.a4 = s_pos; d2pp.a5 = s_pos; //d3lp.a6 = s_spath; d3lp.a7 = s_spath; | |
72 | + } | |
73 | + | |
74 | + | |
75 | + public int basic(short[] pposs, int[] form, int p, int d, Cluster cluster, IFV f) | |
76 | + { | |
77 | + | |
78 | + d0.clean(); dl1.clean(); dl2.clean(); dwp.clean(); dwwp.clean(); dlf.clean(); d3lp.clean(); | |
79 | + | |
80 | + d3lp.clean(); d2lp.clean();d2pw.clean(); d2pp.clean(); | |
81 | + | |
82 | + int n=1; | |
83 | + int dir= (p < d)? ra:la; | |
84 | + d0.v0= n++; d0.v1=pposs[p]; d0.v2=pposs[d]; //d0.stop=4; | |
85 | + int end= (p >= d ? p : d); | |
86 | + int start = (p >= d ? d : p) + 1; | |
87 | + | |
88 | + for(int i = start ; i <end ; i++) { | |
89 | + d0.v3=pposs[i]; | |
90 | + d0.cz4(); | |
91 | + d0.csa(s_dir,dir,f); | |
92 | + } | |
93 | + return n; | |
94 | + } | |
95 | + | |
96 | + | |
97 | + public void firstm(Instances is, int i, | |
98 | + int prnt, int dpnt, int label, Cluster cluster, long[] f) | |
99 | + { | |
100 | + | |
101 | + | |
102 | + //short[] pposs, int[] form, int[] lemmas, short[][] feats | |
103 | + for(int k=0;k<f.length;k++) f[k]=0; | |
104 | + | |
105 | + short[] pposs = is.pposs[i]; | |
106 | + int[] form =is.forms[i]; | |
107 | + short[][] feats = is.feats[i]; | |
108 | + | |
109 | + | |
110 | + int pF = form[prnt],dF = form[dpnt]; | |
111 | + int pL = is.plemmas[i][prnt],dL = is.plemmas[i][dpnt]; | |
112 | + int pP = pposs[prnt],dP = pposs[dpnt]; | |
113 | + | |
114 | + int prntLS = pF==-1?-1:cluster.getLP(pF), chldLS = dF==-1?-1:cluster.getLP(dF); | |
115 | + | |
116 | + final int dir= (prnt < dpnt)? ra:la; | |
117 | + | |
118 | + if (pF>maxForm) pF=-1; | |
119 | + if (pL>maxForm) pL=-1; | |
120 | + | |
121 | + if (dF>maxForm) dF=-1; | |
122 | + if (dL>maxForm) dL=-1; | |
123 | + | |
124 | + | |
125 | + int n=3,c=0; | |
126 | + | |
127 | + dl2.v1=label; | |
128 | + dl2.v0= n++; dl2.v2=pF; dl2.v3=dP; dl2.cz4(); f[c++]=dl2.csa(s_dir,dir); | |
129 | + dl2.v0= n++; dl2.cz3(); f[c++]=dl2.csa(s_dir,dir); | |
130 | + dl2.v0= n++; dl2.v2=dF; dl2.v3=pP; dl2.cz4(); f[c++]=dl2.csa(s_dir,dir); | |
131 | + dl2.v0= n++; dl2.cz3(); f[c++]=dl2.csa(s_dir,dir); | |
132 | + | |
133 | + | |
134 | + dwwp.v1=label; | |
135 | + dwwp.v0= n++; dwwp.v2=pF; dwwp.v3=dF; dwwp.cz4(); f[c++]=dwwp.csa(s_dir,dir); | |
136 | + | |
137 | + dl1.v1=label; | |
138 | + dl1.v0= n++; dl1.v2=dP; dl1.cz3(); f[c++]=dl1.csa(s_dir,dir); | |
139 | + dl1.v0= n++; dl1.v2=pP; dl1.cz3(); f[c++]=dl1.csa(s_dir,dir); | |
140 | + dl1.v0= n++; dl1.v3=dP; dl1.cz4(); f[c++]=dl1.csa(s_dir,dir); | |
141 | + | |
142 | + int pPm1 = prnt > 0 ? pposs[prnt - 1] : s_str, dPm1 = dpnt > 0 ? pposs[dpnt - 1] : s_str; | |
143 | + int pPp1 = prnt < pposs.length - 1 ? pposs[prnt + 1]:s_end, dPp1 = dpnt < pposs.length - 1 ? pposs[dpnt + 1]:s_end; | |
144 | + | |
145 | + int pPm2 = prnt > 1 ? pposs[prnt - 2] : s_str, dPm2 = dpnt > 1 ? pposs[dpnt - 2] : s_str; | |
146 | + int pPp2 = prnt < pposs.length - 2 ? pposs[prnt + 2]:s_end, dPp2 = dpnt < pposs.length - 2 ? pposs[dpnt + 2]:s_end; | |
147 | + | |
148 | + int pFm1 = prnt > 0 ? form[prnt - 1] : s_stwrd, dFm1 = dpnt > 0 ? form[dpnt - 1] : s_stwrd; | |
149 | + int pFp1 = prnt < form.length - 1 ? form[prnt + 1]:s_stwrd, dFp1 = dpnt < form.length - 1 ? form[dpnt + 1]:s_stwrd; | |
150 | + | |
151 | + | |
152 | + | |
153 | + dl1.v0= n++;dl1.v2=pP; dl1.v3=pPp1; dl1.v4=dP;dl1.v5=dPp1; dl1.cz6(); f[n++]=dl1.csa(s_dir,dir); | |
154 | + dl1.v0= n++; dl1.v5=dPm1; dl1.cz6(); f[n++]=dl1.csa(s_dir,dir); | |
155 | + dl1.v0= n++; dl1.v3=pPm1; dl1.cz6(); f[n++]=dl1.csa(s_dir,dir); | |
156 | + dl1.v0= n++; dl1.v5=dPp1; dl1.cz6(); f[n++]=dl1.csa(s_dir,dir); | |
157 | + | |
158 | + | |
159 | + dl1.v0= n++; dl1.v3=pPm1; dl1.cz5(); f[n++]=dl1.csa(s_dir,dir); | |
160 | + dl1.v0= n++; dl1.v3=dPm1; dl1.cz5(); f[n++]=dl1.csa(s_dir,dir); | |
161 | + dl1.v0= n++; dl1.v3=dPp1; dl1.cz5(); f[n++]=dl1.csa(s_dir,dir); | |
162 | + dl1.v0= n++; dl1.v3=pPp1; dl1.cz5(); f[n++]=dl1.csa(s_dir,dir); | |
163 | + | |
164 | + dl1.v0= n++;dl1.v2=pP; dl1.v3=pPp2; dl1.v4=dP;dl1.v5=dPp2; dl1.cz6(); f[n++]=dl1.csa(s_dir,dir); | |
165 | + dl1.v0= n++; dl1.v5=dPm2; dl1.cz6(); f[n++]=dl1.csa(s_dir,dir); | |
166 | + dl1.v0= n++; dl1.v3=pPm2; dl1.cz6(); f[n++]=dl1.csa(s_dir,dir); | |
167 | + dl1.v0= n++; dl1.v5=dPp2; dl1.cz6(); f[n++]=dl1.csa(s_dir,dir); | |
168 | + | |
169 | + dl1.v0= n++; dl1.v3=pPm2; dl1.cz5(); f[n++]=dl1.csa(s_dir,dir); | |
170 | + dl1.v0= n++; dl1.v3=dPm2; dl1.cz5(); f[n++]=dl1.csa(s_dir,dir); | |
171 | + dl1.v0= n++; dl1.v3=dPp2; dl1.cz5(); f[n++]=dl1.csa(s_dir,dir); | |
172 | + dl1.v0= n++; dl1.v3=pPp2; dl1.cz5(); f[n++]=dl1.csa(s_dir,dir); | |
173 | + | |
174 | + | |
175 | + | |
176 | + dl2.v0= n++; dl2.v3=dFm1; dl2.v3=pPp1;dl2.v4=pP; dl2.cz5(); f[n++]=dl2.getVal(); | |
177 | + dl2.v0= n++; dl2.v3=dFp1; dl2.v3=pPm1; dl2.cz5(); f[n++]=dl2.getVal(); | |
178 | + dl2.v0= n++; dl2.v3=pFm1; dl2.v3=dPp1;dl2.v4=dP; dl2.cz5(); f[n++]=dl2.getVal(); | |
179 | + dl2.v0= n++; dl2.v3=pFp1; dl2.v3=dPm1; dl2.cz5(); f[n++]=dl2.getVal(); | |
180 | + | |
181 | + | |
182 | + dl2.v0= n++; dl2.v3=dFm1; dl2.v3=dPm2;dl2.v4=pP; dl2.cz5(); f[n++]=dl2.getVal(); | |
183 | + dl2.v0= n++; dl2.v3=dFp1; dl2.v3=dPp2; dl2.cz5(); f[n++]=dl2.getVal(); | |
184 | + dl2.v0= n++; dl2.v3=pFm1; dl2.v3=pPm2;dl2.v4=dP; dl2.cz5(); f[n++]=dl2.getVal(); | |
185 | + dl2.v0= n++; dl2.v3=pFp1; dl2.v3=pPp2; dl2.cz5(); f[n++]=dl2.getVal(); | |
186 | + | |
187 | + | |
188 | + dwwp.v0= n++; dwwp.v2=pF; dwwp.v3=dF; dwwp.v4=dP; dwwp.cz5(); f[n++]=dwwp.csa(s_dir,dir); | |
189 | + dwwp.v0= n++; dwwp.v2=pF; dwwp.v3=dF; dwwp.v4=pP; dwwp.cz5(); f[n++]=dwwp.csa(s_dir,dir); | |
190 | + dwwp.v0= n++; dwwp.v2=dF; dwwp.v3=pF; dwwp.v4=pP; dwwp.v4=dP; dwwp.cz6(); f[n++]=dwwp.csa(s_dir,dir); | |
191 | + | |
192 | + | |
193 | + | |
194 | + // lemmas | |
195 | + | |
196 | + dl2.v1=label; | |
197 | + dl2.v0= n++; dl2.v2=pL; dl2.v3=dP; dl2.cz4(); f[c++]=dl2.csa(s_dir,dir); | |
198 | + dl2.v0= n++; dl2.cz3(); f[c++]=dl2.csa(s_dir,dir); | |
199 | + dl2.v0= n++; dl2.v2=dL; dl2.v3=pP; dl2.cz4(); f[c++]=dl2.csa(s_dir,dir); | |
200 | + dl2.v0= n++; dl2.cz3(); f[c++]=dl2.csa(s_dir,dir); | |
201 | + | |
202 | + | |
203 | + dwwp.v1=label; | |
204 | + dwwp.v0= n++; dwwp.v2=pL; dwwp.v3=dL; dwwp.cz4(); f[c++]=dwwp.csa(s_dir,dir); | |
205 | + | |
206 | + dwp.v1= label; | |
207 | + dwp.v0=n++;dwp.v2=dL; dwp.v3=pP;dwp.v4=dP;dwp.v5=pL; dwp.cz6(); f[c++]=dwp.csa(s_dir,dir); | |
208 | + dwp.v0=n++;dwp.cz5(); f[c++]=dwp.csa(s_dir,dir); | |
209 | + | |
210 | + dwp.v0=n++;dwp.v2=pL; dwp.cz5(); f[c++]=dwp.csa(s_dir,dir); | |
211 | + dwwp.v0= n++; dwwp.v2=pL; dwwp.v3=dL; dwwp.v4=dP; dwwp.cz5(); f[c++]=dwwp.csa(s_dir,dir); | |
212 | + dwwp.v0= n++; dwwp.v4=pP; dwwp.cz5(); f[c++]=dwwp.csa(s_dir,dir); | |
213 | + | |
214 | + | |
215 | + // cluster | |
216 | + | |
217 | + d2pw.v1=label; | |
218 | + d2pw.v0=n++; d2pw.v2=prntLS; d2pw.v3=chldLS; d2pw.cz4(); f[c++]=d2pw.csa(s_dir,dir); | |
219 | + d2pw.v0=n++; d2pw.v4=pF; d2pw.cz5(); f[c++]=d2pw.csa(s_dir,dir); | |
220 | + d2pw.v0=n++; d2pw.v4=dF; d2pw.cz5(); f[c++]=d2pw.csa(s_dir,dir); | |
221 | + d2pw.v0=n++; d2pw.v5=pF; d2pw.cz6(); f[c++]=d2pw.csa(s_dir,dir); | |
222 | + | |
223 | + | |
224 | + d2pp.v1=label; | |
225 | + d2pp.v0=n++; d2pp.v2=prntLS; d2pp.v3=chldLS; d2pp.cz4(); f[c++]=d2pp.csa(s_dir,dir); | |
226 | + d2pp.v0=n++; d2pp.v4=pP; d2pp.cz5(); f[c++]=d2pp.csa(s_dir,dir); | |
227 | + d2pp.v0=n++; d2pp.v4=dP; d2pp.cz5(); f[c++]=d2pp.csa(s_dir,dir); | |
228 | + d2pp.v0=n++; d2pp.v5=pP; d2pp.cz6(); f[c++]=d2pp.csa(s_dir,dir); | |
229 | + | |
230 | + | |
231 | + short[] prel = is.plabels[i]; | |
232 | + short[] phead = is.pheads[i]; | |
233 | + | |
234 | + | |
235 | + //take those in for stacking | |
236 | + // dl2.v1=label; | |
237 | + // dl2.v0= n++;dl2.v2=prel[dpnt];dl2.v3=pP;dl2.v4=dP; dl2.v5=prnt==phead[dpnt]?1:2; dl2.cz6(); f[c++]=dl2.csa(s_dir,dir); | |
238 | + // dl2.v0= n++;dl2.v2=pP;dl2.v3=dP; dl2.v4=prnt==phead[dpnt]?1:2; dl2.cz5(); f[c++]=dl2.csa(s_dir,dir); | |
239 | + | |
240 | + | |
241 | + | |
242 | + if (feats==null) return; | |
243 | + | |
244 | + short[] featsP =feats[prnt], featsD =feats[dpnt]; | |
245 | + dlf.v0= n++; dlf.v1=label; dlf.v2=pP; dlf.v3=dP; | |
246 | + extractFeat(f, c, dir, featsP, featsD); | |
247 | + | |
248 | + return; | |
249 | + } | |
250 | + | |
251 | + | |
252 | + | |
253 | + public void gcm(Instances is , int i, int p, int d, int gc, int label,Cluster cluster, long[] f) { | |
254 | + | |
255 | + for(int k=0;k<f.length;k++) f[k]=0; | |
256 | + | |
257 | + short[] pos= is.pposs[i]; | |
258 | + int[] forms=is.forms[i]; | |
259 | + int[] lemmas=is.plemmas[i]; | |
260 | + short[][] feats=is.feats[i]; | |
261 | + | |
262 | + int pP = pos[p], dP = pos[d]; | |
263 | + int prntF = forms[p], chldF = forms[d]; | |
264 | + int prntL = lemmas[p], chldL = lemmas[d]; | |
265 | + int prntLS = prntF==-1?-1:cluster.getLP(prntF), chldLS = chldF==-1?-1:cluster.getLP(chldF); | |
266 | + | |
267 | + int gP = gc != -1 ? pos[gc] : s_str; | |
268 | + int gcF = gc != -1 ? forms[gc] : s_stwrd; | |
269 | + int gcL = gc != -1 ? lemmas[gc] : s_stwrd; | |
270 | + int gcLS = (gc != -1) && (gcF!=-1) ? cluster.getLP(gcF) : s_stwrd; | |
271 | + | |
272 | + if (prntF>maxForm) prntF=-1; | |
273 | + if (prntL>maxForm) prntL=-1; | |
274 | + | |
275 | + if (chldF>maxForm) chldF=-1; | |
276 | + if (chldL>maxForm) chldL=-1; | |
277 | + | |
278 | + if (gcF>maxForm) gcF=-1; | |
279 | + if (gcL>maxForm) gcL=-1; | |
280 | + | |
281 | + | |
282 | + int dir= (p < d)? ra:la, dir_gra =(d < gc)? ra:la; | |
283 | + | |
284 | + int n=84,c=0; | |
285 | + | |
286 | + //dl1.v023(); | |
287 | + dl1.v1=label; | |
288 | + dl1.v0= n++; dl1.v2=pP; dl1.v3=dP;dl1.v4=gP; dl1.cz5(); dl1.cs(s_dir,dir);f[c++]=dl1.csa(s_dir,dir_gra); | |
289 | + dl1.v0= n++; dl1.v2=pP; dl1.v3=gP; dl1.cz4();dl1.cs(s_dir,dir);f[c++]=dl1.csa(s_dir,dir_gra); | |
290 | + dl1.v0= n++; dl1.v2=dP; dl1.cz4(); dl1.cs(s_dir,dir);f[c++]=dl1.csa(s_dir,dir_gra); | |
291 | + | |
292 | + dwwp.v1=label; | |
293 | + dwwp.v0= n++; dwwp.v2=prntF; dwwp.v3=gcF; | |
294 | + dwwp.cz4(); dwwp.cs(s_dir,dir);f[c++]=dwwp.csa(s_dir,dir_gra); | |
295 | + | |
296 | + dwwp.v0= n++; dwwp.v2=chldF; dwwp.v3=gcF; | |
297 | + dwwp.cz4(); dwwp.cs(s_dir,dir);f[c++]=dwwp.csa(s_dir,dir_gra); | |
298 | + | |
299 | + dwp.v1=label; | |
300 | + dwp.v0= n++; dwp.v2=gcF; dwp.v3=pP; | |
301 | + dwp.cz4(); dwp.cs(s_dir,dir);f[c++]=dwp.csa(s_dir,dir_gra); | |
302 | + | |
303 | + dwp.v0= n++; dwp.v2=gcF; dwp.v3=dP; | |
304 | + dwp.cz4(); dwp.cs(s_dir,dir);f[c++]=dwp.csa(s_dir,dir_gra); | |
305 | + | |
306 | + dwp.v0= n++; dwp.v2=prntF; dwp.v3=gP; | |
307 | + dwp.cz4(); dwp.cs(s_dir,dir);f[c++]=dwp.csa(s_dir,dir_gra); | |
308 | + | |
309 | + dwp.v0= n++; dwp.v2=chldF; dwp.v3=gP; | |
310 | + dwp.cz4(); dwp.cs(s_dir,dir); f[c++]=dwp.csa(s_dir,dir_gra); | |
311 | + | |
312 | + | |
313 | + // lemma | |
314 | + | |
315 | + dwwp.v0= n++; dwwp.v2=prntL; dwwp.v3=gcL; | |
316 | + dwwp.cz4();dwwp.cs(s_dir,dir);f[c++]=dwwp.csa(s_dir,dir_gra); | |
317 | + | |
318 | + dwwp.v0= n++; dwwp.v2=chldL; dwwp.v3=gcL; | |
319 | + dwwp.cz4(); dwwp.cs(s_dir,dir);f[c++]=dwwp.csa(s_dir,dir_gra); | |
320 | + | |
321 | + dwp.v0= n++; dwp.v2=gcL; dwp.v3=pP; | |
322 | + dwp.cz4(); dwp.cs(s_dir,dir);f[c++]=dwp.csa(s_dir,dir_gra); | |
323 | + | |
324 | + dwp.v0= n++; dwp.v2=gcL; dwp.v3=dP; | |
325 | + dwp.cz4(); dwp.cs(s_dir,dir);f[c++]=dwp.csa(s_dir,dir_gra); | |
326 | + | |
327 | + dwp.v0= n++; dwp.v2=prntL; dwp.v3=gP; | |
328 | + dwp.cz4(); dwp.cs(s_dir,dir);f[c++]=dwp.csa(s_dir,dir_gra); | |
329 | + | |
330 | + dwp.v0= n++; dwp.v2=chldL; dwp.v3=gP; | |
331 | + dwp.cz4(); dwp.cs(s_dir,dir); f[c++]=dwp.csa(s_dir,dir_gra); | |
332 | + | |
333 | + | |
334 | + // clusters | |
335 | + | |
336 | + d2lp.v1= label; | |
337 | + d2lp.v0= n++; d2lp.v2=prntLS; d2lp.v3=gcLS; d2lp.cz4(); d2lp.cs(s_dir,dir);f[c++]=d2lp.csa(s_dir,dir_gra);// f.add(li.l2i(l)); | |
338 | + d2lp.v0= n++; d2lp.v2=chldLS; d2lp.v3=gcLS; d2lp.cz4(); d2lp.cs(s_dir,dir);f[c++]=d2lp.csa(s_dir,dir_gra); | |
339 | + d3lp.v0= n++; d3lp.v1= label; d3lp.v2=prntLS; d3lp.v3=chldLS; d3lp.v4=gcLS; d3lp.cz5(); d3lp.cs(s_dir,dir);f[c++]=d3lp.csa(s_dir,dir_gra); | |
340 | + | |
341 | + //_f83; | |
342 | + d2lp.v0= n++; d2lp.v2=prntLS; d2lp.v3=chldLS; d2lp.v4=gcF; d2lp.cz5(); f[c++]=d2lp.csa(s_dir,dir); | |
343 | + d2lp.v0= n++; d2lp.v2=prntLS; d2lp.v3=gcLS; d2lp.v4=chldF; d2lp.cz5(); f[c++]=d2lp.csa(s_dir,dir); | |
344 | + d2lp.v0= n++; d2lp.v2=chldLS; d2lp.v3=gcLS; d2lp.v4=prntF; d2lp.cz5(); f[c++]=d2lp.csa(s_dir,dir); | |
345 | + | |
346 | + d2pp.v1= label; | |
347 | + d2pp.v0= n++; d2pp.v2=prntLS; d2pp.v3=chldLS; d2pp.v4=gP; d2pp.cz5(); f[c++]=d2pp.csa(s_dir,dir); | |
348 | + d2pp.v0= n++; d2pp.v2=prntLS; d2pp.v3=gcLS; d2pp.v4=dP; d2pp.cz5(); f[c++]=d2pp.csa(s_dir,dir); | |
349 | + d2pp.v0= n++; d2pp.v2=chldLS; d2pp.v3=gcLS; d2pp.v4=pP; d2pp.cz5(); f[c++]=d2pp.csa(s_dir,dir); | |
350 | + | |
351 | + | |
352 | + | |
353 | + // linear features | |
354 | + | |
355 | + int prntPm1 = p != 0 ? pos[p - 1] : s_str; // parent-pos-minus1 | |
356 | + int chldPm1 = d - 1 >=0 ? pos[d - 1] : s_str; // child-pos-minus1 | |
357 | + int prntPp1 = p != pos.length - 1 ? pos[p + 1] : s_end; | |
358 | + int chldPp1 = d != pos.length - 1 ? pos[d + 1] : s_end; | |
359 | + | |
360 | + int gcPm1 = gc > 0 ? pos[gc - 1] : s_str; | |
361 | + int gcPp1 = gc < pos.length - 1 ? pos[gc + 1] : s_end; | |
362 | + | |
363 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcPp1;dl1.v4=dP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | |
364 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcPm1;dl1.v4=dP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | |
365 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=dP;dl1.v4=chldPp1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | |
366 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=dP;dl1.v4=chldPm1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | |
367 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcPp1;dl1.v4=chldPm1;dl1.v5=dP;dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | |
368 | + dl1.v0= n++; dl1.v2=gcPm1; dl1.v3=gP;dl1.v4=chldPm1;dl1.v5=dP; dl1.cz6();f[c++]=dl1.csa(s_dir,dir); | |
369 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcPp1;dl1.v4=dP;dl1.v5=chldPp1; dl1.cz6();f[c++]=dl1.csa(s_dir,dir); | |
370 | + dl1.v0= n++; dl1.v2=gcPm1; dl1.v3=gP;dl1.v4=dP;dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | |
371 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcPp1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | |
372 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcPm1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | |
373 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=pP;dl1.v4=prntPp1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | |
374 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=pP;dl1.v4=prntPm1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | |
375 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcPp1;dl1.v4=prntPm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | |
376 | + dl1.v0= n++; dl1.v2=gcPm1; dl1.v3=gP;dl1.v4=prntPm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | |
377 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcPp1;dl1.v4=pP;dl1.v5=prntPp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | |
378 | + dl1.v0= n++; dl1.v2=gcPm1; dl1.v3=gP; dl1.v4=pP; dl1.v5=prntPp1;dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | |
379 | + | |
380 | + | |
381 | + int pLSp1 = p != pos.length - 1 ? forms[p + 1]==-1?-1:cluster.getLP(forms[p + 1]): _cend; | |
382 | + int cLSp1 = d != pos.length - 1 ? forms[d + 1] ==-1?-1:cluster.getLP(forms[d + 1]):_cend; | |
383 | + int gcLSp1 = gc < pos.length -1 ? forms[gc + 1] ==-1?-1:cluster.getLP(forms[gc + 1]) : s_end; | |
384 | + | |
385 | + int pLSm1 = p != 0 ? lemmas[p - 1]==-1?-1:cluster.getLP(lemmas[p - 1]): _cstr; | |
386 | + int cLSm1 = d - 1 >=0 ? lemmas[d - 1] ==-1?-1:cluster.getLP(lemmas[d - 1]):_cstr; | |
387 | + int gcLSm1 = gc > 0 ? lemmas[gc - 1] ==-1?-1:cluster.getLP(lemmas[gc - 1]) : _cstr; | |
388 | + | |
389 | + | |
390 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcLSp1;dl1.v4=dP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | |
391 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcLSm1;dl1.v4=dP; dl1.cz5();f[c++]=dl1.csa(s_dir,dir); | |
392 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=dP;dl1.v4=cLSp1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | |
393 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=dP;dl1.v4=cLSm1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | |
394 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcLSp1;dl1.v4=cLSm1;dl1.v5=dP;dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | |
395 | + dl1.v0= n++; dl1.v2=gcLSm1; dl1.v3=gP;dl1.v4=cLSm1;dl1.v5=dP; dl1.cz6();f[c++]=dl1.csa(s_dir,dir); | |
396 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcLSp1;dl1.v4=dP;dl1.v5=cLSp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | |
397 | + dl1.v0= n++; dl1.v2=cLSm1; dl1.v3=gP;dl1.v4=dP;dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | |
398 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcLSp1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | |
399 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcLSm1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | |
400 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=pP;dl1.v4=pLSp1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | |
401 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=pP;dl1.v4=pLSm1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | |
402 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcLSp1;dl1.v4=pLSm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | |
403 | + dl1.v0= n++; dl1.v2=gcLSm1; dl1.v3=gP;dl1.v4=pLSm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | |
404 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcLSp1;dl1.v4=pP;dl1.v5=pLSp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | |
405 | + dl1.v0= n++; dl1.v2=gcLSm1; dl1.v3=gP; dl1.v4=pP; dl1.v5=pLSp1;dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | |
406 | + | |
407 | + | |
408 | + | |
409 | + short[] prel = is.plabels[i],phead=is.pheads[i]; | |
410 | + | |
411 | + int g = p==phead[d]?1:2 ; | |
412 | + if (gc>=0) g += d==phead[gc]?4:8; | |
413 | + | |
414 | + int gr = gc==-1?s_relend:prel[gc]; | |
415 | + | |
416 | + // take those in for stacking | |
417 | + /* | |
418 | + dl2.v1=label; | |
419 | + dl2.v0= n++;dl2.v2=prel[d];dl2.v3=g;dl2.v4=gP;dl2.v5=dP;dl2.cz6();f[c++]=dl2.csa(s_dir,dir); | |
420 | + dl2.v0= n++;dl2.v2=prel[d];dl2.v3=g;dl2.v4=gP;dl2.v5=pP;dl2.cz6();f[c++]=dl2.csa(s_dir,dir); | |
421 | + dl2.v0= n++;dl2.v2=prel[d];dl2.v3=g;dl2.v4=gP;dl2.v5=pP;dl2.v6=dP;dl2.cz7();f[c++]=dl2.csa(s_dir,dir); | |
422 | + | |
423 | + dl2.v0= n++;dl2.v2=gr;dl2.v3=g;dl2.v4=gP;dl2.v5=dP;dl2.cz6();f[c++]=dl2.csa(s_dir,dir); | |
424 | + dl2.v0= n++;dl2.v2=gr;dl2.v3=g;dl2.v4=gP;dl2.v5=pP;dl2.cz6();f[c++]=dl2.csa(s_dir,dir); | |
425 | + dl2.v0= n++;dl2.v2=gr;dl2.v3=g;dl2.v4=gP;dl2.v5=pP;dl2.v6=dP;dl2.cz7();f[c++]=dl2.csa(s_dir,dir); | |
426 | + | |
427 | +*/ | |
428 | + if (feats==null) return; | |
429 | + | |
430 | + short[] featsP =feats[d]; | |
431 | + short[] featsD =gc!=-1?feats[gc]:null; | |
432 | + | |
433 | + dlf.v0= n++; dlf.v1=label; dlf.v2=gP; dlf.v3=dP; | |
434 | + extractFeat(f, c, dir, featsP, featsD); | |
435 | + return; | |
436 | + } | |
437 | + | |
438 | + | |
439 | + public void siblingm(Instances is , int i,short pos[], int forms[], int[] lemmas, short[][] feats, int prnt, int d, int sblng, int label, Cluster cluster, long[] f, int v) | |
440 | + { | |
441 | + | |
442 | + for(int k=0;k<f.length;k++) f[k]=0; | |
443 | + | |
444 | + int pP = pos[prnt], dP = pos[d]; | |
445 | + int prntF = forms[prnt],chldF = forms[d]; | |
446 | + int prntL = lemmas[prnt], chldL = lemmas[d]; | |
447 | + int prntLS = prntF==-1?-1:cluster.getLP(prntF), chldLS = chldF==-1?-1:cluster.getLP(chldF); | |
448 | + | |
449 | + int sP = sblng!=-1 ? pos[sblng] : s_str, sblF = sblng!=-1 ? forms[sblng] : s_stwrd, sblL = sblng!=-1 ? lemmas[sblng] : s_stwrd; | |
450 | + | |
451 | + int sblLS = (sblng != -1)&&(sblF!=-1) ? cluster.getLP(sblF) : s_stwrd; | |
452 | + | |
453 | + | |
454 | + int dir= (prnt < d)? ra:la; | |
455 | + | |
456 | + int abs = Math.abs(prnt-d); | |
457 | + | |
458 | + final int dist; | |
459 | + if (abs > 10)dist=d10;else if (abs>5) dist=d5;else if( abs==5)dist=d4;else if (abs==4)dist=d3;else if (abs==3)dist=d2; | |
460 | + else if (abs==2)dist=d1; else dist=di0; | |
461 | + | |
462 | + int n=147; | |
463 | + | |
464 | + if (prntF>maxForm) prntF=-1; | |
465 | + if (prntL>maxForm) prntL=-1; | |
466 | + | |
467 | + if (chldF>maxForm) chldF=-1; | |
468 | + if (chldL>maxForm) chldL=-1; | |
469 | + | |
470 | + if (sblF>maxForm) sblF=-1; | |
471 | + if (sblL>maxForm) sblL=-1; | |
472 | + | |
473 | + | |
474 | + dl1.v0= n++; dl1.v1=label;dl1.v2=pP; dl1.v3=dP;dl1.v4=sP; dl1.cz5(); f[0]=dl1.csa(s_dir,dir);f[1]=dl1.csa(s_dist,dist); | |
475 | + dl1.v0= n++; dl1.v3=sP; dl1.cz4(); f[2]=dl1.csa(s_dir,dir); f[3]=dl1.csa(s_dist,dist); | |
476 | + dl1.v0= n++; dl1.v2=dP;dl1.cz4(); f[4]=dl1.csa(s_dir,dir); f[5]=dl1.csa(s_dist,dist); | |
477 | + | |
478 | + // sibling only could be tried | |
479 | + dwwp.v1=label; | |
480 | + dwwp.v0= n++; dwwp.v2=prntF; dwwp.v3=sblF; dwwp.cz4(); f[6]=dwwp.csa(s_dir,dir); f[7]=dwwp.csa(s_dist,dist); | |
481 | + dwwp.v0= n++; dwwp.v2=chldF; dwwp.cz4(); f[8]=dwwp.csa(s_dir,dir); f[9]=dwwp.csa(s_dist,dist); | |
482 | + dwp.v0= n++; dwp.v1=label; dwp.v2=sblF; dwp.v3=pP; dwp.cz4(); f[10]=dwp.csa(s_dir,dir); f[11]=dwp.csa(s_dist,dist); | |
483 | + dwp.v0= n++; /*dwp.v1=label; */dwp.v3=dP; dwp.cz4(); f[12]=dwp.csa(s_dir,dir); f[13]=dwp.csa(s_dist,dist); | |
484 | + dwp.v0= n++; /*dwp.v1=label;*/ dwp.v2=prntF; dwp.v3=sP; dwp.cz4(); f[14]=dwp.csa(s_dir,dir); f[15]=dwp.csa(s_dist,dist); | |
485 | + dwp.v0= n++; /*dwp.v1=label;*/ dwp.v2=chldF; dwp.cz4(); f[16]=dwp.csa(s_dir,dir); f[17]=dwp.csa(s_dist,dist); | |
486 | + | |
487 | + //lemmas | |
488 | + dwwp.v0= n++; dwwp.v2=prntL; dwwp.v3=sblL; dwwp.cz4(); f[18]=dwwp.csa(s_dir,dir); | |
489 | + dwwp.v0= n++; dwwp.v2=chldL; dwwp.cz4(); f[19]=dwwp.csa(s_dir,dir); f[20]=dwwp.csa(s_dist,dist); | |
490 | + dwp.v0= n++; /*dwp.v1=label;*/ dwp.v2=sblL; dwp.v3=pP; dwp.cz4(); f[21]=dwp.csa(s_dir,dir); f[22]=dwp.csa(s_dist,dist); | |
491 | + dwp.v0= n++; /*dwp.v1=label; */ dwp.v3=dP; dwp.cz4(); f[23]=dwp.csa(s_dir,dir);f[24]=dwp.csa(s_dist,dist); | |
492 | + dwp.v0= n++; /*dwp.v1=label;*/ dwp.v2=prntL; dwp.v3=sP; dwp.cz4(); f[25]=dwp.csa(s_dir,dir); f[26]=dwp.csa(s_dist,dist); | |
493 | + dwp.v0= n++; /*dwp.v1=label;*/ dwp.v2=chldL; dwp.cz4(); f[27]=dwp.csa(s_dir,dir);f[28]=dwp.csa(s_dist,dist); | |
494 | + | |
495 | + | |
496 | + // clusters | |
497 | + | |
498 | + d2lp.v1=label; | |
499 | + d2lp.v0= n++; d2lp.v2=prntLS; d2lp.v3=sblLS; d2lp.cz4(); f[29]=d2lp.csa(s_dir,dir); | |
500 | + d2lp.v0= n++; d2lp.v2=chldLS; d2lp.v3=sblLS; d2lp.cz4(); f[30]=d2lp.csa(s_dir,dir); f[31]=d2lp.csa(s_dist,dist); | |
501 | + | |
502 | + d3lp.v1= label; | |
503 | + d3lp.v0= n++; d3lp.v2=prntLS; d3lp.v3=chldLS; d3lp.v4=sblLS;d3lp.cz5(); f[32]=d3lp.csa(s_dir,dir); | |
504 | + | |
505 | + d2lp.v0= n++; d2lp.v2=prntLS; d2lp.v3=chldLS; d2lp.v4=sblF; d2lp.cz5(); f[33]=d2lp.csa(s_dir,dir); f[34]=d2lp.csa(s_dist,dist); | |
506 | + d2lp.v0= n++; d2lp.v2=prntLS; d2lp.v3=sblLS; d2lp.v4=chldF; d2lp.cz5(); f[35]=d2lp.csa(s_dir,dir); f[36]=d2lp.csa(s_dist,dist); | |
507 | + d2lp.v0= n++; d2lp.v2=chldLS; d2lp.v3=sblLS; d2lp.v4=prntF; d2lp.cz5(); f[37]=d2lp.csa(s_dir,dir); f[38]=d2lp.csa(s_dist,dist); | |
508 | + | |
509 | + d2pp.v1=label; | |
510 | + d2pp.v0= n++; d2pp.v2=prntLS; d2pp.v3=chldLS; d2pp.v4=sP; d2pp.cz5(); f[39]=d2pp.csa(s_dir,dir); f[40]=d2pp.csa(s_dist,dist); | |
511 | + d2pp.v0= n++; d2pp.v2=prntLS; d2pp.v3=sblLS; d2pp.v4=dP; d2pp.cz5(); f[41]=d2pp.csa(s_dir,dir); f[42]=d2pp.csa(s_dist,dist); | |
512 | + d2pp.v0= n++; d2pp.v2=chldLS; d2pp.v3=sblLS; d2pp.v4=pP; d2pp.cz5(); f[43]=d2pp.csa(s_dir,dir); f[44]=d2pp.csa(s_dist,dist); | |
513 | + | |
514 | + | |
515 | + int prntPm1 = prnt!=0 ? pos[prnt-1] : s_str; | |
516 | + int chldPm1 = d-1>=0 ? pos[d-1] : s_str; | |
517 | + int prntPp1 = prnt!=pos.length-1 ? pos[prnt+1] : s_end; | |
518 | + int chldPp1 = d!=pos.length-1 ? pos[d+1] : s_end; | |
519 | + | |
520 | + // sibling part of speech minus and plus 1 | |
521 | + int sblPm1 = sblng>0 ? pos[sblng-1]:s_str; | |
522 | + int sblPp1 = sblng<pos.length-1 ? pos[sblng + 1]:s_end; | |
523 | + | |
524 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sblPp1;dl1.v4=pP; dl1.cz5(); f[45]=dl1.csa(s_dir,dir); | |
525 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sblPm1;dl1.v4=pP; dl1.cz5(); f[46]=dl1.csa(s_dir,dir);// f.add(li.l2i(l)); | |
526 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=pP;dl1.v4=prntPp1;dl1.cz5(); f[47]=dl1.csa(s_dir,dir);// f.add(li.l2i(l)); | |
527 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=pP;dl1.v4=prntPm1; dl1.cz5(); f[48]=dl1.csa(s_dir,dir);// f.add(li.l2i(l)); | |
528 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sblPp1;dl1.v4=prntPm1;dl1.v5=pP; dl1.cz6(); f[49]=dl1.csa(s_dir,dir); | |
529 | + dl1.v0=n++; dl1.v2=sblPm1; dl1.v3=sP;dl1.v4=prntPm1;dl1.v5=pP;dl1.cz6(); f[50]=dl1.csa(s_dir,dir);// f.add(li.l2i(l)); | |
530 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sblPp1;dl1.v4=pP;dl1.v5=prntPp1; dl1.cz6(); f[51]=dl1.csa(s_dir,dir);// f.add(li.l2i(l)); | |
531 | + dl1.v0=n++; dl1.v2=sblPm1; dl1.v3=sP; dl1.v4=pP;dl1.v5=prntPp1; dl1.cz6(); f[52]=dl1.csa(s_dir,dir);// f.add(li.l2i(l)); | |
532 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sblPp1;dl1.v4=dP; dl1.cz5(); f[53]=dl1.csa(s_dir,dir); | |
533 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sblPm1;dl1.v4=dP; dl1.cz5(); f[54]=dl1.csa(s_dir,dir); | |
534 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=dP;dl1.v4=chldPp1;dl1.cz5(); f[55]=dl1.csa(s_dir,dir); | |
535 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=dP;dl1.v4=chldPm1; dl1.cz5(); f[56]=dl1.csa(s_dir,dir); | |
536 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sblPp1;dl1.v4=chldPm1;dl1.v5=dP; dl1.cz6(); f[57]=dl1.csa(s_dir,dir);// f.add(li.l2i(l)); | |
537 | + dl1.v0=n++; dl1.v2=sblPm1; dl1.v3=sP;dl1.v4=chldPm1;dl1.v5=dP;dl1.cz6(); f[58]=dl1.csa(s_dir,dir); | |
538 | + dl1.v0= n++;dl1.v2=sP; dl1.v3=sblPp1;dl1.v4=dP;dl1.v5=chldPp1;dl1.cz6();f[59]=dl1.csa(s_dir,dir);// f.add(li.l2i(l)); | |
539 | + dl1.v0= n++; dl1.v2=sblPm1; dl1.v3=sP;dl1.v4=dP;dl1.v5=chldPp1;dl1.cz6(); f[60]=dl1.csa(s_dir,dir); | |
540 | + | |
541 | + int c=61; | |
542 | + | |
543 | + int pLSp1 = prnt != pos.length - 1 ? forms[prnt + 1]==-1?-1:cluster.getLP(forms[prnt + 1]): _cend; | |
544 | + int cLSp1 = d != pos.length - 1 ? forms[d + 1] ==-1?-1:cluster.getLP(forms[d + 1]):_cend; | |
545 | + int sLSp1 = sblng < pos.length -1 ? forms[sblng + 1] ==-1?-1:cluster.getLP(forms[sblng + 1]) : _cend; | |
546 | + | |
547 | + int pLSm1 = prnt!=0 ? forms[prnt - 1]==-1?-1:cluster.getLP(forms[prnt - 1]): _cstr; | |
548 | + int cLSm1 = d-1>=0 ? forms[d - 1] ==-1?-1:cluster.getLP(forms[d - 1]):_cstr; | |
549 | + int sLSm1 = sblng>0 ? forms[sblng - 1] ==-1?-1:cluster.getLP(forms[sblng - 1]):_cstr; | |
550 | + | |
551 | + //int c=61; | |
552 | + | |
553 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | |
554 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSm1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | |
555 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=pP;dl1.v4=pLSp1;dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | |
556 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=pP;dl1.v4=pLSm1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | |
557 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=pLSm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | |
558 | + dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP;dl1.v4=pLSm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | |
559 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=pP;dl1.v5=pLSp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | |
560 | + dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP; dl1.v4=pP;dl1.v5=pLSp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | |
561 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=dP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | |
562 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSm1;dl1.v4=dP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | |
563 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=dP;dl1.v4=cLSp1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | |
564 | + dl1.v0=n++;dl1.v2=sP; dl1.v3=dP;dl1.v4=cLSm1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | |
565 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSm1;dl1.v4=cLSm1;dl1.v5=dP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | |
566 | + dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP;dl1.v4=cLSm1;dl1.v5=dP;dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | |
567 | + dl1.v0=n++;dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=dP;dl1.v5=cLSp1;dl1.cz6();f[c++]=dl1.csa(s_dir,dir); | |
568 | + dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP;dl1.v4=dP;dl1.v5=cLSp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | |
569 | + | |
570 | + | |
571 | + | |
572 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | |
573 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSm1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | |
574 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=pP;dl1.v4=pLSp1;dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | |
575 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=pP;dl1.v4=pLSm1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | |
576 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=pLSm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | |
577 | + dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP;dl1.v4=pLSm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | |
578 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=pP;dl1.v5=pLSp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | |
579 | + dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP; dl1.v4=pP;dl1.v5=pLSp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | |
580 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=dP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | |
581 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSm1;dl1.v4=dP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | |
582 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=dP;dl1.v4=cLSp1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | |
583 | + dl1.v0=n++;dl1.v2=sP; dl1.v3=dP;dl1.v4=cLSm1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | |
584 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSm1;dl1.v4=cLSm1;dl1.v5=dP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | |
585 | + dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP;dl1.v4=cLSm1;dl1.v5=dP;dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | |
586 | + dl1.v0= n++;dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=dP;dl1.v5=cLSp1;dl1.cz6();f[c++]=dl1.csa(s_dir,dir); | |
587 | + dl1.v0= n++; dl1.v2=sLSm1; dl1.v3=sP;dl1.v4=dP;dl1.v5=cLSp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | |
588 | + | |
589 | + // take those in for stacking | |
590 | + | |
591 | + /* | |
592 | + short[] prel = is.plabels[i],phead=is.pheads[i]; | |
593 | + | |
594 | + int g = prnt==phead[d]?1:2 ; | |
595 | + if (sblng>=0) g += prnt==phead[sblng]?4:8; | |
596 | + | |
597 | + int gr = sblng==-1?s_relend:prel[sblng]; | |
598 | + | |
599 | + | |
600 | + dl2.v0= n++;dl2.v2=prel[d];dl2.v3=g;dl2.v4=sP;dl2.v5=dP;dl2.cz6();f[c++]=dl2.csa(s_dir,dir); | |
601 | + dl2.v0= n++;dl2.v2=prel[d];dl2.v3=g;dl2.v4=sP;dl2.v5=pP;dl2.cz6();f[c++]=dl2.csa(s_dir,dir); | |
602 | + dl2.v0= n++;dl2.v2=prel[d];dl2.v3=g;dl2.v4=sP;dl2.v5=pP;dl2.v6=dP;dl2.cz7();f[c++]=dl2.csa(s_dir,dir); | |
603 | + | |
604 | + dl2.v0= n++;dl2.v2=gr;dl2.v3=g;dl2.v4=sP;dl2.v5=dP;dl2.cz6();f[c++]=dl2.csa(s_dir,dir); | |
605 | + dl2.v0= n++;dl2.v2=gr;dl2.v3=g;dl2.v4=sP;dl2.v5=pP;dl2.cz6();f[c++]=dl2.csa(s_dir,dir); | |
606 | + dl2.v0= n++;dl2.v2=gr;dl2.v3=g;dl2.v4=sP;dl2.v5=pP;dl2.v6=dP;dl2.cz7();f[c++]=dl2.csa(s_dir,dir); | |
607 | +*/ | |
608 | + | |
609 | + if (feats==null) return; | |
610 | + | |
611 | + int cnt=c; | |
612 | + | |
613 | + short[] featsP =feats[d]; | |
614 | + short[] featsSbl =sblng!=-1?feats[sblng]:null; | |
615 | + | |
616 | + dlf.v0= n++; dlf.v1=label; dlf.v2=sP; dlf.v3=dP; | |
617 | + | |
618 | + | |
619 | + cnt = extractFeat(f, cnt ,dir, featsP, featsSbl); | |
620 | + | |
621 | + featsP =feats[prnt]; | |
622 | + featsSbl =sblng!=-1?feats[sblng]:null; | |
623 | + | |
624 | + dlf.v0= n++; dlf.v1=label; dlf.v2=pP; dlf.v3=sP; | |
625 | + if (featsP!=null && featsSbl!=null) { | |
626 | + for(short i1=0;i1<featsP.length;i1++) { | |
627 | + for(short i2=0;i2<featsSbl.length;i2++) { | |
628 | + dlf.v4=featsP[i1]; dlf.v5=featsSbl[i2]; | |
629 | + dlf.cz6(); f[cnt++]=dlf.csa(s_dir,prnt<sblng?1:2); | |
630 | + } | |
631 | + } | |
632 | + } else if (featsP==null && featsSbl!=null) { | |
633 | + | |
634 | + for(short i2=0;i2<featsSbl.length;i2++) { | |
635 | + dlf.v4=nofeat; dlf.v5=featsSbl[i2]; | |
636 | + dlf.cz6(); f[cnt++]=dlf.csa(s_dir,dir); | |
637 | + } | |
638 | + | |
639 | + } else if (featsP!=null && featsSbl==null) { | |
640 | + | |
641 | + for(short i1=0;i1<featsP.length;i1++) { | |
642 | + dlf.v4=featsP[i1]; dlf.v5=nofeat; | |
643 | + dlf.cz6(); f[cnt++]=dlf.csa(s_dir,dir); | |
644 | + } | |
645 | + } | |
646 | + | |
647 | + return; | |
648 | + } | |
649 | + | |
650 | + private int extractFeat(long[] f, int cnt, int dir, short[] featsP, short[] featsD) { | |
651 | + if (featsP!=null && featsD!=null) { | |
652 | + for(short i1=0;i1<featsP.length;i1++) { | |
653 | + for(short i2=0;i2<featsD.length;i2++) { | |
654 | + dlf.v4=featsP[i1]; dlf.v5=featsD[i2]; | |
655 | + dlf.cz6(); f[cnt++]=dlf.csa(s_dir,dir); | |
656 | + } | |
657 | + } | |
658 | + } else if (featsP==null && featsD!=null) { | |
659 | + | |
660 | + for(short i2=0;i2<featsD.length;i2++) { | |
661 | + dlf.v4=nofeat; dlf.v5=featsD[i2]; | |
662 | + dlf.cz6(); f[cnt++]=dlf.csa(s_dir,dir); | |
663 | + | |
664 | + } | |
665 | + } else if (featsP!=null && featsD==null) { | |
666 | + | |
667 | + for(short i1=0;i1<featsP.length;i1++) { | |
668 | + dlf.v4=featsP[i1]; dlf.v5=nofeat; | |
669 | + dlf.cz6(); f[cnt++]=dlf.csa(s_dir,dir); | |
670 | + | |
671 | + } | |
672 | + } | |
673 | + return cnt; | |
674 | + } | |
675 | + | |
676 | + public IFV encodeCat2(Instances is, int ic, short pposs[], int forms[], int[] lemmas, short[] heads, short[] types, short feats[][], | |
677 | + Cluster cluster, IFV f, Long2IntInterface li) { | |
678 | + | |
679 | + | |
680 | + long[] svs = new long[250]; | |
681 | + | |
682 | + for (int i = 1; i < heads.length; i++) { | |
683 | + | |
684 | + | |
685 | + int n =basic(pposs, forms, heads[i], i, cluster, f); | |
686 | + firstm(is, ic, heads[i], i, types[i], cluster,svs); | |
687 | + for(int k=0;k<svs.length;k++) f.add(li.l2i(svs[k])); | |
688 | + | |
689 | + int ch,cmi,cmo; | |
690 | + if (heads[i] < i) { | |
691 | + ch = rightmostRight(heads, heads[i], i); | |
692 | + cmi = leftmostLeft(heads, i, heads[i]); | |
693 | + cmo = rightmostRight(heads, i, heads.length); | |
694 | + | |
695 | + } else { | |
696 | + ch = leftmostLeft(heads, heads[i], i); | |
697 | + cmi = rightmostRight(heads, i, heads[i]); | |
698 | + cmo = leftmostLeft(heads, i, 0); | |
699 | + } | |
700 | + | |
701 | + siblingm(is,ic,pposs, forms,lemmas, feats, heads[i], i, ch,types[i], cluster, svs,n); | |
702 | + for(int k=0;k<svs.length;k++) f.add(li.l2i(svs[k])); | |
703 | + | |
704 | + | |
705 | + gcm(is, ic,heads[i],i,cmi, types[i], cluster, svs); | |
706 | + for(int k=0;k<svs.length;k++) f.add(li.l2i(svs[k])); | |
707 | + | |
708 | + gcm(is, ic, heads[i],i,cmo, types[i], cluster, svs); | |
709 | + for(int k=0;k<svs.length;k++)f.add(li.l2i(svs[k])); | |
710 | + } | |
711 | + | |
712 | + return f; | |
713 | + } | |
714 | + | |
715 | + public FV encodeCat(Instances is, int ic, short pposs[], int forms[], int[] lemmas, short[] heads, short[] types, short feats[][], Cluster cluster, FV f) { | |
716 | + | |
717 | + | |
718 | + long[] svs = new long[250]; | |
719 | + | |
720 | + for (int i = 1; i < heads.length; i++) { | |
721 | + | |
722 | + | |
723 | + int n =basic(pposs, forms, heads[i], i, cluster, f); | |
724 | + firstm(is, ic, heads[i], i, types[i], cluster,svs); | |
725 | + for(int k=0;k<svs.length;k++) dl1.map(f,svs[k]); | |
726 | + | |
727 | + int ch,cmi,cmo; | |
728 | + if (heads[i] < i) { | |
729 | + ch = rightmostRight(heads, heads[i], i); | |
730 | + cmi = leftmostLeft(heads, i, heads[i]); | |
731 | + cmo = rightmostRight(heads, i, heads.length); | |
732 | + | |
733 | + } else { | |
734 | + ch = leftmostLeft(heads, heads[i], i); | |
735 | + cmi = rightmostRight(heads, i, heads[i]); | |
736 | + cmo = leftmostLeft(heads, i, 0); | |
737 | + } | |
738 | + | |
739 | + siblingm(is,ic,pposs, forms,lemmas, feats, heads[i], i, ch,types[i], cluster, svs,n); | |
740 | + for(int k=0;k<svs.length;k++) dl1.map(f,svs[k]); | |
741 | + | |
742 | + | |
743 | + gcm(is, ic,heads[i],i,cmi, types[i], cluster, svs); | |
744 | + for(int k=0;k<svs.length;k++) dl1.map(f,svs[k]); | |
745 | + | |
746 | + gcm(is, ic, heads[i],i,cmo, types[i], cluster, svs); | |
747 | + for(int k=0;k<svs.length;k++) dl1.map(f,svs[k]); | |
748 | + } | |
749 | + | |
750 | + return f; | |
751 | + } | |
752 | + | |
753 | + | |
754 | + public float encode3(short[] pos, short heads[] , short[] types, DataF d2) { | |
755 | + | |
756 | + double v = 0; | |
757 | + for (int i = 1; i < heads.length; i++) { | |
758 | + | |
759 | + int dir= (heads[i] < i)? 0:1; | |
760 | + | |
761 | + v += d2.pl[heads[i]][i]; | |
762 | + v += d2.lab[heads[i]][i][types[i]][dir]; | |
763 | + | |
764 | + boolean left = i<heads[i]; | |
765 | + short[] labels = Edges.get(pos[heads[i]], pos[i], left); | |
766 | + int lid=-1; | |
767 | + for(int k=0;k<labels.length;k++) if (types[i]== labels[k]) {lid= k;break;} | |
768 | + | |
769 | + int ch,cmi,cmo; | |
770 | + if (heads[i] < i) { | |
771 | + ch = rightmostRight(heads, heads[i], i); | |
772 | + cmi = leftmostLeft(heads, i, heads[i]); | |
773 | + cmo = rightmostRight(heads, i, heads.length); | |
774 | + | |
775 | + if (ch==-1) ch=heads[i]; | |
776 | + if (cmi==-1) cmi=heads[i]; | |
777 | + if (cmo==-1) cmo=heads[i]; | |
778 | + | |
779 | + } else { | |
780 | + ch = leftmostLeft(heads, heads[i], i); | |
781 | + cmi = rightmostRight(heads, i, heads[i]); | |
782 | + cmo = leftmostLeft(heads, i, 0); | |
783 | + | |
784 | + if (ch==-1) ch=i; | |
785 | + if (cmi==-1) cmi=i; | |
786 | + if (cmo==-1) cmo=i; | |
787 | + } | |
788 | + v += d2.sib[heads[i]][i][ch][dir][lid]; | |
789 | + v += d2.gra[heads[i]][i][cmi][dir][lid]; | |
790 | + v += d2.gra[heads[i]][i][cmo][dir][lid]; | |
791 | + } | |
792 | + return (float)v; | |
793 | + } | |
794 | + | |
795 | + /** | |
796 | + * Provide the scores of the edges | |
797 | + * @param pos | |
798 | + * @param heads | |
799 | + * @param types | |
800 | + * @param edgesScores | |
801 | + * @param d2 | |
802 | + * @return | |
803 | + */ | |
804 | + public static float encode3(short[] pos, short heads[] , short[] types, float[] edgesScores, DataF d2) { | |
805 | + | |
806 | + double v = 0; | |
807 | + for (int i = 1; i < heads.length; i++) { | |
808 | + | |
809 | + int dir= (heads[i] < i)? 0:1; | |
810 | + | |
811 | + edgesScores[i] = d2.pl[heads[i]][i]; | |
812 | + edgesScores[i] += d2.lab[heads[i]][i][types[i]][dir]; | |
813 | + | |
814 | + boolean left = i<heads[i]; | |
815 | + short[] labels = Edges.get(pos[heads[i]], pos[i], left); | |
816 | + int lid=-1; | |
817 | + for(int k=0;k<labels.length;k++) if (types[i]== labels[k]) {lid= k;break;} | |
818 | + | |
819 | + int ch,cmi,cmo; | |
820 | + if (heads[i] < i) { | |
821 | + ch = rightmostRight(heads, heads[i], i); | |
822 | + cmi = leftmostLeft(heads, i, heads[i]); | |
823 | + cmo = rightmostRight(heads, i, heads.length); | |
824 | + | |
825 | + if (ch==-1) ch=heads[i]; | |
826 | + if (cmi==-1) cmi=heads[i]; | |
827 | + if (cmo==-1) cmo=heads[i]; | |
828 | + | |
829 | + } else { | |
830 | + ch = leftmostLeft(heads, heads[i], i); | |
831 | + cmi = rightmostRight(heads, i, heads[i]); | |
832 | + cmo = leftmostLeft(heads, i, 0); | |
833 | + | |
834 | + if (ch==-1) ch=i; | |
835 | + if (cmi==-1) cmi=i; | |
836 | + if (cmo==-1) cmo=i; | |
837 | + } | |
838 | + edgesScores[i] += d2.sib[heads[i]][i][ch][dir][lid]; | |
839 | + edgesScores[i] += d2.gra[heads[i]][i][cmi][dir][lid]; | |
840 | + edgesScores[i] += d2.gra[heads[i]][i][cmo][dir][lid]; | |
841 | + v+=edgesScores[i]; | |
842 | + } | |
843 | + return (float)v; | |
844 | + } | |
845 | + | |
846 | + | |
847 | + private static int rightmostRight(short[] heads, int head, int max) { | |
848 | + int rightmost = -1; | |
849 | + for (int i = head + 1; i < max; i++) if (heads[i] == head) rightmost = i; | |
850 | + | |
851 | + return rightmost; | |
852 | + } | |
853 | + | |
854 | + private static int leftmostLeft(short[] heads, int head, int min) { | |
855 | + int leftmost = -1; | |
856 | + for (int i = head - 1; i > min; i--) if (heads[i] == head) leftmost = i; | |
857 | + return leftmost; | |
858 | + } | |
859 | + | |
860 | + public static final String REL = "REL",END = "END",STR = "STR",LA = "LA",RA = "RA"; | |
861 | + | |
862 | + private static int ra,la; | |
863 | + private static int s_str; | |
864 | + private static int s_end, _cend,_cstr, s_stwrd,s_relend; | |
865 | + | |
866 | + protected static final String TYPE = "TYPE",DIR = "D"; | |
867 | + public static final String POS = "POS"; | |
868 | + protected static final String DIST = "DIST",MID = "MID", FEAT="F"; | |
869 | + | |
870 | + private static final String _0 = "0",_4 = "4",_3 = "3", _2 = "2",_1 = "1",_5 = "5",_10 = "10"; | |
871 | + | |
872 | + private static int di0, d4,d3,d2,d1,d5,d10; | |
873 | + | |
874 | + | |
875 | + private static final String WORD = "WORD",STWRD = "STWRD", STPOS = "STPOS"; | |
876 | + | |
877 | + | |
878 | + | |
879 | + private static int nofeat; | |
880 | + | |
881 | + | |
882 | + public static int maxForm; | |
883 | + | |
884 | + | |
885 | + /** | |
886 | + * Initialize the features. | |
887 | + * @param maxFeatures | |
888 | + */ | |
889 | + static public void initFeatures() { | |
890 | + | |
891 | + | |
892 | + MFB mf = new MFB(); | |
893 | + mf.register(POS, MID); | |
894 | + s_str = mf.register(POS, STR); | |
895 | + s_end = mf.register(POS, END); | |
896 | + | |
897 | + s_relend = mf.register(REL, END); | |
898 | + | |
899 | + _cstr= mf.register(Cluster.SPATH,STR); | |
900 | + _cend=mf.register(Cluster.SPATH,END); | |
901 | + | |
902 | + | |
903 | + mf.register(TYPE, POS); | |
904 | + | |
905 | + s_stwrd=mf.register(WORD,STWRD); | |
906 | + mf.register(POS,STPOS); | |
907 | + | |
908 | + la = mf.register(DIR, LA); | |
909 | + ra = mf.register(DIR, RA); | |
910 | + | |
911 | + // mf.register(TYPE, CHAR); | |
912 | + | |
913 | + mf.register(TYPE, FEAT); | |
914 | + nofeat=mf.register(FEAT, "NOFEAT"); | |
915 | + | |
916 | + for(int k=0;k<215;k++) mf.register(TYPE, "F"+k); | |
917 | + | |
918 | + | |
919 | + di0=mf.register(DIST, _0); | |
920 | + d1=mf.register(DIST, _1); | |
921 | + d2=mf.register(DIST, _2); | |
922 | + d3=mf.register(DIST, _3); | |
923 | + d4=mf.register(DIST, _4); | |
924 | + d5=mf.register(DIST, _5); | |
925 | + // d5l=mf.register(DIST, _5l); | |
926 | + d10=mf.register(DIST, _10); | |
927 | + | |
928 | + | |
929 | + } | |
930 | + | |
931 | + /* (non-Javadoc) | |
932 | + * @see extractors.Extractor#getType() | |
933 | + */ | |
934 | + @Override | |
935 | + public int getType() { | |
936 | + | |
937 | + return s_type; | |
938 | + } | |
939 | + | |
940 | + /* (non-Javadoc) | |
941 | + * @see extractors.Extractor#setMaxForm(int) | |
942 | + */ | |
943 | + @Override | |
944 | + public void setMaxForm(int max) { | |
945 | + maxForm = max; | |
946 | + } | |
947 | + | |
948 | + /* (non-Javadoc) | |
949 | + * @see extractors.Extractor#getMaxForm() | |
950 | + */ | |
951 | + @Override | |
952 | + public int getMaxForm() { | |
953 | + return maxForm; | |
954 | + } | |
955 | + | |
956 | + | |
957 | + | |
958 | +} | |
... | ... |
dependencyParser/basic/mate-tools/src/extractors/ExtractorClusterStackedR2.java
0 → 100644
1 | +package extractors; | |
2 | + | |
3 | + | |
4 | +import java.util.Arrays; | |
5 | + | |
6 | +import is2.data.Cluster; | |
7 | +import is2.data.D4; | |
8 | +import is2.data.DataF; | |
9 | +import is2.data.Edges; | |
10 | +import is2.data.FV; | |
11 | +import is2.data.IFV; | |
12 | +import is2.data.Instances; | |
13 | +import is2.data.Long2IntInterface; | |
14 | +import is2.data.MFB; | |
15 | +import is2.util.DB; | |
16 | + | |
17 | + | |
18 | + | |
19 | +final public class ExtractorClusterStackedR2 implements Extractor { | |
20 | + | |
21 | + public static int s_rel,s_word,s_type,s_dir,s_dist,s_feat,s_child,s_spath,s_lpath,s_pos; | |
22 | + | |
23 | + MFB mf; | |
24 | + | |
25 | + final D4 d0 ,dl1,dl2, dwr,dr,dwwp,dw,dwp,dlf,d3lp, d2lp,d2pw,d2pp ; | |
26 | + | |
27 | + public final Long2IntInterface li; | |
28 | + | |
29 | + public ExtractorClusterStackedR2(Long2IntInterface li) { | |
30 | + | |
31 | + initFeatures(); | |
32 | + this.li=li; | |
33 | + d0 = new D4(li);dl1 = new D4(li);dl2 = new D4(li); | |
34 | + dwr = new D4(li); | |
35 | + dr = new D4(li); | |
36 | + dwwp = new D4(li); | |
37 | + | |
38 | + dw = new D4(li); | |
39 | + dwp = new D4(li); | |
40 | + | |
41 | + dlf = new D4(li); | |
42 | + d3lp = new D4(li); d2lp = new D4(li); d2pw = new D4(li); d2pp = new D4(li); | |
43 | + | |
44 | + } | |
45 | + | |
46 | + public void initStat() { | |
47 | + | |
48 | + | |
49 | + mf = new MFB(); | |
50 | + s_rel = mf.getFeatureCounter().get(REL).intValue(); | |
51 | + s_pos = mf.getFeatureCounter().get(POS).intValue(); | |
52 | + s_word = mf.getFeatureCounter().get(WORD).intValue(); | |
53 | + s_type = mf.getFeatureCounter().get(TYPE).intValue();//mf.getFeatureBits(); | |
54 | + s_dir = mf.getFeatureCounter().get(DIR); | |
55 | + la = mf.getValue(DIR, LA); | |
56 | + ra = mf.getValue(DIR, RA); | |
57 | + s_dist = mf.getFeatureCounter().get(DIST);//mf.getFeatureBits(DIST); | |
58 | + s_feat = mf.getFeatureCounter().get(FEAT);//mf.getFeatureBits(Pipe.FEAT); | |
59 | + s_spath = mf.getFeatureCounter().get(Cluster.SPATH)==null?0:mf.getFeatureCounter().get(Cluster.SPATH);//mf.getFeatureBits(Cluster.SPATH); | |
60 | + s_lpath = mf.getFeatureCounter().get(Cluster.LPATH)==null?0:mf.getFeatureCounter().get(Cluster.LPATH);//mf.getFeatureBits(Cluster.LPATH); | |
61 | + } | |
62 | + | |
63 | + public void init(){ | |
64 | + // DB.println("init"); | |
65 | + d0.a0 = s_type;d0.a1 = s_pos;d0.a2 = s_pos;d0.a3 = s_pos;d0.a4 = s_pos;d0.a5 = s_pos;d0.a6 = s_pos;d0.a7 = s_pos; | |
66 | + dl1.a0 = s_type;dl1.a1 = s_rel; dl1.a2 = s_pos;dl1.a3 = s_pos; dl1.a4 = s_pos; dl1.a5 = s_pos; dl1.a6 = s_pos; dl1.a7 = s_pos; | |
67 | + dl2.a0 = s_type;dl2.a1 = s_rel;dl2.a2 = s_word;dl2.a3 = s_pos;dl2.a4 = s_pos;dl2.a5 = s_pos;dl2.a6 = s_pos;dl2.a7 = s_pos; | |
68 | + dwp.a0 = s_type; dwp.a1 = s_rel; dwp.a2 = s_word; dwp.a3 = s_pos; dwp.a4 = s_pos; dwp.a5 = s_word; | |
69 | + dwwp.a0 = s_type; dwwp.a1 = s_rel; dwwp.a2 = s_word; dwwp.a3 = s_word; dwwp.a4 = s_pos; dwwp.a5 = s_word; | |
70 | + dlf.a0 = s_type;dlf.a1 = s_rel; dlf.a2 = s_pos;dlf.a3 = s_pos; dlf.a4 = s_feat; dlf.a5 = s_feat; dlf.a6 = s_pos; dlf.a7 = s_pos; | |
71 | + d3lp.a0 = s_type; d3lp.a1 = s_rel; d3lp.a2 = s_lpath; d3lp.a3 = s_lpath; d3lp.a4 = s_lpath; d3lp.a5 = s_word; d3lp.a6 = s_spath; d3lp.a7 = s_spath; | |
72 | + d2lp.a0 = s_type; d2lp.a1 = s_rel; d2lp.a2 = s_lpath; d2lp.a3 = s_lpath; d2lp.a4 = s_word; d2lp.a5 = s_word; //d3lp.a6 = s_spath; d3lp.a7 = s_spath; | |
73 | + d2pw.a0 = s_type; d2pw.a1 = s_rel; d2pw.a2 = s_lpath; d2pw.a3 = s_lpath; d2pw.a4 = s_word; d2pw.a5 = s_word; //d3lp.a6 = s_spath; d3lp.a7 = s_spath; | |
74 | + d2pp.a0 = s_type; d2pp.a1 = s_rel; d2pp.a2 = s_lpath; d2pp.a3 = s_lpath; d2pp.a4 = s_pos; d2pp.a5 = s_pos; //d3lp.a6 = s_spath; d3lp.a7 = s_spath; | |
75 | + } | |
76 | + | |
77 | + | |
78 | + public int basic(short[] pposs, int[] form, int p, int d, Cluster cluster, IFV f) | |
79 | + { | |
80 | + | |
81 | + d0.clean(); dl1.clean(); dl2.clean(); dwp.clean(); dwwp.clean(); dlf.clean(); d3lp.clean(); | |
82 | + | |
83 | + d3lp.clean(); d2lp.clean();d2pw.clean(); d2pp.clean(); | |
84 | + | |
85 | + int n=1; | |
86 | + int dir= (p < d)? ra:la; | |
87 | + // d0.v0= n; d0.v1=pposs[p]; d0.v2=pposs[d]; //d0.stop=4; | |
88 | + int end= (p >= d ? p : d); | |
89 | + int start = (p >= d ? d : p) + 1; | |
90 | + | |
91 | + StringBuilder s = new StringBuilder(end-start); | |
92 | + int[] x = new int[end-start]; | |
93 | + int c=0; | |
94 | + for(int i = start ; i <end ; i++) { | |
95 | + //d0.v3=pposs[i]; | |
96 | + //d0.cz4(); | |
97 | + //d0.csa(s_dir,dir,f); | |
98 | +// s.append((char)pposs[i]); | |
99 | + x[c++] =pposs[i]; | |
100 | + } | |
101 | + | |
102 | + Arrays.sort(x); | |
103 | + for(int i = 0;i<x.length ; i++) { | |
104 | + if (i==0 || x[i]!=x[i-1] ) s.append(x[i]); | |
105 | + } | |
106 | + int v = mf.register("px", s.toString()); | |
107 | + | |
108 | + dwp.v0 = n++; dwp.v1 = 1;dwp.v2 = v; dwp.v3 = pposs[p]; dwp.v4 = pposs[d]; dwp.cz5(); dwp.csa(s_dir,dir,f); | |
109 | + | |
110 | + return n; | |
111 | + } | |
112 | + | |
113 | + | |
114 | + public void firstm(Instances is, int i, | |
115 | + int prnt, int dpnt, int label, Cluster cluster, long[] f) | |
116 | + { | |
117 | + | |
118 | + | |
119 | + //short[] pposs, int[] form, int[] lemmas, short[][] feats | |
120 | + for(int k=0;k<f.length;k++) f[k]=0; | |
121 | + | |
122 | + short[] pposs = is.pposs[i]; | |
123 | + int[] form =is.forms[i]; | |
124 | + short[][] feats = is.feats[i]; | |
125 | + | |
126 | + | |
127 | + int pF = form[prnt],dF = form[dpnt]; | |
128 | + int pL = is.plemmas[i][prnt],dL = is.plemmas[i][dpnt]; | |
129 | + int pP = pposs[prnt],dP = pposs[dpnt]; | |
130 | + | |
131 | + int prntLS = pF==-1?-1:cluster.getLP(pF), chldLS = dF==-1?-1:cluster.getLP(dF); | |
132 | + | |
133 | + final int dir= (prnt < dpnt)? ra:la; | |
134 | + | |
135 | + if (pF>maxForm) pF=-1; | |
136 | + if (pL>maxForm) pL=-1; | |
137 | + | |
138 | + if (dF>maxForm) dF=-1; | |
139 | + if (dL>maxForm) dL=-1; | |
140 | + | |
141 | + | |
142 | + int n=3,c=0; | |
143 | + | |
144 | + dl2.v1=label; | |
145 | + dl2.v0= n++; dl2.v2=pF; dl2.v3=dP; dl2.cz4(); f[c++]=dl2.csa(s_dir,dir); | |
146 | + dl2.v0= n++; dl2.cz3(); f[c++]=dl2.csa(s_dir,dir); | |
147 | + dl2.v0= n++; dl2.v2=dF; dl2.v3=pP; dl2.cz4(); f[c++]=dl2.csa(s_dir,dir); | |
148 | + dl2.v0= n++; dl2.cz3(); f[c++]=dl2.csa(s_dir,dir); | |
149 | + | |
150 | + | |
151 | + dwwp.v1=label; | |
152 | + dwwp.v0= n++; dwwp.v2=pF; dwwp.v3=dF; dwwp.cz4(); f[c++]=dwwp.csa(s_dir,dir); | |
153 | + | |
154 | + dl1.v1=label; | |
155 | + dl1.v0= n++; dl1.v2=dP; dl1.cz3(); f[c++]=dl1.csa(s_dir,dir); | |
156 | + dl1.v0= n++; dl1.v2=pP; dl1.cz3(); f[c++]=dl1.csa(s_dir,dir); | |
157 | + dl1.v0= n++; dl1.v3=dP; dl1.cz4(); f[c++]=dl1.csa(s_dir,dir); | |
158 | + | |
159 | + int pPm1 = prnt > 0 ? pposs[prnt - 1] : s_str, dPm1 = dpnt > 0 ? pposs[dpnt - 1] : s_str; | |
160 | + int pPp1 = prnt < pposs.length - 1 ? pposs[prnt + 1]:s_end, dPp1 = dpnt < pposs.length - 1 ? pposs[dpnt + 1]:s_end; | |
161 | + | |
162 | + int pPm2 = prnt > 1 ? pposs[prnt - 2] : s_str, dPm2 = dpnt > 1 ? pposs[dpnt - 2] : s_str; | |
163 | + int pPp2 = prnt < pposs.length - 2 ? pposs[prnt + 2]:s_end, dPp2 = dpnt < pposs.length - 2 ? pposs[dpnt + 2]:s_end; | |
164 | + | |
165 | + int pFm1 = prnt > 0 ? form[prnt - 1] : s_stwrd, dFm1 = dpnt > 0 ? form[dpnt - 1] : s_stwrd; | |
166 | + int pFp1 = prnt < form.length - 1 ? form[prnt + 1]:s_stwrd, dFp1 = dpnt < form.length - 1 ? form[dpnt + 1]:s_stwrd; | |
167 | + | |
168 | + | |
169 | + | |
170 | + dl1.v0= n++;dl1.v2=pP; dl1.v3=pPp1; dl1.v4=dP;dl1.v5=dPp1; dl1.cz6(); f[n++]=dl1.csa(s_dir,dir); | |
171 | + dl1.v0= n++; dl1.v5=dPm1; dl1.cz6(); f[n++]=dl1.csa(s_dir,dir); | |
172 | + dl1.v0= n++; dl1.v3=pPm1; dl1.cz6(); f[n++]=dl1.csa(s_dir,dir); | |
173 | + dl1.v0= n++; dl1.v5=dPp1; dl1.cz6(); f[n++]=dl1.csa(s_dir,dir); | |
174 | + | |
175 | + | |
176 | + dl1.v0= n++; dl1.v3=pPm1; dl1.cz5(); f[n++]=dl1.csa(s_dir,dir); | |
177 | + dl1.v0= n++; dl1.v3=dPm1; dl1.cz5(); f[n++]=dl1.csa(s_dir,dir); | |
178 | + dl1.v0= n++; dl1.v3=dPp1; dl1.cz5(); f[n++]=dl1.csa(s_dir,dir); | |
179 | + dl1.v0= n++; dl1.v3=pPp1; dl1.cz5(); f[n++]=dl1.csa(s_dir,dir); | |
180 | + | |
181 | + dl1.v0= n++;dl1.v2=pP; dl1.v3=pPp2; dl1.v4=dP;dl1.v5=dPp2; dl1.cz6(); f[n++]=dl1.csa(s_dir,dir); | |
182 | + dl1.v0= n++; dl1.v5=dPm2; dl1.cz6(); f[n++]=dl1.csa(s_dir,dir); | |
183 | + dl1.v0= n++; dl1.v3=pPm2; dl1.cz6(); f[n++]=dl1.csa(s_dir,dir); | |
184 | + dl1.v0= n++; dl1.v5=dPp2; dl1.cz6(); f[n++]=dl1.csa(s_dir,dir); | |
185 | + | |
186 | + dl1.v0= n++; dl1.v3=pPm2; dl1.cz5(); f[n++]=dl1.csa(s_dir,dir); | |
187 | + dl1.v0= n++; dl1.v3=dPm2; dl1.cz5(); f[n++]=dl1.csa(s_dir,dir); | |
188 | + dl1.v0= n++; dl1.v3=dPp2; dl1.cz5(); f[n++]=dl1.csa(s_dir,dir); | |
189 | + dl1.v0= n++; dl1.v3=pPp2; dl1.cz5(); f[n++]=dl1.csa(s_dir,dir); | |
190 | + | |
191 | + | |
192 | + | |
193 | + dl2.v0= n++; dl2.v3=dFm1; dl2.v3=pPp1;dl2.v4=pP; dl2.cz5(); f[n++]=dl2.getVal(); | |
194 | + dl2.v0= n++; dl2.v3=dFp1; dl2.v3=pPm1; dl2.cz5(); f[n++]=dl2.getVal(); | |
195 | + dl2.v0= n++; dl2.v3=pFm1; dl2.v3=dPp1;dl2.v4=dP; dl2.cz5(); f[n++]=dl2.getVal(); | |
196 | + dl2.v0= n++; dl2.v3=pFp1; dl2.v3=dPm1; dl2.cz5(); f[n++]=dl2.getVal(); | |
197 | + | |
198 | + | |
199 | + dl2.v0= n++; dl2.v3=dFm1; dl2.v3=dPm2;dl2.v4=pP; dl2.cz5(); f[n++]=dl2.getVal(); | |
200 | + dl2.v0= n++; dl2.v3=dFp1; dl2.v3=dPp2; dl2.cz5(); f[n++]=dl2.getVal(); | |
201 | + dl2.v0= n++; dl2.v3=pFm1; dl2.v3=pPm2;dl2.v4=dP; dl2.cz5(); f[n++]=dl2.getVal(); | |
202 | + dl2.v0= n++; dl2.v3=pFp1; dl2.v3=pPp2; dl2.cz5(); f[n++]=dl2.getVal(); | |
203 | + | |
204 | + | |
205 | + dwwp.v0= n++; dwwp.v2=pF; dwwp.v3=dF; dwwp.v4=dP; dwwp.cz5(); f[n++]=dwwp.csa(s_dir,dir); | |
206 | + dwwp.v0= n++; dwwp.v2=pF; dwwp.v3=dF; dwwp.v4=pP; dwwp.cz5(); f[n++]=dwwp.csa(s_dir,dir); | |
207 | + dwwp.v0= n++; dwwp.v2=dF; dwwp.v3=pF; dwwp.v4=pP; dwwp.v4=dP; dwwp.cz6(); f[n++]=dwwp.csa(s_dir,dir); | |
208 | + | |
209 | + | |
210 | + | |
211 | + // lemmas | |
212 | + | |
213 | + dl2.v1=label; | |
214 | + dl2.v0= n++; dl2.v2=pL; dl2.v3=dP; dl2.cz4(); f[c++]=dl2.csa(s_dir,dir); | |
215 | + dl2.v0= n++; dl2.cz3(); f[c++]=dl2.csa(s_dir,dir); | |
216 | + dl2.v0= n++; dl2.v2=dL; dl2.v3=pP; dl2.cz4(); f[c++]=dl2.csa(s_dir,dir); | |
217 | + dl2.v0= n++; dl2.cz3(); f[c++]=dl2.csa(s_dir,dir); | |
218 | + | |
219 | + | |
220 | + dwwp.v1=label; | |
221 | + dwwp.v0= n++; dwwp.v2=pL; dwwp.v3=dL; dwwp.cz4(); f[c++]=dwwp.csa(s_dir,dir); | |
222 | + | |
223 | + dwp.v1= label; | |
224 | + dwp.v0=n++;dwp.v2=dL; dwp.v3=pP;dwp.v4=dP;dwp.v5=pL; dwp.cz6(); f[c++]=dwp.csa(s_dir,dir); | |
225 | + dwp.v0=n++;dwp.cz5(); f[c++]=dwp.csa(s_dir,dir); | |
226 | + | |
227 | + dwp.v0=n++;dwp.v2=pL; dwp.cz5(); f[c++]=dwp.csa(s_dir,dir); | |
228 | + dwwp.v0= n++; dwwp.v2=pL; dwwp.v3=dL; dwwp.v4=dP; dwwp.cz5(); f[c++]=dwwp.csa(s_dir,dir); | |
229 | + dwwp.v0= n++; dwwp.v4=pP; dwwp.cz5(); f[c++]=dwwp.csa(s_dir,dir); | |
230 | + | |
231 | + | |
232 | + // cluster | |
233 | + | |
234 | + d2pw.v1=label; | |
235 | + d2pw.v0=n++; d2pw.v2=prntLS; d2pw.v3=chldLS; d2pw.cz4(); f[c++]=d2pw.csa(s_dir,dir); | |
236 | + d2pw.v0=n++; d2pw.v4=pF; d2pw.cz5(); f[c++]=d2pw.csa(s_dir,dir); | |
237 | + d2pw.v0=n++; d2pw.v4=dF; d2pw.cz5(); f[c++]=d2pw.csa(s_dir,dir); | |
238 | + d2pw.v0=n++; d2pw.v5=pF; d2pw.cz6(); f[c++]=d2pw.csa(s_dir,dir); | |
239 | + | |
240 | + | |
241 | + d2pp.v1=label; | |
242 | + d2pp.v0=n++; d2pp.v2=prntLS; d2pp.v3=chldLS; d2pp.cz4(); f[c++]=d2pp.csa(s_dir,dir); | |
243 | + d2pp.v0=n++; d2pp.v4=pP; d2pp.cz5(); f[c++]=d2pp.csa(s_dir,dir); | |
244 | + d2pp.v0=n++; d2pp.v4=dP; d2pp.cz5(); f[c++]=d2pp.csa(s_dir,dir); | |
245 | + d2pp.v0=n++; d2pp.v5=pP; d2pp.cz6(); f[c++]=d2pp.csa(s_dir,dir); | |
246 | + | |
247 | + | |
248 | + short[] prel = is.plabels[i]; | |
249 | + short[] phead = is.pheads[i]; | |
250 | + | |
251 | + | |
252 | + //take those in for stacking | |
253 | + // dl2.v1=label; | |
254 | + // dl2.v0= n++;dl2.v2=prel[dpnt];dl2.v3=pP;dl2.v4=dP; dl2.v5=prnt==phead[dpnt]?1:2; dl2.cz6(); f[c++]=dl2.csa(s_dir,dir); | |
255 | + // dl2.v0= n++;dl2.v2=pP;dl2.v3=dP; dl2.v4=prnt==phead[dpnt]?1:2; dl2.cz5(); f[c++]=dl2.csa(s_dir,dir); | |
256 | + | |
257 | + | |
258 | + | |
259 | + if (feats==null) return; | |
260 | + | |
261 | + short[] featsP =feats[prnt], featsD =feats[dpnt]; | |
262 | + dlf.v0= n++; dlf.v1=label; dlf.v2=pP; dlf.v3=dP; | |
263 | + extractFeat(f, c, dir, featsP, featsD); | |
264 | + | |
265 | + return; | |
266 | + } | |
267 | + | |
268 | + | |
269 | + | |
270 | + public void gcm(Instances is , int i, int p, int d, int gc, int label,Cluster cluster, long[] f) { | |
271 | + | |
272 | + for(int k=0;k<f.length;k++) f[k]=0; | |
273 | + | |
274 | + short[] pos= is.pposs[i]; | |
275 | + int[] forms=is.forms[i]; | |
276 | + int[] lemmas=is.plemmas[i]; | |
277 | + short[][] feats=is.feats[i]; | |
278 | + | |
279 | + int pP = pos[p], dP = pos[d]; | |
280 | + int prntF = forms[p], chldF = forms[d]; | |
281 | + int prntL = lemmas[p], chldL = lemmas[d]; | |
282 | + int prntLS = prntF==-1?-1:cluster.getLP(prntF), chldLS = chldF==-1?-1:cluster.getLP(chldF); | |
283 | + | |
284 | + int gP = gc != -1 ? pos[gc] : s_str; | |
285 | + int gcF = gc != -1 ? forms[gc] : s_stwrd; | |
286 | + int gcL = gc != -1 ? lemmas[gc] : s_stwrd; | |
287 | + int gcLS = (gc != -1) && (gcF!=-1) ? cluster.getLP(gcF) : s_stwrd; | |
288 | + | |
289 | + if (prntF>maxForm) prntF=-1; | |
290 | + if (prntL>maxForm) prntL=-1; | |
291 | + | |
292 | + if (chldF>maxForm) chldF=-1; | |
293 | + if (chldL>maxForm) chldL=-1; | |
294 | + | |
295 | + if (gcF>maxForm) gcF=-1; | |
296 | + if (gcL>maxForm) gcL=-1; | |
297 | + | |
298 | + | |
299 | + int dir= (p < d)? ra:la, dir_gra =(d < gc)? ra:la; | |
300 | + | |
301 | + int n=84,c=0; | |
302 | + | |
303 | + //dl1.v023(); | |
304 | + dl1.v1=label; | |
305 | + dl1.v0= n++; dl1.v2=pP; dl1.v3=dP;dl1.v4=gP; dl1.cz5(); dl1.cs(s_dir,dir);f[c++]=dl1.csa(s_dir,dir_gra); | |
306 | + dl1.v0= n++; dl1.v2=pP; dl1.v3=gP; dl1.cz4();dl1.cs(s_dir,dir);f[c++]=dl1.csa(s_dir,dir_gra); | |
307 | + dl1.v0= n++; dl1.v2=dP; dl1.cz4(); dl1.cs(s_dir,dir);f[c++]=dl1.csa(s_dir,dir_gra); | |
308 | + | |
309 | + dwwp.v1=label; | |
310 | + dwwp.v0= n++; dwwp.v2=prntF; dwwp.v3=gcF; | |
311 | + dwwp.cz4(); dwwp.cs(s_dir,dir);f[c++]=dwwp.csa(s_dir,dir_gra); | |
312 | + | |
313 | + dwwp.v0= n++; dwwp.v2=chldF; dwwp.v3=gcF; | |
314 | + dwwp.cz4(); dwwp.cs(s_dir,dir);f[c++]=dwwp.csa(s_dir,dir_gra); | |
315 | + | |
316 | + dwp.v1=label; | |
317 | + dwp.v0= n++; dwp.v2=gcF; dwp.v3=pP; | |
318 | + dwp.cz4(); dwp.cs(s_dir,dir);f[c++]=dwp.csa(s_dir,dir_gra); | |
319 | + | |
320 | + dwp.v0= n++; dwp.v2=gcF; dwp.v3=dP; | |
321 | + dwp.cz4(); dwp.cs(s_dir,dir);f[c++]=dwp.csa(s_dir,dir_gra); | |
322 | + | |
323 | + dwp.v0= n++; dwp.v2=prntF; dwp.v3=gP; | |
324 | + dwp.cz4(); dwp.cs(s_dir,dir);f[c++]=dwp.csa(s_dir,dir_gra); | |
325 | + | |
326 | + dwp.v0= n++; dwp.v2=chldF; dwp.v3=gP; | |
327 | + dwp.cz4(); dwp.cs(s_dir,dir); f[c++]=dwp.csa(s_dir,dir_gra); | |
328 | + | |
329 | + | |
330 | + // lemma | |
331 | + | |
332 | + dwwp.v0= n++; dwwp.v2=prntL; dwwp.v3=gcL; | |
333 | + dwwp.cz4();dwwp.cs(s_dir,dir);f[c++]=dwwp.csa(s_dir,dir_gra); | |
334 | + | |
335 | + dwwp.v0= n++; dwwp.v2=chldL; dwwp.v3=gcL; | |
336 | + dwwp.cz4(); dwwp.cs(s_dir,dir);f[c++]=dwwp.csa(s_dir,dir_gra); | |
337 | + | |
338 | + dwp.v0= n++; dwp.v2=gcL; dwp.v3=pP; | |
339 | + dwp.cz4(); dwp.cs(s_dir,dir);f[c++]=dwp.csa(s_dir,dir_gra); | |
340 | + | |
341 | + dwp.v0= n++; dwp.v2=gcL; dwp.v3=dP; | |
342 | + dwp.cz4(); dwp.cs(s_dir,dir);f[c++]=dwp.csa(s_dir,dir_gra); | |
343 | + | |
344 | + dwp.v0= n++; dwp.v2=prntL; dwp.v3=gP; | |
345 | + dwp.cz4(); dwp.cs(s_dir,dir);f[c++]=dwp.csa(s_dir,dir_gra); | |
346 | + | |
347 | + dwp.v0= n++; dwp.v2=chldL; dwp.v3=gP; | |
348 | + dwp.cz4(); dwp.cs(s_dir,dir); f[c++]=dwp.csa(s_dir,dir_gra); | |
349 | + | |
350 | + | |
351 | + // clusters | |
352 | + | |
353 | + d2lp.v1= label; | |
354 | + d2lp.v0= n++; d2lp.v2=prntLS; d2lp.v3=gcLS; d2lp.cz4(); d2lp.cs(s_dir,dir);f[c++]=d2lp.csa(s_dir,dir_gra);// f.add(li.l2i(l)); | |
355 | + d2lp.v0= n++; d2lp.v2=chldLS; d2lp.v3=gcLS; d2lp.cz4(); d2lp.cs(s_dir,dir);f[c++]=d2lp.csa(s_dir,dir_gra); | |
356 | + d3lp.v0= n++; d3lp.v1= label; d3lp.v2=prntLS; d3lp.v3=chldLS; d3lp.v4=gcLS; d3lp.cz5(); d3lp.cs(s_dir,dir);f[c++]=d3lp.csa(s_dir,dir_gra); | |
357 | + | |
358 | + //_f83; | |
359 | + d2lp.v0= n++; d2lp.v2=prntLS; d2lp.v3=chldLS; d2lp.v4=gcF; d2lp.cz5(); f[c++]=d2lp.csa(s_dir,dir); | |
360 | + d2lp.v0= n++; d2lp.v2=prntLS; d2lp.v3=gcLS; d2lp.v4=chldF; d2lp.cz5(); f[c++]=d2lp.csa(s_dir,dir); | |
361 | + d2lp.v0= n++; d2lp.v2=chldLS; d2lp.v3=gcLS; d2lp.v4=prntF; d2lp.cz5(); f[c++]=d2lp.csa(s_dir,dir); | |
362 | + | |
363 | + d2pp.v1= label; | |
364 | + d2pp.v0= n++; d2pp.v2=prntLS; d2pp.v3=chldLS; d2pp.v4=gP; d2pp.cz5(); f[c++]=d2pp.csa(s_dir,dir); | |
365 | + d2pp.v0= n++; d2pp.v2=prntLS; d2pp.v3=gcLS; d2pp.v4=dP; d2pp.cz5(); f[c++]=d2pp.csa(s_dir,dir); | |
366 | + d2pp.v0= n++; d2pp.v2=chldLS; d2pp.v3=gcLS; d2pp.v4=pP; d2pp.cz5(); f[c++]=d2pp.csa(s_dir,dir); | |
367 | + | |
368 | + | |
369 | + | |
370 | + // linear features | |
371 | + | |
372 | + int prntPm1 = p != 0 ? pos[p - 1] : s_str; // parent-pos-minus1 | |
373 | + int chldPm1 = d - 1 >=0 ? pos[d - 1] : s_str; // child-pos-minus1 | |
374 | + int prntPp1 = p != pos.length - 1 ? pos[p + 1] : s_end; | |
375 | + int chldPp1 = d != pos.length - 1 ? pos[d + 1] : s_end; | |
376 | + | |
377 | + int gcPm1 = gc > 0 ? pos[gc - 1] : s_str; | |
378 | + int gcPp1 = gc < pos.length - 1 ? pos[gc + 1] : s_end; | |
379 | + | |
380 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcPp1;dl1.v4=dP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | |
381 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcPm1;dl1.v4=dP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | |
382 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=dP;dl1.v4=chldPp1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | |
383 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=dP;dl1.v4=chldPm1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | |
384 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcPp1;dl1.v4=chldPm1;dl1.v5=dP;dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | |
385 | + dl1.v0= n++; dl1.v2=gcPm1; dl1.v3=gP;dl1.v4=chldPm1;dl1.v5=dP; dl1.cz6();f[c++]=dl1.csa(s_dir,dir); | |
386 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcPp1;dl1.v4=dP;dl1.v5=chldPp1; dl1.cz6();f[c++]=dl1.csa(s_dir,dir); | |
387 | + dl1.v0= n++; dl1.v2=gcPm1; dl1.v3=gP;dl1.v4=dP;dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | |
388 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcPp1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | |
389 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcPm1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | |
390 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=pP;dl1.v4=prntPp1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | |
391 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=pP;dl1.v4=prntPm1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | |
392 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcPp1;dl1.v4=prntPm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | |
393 | + dl1.v0= n++; dl1.v2=gcPm1; dl1.v3=gP;dl1.v4=prntPm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | |
394 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcPp1;dl1.v4=pP;dl1.v5=prntPp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | |
395 | + dl1.v0= n++; dl1.v2=gcPm1; dl1.v3=gP; dl1.v4=pP; dl1.v5=prntPp1;dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | |
396 | + | |
397 | + | |
398 | + int pLSp1 = p != pos.length - 1 ? forms[p + 1]==-1?-1:cluster.getLP(forms[p + 1]): _cend; | |
399 | + int cLSp1 = d != pos.length - 1 ? forms[d + 1] ==-1?-1:cluster.getLP(forms[d + 1]):_cend; | |
400 | + int gcLSp1 = gc < pos.length -1 ? forms[gc + 1] ==-1?-1:cluster.getLP(forms[gc + 1]) : s_end; | |
401 | + | |
402 | + int pLSm1 = p != 0 ? lemmas[p - 1]==-1?-1:cluster.getLP(lemmas[p - 1]): _cstr; | |
403 | + int cLSm1 = d - 1 >=0 ? lemmas[d - 1] ==-1?-1:cluster.getLP(lemmas[d - 1]):_cstr; | |
404 | + int gcLSm1 = gc > 0 ? lemmas[gc - 1] ==-1?-1:cluster.getLP(lemmas[gc - 1]) : _cstr; | |
405 | + | |
406 | + | |
407 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcLSp1;dl1.v4=dP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | |
408 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcLSm1;dl1.v4=dP; dl1.cz5();f[c++]=dl1.csa(s_dir,dir); | |
409 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=dP;dl1.v4=cLSp1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | |
410 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=dP;dl1.v4=cLSm1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | |
411 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcLSp1;dl1.v4=cLSm1;dl1.v5=dP;dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | |
412 | + dl1.v0= n++; dl1.v2=gcLSm1; dl1.v3=gP;dl1.v4=cLSm1;dl1.v5=dP; dl1.cz6();f[c++]=dl1.csa(s_dir,dir); | |
413 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcLSp1;dl1.v4=dP;dl1.v5=cLSp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | |
414 | + dl1.v0= n++; dl1.v2=cLSm1; dl1.v3=gP;dl1.v4=dP;dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | |
415 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcLSp1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | |
416 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcLSm1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | |
417 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=pP;dl1.v4=pLSp1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | |
418 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=pP;dl1.v4=pLSm1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | |
419 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcLSp1;dl1.v4=pLSm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | |
420 | + dl1.v0= n++; dl1.v2=gcLSm1; dl1.v3=gP;dl1.v4=pLSm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | |
421 | + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcLSp1;dl1.v4=pP;dl1.v5=pLSp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | |
422 | + dl1.v0= n++; dl1.v2=gcLSm1; dl1.v3=gP; dl1.v4=pP; dl1.v5=pLSp1;dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | |
423 | + | |
424 | + | |
425 | + | |
426 | + short[] prel = is.plabels[i],phead=is.pheads[i]; | |
427 | + | |
428 | + int g = p==phead[d]?1:2 ; | |
429 | + if (gc>=0) g += d==phead[gc]?4:8; | |
430 | + | |
431 | + int gr = gc==-1?s_relend:prel[gc]; | |
432 | + | |
433 | + // take those in for stacking | |
434 | + /* | |
435 | + dl2.v1=label; | |
436 | + dl2.v0= n++;dl2.v2=prel[d];dl2.v3=g;dl2.v4=gP;dl2.v5=dP;dl2.cz6();f[c++]=dl2.csa(s_dir,dir); | |
437 | + dl2.v0= n++;dl2.v2=prel[d];dl2.v3=g;dl2.v4=gP;dl2.v5=pP;dl2.cz6();f[c++]=dl2.csa(s_dir,dir); | |
438 | + dl2.v0= n++;dl2.v2=prel[d];dl2.v3=g;dl2.v4=gP;dl2.v5=pP;dl2.v6=dP;dl2.cz7();f[c++]=dl2.csa(s_dir,dir); | |
439 | + | |
440 | + dl2.v0= n++;dl2.v2=gr;dl2.v3=g;dl2.v4=gP;dl2.v5=dP;dl2.cz6();f[c++]=dl2.csa(s_dir,dir); | |
441 | + dl2.v0= n++;dl2.v2=gr;dl2.v3=g;dl2.v4=gP;dl2.v5=pP;dl2.cz6();f[c++]=dl2.csa(s_dir,dir); | |
442 | + dl2.v0= n++;dl2.v2=gr;dl2.v3=g;dl2.v4=gP;dl2.v5=pP;dl2.v6=dP;dl2.cz7();f[c++]=dl2.csa(s_dir,dir); | |
443 | + | |
444 | +*/ | |
445 | + if (feats==null) return; | |
446 | + | |
447 | + short[] featsP =feats[d]; | |
448 | + short[] featsD =gc!=-1?feats[gc]:null; | |
449 | + | |
450 | + dlf.v0= n++; dlf.v1=label; dlf.v2=gP; dlf.v3=dP; | |
451 | + extractFeat(f, c, dir, featsP, featsD); | |
452 | + return; | |
453 | + } | |
454 | + | |
455 | + | |
456 | + public void siblingm(Instances is , int i,short pos[], int forms[], int[] lemmas, short[][] feats, int prnt, int d, int sblng, int label, Cluster cluster, long[] f, int v) | |
457 | + { | |
458 | + | |
459 | + for(int k=0;k<f.length;k++) f[k]=0; | |
460 | + | |
461 | + int pP = pos[prnt], dP = pos[d]; | |
462 | + int prntF = forms[prnt],chldF = forms[d]; | |
463 | + int prntL = lemmas[prnt], chldL = lemmas[d]; | |
464 | + int prntLS = prntF==-1?-1:cluster.getLP(prntF), chldLS = chldF==-1?-1:cluster.getLP(chldF); | |
465 | + | |
466 | + int sP = sblng!=-1 ? pos[sblng] : s_str, sblF = sblng!=-1 ? forms[sblng] : s_stwrd, sblL = sblng!=-1 ? lemmas[sblng] : s_stwrd; | |
467 | + | |
468 | + int sblLS = (sblng != -1)&&(sblF!=-1) ? cluster.getLP(sblF) : s_stwrd; | |
469 | + | |
470 | + | |
471 | + int dir= (prnt < d)? ra:la; | |
472 | + | |
473 | + int abs = Math.abs(prnt-d); | |
474 | + | |
475 | + final int dist; | |
476 | + if (abs > 10)dist=d10;else if (abs>5) dist=d5;else if( abs==5)dist=d4;else if (abs==4)dist=d3;else if (abs==3)dist=d2; | |
477 | + else if (abs==2)dist=d1; else dist=di0; | |
478 | + | |
479 | + int n=147; | |
480 | + | |
481 | + if (prntF>maxForm) prntF=-1; | |
482 | + if (prntL>maxForm) prntL=-1; | |
483 | + | |
484 | + if (chldF>maxForm) chldF=-1; | |
485 | + if (chldL>maxForm) chldL=-1; | |
486 | + | |
487 | + if (sblF>maxForm) sblF=-1; | |
488 | + if (sblL>maxForm) sblL=-1; | |
489 | + | |
490 | + | |
491 | + dl1.v0= n++; dl1.v1=label;dl1.v2=pP; dl1.v3=dP;dl1.v4=sP; dl1.cz5(); f[0]=dl1.csa(s_dir,dir);f[1]=dl1.csa(s_dist,dist); | |
492 | + dl1.v0= n++; dl1.v3=sP; dl1.cz4(); f[2]=dl1.csa(s_dir,dir); f[3]=dl1.csa(s_dist,dist); | |
493 | + dl1.v0= n++; dl1.v2=dP;dl1.cz4(); f[4]=dl1.csa(s_dir,dir); f[5]=dl1.csa(s_dist,dist); | |
494 | + | |
495 | + // sibling only could be tried | |
496 | + dwwp.v1=label; | |
497 | + dwwp.v0= n++; dwwp.v2=prntF; dwwp.v3=sblF; dwwp.cz4(); f[6]=dwwp.csa(s_dir,dir); f[7]=dwwp.csa(s_dist,dist); | |
498 | + dwwp.v0= n++; dwwp.v2=chldF; dwwp.cz4(); f[8]=dwwp.csa(s_dir,dir); f[9]=dwwp.csa(s_dist,dist); | |
499 | + dwp.v0= n++; dwp.v1=label; dwp.v2=sblF; dwp.v3=pP; dwp.cz4(); f[10]=dwp.csa(s_dir,dir); f[11]=dwp.csa(s_dist,dist); | |
500 | + dwp.v0= n++; /*dwp.v1=label; */dwp.v3=dP; dwp.cz4(); f[12]=dwp.csa(s_dir,dir); f[13]=dwp.csa(s_dist,dist); | |
501 | + dwp.v0= n++; /*dwp.v1=label;*/ dwp.v2=prntF; dwp.v3=sP; dwp.cz4(); f[14]=dwp.csa(s_dir,dir); f[15]=dwp.csa(s_dist,dist); | |
502 | + dwp.v0= n++; /*dwp.v1=label;*/ dwp.v2=chldF; dwp.cz4(); f[16]=dwp.csa(s_dir,dir); f[17]=dwp.csa(s_dist,dist); | |
503 | + | |
504 | + //lemmas | |
505 | + dwwp.v0= n++; dwwp.v2=prntL; dwwp.v3=sblL; dwwp.cz4(); f[18]=dwwp.csa(s_dir,dir); | |
506 | + dwwp.v0= n++; dwwp.v2=chldL; dwwp.cz4(); f[19]=dwwp.csa(s_dir,dir); f[20]=dwwp.csa(s_dist,dist); | |
507 | + dwp.v0= n++; /*dwp.v1=label;*/ dwp.v2=sblL; dwp.v3=pP; dwp.cz4(); f[21]=dwp.csa(s_dir,dir); f[22]=dwp.csa(s_dist,dist); | |
508 | + dwp.v0= n++; /*dwp.v1=label; */ dwp.v3=dP; dwp.cz4(); f[23]=dwp.csa(s_dir,dir);f[24]=dwp.csa(s_dist,dist); | |
509 | + dwp.v0= n++; /*dwp.v1=label;*/ dwp.v2=prntL; dwp.v3=sP; dwp.cz4(); f[25]=dwp.csa(s_dir,dir); f[26]=dwp.csa(s_dist,dist); | |
510 | + dwp.v0= n++; /*dwp.v1=label;*/ dwp.v2=chldL; dwp.cz4(); f[27]=dwp.csa(s_dir,dir);f[28]=dwp.csa(s_dist,dist); | |
511 | + | |
512 | + | |
513 | + // clusters | |
514 | + | |
515 | + d2lp.v1=label; | |
516 | + d2lp.v0= n++; d2lp.v2=prntLS; d2lp.v3=sblLS; d2lp.cz4(); f[29]=d2lp.csa(s_dir,dir); | |
517 | + d2lp.v0= n++; d2lp.v2=chldLS; d2lp.v3=sblLS; d2lp.cz4(); f[30]=d2lp.csa(s_dir,dir); f[31]=d2lp.csa(s_dist,dist); | |
518 | + | |
519 | + d3lp.v1= label; | |
520 | + d3lp.v0= n++; d3lp.v2=prntLS; d3lp.v3=chldLS; d3lp.v4=sblLS;d3lp.cz5(); f[32]=d3lp.csa(s_dir,dir); | |
521 | + | |
522 | + d2lp.v0= n++; d2lp.v2=prntLS; d2lp.v3=chldLS; d2lp.v4=sblF; d2lp.cz5(); f[33]=d2lp.csa(s_dir,dir); f[34]=d2lp.csa(s_dist,dist); | |
523 | + d2lp.v0= n++; d2lp.v2=prntLS; d2lp.v3=sblLS; d2lp.v4=chldF; d2lp.cz5(); f[35]=d2lp.csa(s_dir,dir); f[36]=d2lp.csa(s_dist,dist); | |
524 | + d2lp.v0= n++; d2lp.v2=chldLS; d2lp.v3=sblLS; d2lp.v4=prntF; d2lp.cz5(); f[37]=d2lp.csa(s_dir,dir); f[38]=d2lp.csa(s_dist,dist); | |
525 | + | |
526 | + d2pp.v1=label; | |
527 | + d2pp.v0= n++; d2pp.v2=prntLS; d2pp.v3=chldLS; d2pp.v4=sP; d2pp.cz5(); f[39]=d2pp.csa(s_dir,dir); f[40]=d2pp.csa(s_dist,dist); | |
528 | + d2pp.v0= n++; d2pp.v2=prntLS; d2pp.v3=sblLS; d2pp.v4=dP; d2pp.cz5(); f[41]=d2pp.csa(s_dir,dir); f[42]=d2pp.csa(s_dist,dist); | |
529 | + d2pp.v0= n++; d2pp.v2=chldLS; d2pp.v3=sblLS; d2pp.v4=pP; d2pp.cz5(); f[43]=d2pp.csa(s_dir,dir); f[44]=d2pp.csa(s_dist,dist); | |
530 | + | |
531 | + | |
532 | + int prntPm1 = prnt!=0 ? pos[prnt-1] : s_str; | |
533 | + int chldPm1 = d-1>=0 ? pos[d-1] : s_str; | |
534 | + int prntPp1 = prnt!=pos.length-1 ? pos[prnt+1] : s_end; | |
535 | + int chldPp1 = d!=pos.length-1 ? pos[d+1] : s_end; | |
536 | + | |
537 | + // sibling part of speech minus and plus 1 | |
538 | + int sblPm1 = sblng>0 ? pos[sblng-1]:s_str; | |
539 | + int sblPp1 = sblng<pos.length-1 ? pos[sblng + 1]:s_end; | |
540 | + | |
541 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sblPp1;dl1.v4=pP; dl1.cz5(); f[45]=dl1.csa(s_dir,dir); | |
542 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sblPm1;dl1.v4=pP; dl1.cz5(); f[46]=dl1.csa(s_dir,dir);// f.add(li.l2i(l)); | |
543 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=pP;dl1.v4=prntPp1;dl1.cz5(); f[47]=dl1.csa(s_dir,dir);// f.add(li.l2i(l)); | |
544 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=pP;dl1.v4=prntPm1; dl1.cz5(); f[48]=dl1.csa(s_dir,dir);// f.add(li.l2i(l)); | |
545 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sblPp1;dl1.v4=prntPm1;dl1.v5=pP; dl1.cz6(); f[49]=dl1.csa(s_dir,dir); | |
546 | + dl1.v0=n++; dl1.v2=sblPm1; dl1.v3=sP;dl1.v4=prntPm1;dl1.v5=pP;dl1.cz6(); f[50]=dl1.csa(s_dir,dir);// f.add(li.l2i(l)); | |
547 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sblPp1;dl1.v4=pP;dl1.v5=prntPp1; dl1.cz6(); f[51]=dl1.csa(s_dir,dir);// f.add(li.l2i(l)); | |
548 | + dl1.v0=n++; dl1.v2=sblPm1; dl1.v3=sP; dl1.v4=pP;dl1.v5=prntPp1; dl1.cz6(); f[52]=dl1.csa(s_dir,dir);// f.add(li.l2i(l)); | |
549 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sblPp1;dl1.v4=dP; dl1.cz5(); f[53]=dl1.csa(s_dir,dir); | |
550 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sblPm1;dl1.v4=dP; dl1.cz5(); f[54]=dl1.csa(s_dir,dir); | |
551 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=dP;dl1.v4=chldPp1;dl1.cz5(); f[55]=dl1.csa(s_dir,dir); | |
552 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=dP;dl1.v4=chldPm1; dl1.cz5(); f[56]=dl1.csa(s_dir,dir); | |
553 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sblPp1;dl1.v4=chldPm1;dl1.v5=dP; dl1.cz6(); f[57]=dl1.csa(s_dir,dir);// f.add(li.l2i(l)); | |
554 | + dl1.v0=n++; dl1.v2=sblPm1; dl1.v3=sP;dl1.v4=chldPm1;dl1.v5=dP;dl1.cz6(); f[58]=dl1.csa(s_dir,dir); | |
555 | + dl1.v0= n++;dl1.v2=sP; dl1.v3=sblPp1;dl1.v4=dP;dl1.v5=chldPp1;dl1.cz6();f[59]=dl1.csa(s_dir,dir);// f.add(li.l2i(l)); | |
556 | + dl1.v0= n++; dl1.v2=sblPm1; dl1.v3=sP;dl1.v4=dP;dl1.v5=chldPp1;dl1.cz6(); f[60]=dl1.csa(s_dir,dir); | |
557 | + | |
558 | + int c=61; | |
559 | + | |
560 | + int pLSp1 = prnt != pos.length - 1 ? forms[prnt + 1]==-1?-1:cluster.getLP(forms[prnt + 1]): _cend; | |
561 | + int cLSp1 = d != pos.length - 1 ? forms[d + 1] ==-1?-1:cluster.getLP(forms[d + 1]):_cend; | |
562 | + int sLSp1 = sblng < pos.length -1 ? forms[sblng + 1] ==-1?-1:cluster.getLP(forms[sblng + 1]) : _cend; | |
563 | + | |
564 | + int pLSm1 = prnt!=0 ? forms[prnt - 1]==-1?-1:cluster.getLP(forms[prnt - 1]): _cstr; | |
565 | + int cLSm1 = d-1>=0 ? forms[d - 1] ==-1?-1:cluster.getLP(forms[d - 1]):_cstr; | |
566 | + int sLSm1 = sblng>0 ? forms[sblng - 1] ==-1?-1:cluster.getLP(forms[sblng - 1]):_cstr; | |
567 | + | |
568 | + //int c=61; | |
569 | + | |
570 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | |
571 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSm1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | |
572 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=pP;dl1.v4=pLSp1;dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | |
573 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=pP;dl1.v4=pLSm1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | |
574 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=pLSm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | |
575 | + dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP;dl1.v4=pLSm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | |
576 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=pP;dl1.v5=pLSp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | |
577 | + dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP; dl1.v4=pP;dl1.v5=pLSp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | |
578 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=dP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | |
579 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSm1;dl1.v4=dP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | |
580 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=dP;dl1.v4=cLSp1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | |
581 | + dl1.v0=n++;dl1.v2=sP; dl1.v3=dP;dl1.v4=cLSm1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | |
582 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSm1;dl1.v4=cLSm1;dl1.v5=dP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | |
583 | + dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP;dl1.v4=cLSm1;dl1.v5=dP;dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | |
584 | + dl1.v0=n++;dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=dP;dl1.v5=cLSp1;dl1.cz6();f[c++]=dl1.csa(s_dir,dir); | |
585 | + dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP;dl1.v4=dP;dl1.v5=cLSp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | |
586 | + | |
587 | + | |
588 | + | |
589 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | |
590 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSm1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | |
591 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=pP;dl1.v4=pLSp1;dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | |
592 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=pP;dl1.v4=pLSm1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | |
593 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=pLSm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | |
594 | + dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP;dl1.v4=pLSm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | |
595 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=pP;dl1.v5=pLSp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | |
596 | + dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP; dl1.v4=pP;dl1.v5=pLSp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | |
597 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=dP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | |
598 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSm1;dl1.v4=dP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | |
599 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=dP;dl1.v4=cLSp1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | |
600 | + dl1.v0=n++;dl1.v2=sP; dl1.v3=dP;dl1.v4=cLSm1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); | |
601 | + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSm1;dl1.v4=cLSm1;dl1.v5=dP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | |
602 | + dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP;dl1.v4=cLSm1;dl1.v5=dP;dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | |
603 | + dl1.v0= n++;dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=dP;dl1.v5=cLSp1;dl1.cz6();f[c++]=dl1.csa(s_dir,dir); | |
604 | + dl1.v0= n++; dl1.v2=sLSm1; dl1.v3=sP;dl1.v4=dP;dl1.v5=cLSp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); | |
605 | + | |
606 | + // take those in for stacking | |
607 | + | |
608 | + /* | |
609 | + short[] prel = is.plabels[i],phead=is.pheads[i]; | |
610 | + | |
611 | + int g = prnt==phead[d]?1:2 ; | |
612 | + if (sblng>=0) g += prnt==phead[sblng]?4:8; | |
613 | + | |
614 | + int gr = sblng==-1?s_relend:prel[sblng]; | |
615 | + | |
616 | + | |
617 | + dl2.v0= n++;dl2.v2=prel[d];dl2.v3=g;dl2.v4=sP;dl2.v5=dP;dl2.cz6();f[c++]=dl2.csa(s_dir,dir); | |
618 | + dl2.v0= n++;dl2.v2=prel[d];dl2.v3=g;dl2.v4=sP;dl2.v5=pP;dl2.cz6();f[c++]=dl2.csa(s_dir,dir); | |
619 | + dl2.v0= n++;dl2.v2=prel[d];dl2.v3=g;dl2.v4=sP;dl2.v5=pP;dl2.v6=dP;dl2.cz7();f[c++]=dl2.csa(s_dir,dir); | |
620 | + | |
621 | + dl2.v0= n++;dl2.v2=gr;dl2.v3=g;dl2.v4=sP;dl2.v5=dP;dl2.cz6();f[c++]=dl2.csa(s_dir,dir); | |
622 | + dl2.v0= n++;dl2.v2=gr;dl2.v3=g;dl2.v4=sP;dl2.v5=pP;dl2.cz6();f[c++]=dl2.csa(s_dir,dir); | |
623 | + dl2.v0= n++;dl2.v2=gr;dl2.v3=g;dl2.v4=sP;dl2.v5=pP;dl2.v6=dP;dl2.cz7();f[c++]=dl2.csa(s_dir,dir); | |
624 | +*/ | |
625 | + | |
626 | + if (feats==null) return; | |
627 | + | |
628 | + int cnt=c; | |
629 | + | |
630 | + short[] featsP =feats[d]; | |
631 | + short[] featsSbl =sblng!=-1?feats[sblng]:null; | |
632 | + | |
633 | + dlf.v0= n++; dlf.v1=label; dlf.v2=sP; dlf.v3=dP; | |
634 | + | |
635 | + | |
636 | + cnt = extractFeat(f, cnt ,dir, featsP, featsSbl); | |
637 | + | |
638 | + featsP =feats[prnt]; | |
639 | + featsSbl =sblng!=-1?feats[sblng]:null; | |
640 | + | |
641 | + dlf.v0= n++; dlf.v1=label; dlf.v2=pP; dlf.v3=sP; | |
642 | + if (featsP!=null && featsSbl!=null) { | |
643 | + for(short i1=0;i1<featsP.length;i1++) { | |
644 | + for(short i2=0;i2<featsSbl.length;i2++) { | |
645 | + dlf.v4=featsP[i1]; dlf.v5=featsSbl[i2]; | |
646 | + dlf.cz6(); f[cnt++]=dlf.csa(s_dir,prnt<sblng?1:2); | |
647 | + } | |
648 | + } | |
649 | + } else if (featsP==null && featsSbl!=null) { | |
650 | + | |
651 | + for(short i2=0;i2<featsSbl.length;i2++) { | |
652 | + dlf.v4=nofeat; dlf.v5=featsSbl[i2]; | |
653 | + dlf.cz6(); f[cnt++]=dlf.csa(s_dir,dir); | |
654 | + } | |
655 | + | |
656 | + } else if (featsP!=null && featsSbl==null) { | |
657 | + | |
658 | + for(short i1=0;i1<featsP.length;i1++) { | |
659 | + dlf.v4=featsP[i1]; dlf.v5=nofeat; | |
660 | + dlf.cz6(); f[cnt++]=dlf.csa(s_dir,dir); | |
661 | + } | |
662 | + } | |
663 | + | |
664 | + return; | |
665 | + } | |
666 | + | |
667 | + private int extractFeat(long[] f, int cnt, int dir, short[] featsP, short[] featsD) { | |
668 | + if (featsP!=null && featsD!=null) { | |
669 | + for(short i1=0;i1<featsP.length;i1++) { | |
670 | + for(short i2=0;i2<featsD.length;i2++) { | |
671 | + dlf.v4=featsP[i1]; dlf.v5=featsD[i2]; | |
672 | + dlf.cz6(); f[cnt++]=dlf.csa(s_dir,dir); | |
673 | + } | |
674 | + } | |
675 | + } else if (featsP==null && featsD!=null) { | |
676 | + | |
677 | + for(short i2=0;i2<featsD.length;i2++) { | |
678 | + dlf.v4=nofeat; dlf.v5=featsD[i2]; | |
679 | + dlf.cz6(); f[cnt++]=dlf.csa(s_dir,dir); | |
680 | + | |
681 | + } | |
682 | + } else if (featsP!=null && featsD==null) { | |
683 | + | |
684 | + for(short i1=0;i1<featsP.length;i1++) { | |
685 | + dlf.v4=featsP[i1]; dlf.v5=nofeat; | |
686 | + dlf.cz6(); f[cnt++]=dlf.csa(s_dir,dir); | |
687 | + | |
688 | + } | |
689 | + } | |
690 | + return cnt; | |
691 | + } | |
692 | + | |
693 | + | |
694 | + public FV encodeCat(Instances is, int ic, short pposs[], int forms[], int[] lemmas, short[] heads, short[] types, short feats[][], Cluster cluster, FV f) { | |
695 | + | |
696 | + | |
697 | + long[] svs = new long[250]; | |
698 | + | |
699 | + for (int i = 1; i < heads.length; i++) { | |
700 | + | |
701 | + | |
702 | + int n =basic(pposs, forms, heads[i], i, cluster, f); | |
703 | + | |
704 | + firstm(is, ic, heads[i], i, types[i], cluster,svs); | |
705 | + for(int k=0;k<svs.length;k++) dl1.map(f,svs[k]); | |
706 | + | |
707 | + int ch,cmi,cmo; | |
708 | + if (heads[i] < i) { | |
709 | + ch = rightmostRight(heads, heads[i], i); | |
710 | + cmi = leftmostLeft(heads, i, heads[i]); | |
711 | + cmo = rightmostRight(heads, i, heads.length); | |
712 | + | |
713 | + } else { | |
714 | + ch = leftmostLeft(heads, heads[i], i); | |
715 | + cmi = rightmostRight(heads, i, heads[i]); | |
716 | + cmo = leftmostLeft(heads, i, 0); | |
717 | + } | |
718 | + | |
719 | + siblingm(is,ic,pposs, forms,lemmas, feats, heads[i], i, ch,types[i], cluster, svs,n); | |
720 | + for(int k=0;k<svs.length;k++) dl1.map(f,svs[k]); | |
721 | + | |
722 | + | |
723 | + gcm(is, ic,heads[i],i,cmi, types[i], cluster, svs); | |
724 | + for(int k=0;k<svs.length;k++) dl1.map(f,svs[k]); | |
725 | + | |
726 | + gcm(is, ic, heads[i],i,cmo, types[i], cluster, svs); | |
727 | + for(int k=0;k<svs.length;k++) dl1.map(f,svs[k]); | |
728 | + } | |
729 | + | |
730 | + return f; | |
731 | + } | |
732 | + | |
733 | + | |
734 | + public float encode3(short[] pos, short heads[] , short[] types, DataF d2) { | |
735 | + | |
736 | + double v = 0; | |
737 | + for (int i = 1; i < heads.length; i++) { | |
738 | + | |
739 | + int dir= (heads[i] < i)? 0:1; | |
740 | + | |
741 | + v += d2.pl[heads[i]][i]; | |
742 | + v += d2.lab[heads[i]][i][types[i]][dir]; | |
743 | + | |
744 | + boolean left = i<heads[i]; | |
745 | + short[] labels = Edges.get(pos[heads[i]], pos[i], left); | |
746 | + int lid=-1; | |
747 | + for(int k=0;k<labels.length;k++) if (types[i]== labels[k]) {lid= k;break;} | |
748 | + | |
749 | + int ch,cmi,cmo; | |
750 | + if (heads[i] < i) { | |
751 | + ch = rightmostRight(heads, heads[i], i); | |
752 | + cmi = leftmostLeft(heads, i, heads[i]); | |
753 | + cmo = rightmostRight(heads, i, heads.length); | |
754 | + | |
755 | + if (ch==-1) ch=heads[i]; | |
756 | + if (cmi==-1) cmi=heads[i]; | |
757 | + if (cmo==-1) cmo=heads[i]; | |
758 | + | |
759 | + } else { | |
760 | + ch = leftmostLeft(heads, heads[i], i); | |
761 | + cmi = rightmostRight(heads, i, heads[i]); | |
762 | + cmo = leftmostLeft(heads, i, 0); | |
763 | + | |
764 | + if (ch==-1) ch=i; | |
765 | + if (cmi==-1) cmi=i; | |
766 | + if (cmo==-1) cmo=i; | |
767 | + } | |
768 | + v += d2.sib[heads[i]][i][ch][dir][lid]; | |
769 | + v += d2.gra[heads[i]][i][cmi][dir][lid]; | |
770 | + v += d2.gra[heads[i]][i][cmo][dir][lid]; | |
771 | + } | |
772 | + return (float)v; | |
773 | + } | |
774 | + | |
775 | + /** | |
776 | + * Provide the scores of the edges | |
777 | + * @param pos | |
778 | + * @param heads | |
779 | + * @param types | |
780 | + * @param edgesScores | |
781 | + * @param d2 | |
782 | + * @return | |
783 | + */ | |
784 | + public static float encode3(short[] pos, short heads[] , short[] types, float[] edgesScores, DataF d2) { | |
785 | + | |
786 | + double v = 0; | |
787 | + for (int i = 1; i < heads.length; i++) { | |
788 | + | |
789 | + int dir= (heads[i] < i)? 0:1; | |
790 | + | |
791 | + edgesScores[i] = d2.pl[heads[i]][i]; | |
792 | + edgesScores[i] += d2.lab[heads[i]][i][types[i]][dir]; | |
793 | + | |
794 | + boolean left = i<heads[i]; | |
795 | + short[] labels = Edges.get(pos[heads[i]], pos[i], left); | |
796 | + int lid=-1; | |
797 | + for(int k=0;k<labels.length;k++) if (types[i]== labels[k]) {lid= k;break;} | |
798 | + | |
799 | + int ch,cmi,cmo; | |
800 | + if (heads[i] < i) { | |
801 | + ch = rightmostRight(heads, heads[i], i); | |
802 | + cmi = leftmostLeft(heads, i, heads[i]); | |
803 | + cmo = rightmostRight(heads, i, heads.length); | |
804 | + | |
805 | + if (ch==-1) ch=heads[i]; | |
806 | + if (cmi==-1) cmi=heads[i]; | |
807 | + if (cmo==-1) cmo=heads[i]; | |
808 | + | |
809 | + } else { | |
810 | + ch = leftmostLeft(heads, heads[i], i); | |
811 | + cmi = rightmostRight(heads, i, heads[i]); | |
812 | + cmo = leftmostLeft(heads, i, 0); | |
813 | + | |
814 | + if (ch==-1) ch=i; | |
815 | + if (cmi==-1) cmi=i; | |
816 | + if (cmo==-1) cmo=i; | |
817 | + } | |
818 | + edgesScores[i] += d2.sib[heads[i]][i][ch][dir][lid]; | |
819 | + edgesScores[i] += d2.gra[heads[i]][i][cmi][dir][lid]; | |
820 | + edgesScores[i] += d2.gra[heads[i]][i][cmo][dir][lid]; | |
821 | + v+=edgesScores[i]; | |
822 | + } | |
823 | + return (float)v; | |
824 | + } | |
825 | + | |
826 | + | |
827 | + private static int rightmostRight(short[] heads, int head, int max) { | |
828 | + int rightmost = -1; | |
829 | + for (int i = head + 1; i < max; i++) if (heads[i] == head) rightmost = i; | |
830 | + | |
831 | + return rightmost; | |
832 | + } | |
833 | + | |
834 | + private static int leftmostLeft(short[] heads, int head, int min) { | |
835 | + int leftmost = -1; | |
836 | + for (int i = head - 1; i > min; i--) if (heads[i] == head) leftmost = i; | |
837 | + return leftmost; | |
838 | + } | |
839 | + | |
840 | + public static final String REL = "REL",END = "END",STR = "STR",LA = "LA",RA = "RA"; | |
841 | + | |
842 | + private static int ra,la; | |
843 | + private static int s_str; | |
844 | + private static int s_end, _cend,_cstr, s_stwrd,s_relend; | |
845 | + | |
846 | + protected static final String TYPE = "TYPE",DIR = "D", FEAT="F"; | |
847 | + public static final String POS = "POS"; | |
848 | + protected static final String DIST = "DIST",MID = "MID"; | |
849 | + | |
850 | + private static final String _0 = "0",_4 = "4",_3 = "3", _2 = "2",_1 = "1",_5 = "5",_10 = "10"; | |
851 | + | |
852 | + private static int di0, d4,d3,d2,d1,d5,d10; | |
853 | + | |
854 | + | |
855 | + private static final String WORD = "WORD",STWRD = "STWRD", STPOS = "STPOS"; | |
856 | + | |
857 | + | |
858 | + | |
859 | + private static int nofeat; | |
860 | + | |
861 | + | |
862 | + private static int maxForm; | |
863 | + | |
864 | + | |
865 | + /** | |
866 | + * Initialize the features. | |
867 | + * @param maxFeatures | |
868 | + */ | |
869 | + static public void initFeatures() { | |
870 | + | |
871 | + | |
872 | + MFB mf = new MFB(); | |
873 | + mf.register(POS, MID); | |
874 | + s_str = mf.register(POS, STR); | |
875 | + s_end = mf.register(POS, END); | |
876 | + | |
877 | + s_relend = mf.register(REL, END); | |
878 | + | |
879 | + _cstr= mf.register(Cluster.SPATH,STR); | |
880 | + _cend=mf.register(Cluster.SPATH,END); | |
881 | + | |
882 | + | |
883 | + mf.register(TYPE, POS); | |
884 | + | |
885 | + s_stwrd=mf.register(WORD,STWRD); | |
886 | + mf.register(POS,STPOS); | |
887 | + | |
888 | + la = mf.register(DIR, LA); | |
889 | + ra = mf.register(DIR, RA); | |
890 | + | |
891 | + // mf.register(TYPE, CHAR); | |
892 | + | |
893 | + mf.register(TYPE, FEAT); | |
894 | + nofeat=mf.register(FEAT, "NOFEAT"); | |
895 | + | |
896 | + for(int k=0;k<215;k++) mf.register(TYPE, "F"+k); | |
897 | + | |
898 | + | |
899 | + di0=mf.register(DIST, _0); | |
900 | + d1=mf.register(DIST, _1); | |
901 | + d2=mf.register(DIST, _2); | |
902 | + d3=mf.register(DIST, _3); | |
903 | + d4=mf.register(DIST, _4); | |
904 | + d5=mf.register(DIST, _5); | |
905 | + // d5l=mf.register(DIST, _5l); | |
906 | + d10=mf.register(DIST, _10); | |
907 | + | |
908 | + | |
909 | + } | |
910 | + | |
911 | + /* (non-Javadoc) | |
912 | + * @see extractors.Extractor#getType() | |
913 | + */ | |
914 | + @Override | |
915 | + public int getType() { | |
916 | + return s_type; | |
917 | + } | |
918 | + | |
919 | + /* (non-Javadoc) | |
920 | + * @see extractors.Extractor#setMaxForm(java.lang.Integer) | |
921 | + */ | |
922 | + @Override | |
923 | + public void setMaxForm(int max) { | |
924 | + maxForm = max; | |
925 | + } | |
926 | + | |
927 | + /* (non-Javadoc) | |
928 | + * @see extractors.Extractor#getMaxForm() | |
929 | + */ | |
930 | + @Override | |
931 | + public int getMaxForm() { | |
932 | + return maxForm; | |
933 | + } | |
934 | + | |
935 | + | |
936 | + | |
937 | +} | |
... | ... |
dependencyParser/basic/mate-tools/src/extractors/ExtractorFactory.java
0 → 100644
1 | +/** | |
2 | + * | |
3 | + */ | |
4 | +package extractors; | |
5 | + | |
6 | +import is2.data.Long2IntInterface; | |
7 | + | |
8 | +/** | |
9 | + * @author Dr. Bernd Bohnet, 29.04.2011 | |
10 | + * | |
11 | + * | |
12 | + */ | |
13 | +public class ExtractorFactory { | |
14 | + | |
15 | + public static final int StackedClustered = 4; | |
16 | + public static final int StackedClusteredR2 = 5; | |
17 | + | |
18 | + | |
19 | + private int type=-1; | |
20 | + | |
21 | + /** | |
22 | + * @param stackedClusteredR22 | |
23 | + */ | |
24 | + public ExtractorFactory(int t) { | |
25 | + type=t; | |
26 | + } | |
27 | + | |
28 | + /** | |
29 | + * @param stackedClusteredR22 | |
30 | + * @param l2i | |
31 | + * @return | |
32 | + */ | |
33 | + public Extractor getExtractor(Long2IntInterface l2i) { | |
34 | + switch(type) | |
35 | + { | |
36 | + case StackedClustered: | |
37 | + return new ExtractorClusterStacked(l2i); | |
38 | + case StackedClusteredR2: | |
39 | + return new ExtractorClusterStackedR2(l2i); | |
40 | + } | |
41 | + return null; | |
42 | + } | |
43 | + | |
44 | +} | |
... | ... |
dependencyParser/basic/mate-tools/src/extractors/ExtractorReranker.java
0 → 100644
1 | +package extractors; | |
2 | + | |
3 | + | |
4 | +import is2.data.Cluster; | |
5 | +import is2.data.D4; | |
6 | +import is2.data.Instances; | |
7 | +import is2.data.Long2IntInterface; | |
8 | +import is2.data.MFB; | |
9 | +import is2.data.ParseNBest; | |
10 | +import is2.util.DB; | |
11 | + | |
12 | +import java.util.Arrays; | |
13 | + | |
14 | + | |
15 | + | |
16 | +final public class ExtractorReranker { | |
17 | + | |
18 | + public static int s_rel,s_word,s_type,s_dir,s_dist,s_feat,s_child,s_spath,s_lpath,s_pos; | |
19 | + public static int d0,d1,d2,d3,d4,d5,d10; | |
20 | + | |
21 | + MFB mf; | |
22 | + | |
23 | + final D4 dl1,dl2, dwr,dr,dwwp,dw,dwp,dlf,d3lp, d2lp,d2pw,d2pp ; | |
24 | + | |
25 | + public final Long2IntInterface li; | |
26 | + | |
27 | + public ExtractorReranker(Long2IntInterface li) { | |
28 | + this.li=li; | |
29 | + dl1 = new D4(li);dl2 = new D4(li); | |
30 | + dwr = new D4(li); | |
31 | + dr = new D4(li); | |
32 | + dwwp = new D4(li); | |
33 | + | |
34 | + dw = new D4(li); | |
35 | + dwp = new D4(li); | |
36 | + | |
37 | + dlf = new D4(li); | |
38 | + d3lp = new D4(li); d2lp = new D4(li); d2pw = new D4(li); d2pp = new D4(li); | |
39 | + | |
40 | + } | |
41 | + | |
42 | + public static void initStat() { | |
43 | + DB.println("init called "); | |
44 | + MFB mf = new MFB(); | |
45 | + s_rel = mf.getFeatureCounter().get(REL).intValue();; | |
46 | + s_pos = mf.getFeatureCounter().get(POS).intValue(); | |
47 | + s_word = mf.getFeatureCounter().get(WORD).intValue(); | |
48 | + s_type = mf.getFeatureCounter().get(TYPE).intValue();//mf.getFeatureBits(); | |
49 | + s_dir = mf.getFeatureCounter().get(DIR); | |
50 | + la = mf.getValue(DIR, LA); | |
51 | + ra = mf.getValue(DIR, RA); | |
52 | + s_dist = mf.getFeatureCounter().get(DIST);//mf.getFeatureBits(DIST); | |
53 | + s_feat = mf.getFeatureCounter().get(FEAT);//mf.getFeatureBits(Pipe.FEAT); | |
54 | + s_spath = mf.getFeatureCounter().get(Cluster.SPATH)==null?0:mf.getFeatureCounter().get(Cluster.SPATH);//mf.getFeatureBits(Cluster.SPATH); | |
55 | + s_lpath = mf.getFeatureCounter().get(Cluster.LPATH)==null?0:mf.getFeatureCounter().get(Cluster.LPATH);//mf.getFeatureBits(Cluster.LPATH); | |
56 | + } | |
57 | + | |
58 | + public void init(){ | |
59 | + mf = new MFB(); | |
60 | + | |
61 | + dl1.a0 = s_type;dl1.a1 = 3; dl1.a2 = s_pos;dl1.a3 = s_pos; dl1.a4 = s_pos; dl1.a5 = s_pos; dl1.a6 = s_pos; dl1.a7 = s_pos; | |
62 | + dl2.a0 = s_type;dl2.a1 = 3;dl2.a2 = s_rel;dl2.a3 = s_rel;dl2.a4 = s_rel;dl2.a5 = s_rel;dl2.a6 = s_rel;dl2.a7 = s_rel;dl2.a8 = s_rel; dl2.a9 = s_rel; | |
63 | + dwp.a0 = s_type; dwp.a1 = 3; dwp.a2 = s_word; dwp.a3 = s_rel; dwp.a4 = s_rel; dwp.a5 = s_rel;dwp.a6 = s_rel;dwp.a7 = s_rel; | |
64 | + dwwp.a0 = s_type; dwwp.a1 = 3; dwwp.a2 = s_word; dwwp.a3 = s_word; dwwp.a4 = s_pos; dwwp.a5 = s_word;dwwp.a6 = s_pos;dwwp.a7 = s_pos; | |
65 | + } | |
66 | + | |
67 | + | |
68 | + | |
69 | + | |
70 | + | |
71 | + | |
72 | + public static final String REL = "REL",END = "END",STR = "STR",LA = "LA",RA = "RA", FEAT="F"; | |
73 | + | |
74 | + private static int ra,la; | |
75 | + private static int s_str; | |
76 | + private static int s_end, _cend,_cstr, s_stwrd,s_relend; | |
77 | + | |
78 | + protected static final String TYPE = "TYPE",DIR = "D"; | |
79 | + public static final String POS = "POS"; | |
80 | + protected static final String DIST = "DIST",MID = "MID"; | |
81 | + | |
82 | + private static final String _0 = "0",_4 = "4",_3 = "3", _2 = "2",_1 = "1",_5 = "5",_10 = "10"; | |
83 | + | |
84 | + | |
85 | + | |
86 | + private static final String WORD = "WORD",STWRD = "STWRD", STPOS = "STPOS"; | |
87 | + | |
88 | + | |
89 | + | |
90 | + private static int nofeat; | |
91 | + | |
92 | + | |
93 | + public static int maxForm; | |
94 | + | |
95 | + | |
96 | + final public static int _FC =60; | |
97 | + | |
98 | + | |
99 | + /** | |
100 | + * Initialize the features. | |
101 | + * @param maxFeatures | |
102 | + */ | |
103 | + static public void initFeatures() { | |
104 | + | |
105 | + | |
106 | + MFB mf = new MFB(); | |
107 | + mf.register(POS, MID); | |
108 | + s_str = mf.register(POS, STR); | |
109 | + s_end = mf.register(POS, END); | |
110 | + | |
111 | + s_relend = mf.register(REL, END); | |
112 | + | |
113 | + _cstr= mf.register(Cluster.SPATH,STR); | |
114 | + _cend=mf.register(Cluster.SPATH,END); | |
115 | + | |
116 | + | |
117 | + mf.register(TYPE, POS); | |
118 | + | |
119 | + s_stwrd=mf.register(WORD,STWRD); | |
120 | + mf.register(POS,STPOS); | |
121 | + | |
122 | + la = mf.register(DIR, LA); | |
123 | + ra = mf.register(DIR, RA); | |
124 | + | |
125 | + // mf.register(TYPE, CHAR); | |
126 | + | |
127 | + mf.register(TYPE, FEAT); | |
128 | + nofeat=mf.register(FEAT, "NOFEAT"); | |
129 | + | |
130 | + for(int k=0;k<60;k++) mf.register(TYPE, "F"+k); | |
131 | + | |
132 | + | |
133 | + d0 =mf.register(DIST, _0); | |
134 | + d1= mf.register(DIST, _1); | |
135 | + d2 =mf.register(DIST, _2); | |
136 | + d3= mf.register(DIST, _3); | |
137 | + d4= mf.register(DIST, _4); | |
138 | + d5= mf.register(DIST, _5); | |
139 | + // d5l=mf.register(DIST, _5l); | |
140 | + d10= mf.register(DIST, _10); | |
141 | + | |
142 | + | |
143 | + } | |
144 | + | |
145 | + /** | |
146 | + * @param is | |
147 | + * @param n | |
148 | + * @param parseNBest | |
149 | + * @param vs | |
150 | + */ | |
151 | + public void extractFeatures3(Instances is, int i, ParseNBest parse, int rank, long[] v) { | |
152 | + | |
153 | + int f=1,n=0; | |
154 | + | |
155 | + for(short k= 0; k<is.length(i)-1;k++) { | |
156 | + | |
157 | + short[] chld = children(parse.heads,k); | |
158 | + | |
159 | + f=2; | |
160 | + | |
161 | + int fm = is.forms[i][k]; | |
162 | + int hh = k!=0? is.pposs[i][parse.heads[k]]:s_end; | |
163 | + int h = is.pposs[i][k]; | |
164 | + int hrel = parse.labels[k]; | |
165 | + int hhrel = k!=0? parse.labels[parse.heads[k]]:s_relend; | |
166 | + int hhf = k!=0? is.forms[i][parse.heads[k]]:s_stwrd; | |
167 | + | |
168 | + | |
169 | + | |
170 | + int rlast = chld.length>0?parse.labels[chld[chld.length-1]]:s_relend; | |
171 | + | |
172 | + int [] rels = new int[chld.length]; | |
173 | + int [] pss = new int[chld.length]; | |
174 | + for(int j=0;j<chld.length;j++) { | |
175 | + rels[j] = parse.labels[chld[j]]; | |
176 | + pss[j] = is.pposs[i][chld[j]]; | |
177 | + } | |
178 | + | |
179 | + StringBuilder rl = new StringBuilder(chld.length); | |
180 | + StringBuilder psl = new StringBuilder(chld.length); | |
181 | + for(int j=0;j<chld.length;j++) { | |
182 | + rl.append((char)rels[j]); | |
183 | + psl.append((char)pss[j]); | |
184 | + } | |
185 | + | |
186 | + int rli = mf.register("rli", rl.toString()); | |
187 | + int pli = mf.register("pli", psl.toString()); | |
188 | + | |
189 | + dwwp.v0=f++; dwwp.v2=rli; dwwp.cz3(); v[n++]=dwwp.getVal(); | |
190 | + dwwp.v0=f++; dwwp.v2=pli; dwwp.cz3(); v[n++]=dwwp.getVal(); | |
191 | + | |
192 | + dwwp.v0=f++; dwwp.v2=rli; dwwp.v3=h;dwwp.cz4(); v[n++]=dwwp.getVal(); | |
193 | + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=h;dwwp.cz4(); v[n++]=dwwp.getVal(); | |
194 | + | |
195 | + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=hh; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal(); | |
196 | + dwwp.v0=f++; dwwp.v2=rli; dwwp.v3=hh; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal(); | |
197 | + | |
198 | + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=rli; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal(); | |
199 | + dwwp.v0=f++; dwwp.v2=fm; dwwp.v3=rli; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal(); | |
200 | + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=fm; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal(); | |
201 | + | |
202 | + | |
203 | + | |
204 | + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=rli; dwwp.v4=hh; dwwp.cz5(); v[n++]=dwwp.getVal(); | |
205 | + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=rli; dwwp.v4=hh; dwwp.v5=h; dwwp.cz6(); v[n++]=dwwp.getVal(); | |
206 | + | |
207 | + dwp.v0= f++; dwp.v2=rli; dwp.v3=hrel;dwp.v4=hh; dwp.v5=h; dwp.cz6(); v[n++]=dwp.getVal(); | |
208 | + | |
209 | + Arrays.sort(rels); | |
210 | + Arrays.sort(pss); | |
211 | + | |
212 | + rl = new StringBuilder(chld.length); | |
213 | + psl = new StringBuilder(chld.length); | |
214 | + for(int j=0;j<chld.length;j++) { | |
215 | + rl.append((char)rels[j]); | |
216 | + psl.append((char)pss[j]); | |
217 | + } | |
218 | + rli = mf.register("rli", rl.toString()); | |
219 | + pli = mf.register("pli", psl.toString()); | |
220 | + | |
221 | + | |
222 | + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=rli; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal(); | |
223 | + dwwp.v0=f++; dwwp.v2=fm; dwwp.v3=rli; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal(); | |
224 | + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=fm; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal(); | |
225 | + | |
226 | + dwwp.v0=f++; dwwp.v2=rli; dwwp.v3=h;dwwp.cz4(); v[n++]=dwwp.getVal(); | |
227 | + | |
228 | + dl1.v0= f++; dl1.v2=h; dl1.v3=hrel; dl1.v4=hhrel;dl1.v5=hh; dl1.v6=rlast; dl1.cz6(); v[n++]=dl1.getVal(); | |
229 | + dwp.v0= f++; dwp.v2=fm; dwp.v3=hrel; dwp.v4=hh; dwp.cz5(); v[n++]=dwp.getVal(); | |
230 | + dwp.v0= f++; dwp.v2=hhf; dwp.v3=hrel; dwp.v4=hh; dwp.v5=h; dwp.cz6(); v[n++]=dwp.getVal(); | |
231 | + | |
232 | + dwwp.v0=f++; dwwp.v2=fm; dwwp.v3=hhf; dwwp.v4=hrel; dwwp.v5=hhrel; dwwp.cz6(); v[n++]=dwwp.getVal(); | |
233 | + dwwp.v0=f++; dwwp.v2=h; dwwp.v3=hhf; dwwp.v4=hrel; dwwp.v5=hhrel; dwwp.cz6(); v[n++]=dwwp.getVal(); | |
234 | + dwwp.v0=f++; dwwp.v2=fm; dwwp.v3=hh; dwwp.v4=hrel; dwwp.v5=hhrel; dwwp.cz6(); v[n++]=dwwp.getVal(); | |
235 | + | |
236 | + dwwp.v0=f++; dwwp.v2=fm; dwwp.v3=hhf; dwwp.v4=h; dwwp.v5=hh; dwwp.cz6(); v[n++]=dwwp.getVal(); | |
237 | + dwwp.v0=f++; dwwp.v2=h; dwwp.v3=hhf; dwwp.v4=hrel; dwwp.v5=hh; dwwp.cz6(); v[n++]=dwwp.getVal(); | |
238 | + dwwp.v0=f++; dwwp.v2=fm; dwwp.v3=hh; dwwp.v4=h; dwwp.v5=hrel; dwwp.cz6(); v[n++]=dwwp.getVal(); | |
239 | + | |
240 | + | |
241 | + // dl1.v0= f++; dl1.v2=h;dl1.v3=hrel; dl1.v4=hhrel;dl1.v5=hh; dl1.v6=hhhrel;dl1.v7=hhh; dl1.v8=rlast; dl1.cz9(); v[n++]=dl1.getVal(); | |
242 | +// dl1.v0= f++; dl1.v2=h;dl1.v3=hrel; dl1.v4=hhrel;dl1.v5=hh; dl1.v6=hhhrel;dl1.v7=hhh; dl1.v8=rlast; dl1.cz9(); v[n++]=dl1.getVal(); | |
243 | + // dl1.v0= f++; dl1.v2=h;dl1.v3=hrel; dl1.v4=dir;dl1.v5=hh; dl1.v6=hhh;dl1.v7=rlast; dl1.v8=r1; dl1.cz9(); v[n++]=dl1.getVal(); | |
244 | + // dl1.v0= f++; dl1.v2=h;dl1.v3=hh; dl1.v4=hhh;dl1.v5=hrel; dl1.cz6(); v[n++]=dl1.getVal(); | |
245 | + | |
246 | + | |
247 | + short hp = parse.heads[k]; | |
248 | + short[] hchld = hp==-1?new short[0]:children(parse.heads,hp); | |
249 | + | |
250 | + int [] hrels = new int[hchld.length]; | |
251 | + int [] hpss = new int[hchld.length]; | |
252 | + for(int j=0;j<hchld.length;j++) { | |
253 | + hrels[j] = parse.labels[hchld[j]]; | |
254 | + hpss[j] = is.pposs[i][hchld[j]]; | |
255 | + } | |
256 | + | |
257 | + | |
258 | + StringBuilder hrl = new StringBuilder(hchld.length); | |
259 | + StringBuilder hpsl = new StringBuilder(hchld.length); | |
260 | + for(int j=0;j<hchld.length;j++) { | |
261 | + hrl.append((char)hrels[j]); | |
262 | + hpsl.append((char)hpss[j]); | |
263 | + } | |
264 | + int hrli = mf.register("rli", hrl.toString()); | |
265 | + int hpli = mf.register("pli", hpsl.toString()); | |
266 | + | |
267 | + dwwp.v0=f++; dwwp.v2=hpli; dwwp.v3=hrli; dwwp.cz4(); v[n++]=dwwp.getVal(); | |
268 | + dwwp.v0=f++; dwwp.v2=fm; dwwp.v3=hrli; dwwp.cz4(); v[n++]=dwwp.getVal(); | |
269 | + dwwp.v0=f++; dwwp.v2=hpli; dwwp.v3=fm; dwwp.cz4(); v[n++]=dwwp.getVal(); | |
270 | + | |
271 | + dwwp.v0=f++; dwwp.v2=hpli; dwwp.v3=rli; dwwp.v4=hrel;dwwp.v5=h; dwwp.cz5(); v[n++]=dwwp.getVal(); | |
272 | + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=hrli;dwwp.v4=hrel;dwwp.v5=h; dwwp.cz5(); v[n++]=dwwp.getVal(); | |
273 | + dwwp.v0=f++; dwwp.v2=hpli; dwwp.v3=hpli;dwwp.v4=hrel;dwwp.v5=h; dwwp.cz5(); v[n++]=dwwp.getVal(); | |
274 | + | |
275 | + | |
276 | + | |
277 | + } | |
278 | + | |
279 | + v[n]=Integer.MIN_VALUE; | |
280 | + } | |
281 | + | |
282 | + /** | |
283 | + * This works seem works well with n-best n=8 (88.858074) , n=10 (88.836884), n=12 (88.858) | |
284 | + * n=14 (88.913417) n=16 (88.79546) n=20 (88.80621) n 50 (88.729364) | |
285 | + * 1-best: 88.749605 | |
286 | + * | |
287 | + * @param is | |
288 | + * @param i | |
289 | + * @param parse | |
290 | + * @param rank | |
291 | + * @param v | |
292 | + * @param cluster | |
293 | + */ | |
294 | + public void extractFeatures(Instances is, int i, ParseNBest parse, int rank, long[] v, Cluster cluster) { | |
295 | + | |
296 | + // mf.getValue(REL, "SB"); | |
297 | + | |
298 | + int f=1,n=0; | |
299 | + | |
300 | + for(short k= 0; k<is.length(i)-1;k++) { | |
301 | + | |
302 | + short[] chld = children(parse.heads,k); | |
303 | + | |
304 | + int abs = Math.abs(parse.heads[k]-k); | |
305 | + final int dist; | |
306 | + if (abs > 10)dist=d10;else if (abs>5) dist=d5;else if( abs==5)dist=d4;else if (abs==4)dist=d3;else if (abs==3)dist=d2; | |
307 | + else if (abs==2)dist=d1; else dist=d0; | |
308 | + | |
309 | + | |
310 | + f=2; | |
311 | + | |
312 | + int fm = is.forms[i][k]; | |
313 | + int hh = k!=0? is.pposs[i][parse.heads[k]]:s_end; | |
314 | + int h = is.pposs[i][k]; | |
315 | + int hrel = parse.labels[k];//is.labels[i][k]; | |
316 | + int hhrel = k!=0? parse.labels[parse.heads[k]]:s_relend; | |
317 | + int hhf = k!=0? is.forms[i][parse.heads[k]]:s_stwrd; | |
318 | + | |
319 | + int r1 = chld.length>0?parse.labels[chld[0]]:s_relend; | |
320 | + int rlast = chld.length>0?parse.labels[chld[chld.length-1]]:s_relend; | |
321 | + | |
322 | + int [] rels = new int[chld.length]; | |
323 | + int [] pss = new int[chld.length]; | |
324 | + int [] cls = new int[chld.length]; | |
325 | + | |
326 | + int[] rc = new int[30]; // 20 was a good length | |
327 | + | |
328 | + for(int j=0;j<chld.length;j++) { | |
329 | + rels[j] = parse.labels[chld[j]]; | |
330 | + if (rels[j]<rc.length) rc[rels[j]]++; | |
331 | + pss[j] = is.pposs[i][chld[j]]; | |
332 | +// cls[j] = is.forms[i][chld[j]]==-1?0:cluster.getLP(is.forms[i][chld[j]]); | |
333 | +// cls[j] = cls[j]==-1?0:cls[j]; | |
334 | + } | |
335 | + | |
336 | + StringBuilder rl = new StringBuilder(chld.length); | |
337 | + StringBuilder psl = new StringBuilder(chld.length); | |
338 | + StringBuilder csl = new StringBuilder(chld.length); | |
339 | + for(int j=0;j<chld.length;j++) { | |
340 | + rl.append((char)rels[j]); | |
341 | + psl.append((char)pss[j]); | |
342 | +// csl.append((char)cls[j]); | |
343 | + } | |
344 | + | |
345 | + int rli = mf.register("rli", rl.toString()); | |
346 | + int pli = mf.register("pli", psl.toString()); | |
347 | +// int cli = mf.register("cli", csl.toString()); | |
348 | + | |
349 | + | |
350 | + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=rli; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal(); | |
351 | + dwwp.v0=f++; dwwp.v2=fm; dwwp.v3=rli; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal(); | |
352 | + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=fm; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal(); | |
353 | + // dwwp.v0=f++; dwwp.v2=cli; dwwp.v3=fm; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal(); | |
354 | + | |
355 | + dwwp.v0=f++; dwwp.v2=rli; dwwp.cz3(); v[n++]=dwwp.getVal(); | |
356 | + dwwp.v0=f++; dwwp.v2=pli; dwwp.cz3(); v[n++]=dwwp.getVal(); | |
357 | + //dwwp.v0=f++; dwwp.v2=cli; dwwp.cz3(); v[n++]=dwwp.getVal(); | |
358 | + | |
359 | + // dwwp.v0=f++; dwwp.v2=cli;dwwp.v3=h; dwwp.cz4(); v[n++]=dwwp.getVal(); | |
360 | + | |
361 | + for(int j=1;j<rc.length;j++) { | |
362 | + dwwp.v0=f++; dwwp.v2=rc[j]==0?1:rc[j]==1?2:3; dwwp.v3=j; dwwp.cz4(); v[n++]=dwwp.getVal();// | |
363 | + } | |
364 | + | |
365 | + dwwp.v0=f++; dwwp.v2=rli; dwwp.v3=h;dwwp.cz4(); v[n++]=dwwp.getVal(); | |
366 | + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=h;dwwp.cz4(); v[n++]=dwwp.getVal(); | |
367 | + | |
368 | + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=hh; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal(); | |
369 | + dwwp.v0=f++; dwwp.v2=rli; dwwp.v3=hh; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal(); | |
370 | + | |
371 | + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=rli; dwwp.v4=hh; dwwp.cz5(); v[n++]=dwwp.getVal(); | |
372 | + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=rli; dwwp.v4=hh; dwwp.v5=h; dwwp.cz6(); v[n++]=dwwp.getVal(); | |
373 | + | |
374 | + dwp.v0= f++; dwp.v2=rli; dwp.v3=hrel;dwp.v4=hh; dwp.v5=h; dwp.cz6(); v[n++]=dwp.getVal(); | |
375 | + | |
376 | + //dwwp.v0=f++; dwwp.v2=h; dwwp.v3=hh; dwwp.v4=dist; dwwp.cz5(); v[n++]=dwwp.getVal(); | |
377 | + | |
378 | + Arrays.sort(rels); | |
379 | + Arrays.sort(pss); | |
380 | + | |
381 | + rl = new StringBuilder(chld.length); | |
382 | + psl = new StringBuilder(chld.length); | |
383 | + for(int j=0;j<chld.length;j++) { | |
384 | + rl.append((char)rels[j]); | |
385 | + psl.append((char)pss[j]); | |
386 | + } | |
387 | + rli = mf.register("rli", rl.toString()); | |
388 | + pli = mf.register("pli", psl.toString()); | |
389 | + | |
390 | + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=rli; dwwp.v4=1; dwwp.v5=h; dwwp.cz6(); v[n++]=dwwp.getVal(); | |
391 | + dwwp.v0=f++; dwwp.v2=fm; dwwp.v3=rli; dwwp.v4=1; dwwp.v5=h; dwwp.cz6(); v[n++]=dwwp.getVal(); | |
392 | + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=fm; dwwp.v4=1; dwwp.v5=h; dwwp.cz6(); v[n++]=dwwp.getVal(); | |
393 | + | |
394 | + dwwp.v0=f++; dwwp.v2=rli; dwwp.v3=h;dwwp.cz4(); v[n++]=dwwp.getVal(); | |
395 | + | |
396 | + dl1.v0= f++; dl1.v2=h; dl1.v3=hrel; dl1.v4=hhrel;dl1.v5=hh; dl1.v6=rlast; dl1.cz6(); v[n++]=dl1.getVal(); | |
397 | + dwp.v0= f++; dwp.v2=fm; dwp.v3=hrel; dwp.v4=hh; dwp.cz5(); v[n++]=dwp.getVal(); | |
398 | + dwp.v0= f++; dwp.v2=hhf; dwp.v3=hrel;dwp.v4=hh; dwp.v5=h; dwp.cz6(); v[n++]=dwp.getVal(); | |
399 | + } | |
400 | + | |
401 | + v[n]=Integer.MIN_VALUE; | |
402 | + } | |
403 | + | |
404 | + /** | |
405 | + | |
406 | + * Works well! | |
407 | + * @param is | |
408 | + * @param i | |
409 | + * @param parse | |
410 | + * @param rank | |
411 | + * @param v | |
412 | + */ | |
413 | + public void extractFeatures6(Instances is, int i, ParseNBest parse, int rank, long[] v) { | |
414 | + | |
415 | + // mf.getValue(REL, "SB"); | |
416 | + | |
417 | + int f=1,n=0; | |
418 | + | |
419 | + for(short k= 0; k<is.length(i)-1;k++) { | |
420 | + | |
421 | + short[] chld = children(parse.heads,k); | |
422 | + | |
423 | + f=2; | |
424 | + | |
425 | + int fm = is.forms[i][k]; | |
426 | + int hh = k!=0? is.pposs[i][parse.heads[k]]:s_end; | |
427 | + int h = is.pposs[i][k]; | |
428 | + int hrel = parse.labels[k];//is.labels[i][k]; | |
429 | + int hhrel = k!=0? parse.labels[parse.heads[k]]:s_relend; | |
430 | + int hhf = k!=0? is.forms[i][parse.heads[k]]:s_stwrd; | |
431 | + | |
432 | + int r1 = chld.length>0?parse.labels[chld[0]]:s_relend; | |
433 | + int rlast = chld.length>0?parse.labels[chld[chld.length-1]]:s_relend; | |
434 | + | |
435 | + int [] rels = new int[chld.length]; | |
436 | + int [] pss = new int[chld.length]; | |
437 | + | |
438 | + int[] rc = new int[30]; // 20 was a good length | |
439 | + | |
440 | + for(int j=0;j<chld.length;j++) { | |
441 | + rels[j] = parse.labels[chld[j]]; | |
442 | + if (rels[j]<rc.length) rc[rels[j]]++; | |
443 | + // if (rels[j]==sb) numSB++; | |
444 | + pss[j] = is.pposs[i][chld[j]]; | |
445 | + } | |
446 | + | |
447 | + StringBuilder rl = new StringBuilder(chld.length); | |
448 | + StringBuilder psl = new StringBuilder(chld.length); | |
449 | + for(int j=0;j<chld.length;j++) { | |
450 | + rl.append((char)rels[j]); | |
451 | + psl.append((char)pss[j]); | |
452 | + } | |
453 | + | |
454 | + int rli = mf.register("rli", rl.toString()); | |
455 | + int pli = mf.register("pli", psl.toString()); | |
456 | + | |
457 | + | |
458 | + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=rli; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal(); | |
459 | + dwwp.v0=f++; dwwp.v2=fm; dwwp.v3=rli; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal(); | |
460 | + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=fm; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal(); | |
461 | + | |
462 | + dwwp.v0=f++; dwwp.v2=rli; dwwp.cz3(); v[n++]=dwwp.getVal(); | |
463 | + dwwp.v0=f++; dwwp.v2=pli; dwwp.cz3(); v[n++]=dwwp.getVal(); | |
464 | + | |
465 | + for(int j=1;j<rc.length;j++) { | |
466 | + dwwp.v0=f++; dwwp.v2=rc[j]==0?1:rc[j]==1?2:3; dwwp.v3=j; dwwp.cz4(); v[n++]=dwwp.getVal();// | |
467 | + } | |
468 | + | |
469 | + dwwp.v0=f++; dwwp.v2=rli; dwwp.v3=h;dwwp.cz4(); v[n++]=dwwp.getVal(); | |
470 | + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=h;dwwp.cz4(); v[n++]=dwwp.getVal(); | |
471 | + | |
472 | + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=hh; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal(); | |
473 | + dwwp.v0=f++; dwwp.v2=rli; dwwp.v3=hh; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal(); | |
474 | + | |
475 | + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=rli; dwwp.v4=hh; dwwp.cz5(); v[n++]=dwwp.getVal(); | |
476 | + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=rli; dwwp.v4=hh; dwwp.v5=h; dwwp.cz6(); v[n++]=dwwp.getVal(); | |
477 | + | |
478 | + dwp.v0= f++; dwp.v2=rli; dwp.v3=hrel;dwp.v4=hh; dwp.v5=h; dwp.cz6(); v[n++]=dwp.getVal(); | |
479 | + | |
480 | + | |
481 | + Arrays.sort(rels); | |
482 | + Arrays.sort(pss); | |
483 | + | |
484 | + rl = new StringBuilder(chld.length); | |
485 | + psl = new StringBuilder(chld.length); | |
486 | + for(int j=0;j<chld.length;j++) { | |
487 | + rl.append((char)rels[j]); | |
488 | + psl.append((char)pss[j]); | |
489 | + } | |
490 | + rli = mf.register("rli", rl.toString()); | |
491 | + pli = mf.register("pli", psl.toString()); | |
492 | + | |
493 | + | |
494 | + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=rli; dwwp.v4=1; dwwp.v5=h; dwwp.cz6(); v[n++]=dwwp.getVal(); | |
495 | + dwwp.v0=f++; dwwp.v2=fm; dwwp.v3=rli; dwwp.v4=1; dwwp.v5=h; dwwp.cz6(); v[n++]=dwwp.getVal(); | |
496 | + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=fm; dwwp.v4=1; dwwp.v5=h; dwwp.cz6(); v[n++]=dwwp.getVal(); | |
497 | + | |
498 | + dwwp.v0=f++; dwwp.v2=rli; dwwp.v3=h;dwwp.cz4(); v[n++]=dwwp.getVal(); | |
499 | + | |
500 | + dl1.v0= f++; dl1.v2=h; dl1.v3=hrel; dl1.v4=hhrel;dl1.v5=hh; dl1.v6=rlast; dl1.cz6(); v[n++]=dl1.getVal(); | |
501 | + dwp.v0= f++; dwp.v2=fm; dwp.v3=hrel; dwp.v4=hh; dwp.cz5(); v[n++]=dwp.getVal(); | |
502 | + dwp.v0= f++; dwp.v2=hhf; dwp.v3=hrel;dwp.v4=hh; dwp.v5=h; dwp.cz6(); v[n++]=dwp.getVal(); | |
503 | + | |
504 | + } | |
505 | + | |
506 | + v[n]=Integer.MIN_VALUE; | |
507 | + } | |
508 | + | |
509 | + | |
510 | + | |
511 | + public void extractFeatures2(Instances is, int i, ParseNBest parse, int rank, long[] v) { | |
512 | + | |
513 | + | |
514 | + | |
515 | + int f=1,n=0; | |
516 | + | |
517 | + for(short k= 0; k<is.length(i)-1;k++) { | |
518 | + | |
519 | + short[] chld = children(parse.heads,k); | |
520 | + | |
521 | + f=2; | |
522 | + | |
523 | + int fm = is.forms[i][k]; | |
524 | + int hh = k!=0? is.pposs[i][parse.heads[k]]:s_end; | |
525 | + int h = is.pposs[i][k]; | |
526 | + int hrel = parse.labels[k];//is.labels[i][k]; | |
527 | + int hhrel = k!=0? parse.labels[parse.heads[k]]:s_relend; | |
528 | + int hhf = k!=0? is.forms[i][parse.heads[k]]:s_stwrd; | |
529 | + | |
530 | + int r1 = chld.length>0?parse.labels[chld[0]]:s_relend; | |
531 | + int rlast = chld.length>0?parse.labels[chld[chld.length-1]]:s_relend; | |
532 | + | |
533 | + int [] rels = new int[chld.length]; | |
534 | + int [] pss = new int[chld.length]; | |
535 | + | |
536 | + | |
537 | + | |
538 | + for(int j=0;j<chld.length;j++) { | |
539 | + rels[j] = parse.labels[chld[j]]; | |
540 | + pss[j] = is.pposs[i][chld[j]]; | |
541 | + } | |
542 | + | |
543 | + StringBuilder rl = new StringBuilder(chld.length); | |
544 | + StringBuilder psl = new StringBuilder(chld.length); | |
545 | + for(int j=0;j<chld.length;j++) { | |
546 | + rl.append((char)rels[j]); | |
547 | + psl.append((char)pss[j]); | |
548 | + } | |
549 | + | |
550 | + int rli = mf.register("rli", rl.toString()); | |
551 | + int pli = mf.register("pli", psl.toString()); | |
552 | + | |
553 | + | |
554 | + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=rli; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal(); | |
555 | + dwwp.v0=f++; dwwp.v2=fm; dwwp.v3=rli; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal(); | |
556 | + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=fm; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal(); | |
557 | + | |
558 | + dwwp.v0=f++; dwwp.v2=rli; dwwp.cz3(); v[n++]=dwwp.getVal(); | |
559 | + dwwp.v0=f++; dwwp.v2=pli; dwwp.cz3(); v[n++]=dwwp.getVal(); | |
560 | + | |
561 | + dwwp.v0=f++; dwwp.v2=rli; dwwp.v3=h;dwwp.cz4(); v[n++]=dwwp.getVal(); | |
562 | + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=h;dwwp.cz4(); v[n++]=dwwp.getVal(); | |
563 | + | |
564 | + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=hh; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal(); | |
565 | + dwwp.v0=f++; dwwp.v2=rli; dwwp.v3=hh; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal(); | |
566 | + | |
567 | + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=rli; dwwp.v4=hh; dwwp.cz5(); v[n++]=dwwp.getVal(); | |
568 | + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=rli; dwwp.v4=hh; dwwp.v5=h; dwwp.cz6(); v[n++]=dwwp.getVal(); | |
569 | + | |
570 | + dwp.v0= f++; dwp.v2=rli; dwp.v3=hrel;dwp.v4=hh; dwp.v5=h; dwp.cz6(); v[n++]=dwp.getVal(); | |
571 | + | |
572 | + | |
573 | + Arrays.sort(rels); | |
574 | + Arrays.sort(pss); | |
575 | + | |
576 | + rl = new StringBuilder(chld.length); | |
577 | + psl = new StringBuilder(chld.length); | |
578 | + for(int j=0;j<chld.length;j++) { | |
579 | + rl.append((char)rels[j]); | |
580 | + psl.append((char)pss[j]); | |
581 | + } | |
582 | + rli = mf.register("rli", rl.toString()); | |
583 | + pli = mf.register("pli", psl.toString()); | |
584 | + | |
585 | + | |
586 | + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=rli; dwwp.v4=1; dwwp.v5=h; dwwp.cz6(); v[n++]=dwwp.getVal(); | |
587 | + dwwp.v0=f++; dwwp.v2=fm; dwwp.v3=rli; dwwp.v4=1; dwwp.v5=h; dwwp.cz6(); v[n++]=dwwp.getVal(); | |
588 | + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=fm; dwwp.v4=1; dwwp.v5=h; dwwp.cz6(); v[n++]=dwwp.getVal(); | |
589 | + | |
590 | + dwwp.v0=f++; dwwp.v2=rli; dwwp.v3=h;dwwp.cz4(); v[n++]=dwwp.getVal(); | |
591 | + | |
592 | + dl1.v0= f++; dl1.v2=h; dl1.v3=hrel; dl1.v4=hhrel;dl1.v5=hh; dl1.v6=rlast; dl1.cz6(); v[n++]=dl1.getVal(); | |
593 | + dwp.v0= f++; dwp.v2=fm; dwp.v3=hrel; dwp.v4=hh; dwp.cz5(); v[n++]=dwp.getVal(); | |
594 | + dwp.v0= f++; dwp.v2=hhf; dwp.v3=hrel;dwp.v4=hh; dwp.v5=h; dwp.cz6(); v[n++]=dwp.getVal(); | |
595 | + | |
596 | + } | |
597 | + | |
598 | + v[n]=Integer.MIN_VALUE; | |
599 | + } | |
600 | + | |
601 | + | |
602 | + | |
603 | + /** | |
604 | + * @param parse | |
605 | + * @param k | |
606 | + * @return | |
607 | + */ | |
608 | + private short[] children(short[] heads, short h) { | |
609 | + | |
610 | + int c=0; | |
611 | + for(int k=0;k<heads.length;k++) if (heads[k] ==h ) c++; | |
612 | + | |
613 | + short[] clds = new short[c]; | |
614 | + c=0; | |
615 | + for(int k=0;k<heads.length;k++) if (heads[k] ==h ) clds[c++]=(short)k; | |
616 | + return clds; | |
617 | + } | |
618 | + | |
619 | + | |
620 | + | |
621 | +} | |
... | ... |
dependencyParser/basic/mate-tools/src/extractors/ParallelExtract.java
0 → 100755
1 | +package extractors; | |
2 | + | |
3 | +import is2.data.Cluster; | |
4 | +import is2.data.DataF; | |
5 | +import is2.data.Edges; | |
6 | +import is2.data.F2SF; | |
7 | +import is2.data.FV; | |
8 | +import is2.data.Instances; | |
9 | +import is2.data.Long2IntInterface; | |
10 | + | |
11 | +import java.util.ArrayList; | |
12 | +import java.util.concurrent.Callable; | |
13 | + | |
14 | + | |
15 | +/** | |
16 | + * @author Bernd Bohnet, 30.08.2009 | |
17 | + * | |
18 | + * This class implements a parallel feature extractor. | |
19 | + */ | |
20 | +final public class ParallelExtract implements Callable<Object> | |
21 | +{ | |
22 | + // the data space of the weights for a dependency tree | |
23 | + final DataF d; | |
24 | + | |
25 | + // the data extractor does the actual work | |
26 | + final Extractor extractor; | |
27 | + | |
28 | + private Instances is; | |
29 | + private int i; | |
30 | + | |
31 | + private F2SF para; | |
32 | + | |
33 | + private Cluster cluster; | |
34 | + | |
35 | + private Long2IntInterface li; | |
36 | + | |
37 | + public ParallelExtract(Extractor e, Instances is, int i, DataF d, F2SF para,Cluster cluster, Long2IntInterface li) { | |
38 | + | |
39 | + this.is =is; | |
40 | + extractor=e; | |
41 | + this.d =d; | |
42 | + this.i=i; | |
43 | + this.para=para; | |
44 | + this.cluster = cluster; | |
45 | + this.li=li; | |
46 | + } | |
47 | + | |
48 | + | |
49 | + public static class DSet { | |
50 | + int w1,w2; | |
51 | + } | |
52 | + | |
53 | + public Object call() { | |
54 | + | |
55 | + try { | |
56 | + | |
57 | + F2SF f= para; | |
58 | + | |
59 | + | |
60 | + short[] pos=is.pposs[i]; | |
61 | + int[] forms=is.forms[i]; | |
62 | + int[] lemmas=is.plemmas[i]; | |
63 | + short[][] feats=is.feats[i]; | |
64 | + int length = pos.length; | |
65 | + | |
66 | + long[] svs = new long[250]; | |
67 | + | |
68 | + int type=extractor.getType(); | |
69 | + | |
70 | + while (true) { | |
71 | + | |
72 | + DSet set = get(); | |
73 | + if (set ==null) break; | |
74 | + | |
75 | + int w1=set.w1; | |
76 | + int w2=set.w2; | |
77 | + | |
78 | + f.clear(); | |
79 | + int n =extractor.basic(pos, forms, w1, w2,cluster, f); | |
80 | + d.pl[w1][w2]=f.getScoreF(); | |
81 | + | |
82 | + short[] labels = Edges.get(pos[w1], pos[w2],false); | |
83 | + float[][] lab = d.lab[w1][w2]; | |
84 | + | |
85 | + extractor.firstm(is, i, w1, w2, 0, cluster, svs); | |
86 | + | |
87 | + if (labels!=null) { | |
88 | + | |
89 | + | |
90 | + for (int l = labels.length - 1; l >= 0; l--) { | |
91 | + | |
92 | + short label = labels[l]; | |
93 | + | |
94 | + f.clear(); | |
95 | + for(int k=svs.length-1;k>=0;k--) if (svs[k]>0) f.add(li.l2i(svs[k]+label*type)); | |
96 | + lab[label][0]=f.getScoreF(); | |
97 | + } | |
98 | + } | |
99 | + | |
100 | + labels = Edges.get(pos[w1], pos[w2],true); | |
101 | + | |
102 | + if (labels!=null) { | |
103 | + | |
104 | + for (int l = labels.length - 1; l >= 0; l--) { | |
105 | + | |
106 | + int label = labels[l]; | |
107 | + f.clear(); | |
108 | + for(int k=svs.length-1;k>=0;k--) if (svs[k]>0) f.add(li.l2i(svs[k]+label*type)); | |
109 | + lab[label][1]=f.getScoreF(); | |
110 | + } | |
111 | + } | |
112 | + | |
113 | + int s = w1<w2 ? w1 : w2; | |
114 | + int e = w1<w2 ? w2 : w1; | |
115 | + | |
116 | + int sg = w1<w2 ? w1 : 0; | |
117 | + int eg = w1<w2 ? length : w1+1; | |
118 | + | |
119 | + | |
120 | + for(int m=s;m<e;m++) { | |
121 | + for(int dir=0;dir<2;dir++) { | |
122 | + labels = Edges.get(pos[w1], pos[w2],dir==1); | |
123 | + float lab2[]= new float[labels.length]; | |
124 | + | |
125 | + int g = (m==s||e==m) ? -1 : m; | |
126 | + | |
127 | + | |
128 | + extractor.siblingm(is,i,pos,forms,lemmas,feats, w1, w2, g, 0, cluster, svs,n); | |
129 | + | |
130 | + for (int l = labels.length - 1; l >= 0; l--) { | |
131 | + | |
132 | + int label = labels[l]; | |
133 | + f.clear(); | |
134 | + | |
135 | + for(int k=svs.length-1;k>=0;k--) { | |
136 | + if (svs[k]>0) f.add(li.l2i(svs[k]+label*type)); | |
137 | + } | |
138 | + lab2[l] = (float)f.score;//f.getScoreF(); | |
139 | + } | |
140 | + d.sib[w1][w2][m][dir]=lab2; | |
141 | + } | |
142 | + } | |
143 | + | |
144 | + for(int m=sg;m<eg;m++) { | |
145 | + for(int dir=0;dir<2;dir++) { | |
146 | + labels = Edges.get(pos[w1], pos[w2],dir==1); | |
147 | + float[] lab2 = new float[labels.length]; | |
148 | + | |
149 | + int g = (m==s||e==m) ? -1 : m; | |
150 | + | |
151 | + extractor.gcm(is, i, w1,w2,g, 0, cluster, svs); | |
152 | + | |
153 | + for (int l = labels.length - 1; l >= 0; l--) { | |
154 | + | |
155 | + int label = labels[l]; | |
156 | + | |
157 | + f.clear(); | |
158 | + for(int k=svs.length-1;k>=0;k--) { | |
159 | + if (svs[k]>0) f.add(li.l2i(svs[k]+label*type)); | |
160 | + } | |
161 | + lab2[l] = f.getScoreF(); | |
162 | + } | |
163 | + d.gra[w1][w2][m][dir] =lab2; | |
164 | + } | |
165 | + } | |
166 | + | |
167 | + } | |
168 | + } catch(Exception e ) { | |
169 | + e.printStackTrace(); | |
170 | + } | |
171 | + return null; | |
172 | + } | |
173 | + | |
174 | + | |
175 | + static ArrayList<DSet> sets = new ArrayList<DSet>(); | |
176 | + | |
177 | + private DSet get() { | |
178 | + | |
179 | + synchronized (sets) { | |
180 | + if (sets.size()==0) return null; | |
181 | + return sets.remove(sets.size()-1); | |
182 | + } | |
183 | + } | |
184 | + static public void add(int w1, int w2){ | |
185 | + DSet ds =new DSet(); | |
186 | + ds.w1=w1; | |
187 | + ds.w2=w2; | |
188 | + sets.add(ds); | |
189 | + } | |
190 | + | |
191 | + | |
192 | + | |
193 | + | |
194 | +} | |
... | ... |
dependencyParser/basic/mate-tools/src/is2/data/Closed.java
0 → 100755
1 | +package is2.data; | |
2 | + | |
3 | + | |
4 | + | |
5 | +final public class Closed { | |
6 | + | |
7 | + public double p; | |
8 | + short b,e,m; | |
9 | + byte dir; | |
10 | + | |
11 | + Closed d; | |
12 | + Open u; | |
13 | + | |
14 | + public Closed(short s, short t, int m, int dir,Open u, Closed d, float score) { | |
15 | + this.b = s; | |
16 | + this.e = t; | |
17 | + this.m = (short)m; | |
18 | + this.dir = (byte)dir; | |
19 | + this.u=u; | |
20 | + this.d =d; | |
21 | + p=score; | |
22 | + } | |
23 | + | |
24 | + | |
25 | + public void create(Parse parse) { | |
26 | + if (u != null) u.create(parse); | |
27 | + if (d != null) d.create(parse); | |
28 | + } | |
29 | +} | |
30 | + | |
31 | + | |
... | ... |
dependencyParser/basic/mate-tools/src/is2/data/Cluster.java
0 → 100644
1 | +/** | |
2 | + * | |
3 | + */ | |
4 | +package is2.data; | |
5 | + | |
6 | + | |
7 | + | |
8 | +import is2.util.DB; | |
9 | + | |
10 | +import java.io.BufferedReader; | |
11 | +import java.io.DataInputStream; | |
12 | +import java.io.DataOutputStream; | |
13 | +import java.io.FileInputStream; | |
14 | +import java.io.IOException; | |
15 | +import java.io.InputStreamReader; | |
16 | + | |
17 | +/** | |
18 | + * @author Dr. Bernd Bohnet, 28.10.2010 | |
19 | + * | |
20 | + * | |
21 | + */ | |
22 | +final public class Cluster { | |
23 | + | |
24 | + public static final String LPATH = "LP"; | |
25 | + public static final String SPATH = "SP"; | |
26 | + | |
27 | + // [word][p] p = [0:long-path | 1:short-path] | |
28 | + final private short[][] word2path; | |
29 | + | |
30 | + public Cluster() { | |
31 | + word2path =new short[0][0]; | |
32 | + } | |
33 | + | |
34 | + /** | |
35 | + * @param clusterFile | |
36 | + * @param mf | |
37 | + * | |
38 | + */ | |
39 | + public Cluster(String clusterFile, IEncoderPlus mf, int ls) { | |
40 | + | |
41 | + final String REGEX = "\t"; | |
42 | + | |
43 | + // register words | |
44 | + try { | |
45 | + BufferedReader inputReader = new BufferedReader(new InputStreamReader(new FileInputStream(clusterFile),"UTF-8"),32768); | |
46 | + | |
47 | + int cnt=0; | |
48 | + String line; | |
49 | + while ((line =inputReader.readLine())!=null) { | |
50 | + | |
51 | + cnt++; | |
52 | + try { | |
53 | + String[] split = line.split(REGEX); | |
54 | + mf.register(SPATH, split[0].length()<ls?split[0]:split[0].substring(0,ls)); | |
55 | + mf.register(LPATH, split[0]); | |
56 | + mf.register(PipeGen.WORD, split[1]); | |
57 | + } catch(Exception e) { | |
58 | + System.out.println("Error in cluster line "+cnt+" error: "+e.getMessage()); | |
59 | + } | |
60 | + } | |
61 | + System.out.println("read number of clusters "+cnt); | |
62 | + inputReader.close(); | |
63 | + | |
64 | + } catch (Exception e) { | |
65 | + e.printStackTrace(); | |
66 | + } | |
67 | + | |
68 | + word2path = new short[mf.getFeatureCounter().get(PipeGen.WORD)][2]; | |
69 | + | |
70 | + | |
71 | + // insert words | |
72 | + try { | |
73 | + String line; | |
74 | + BufferedReader inputReader = new BufferedReader(new InputStreamReader(new FileInputStream(clusterFile),"UTF-8"),32768); | |
75 | + | |
76 | + while ((line =inputReader.readLine())!=null) { | |
77 | + | |
78 | + String[] split = line.split(REGEX); | |
79 | + int wd = mf.getValue(PipeGen.WORD, split[1]); | |
80 | + word2path[wd][0] = (short)mf.getValue(SPATH, split[0].length()<ls?split[0]:split[0].substring(0,ls)); | |
81 | + word2path[wd][1] = (short)mf.getValue(LPATH, split[0]); | |
82 | + } | |
83 | + inputReader.close(); | |
84 | + int fill=0; | |
85 | + for(int l = 0; l<word2path.length; l++ ){ | |
86 | + if (word2path[l][0]!=0) fill++; | |
87 | + } | |
88 | + /* | |
89 | + for(int l = 0; l<word2path.length; l++ ){ | |
90 | + if (word2path[l][1]!=0) fillL++; | |
91 | + if (word2path[l][1]<-1) System.out.println("lower "+word2path[l][1]); | |
92 | + } | |
93 | + */ | |
94 | + System.out.println("filled "+fill+" of "+word2path.length); | |
95 | + | |
96 | + } catch (Exception e) { | |
97 | + e.printStackTrace(); | |
98 | + } | |
99 | + } | |
100 | + | |
101 | + /** | |
102 | + * Read the cluster | |
103 | + * @param dos | |
104 | + * @throws IOException | |
105 | + */ | |
106 | + public Cluster(DataInputStream dis) throws IOException { | |
107 | + | |
108 | + word2path = new short[dis.readInt()][2]; | |
109 | + for(int i =0;i<word2path.length;i++) { | |
110 | + word2path[i][0]=dis.readShort(); | |
111 | + word2path[i][1]=dis.readShort(); | |
112 | + } | |
113 | + DB.println("Read cluster with "+word2path.length+" words "); | |
114 | + } | |
115 | + | |
116 | + /** | |
117 | + * Write the cluster | |
118 | + * @param dos | |
119 | + * @throws IOException | |
120 | + */ | |
121 | + public void write(DataOutputStream dos) throws IOException { | |
122 | + | |
123 | + dos.writeInt(word2path.length); | |
124 | + for(short[] i : word2path) { | |
125 | + dos.writeShort(i[0]); | |
126 | + dos.writeShort(i[1]); | |
127 | + } | |
128 | + | |
129 | + } | |
130 | + | |
131 | + /** | |
132 | + * @param form the id of a word form | |
133 | + * @return the short path to the word form in the cluster | |
134 | + | |
135 | + final public int getSP(int form) { | |
136 | + if (word2path.length<form) return -1; | |
137 | + return word2path[form][0]; | |
138 | + } | |
139 | + */ | |
140 | + /** | |
141 | + * get the long path to a word form in the cluster | |
142 | + * @param form the id of a word form | |
143 | + * @return the long path to the word | |
144 | + */ | |
145 | + final public int getLP(int form) { | |
146 | + if (word2path.length<=form || word2path[form].length<=0) return -1; | |
147 | + return word2path[form][0]==0?-1:word2path[form][0]; | |
148 | + } | |
149 | + | |
150 | + final public int getLP(int form, int l) { | |
151 | + if (word2path.length<form) return -1; | |
152 | + return word2path[form][l]==0?-1:word2path[form][l]; | |
153 | + } | |
154 | + | |
155 | + final public int size() { | |
156 | + return word2path.length; | |
157 | + } | |
158 | +} | |
... | ... |
dependencyParser/basic/mate-tools/src/is2/data/D4.java
0 → 100644
1 | +/** | |
2 | + * | |
3 | + */ | |
4 | +package is2.data; | |
5 | + | |
6 | +import is2.util.DB; | |
7 | + | |
8 | +/** | |
9 | + * @author Dr. Bernd Bohnet, 30.10.2010 | |
10 | + * | |
11 | + * This class computes the mapping of features to the weight vector. | |
12 | + */ | |
13 | +final public class D4 extends DX { | |
14 | + private long shift; | |
15 | + private long h; | |
16 | + | |
17 | + | |
18 | + private final Long2IntInterface _li; | |
19 | + public D4(Long2IntInterface li) { | |
20 | + _li=li; | |
21 | + } | |
22 | + | |
23 | + | |
24 | + final public void clean() { | |
25 | + v0=0;v1=0;v2=0;v3=0;v4=0;v5=0;v6=0;v7=0;v8=0; | |
26 | + shift=0;h=0; | |
27 | + } | |
28 | + | |
29 | + final public void cz3(){ | |
30 | + if (v0<0||v1<0||v2<0) { h=-1;return;} | |
31 | + | |
32 | + h= v0+v1*(shift =a0)+(long)v2*(shift *=a1); | |
33 | + shift *=a2; | |
34 | + } | |
35 | + | |
36 | + final public long c3(){ | |
37 | + if (v0<0||v1<0||v2<0) { h=-1;return h;} | |
38 | + | |
39 | + h= v0+v1*(shift =a0)+(long)v2*(shift *=a1); | |
40 | + shift *=a2; | |
41 | + return h; | |
42 | + } | |
43 | + | |
44 | + final public void cz4(){ | |
45 | + if (v0<0||v1<0||v2<0||v3<0) {h=-1;return;} | |
46 | + | |
47 | + h =v0+v1*(shift =a0)+v2*(shift *=a1)+v3*(shift *=a2); | |
48 | + shift *=a3; | |
49 | + } | |
50 | + | |
51 | + final public long c4(){ | |
52 | + if (v0<0||v1<0||v2<0||v3<0) {h=-1;return h;} | |
53 | + | |
54 | + h =v0+v1*(shift =a0)+v2*(shift *=a1)+v3*(shift *=a2); | |
55 | + shift *=a3; | |
56 | + return h; | |
57 | + } | |
58 | + | |
59 | + | |
60 | + final public void cz5(){ | |
61 | + | |
62 | + if (v0<0||v1<0||v2<0||v3<0||v4<0) {h=-1;return;} | |
63 | + | |
64 | + h =v0+v1*(shift =a0)+v2*(shift *=a1)+v3*(shift*=a2)+v4*(shift*=a3); | |
65 | + shift*=a4; | |
66 | + | |
67 | + } | |
68 | + | |
69 | + final public long c5(){ | |
70 | + | |
71 | + if (v0<0||v1<0||v2<0||v3<0||v4<0) {h=-1;return h;} | |
72 | + | |
73 | + h =v0+v1*(shift =a0)+v2*(shift *=a1)+v3*(shift *=a2)+v4*(shift*=a3); | |
74 | + shift*=a4; | |
75 | + return h; | |
76 | + } | |
77 | + | |
78 | + | |
79 | + final public void cz6(){ | |
80 | + | |
81 | + if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0) {h=-1; return;} | |
82 | + | |
83 | + h =v0+v1*(shift =a0)+v2*(shift *=a1)+v3*(shift *=a2); | |
84 | + h +=v4*(shift*=a3)+v5*(shift*=a4); | |
85 | + shift*=a5; | |
86 | + } | |
87 | + | |
88 | + final public long c6(){ | |
89 | + | |
90 | + if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0) {h=-1; return h;} | |
91 | + | |
92 | + h =v0+v1*(shift =a0)+v2*(shift *=a1)+v3*(shift *=a2); | |
93 | + h +=v4*(shift*=a3)+v5*(shift*=a4); | |
94 | + shift*=a5; | |
95 | + return h; | |
96 | + } | |
97 | + | |
98 | + | |
99 | + final public long cs(int b, int v) { | |
100 | + if (h<0) {h=-1; return h;} | |
101 | + | |
102 | + h += v*shift; | |
103 | + shift *=b; | |
104 | + return h; | |
105 | + | |
106 | + } | |
107 | + | |
108 | + final public void csa(int b, int v, IFV f) { | |
109 | + if (h<0) {h=-1; return;} | |
110 | + | |
111 | + h += v*shift; | |
112 | + shift *=b; | |
113 | + f.add(_li.l2i(h)); | |
114 | + } | |
115 | + | |
116 | + final public long csa(int b, int v) { | |
117 | + if (h<0) {h=-1; return-1; } | |
118 | + | |
119 | + h += v*shift; | |
120 | + shift *=b; | |
121 | + return h; | |
122 | + } | |
123 | + | |
124 | + public final long getVal(){ | |
125 | + return h; | |
126 | + } | |
127 | + | |
128 | + public final void map(IFV f, long l){ | |
129 | + if (l>0) f.add(this._li.l2i(l)); | |
130 | + } | |
131 | + | |
132 | + /** | |
133 | + * @param f | |
134 | + */ | |
135 | + final public void add(IFV f) { | |
136 | + f.add(_li.l2i(h)); | |
137 | + } | |
138 | + | |
139 | + final public void cz7() { | |
140 | + if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0||v6<0) {h=-1; return;} | |
141 | + | |
142 | + h =v0+v1*(shift =a0)+v2*(shift *=a1)+v3*(shift *=a2); | |
143 | + h +=v4*(shift*=a3)+v5*(shift*=a4)+v6*(shift*=a5); | |
144 | + shift*=a6; | |
145 | + | |
146 | + } | |
147 | + | |
148 | + final public long c7() { | |
149 | + if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0||v6<0) {h=-1; return h;} | |
150 | + | |
151 | + h =v0+v1*(shift =a0)+v2*(shift *=a1)+v3*(shift *=a2); | |
152 | + h +=v4*(shift*=a3)+v5*(shift*=a4)+v6*(shift*=a5); | |
153 | + shift*=a6; | |
154 | + return h; | |
155 | + } | |
156 | + | |
157 | + /** | |
158 | + * | |
159 | + */ | |
160 | + final public void cz8() { | |
161 | + if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0||v6<0||v7<0) {h=-1; return;} | |
162 | + | |
163 | + h =v0+v1*(shift =a0)+v2*(shift *=a1)+v3*(shift *=a2); | |
164 | + h +=v4*(shift*=a3)+v5*(shift*=a4)+v6*(shift*=a5)+v7*(shift*=a6); | |
165 | + shift*=a7; | |
166 | + } | |
167 | + | |
168 | + final public void cz9() { | |
169 | + if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0||v6<0||v7<0||v8<0) {h=-1; return;} | |
170 | + | |
171 | + h =v0+v1*(shift =a0)+v2*(shift *=a1)+v3*(shift *=a2); | |
172 | + h +=v4*(shift*=a3)+v5*(shift*=a4)+v6*(shift*=a5)+v7*(shift*=a6)+v8*(shift*=a7); | |
173 | + shift*=a8; | |
174 | + } | |
175 | + | |
176 | + | |
177 | + /* (non-Javadoc) | |
178 | + * @see is2.data.DX#computeLabeValue(short, short) | |
179 | + */ | |
180 | + @Override | |
181 | + public int computeLabeValue(int label, int shift) { | |
182 | + return label*shift; | |
183 | + } | |
184 | + | |
185 | + | |
186 | + public void fix() { | |
187 | + | |
188 | + } | |
189 | + | |
190 | + | |
191 | +} | |
0 | 192 | \ No newline at end of file |
... | ... |
dependencyParser/basic/mate-tools/src/is2/data/D6.java
0 → 100644
1 | +/** | |
2 | + * | |
3 | + */ | |
4 | +package is2.data; | |
5 | + | |
6 | +import is2.util.DB; | |
7 | + | |
8 | +/** | |
9 | + * @author Dr. Bernd Bohnet, 30.10.2010 | |
10 | + * | |
11 | + * This class computes the mapping of features to the weight vector. | |
12 | + */ | |
13 | +final public class D6 extends DX { | |
14 | + private long shift; | |
15 | + private long h; | |
16 | + | |
17 | + | |
18 | + private final Long2IntInterface _li; | |
19 | + public D6(Long2IntInterface li) { | |
20 | + _li=li; | |
21 | + } | |
22 | + | |
23 | + boolean fixed =false; | |
24 | + | |
25 | + public void fix() { | |
26 | + | |
27 | + if (fixed) { | |
28 | + DB.println("warning: already fixed"); | |
29 | + // return; | |
30 | + } | |
31 | + | |
32 | + long t0= 1, t1=a0, t2=t1*a1, t3=t2*a2,t4=t3*a3, t5=t4*a4,t6=t5*a5, t7=t6*a6, t8=t7*a7, t9=t8*a8; | |
33 | + | |
34 | + | |
35 | + | |
36 | + | |
37 | + a0=t0;a1=t1;a2=t2;a3=t3;a4=t4;a5=t5;a6=t6;a7=t7;a8=t8; a9=t9; | |
38 | + | |
39 | + fixed=true; | |
40 | + } | |
41 | + | |
42 | + | |
43 | + | |
44 | + final public void clean() { | |
45 | + v0=0;v1=0;v2=0;v3=0;v4=0;v5=0;v6=0;v7=0;v8=0; | |
46 | + shift=0;h=0; | |
47 | + } | |
48 | + | |
49 | + final public void cz3(){ | |
50 | + if (v0<0||v1<0||v2<0) { h=-1;return;} | |
51 | + | |
52 | + h= v0+v1*a1+v2*a2; | |
53 | + shift =a3; | |
54 | + } | |
55 | + | |
56 | + final public long c3(){ | |
57 | + if (v0<0||v1<0||v2<0) { h=-1;return h;} | |
58 | + | |
59 | + h= v0+v1*a1+v2*a2; | |
60 | + shift =a3; | |
61 | + return h; | |
62 | + } | |
63 | + | |
64 | + final public void cz4(){ | |
65 | + if (v0<0||v1<0||v2<0||v3<0) {h=-1;return;} | |
66 | + | |
67 | + h =v0+v1*a1+v2*a2+v3*a3; | |
68 | + shift =a4; | |
69 | + } | |
70 | + | |
71 | + final public long c4(){ | |
72 | + if (v0<0||v1<0||v2<0||v3<0) {h=-1;return h;} | |
73 | + | |
74 | + h =v0+v1*a1+v2*a2+v3*a3; | |
75 | + shift =a4; | |
76 | + return h; | |
77 | + } | |
78 | + | |
79 | + | |
80 | + final public void cz5(){ | |
81 | + | |
82 | + if (v0<0||v1<0||v2<0||v3<0||v4<0) {h=-1;return;} | |
83 | + | |
84 | + h =v0+v1*a1+v2*a2+v3*a3+v4*a4; | |
85 | + shift=a5; | |
86 | + | |
87 | + } | |
88 | + | |
89 | + final public long c5(){ | |
90 | + | |
91 | + if (v0<0||v1<0||v2<0||v3<0||v4<0) {h=-1;return h;} | |
92 | + | |
93 | + h =v0+v1*a1+v2*a2+v3*a3+v4*a4; | |
94 | + shift=a5; | |
95 | + return h; | |
96 | + } | |
97 | + | |
98 | + | |
99 | + final public void cz6(){ | |
100 | + | |
101 | + if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0) {h=-1; return;} | |
102 | + | |
103 | + h =v0+v1*a1+v2*a2+v3*a3 +v4*a4+v5*a5; | |
104 | + shift=a6; | |
105 | + } | |
106 | + | |
107 | + final public long c6(){ | |
108 | + | |
109 | + if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0) {h=-1; return h;} | |
110 | + | |
111 | + h =v0+v1*a1+v2*a2+v3*a3 +v4*a4+v5*a5; | |
112 | + shift=a6; | |
113 | + return h; | |
114 | + } | |
115 | + | |
116 | + | |
117 | + final public long cs(int b, int v) { | |
118 | + if (h<0) {h=-1; return h;} | |
119 | + | |
120 | + h += v*shift; | |
121 | + shift *=b; | |
122 | + return h; | |
123 | + | |
124 | + } | |
125 | + | |
126 | + final public void csa(int b, int v, IFV f) { | |
127 | + if (h<0) {h=-1; return;} | |
128 | + | |
129 | + h += v*shift; | |
130 | + shift *=b; | |
131 | + f.add(_li.l2i(h)); | |
132 | + } | |
133 | + | |
134 | + final public long csa(int b, int v) { | |
135 | + if (h<0) {h=-1; return-1; } | |
136 | + | |
137 | + h += v*shift; | |
138 | + shift *=b; | |
139 | + return h; | |
140 | + } | |
141 | + | |
142 | + public final long getVal(){ | |
143 | + return h; | |
144 | + } | |
145 | + | |
146 | + public final void map(IFV f, long l){ | |
147 | + if (l>0) f.add(this._li.l2i(l)); | |
148 | + } | |
149 | + | |
150 | + /** | |
151 | + * @param f | |
152 | + */ | |
153 | + final public void add(IFV f) { | |
154 | + f.add(_li.l2i(h)); | |
155 | + } | |
156 | + | |
157 | + final public void cz7() { | |
158 | + if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0||v6<0) {h=-1; return;} | |
159 | + | |
160 | + h =v0+v1*a1+v2*a2+v3*a3 +v4*a4+v5*a5+v6*a6; | |
161 | + shift=a7; | |
162 | + | |
163 | + } | |
164 | + | |
165 | + final public long c7() { | |
166 | + if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0||v6<0) {h=-1; return h;} | |
167 | + | |
168 | + h =v0+v1*a1+v2*a2+v3*a3 +v4*a4+v5*a5+v6*a6; | |
169 | + shift=a7; | |
170 | + return h; | |
171 | + } | |
172 | + | |
173 | + /** | |
174 | + * | |
175 | + */ | |
176 | + final public void cz8() { | |
177 | + if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0||v6<0||v7<0) {h=-1; return;} | |
178 | + | |
179 | + h =v0+v1*a1+v2*a2+v3*a3 +v4*a4+v5*a5+v6*a6+v7*a7; | |
180 | + shift=a8; | |
181 | + } | |
182 | + | |
183 | + | |
184 | + | |
185 | + /* (non-Javadoc) | |
186 | + * @see is2.data.DX#computeLabeValue(short, short) | |
187 | + */ | |
188 | + @Override | |
189 | + public int computeLabeValue(int label, int shift) { | |
190 | + return label*shift; | |
191 | + } | |
192 | + | |
193 | + | |
194 | + | |
195 | + | |
196 | + | |
197 | +} | |
0 | 198 | \ No newline at end of file |
... | ... |
dependencyParser/basic/mate-tools/src/is2/data/D7.java
0 → 100644
1 | +/** | |
2 | + * | |
3 | + */ | |
4 | +package is2.data; | |
5 | + | |
6 | + | |
7 | +/** | |
8 | + * @author Dr. Bernd Bohnet, 30.10.2010 | |
9 | + * | |
10 | + * This class computes the mapping of features to the weight vector. | |
11 | + */ | |
12 | +final public class D7 extends DX { | |
13 | + | |
14 | + private long shift; | |
15 | + private long h; | |
16 | + private final Long2IntInterface _li; | |
17 | + | |
18 | + public D7(Long2IntInterface li) { | |
19 | + _li=li; | |
20 | + } | |
21 | + | |
22 | + boolean fixed =false; | |
23 | + | |
24 | + public void fix() { | |
25 | + | |
26 | + long t0= 1, t1=a0, t2=t1*a1, t3=t2*a2,t4=t3*a3, t5=t4*a4,t6=t5*a5, t7=t6*a6, t8=t7*a7, t9=t8*a8; | |
27 | + | |
28 | + a0=t0;a1=t1;a2=t2;a3=t3;a4=t4;a5=t5;a6=t6;a7=t7;a8=t8; a9=t9; | |
29 | + | |
30 | + } | |
31 | + | |
32 | + | |
33 | + | |
34 | + final public void clean() { | |
35 | + v0=0;v1=0;v2=0;v3=0;v4=0;v5=0;v6=0;v7=0;v8=0; | |
36 | + shift=0;h=0; | |
37 | + } | |
38 | + | |
39 | + final public void cz3(){ | |
40 | + if (v2<0) { h=-1;return;} | |
41 | + | |
42 | + h= v0+v1*a1+v2*a2; | |
43 | + shift =a3; | |
44 | + } | |
45 | + | |
46 | + final public long c3(){ | |
47 | + if (v2<0) { h=-1;return h;} | |
48 | + | |
49 | + h= v0+v1*a1+v2*a2; | |
50 | + shift =a3; | |
51 | + return h; | |
52 | + } | |
53 | + | |
54 | + final public long d3(){ | |
55 | + if (v2<0)return -1; | |
56 | + return v0+v2*a2; | |
57 | + } | |
58 | + | |
59 | + final public void cz4(){ | |
60 | + // if (v0<0||v1<0||v2<0||v3<0) {h=-1;return;} | |
61 | + if (v2<0||v3<0) {h=-1;return;} | |
62 | + | |
63 | + h =v0+v1*a1+v2*a2+v3*a3; | |
64 | + shift =a4; | |
65 | + } | |
66 | + | |
67 | + final public long c4(){ | |
68 | + if (v2<0||v3<0) {h=-1;return h;} | |
69 | + | |
70 | + h =v0+v1*a1+v2*a2+v3*a3; | |
71 | + shift =a4; | |
72 | + return h; | |
73 | + } | |
74 | + | |
75 | + | |
76 | + final public long d4(){ | |
77 | + if (v2<0||v3<0) return -1; | |
78 | + return v0+v2*a2+v3*a3; | |
79 | + } | |
80 | + | |
81 | + | |
82 | + final public void cz5(){ | |
83 | + | |
84 | + if (v2<0||v3<0||v4<0) {h=-1;return;} | |
85 | + | |
86 | + h =v0+v1*a1+v2*a2+v3*a3+v4*a4; | |
87 | + shift=a5; | |
88 | + | |
89 | + } | |
90 | + | |
91 | + final public long c5(){ | |
92 | + | |
93 | + if (v2<0||v3<0||v4<0) {h=-1;return h;} | |
94 | + | |
95 | + h =v0+v1*a1+v2*a2+v3*a3+v4*a4; | |
96 | + shift=a5; | |
97 | + return h; | |
98 | + } | |
99 | + | |
100 | + final public long d5(){ | |
101 | + if (v2<0||v3<0||v4<0) return -1; | |
102 | + return v0+v2*a2+v3*a3+v4*a4; | |
103 | + } | |
104 | + | |
105 | + | |
106 | + final public void cz6(){ | |
107 | + | |
108 | + if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0) {h=-1; return;} | |
109 | + | |
110 | + h =v0+v1*a1+v2*a2+v3*a3 +v4*a4+v5*a5; | |
111 | + shift=a6; | |
112 | + } | |
113 | + | |
114 | + final public long c6(){ | |
115 | + | |
116 | + if (v2<0||v3<0||v4<0||v5<0) {h=-1; return h;} | |
117 | + | |
118 | + h =v0+v1*a1+v2*a2+v3*a3 +v4*a4+v5*a5; | |
119 | + shift=a6; | |
120 | + return h; | |
121 | + } | |
122 | + | |
123 | + final public long d6(){ | |
124 | + if (v2<0||v3<0||v4<0||v5<0) return -1; | |
125 | + return v0+v2*a2+v3*a3 +v4*a4+v5*a5; | |
126 | + } | |
127 | + | |
128 | + | |
129 | + final public long cs(int b, int v) { | |
130 | + if (h<0) {h=-1; return h;} | |
131 | + | |
132 | + h += v*shift; | |
133 | + shift *=b; | |
134 | + return h; | |
135 | + | |
136 | + } | |
137 | + | |
138 | + final public void csa(int b, int v, IFV f) { | |
139 | + if (h<0) {h=-1; return;} | |
140 | + | |
141 | + h += v*shift; | |
142 | + shift *=b; | |
143 | + f.add(_li.l2i(h)); | |
144 | + } | |
145 | + | |
146 | + final public long csa(int b, int v) { | |
147 | + if (h<0) {h=-1; return-1; } | |
148 | + | |
149 | + h += v*shift; | |
150 | + shift *=b; | |
151 | + return h; | |
152 | + } | |
153 | + | |
154 | + public final long getVal(){ | |
155 | + return h; | |
156 | + } | |
157 | + | |
158 | + public final void map(IFV f, long l){ | |
159 | + if (l>0) f.add(this._li.l2i(l)); | |
160 | + } | |
161 | + | |
162 | + /** | |
163 | + * @param f | |
164 | + */ | |
165 | + final public void add(IFV f) { | |
166 | + f.add(_li.l2i(h)); | |
167 | + } | |
168 | + | |
169 | + final public void cz7() { | |
170 | + if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0||v6<0) {h=-1; return;} | |
171 | + | |
172 | + h =v0+v1*a1+v2*a2+v3*a3 +v4*a4+v5*a5+v6*a6; | |
173 | + shift=a7; | |
174 | + | |
175 | + } | |
176 | + | |
177 | + | |
178 | + final public long c7() { | |
179 | + if (v2<0||v3<0||v4<0||v5<0||v6<0) {h=-1; return h;} | |
180 | + | |
181 | + h =v0+v1*a1+v2*a2+v3*a3 +v4*a4+v5*a5+v6*a6; | |
182 | + shift=a7; | |
183 | + return h; | |
184 | + } | |
185 | + | |
186 | + final public long d7() { | |
187 | + if (v2<0||v3<0||v4<0||v5<0||v6<0) return -1; | |
188 | + return v0+v2*a2+v3*a3 +v4*a4+v5*a5+v6*a6; | |
189 | + } | |
190 | + | |
191 | + /** | |
192 | + * | |
193 | + */ | |
194 | + final public void cz8() { | |
195 | + if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0||v6<0||v7<0) {h=-1; return;} | |
196 | + | |
197 | + h =v0+v1*a1+v2*a2+v3*a3 +v4*a4+v5*a5+v6*a6+v7*a7; | |
198 | + shift=a8; | |
199 | + } | |
200 | + | |
201 | + final public long d8() { | |
202 | + if (v2<0||v3<0||v4<0||v5<0||v6<0||v7<0) {return-1;} | |
203 | + return v0+v2*a2+v3*a3 +v4*a4+v5*a5+v6*a6+v7*a7; | |
204 | + } | |
205 | + | |
206 | + | |
207 | + | |
208 | + /* (non-Javadoc) | |
209 | + * @see is2.data.DX#computeLabeValue(short, short) | |
210 | + */ | |
211 | + @Override | |
212 | + public int computeLabeValue(int label, int shift) { | |
213 | + return label*shift; | |
214 | + } | |
215 | + | |
216 | + | |
217 | + | |
218 | + | |
219 | + | |
220 | +} | |
0 | 221 | \ No newline at end of file |
... | ... |
dependencyParser/basic/mate-tools/src/is2/data/DPSTree.java
0 → 100644
1 | +/** | |
2 | + * | |
3 | + */ | |
4 | +package is2.data; | |
5 | + | |
6 | +import is2.util.DB; | |
7 | + | |
8 | +import java.util.ArrayList; | |
9 | +import java.util.Collections; | |
10 | +import java.util.Stack; | |
11 | + | |
12 | +/** | |
13 | + * @author Dr. Bernd Bohnet, 17.01.2011 | |
14 | + * | |
15 | + * Dynamic phrase structure tree. | |
16 | + */ | |
17 | +public class DPSTree { | |
18 | + | |
19 | + | |
20 | + private int size=0; | |
21 | + | |
22 | + public int[] heads; | |
23 | + public int[] labels; | |
24 | + | |
25 | + public DPSTree() { | |
26 | + this(30); | |
27 | + } | |
28 | + | |
29 | + public DPSTree(int initialCapacity) { | |
30 | + heads = new int[initialCapacity]; | |
31 | + labels = new int[initialCapacity]; | |
32 | + } | |
33 | + | |
34 | + | |
35 | + /** | |
36 | + * Increases the capacity of this <tt>Graph</tt> instance, if | |
37 | + * necessary, to ensure that it can hold at least the number of nodes | |
38 | + * specified by the minimum capacity argument. | |
39 | + * | |
40 | + * @param minCapacity the desired minimum capacity. | |
41 | + */ | |
42 | + private void ensureCapacity(int minCapacity) { | |
43 | + | |
44 | + | |
45 | + if (minCapacity > heads.length) { | |
46 | + | |
47 | + int newCapacity =minCapacity + 1; | |
48 | + | |
49 | + if (newCapacity < minCapacity) newCapacity = minCapacity; | |
50 | + int oldIndex[] = heads; | |
51 | + heads = new int[newCapacity]; | |
52 | + System.arraycopy(oldIndex, 0, heads, 0, oldIndex.length); | |
53 | + | |
54 | + oldIndex = labels; | |
55 | + labels = new int[newCapacity]; | |
56 | + System.arraycopy(oldIndex, 0, labels, 0, oldIndex.length); | |
57 | + | |
58 | + } | |
59 | + } | |
60 | + | |
61 | + | |
62 | + final public int size() { | |
63 | + return size; | |
64 | + } | |
65 | + | |
66 | + | |
67 | + final public boolean isEmpty() { | |
68 | + return size == 0; | |
69 | + } | |
70 | + | |
71 | + final public void clear() { | |
72 | + size = 0; | |
73 | + } | |
74 | + | |
75 | + final public void createTerminals(int terminals) { | |
76 | + ensureCapacity(terminals+1); | |
77 | + size= terminals+1; | |
78 | + } | |
79 | + | |
80 | + final public int create(int phrase) { | |
81 | + | |
82 | + ensureCapacity(size+1); | |
83 | + labels[size] =phrase; | |
84 | + size++; | |
85 | + return size-1; | |
86 | + } | |
87 | + | |
88 | + public int create(int phrase, int nodeId) { | |
89 | + | |
90 | + if (nodeId<0) return this.create(phrase); | |
91 | +// DB.println("create phrase "+nodeId+" label "+phrase); | |
92 | + ensureCapacity(nodeId+1); | |
93 | + labels[nodeId] =phrase; | |
94 | + if (size<nodeId) size=nodeId+1; | |
95 | + return nodeId; | |
96 | + } | |
97 | + | |
98 | + public void createEdge(int i, int j) { | |
99 | + heads[i] =j; | |
100 | +// DB.println("create edge "+i+"\t "+j); | |
101 | + } | |
102 | + | |
103 | + public DPSTree clone() { | |
104 | + DPSTree ps = new DPSTree(this.size+1); | |
105 | + | |
106 | + for(int k=0;k<size;k++) { | |
107 | + ps.heads[k] = heads[k]; | |
108 | + ps.labels[k] = labels[k]; | |
109 | + } | |
110 | + ps.size=size; | |
111 | + return ps; | |
112 | + | |
113 | + } | |
114 | + | |
115 | +} | |
0 | 116 | \ No newline at end of file |
... | ... |
dependencyParser/basic/mate-tools/src/is2/data/DX.java
0 → 100644
1 | +/** | |
2 | + * | |
3 | + */ | |
4 | +package is2.data; | |
5 | + | |
6 | +import is2.data.IFV; | |
7 | + | |
8 | +/** | |
9 | + * @author Dr. Bernd Bohnet, 30.08.2011 | |
10 | + * | |
11 | + * | |
12 | + */ | |
13 | +public abstract class DX { | |
14 | + | |
15 | + public long a0,a1,a2,a3,a4,a5,a6,a7,a8,a9; | |
16 | + public long v0,v1,v2,v3,v4,v5,v6,v7,v8,v9; | |
17 | + | |
18 | + public abstract void cz3(); | |
19 | + | |
20 | + public abstract void cz4(); | |
21 | + | |
22 | + public abstract void cz5(); | |
23 | + | |
24 | + public abstract void cz6(); | |
25 | + | |
26 | + public abstract void cz7(); | |
27 | + | |
28 | + public abstract void cz8(); | |
29 | + | |
30 | + public abstract void clean(); | |
31 | + | |
32 | + public abstract long cs(int b, int v); | |
33 | + | |
34 | + public abstract long csa(int b, int v); | |
35 | + | |
36 | + public abstract void csa(int b, int v, IFV f); | |
37 | + | |
38 | + /** | |
39 | + * @return | |
40 | + */ | |
41 | + public abstract long getVal(); | |
42 | + | |
43 | + /** | |
44 | + * @param f | |
45 | + * @param l | |
46 | + */ | |
47 | + public abstract void map(IFV f, long l); | |
48 | + | |
49 | + /** | |
50 | + * @param label | |
51 | + * @param s_type | |
52 | + * @return | |
53 | + */ | |
54 | + public abstract int computeLabeValue(int label,int s_type) ; | |
55 | + | |
56 | + public abstract void fix(); | |
57 | + | |
58 | +} | |
0 | 59 | \ No newline at end of file |
... | ... |
dependencyParser/basic/mate-tools/src/is2/data/DataF.java
0 → 100755
1 | +package is2.data; | |
2 | + | |
3 | + | |
4 | + | |
5 | +final public class DataF { | |
6 | + | |
7 | + final public short typesLen; | |
8 | + final public int len; | |
9 | + | |
10 | + // first order features | |
11 | + final public float[][] pl; | |
12 | + | |
13 | + // remove !!!! | |
14 | +// final public float[][] highestLab; | |
15 | + | |
16 | + //final public FV[][][] label; | |
17 | + final public float[][][][] lab; | |
18 | + | |
19 | + | |
20 | + public FV fv; | |
21 | + | |
22 | + final public float[][][][][] sib; | |
23 | + | |
24 | + final public float[][][][][] gra; | |
25 | + | |
26 | + | |
27 | + public DataF(int length, short types) { | |
28 | + typesLen=types; | |
29 | + len =length; | |
30 | + | |
31 | + pl = new float[length][length]; | |
32 | + lab = new float[length][length][types][2]; | |
33 | + // highestLab = new float[length][length]; | |
34 | + | |
35 | + sib = new float[length][length][length][2][]; | |
36 | + gra = new float[length][length][length][2][]; | |
37 | + | |
38 | + } | |
39 | +} | |
... | ... |
dependencyParser/basic/mate-tools/src/is2/data/DataFES.java
0 → 100644
1 | +package is2.data; | |
2 | + | |
3 | + | |
4 | + | |
5 | +final public class DataFES { | |
6 | + | |
7 | + final public short typesLen; | |
8 | + final public int len; | |
9 | + | |
10 | + // first order features | |
11 | + final public float[][] pl; | |
12 | + | |
13 | + // remove !!!! | |
14 | +// final public float[][] highestLab; | |
15 | + | |
16 | + //final public FV[][][] label; | |
17 | + final public float[][][] lab; | |
18 | + | |
19 | + | |
20 | + public FV fv; | |
21 | + | |
22 | + final public float[][][][] sib; | |
23 | + | |
24 | + final public float[][][][] gra; | |
25 | + | |
26 | + | |
27 | + public DataFES(int length, short types) { | |
28 | + typesLen=types; | |
29 | + len =length; | |
30 | + | |
31 | + pl = new float[length][length]; | |
32 | + lab = new float[length][length][types]; | |
33 | + | |
34 | + sib = new float[length][length][length][]; | |
35 | + gra = new float[length][length][length][]; | |
36 | + | |
37 | + } | |
38 | +} | |
... | ... |
dependencyParser/basic/mate-tools/src/is2/data/DataT.java
0 → 100644
1 | +package is2.data; | |
2 | + | |
3 | + | |
4 | + | |
5 | +final public class DataT { | |
6 | + | |
7 | + final public short typesLen; | |
8 | + final public int len; | |
9 | + | |
10 | + | |
11 | + //final public FV[][][] label; | |
12 | + // a b lab op | |
13 | + final public float[][][][] lab; | |
14 | + | |
15 | + | |
16 | + | |
17 | + public DataT(int length, short types) { | |
18 | + typesLen=types; | |
19 | + len =length; | |
20 | + | |
21 | + lab = new float[length][length][types][4]; | |
22 | + | |
23 | + | |
24 | + } | |
25 | +} | |
... | ... |