Commit d104f60d12d6c6a6335718b56cac59ffe40dd9c3

Authored by Jan Lupa
1 parent d56a8b56

Dependency Parser split into two versions: basic and experimental

Showing 495 changed files with 34859 additions and 5714 deletions

Too many changes to show.

To preserve performance only 32 of 495 files are displayed.

dependencyParser/basic/mate-tools/.classpath 0 → 100644
  1 +<?xml version="1.0" encoding="UTF-8"?>
  2 +<classpath>
  3 + <classpathentry kind="src" path="src"/>
  4 + <classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
  5 + <classpathentry kind="lib" path="/mtt/lib/trove.jar"/>
  6 + <classpathentry kind="lib" path="lib/commons-math-2.2.jar"/>
  7 + <classpathentry kind="output" path="classes"/>
  8 +</classpath>
... ...
dependencyParser/mate-tools/.externalToolBuilders/New_Builder.launch renamed to dependencyParser/basic/mate-tools/.externalToolBuilders/New_Builder.launch
dependencyParser/mate-tools/.externalToolBuilders/ana.launch renamed to dependencyParser/basic/mate-tools/.externalToolBuilders/ana.launch
dependencyParser/mate-tools/.project renamed to dependencyParser/basic/mate-tools/.project
dependencyParser/mate-tools/build.xml renamed to dependencyParser/basic/mate-tools/build.xml
dependencyParser/mate-tools/lib/commons-math-2.2.jar renamed to dependencyParser/basic/mate-tools/lib/commons-math-2.2.jar
No preview for this file type
dependencyParser/mate-tools/lib/trove-2.0.4.jar renamed to dependencyParser/basic/mate-tools/lib/trove-2.0.4.jar
No preview for this file type
dependencyParser/basic/mate-tools/src/decoder/ParallelDecoder.java 0 → 100755
  1 +package decoder;
  2 +
  3 +import is2.data.Closed;
  4 +import is2.data.DataF;
  5 +import is2.data.Edges;
  6 +import is2.data.Open;
  7 +
  8 +import java.util.ArrayList;
  9 +import java.util.concurrent.Callable;
  10 +
  11 +/**
  12 + * @author Bernd Bohnet, 30.08.2009
  13 + *
  14 + * This class implements a parallel feature extractor.
  15 + */
  16 +final public class ParallelDecoder implements Callable<Object>
  17 +{
  18 + // some constants
  19 + private static final float INIT_BEST = (-1.0F / 0.0F);
  20 + private static final boolean[] DIR ={false,true};
  21 +
  22 + // the data space of the weights for a dependency tree
  23 + final private DataF x;
  24 +
  25 + private short[] pos;
  26 +
  27 + private Open O[][][][];
  28 + private Closed C[][][][] ;
  29 +
  30 + private int n;
  31 +
  32 + boolean done=false;
  33 + public boolean waiting =false;
  34 +
  35 + /**
  36 + * Initialize the parallel decoder.
  37 + *
  38 + * @param pos part-of-speech
  39 + * @param d data
  40 + * @param edges part-of-speech edge mapping
  41 + * @param o open spans
  42 + * @param c closed spans
  43 + * @param n number of words
  44 + */
  45 + public ParallelDecoder(short[] pos, DataF d, Open o[][][][], Closed c[][][][], int n) {
  46 +
  47 + this.pos =pos;
  48 + this.x =d;
  49 +
  50 + this.O=o;
  51 + this.C=c;
  52 + this.n=n;
  53 + }
  54 +
  55 +
  56 + private static class DSet { short w1,w2;}
  57 +
  58 + @Override
  59 + public Object call() {
  60 +
  61 + while (true){
  62 +
  63 + DSet set = get();
  64 + if (done && set==null) break;
  65 +
  66 + if (set ==null) return null;
  67 +
  68 + short s=set.w1, t=set.w2;
  69 +
  70 + for(short dir =1;dir>=0;dir--) {
  71 +
  72 + short[] labs = (dir==1) ? Edges.get(pos[s],pos[t], false):Edges.get(pos[t],pos[s], true);
  73 +
  74 + O[s][t][dir] = new Open[labs.length];
  75 + for (int l = O[s][t][dir].length - 1; l >= 0; l--) {
  76 +
  77 + double tRP = INIT_BEST;
  78 +
  79 + Closed tL = null, tR = null;
  80 +
  81 + for (int r = s; r < t; r++) {
  82 +
  83 + if (s == 0 && r != 0) continue;
  84 +
  85 + double tLPr = INIT_BEST,tRPr = INIT_BEST;
  86 + Closed tLCld = null, tRCld = null;
  87 +
  88 + if (r == s) tLPr = dir==1 ? x.sib[s][t][s][0][l] : x.gra[t][s][s][1 ][l];
  89 + else
  90 + for (int i = s + 1; i <= r; i++)
  91 + if (((dir==1 ? x.sib[s][t][i][0][l] : x.gra[t][s][i][1][l]) + C[s][r][1][i].p) > tLPr) {
  92 + tLPr = ((dir==1 ? x.sib[s][t][i][0][l] : x.gra[t][s][i][1][l]) + C[s][r][1][i].p);tLCld = C[s][r][1][i];}
  93 +
  94 + if (r == t-1) tRPr = dir==1 ? x.gra[s][t][s][0][l] : x.sib[t][s][s][1][l];
  95 + else
  96 + for (int i = r + 1; i < t; i++)
  97 + if (((dir == 1 ? x.gra[s][t][i][0][l] : x.sib[t][s][i][1][l]) + C[r+1][t][0][i].p) > tRPr) {
  98 + tRPr = ((dir==1?x.gra[s][t][i][0][l]:x.sib[t][s][i][1][l]) + C[r+1][t][0][i].p); tRCld=C[r + 1][t][0][i];}
  99 +
  100 + if (tLPr + tRPr > tRP) {tRP = tLPr + tRPr; tL = tLCld;tR = tRCld;}
  101 + }
  102 + O[s][t][dir][l] = new Open(s, t, dir, labs[l],tL, tR,
  103 + (float) ( tRP+((dir==1)?x.pl[s][t]: x.pl[t][s]) + ((dir==1)? x.lab[s][t][labs[l]][0]:x.lab[t][s][labs[l]][1])));
  104 + }
  105 + }
  106 + C[s][t][1] = new Closed[n]; C[s][t][0] = new Closed[n];
  107 +
  108 + for (int m = s ; m <= t; m++) {
  109 + for(boolean d : DIR) {
  110 + if ((d && m!=s)||!d && (m!=t && s!=0)) {
  111 +
  112 + // create closed structure
  113 +
  114 + double top = INIT_BEST;
  115 +
  116 + Open tU = null; Closed tL = null;
  117 + int numLabels =O[(d ? s : m)][(d ? m : t)][d?1:0].length;
  118 +
  119 + //for (int l = numLabels-1; l >=0; l--) {
  120 + for (int l = 0; l < numLabels; l++) {
  121 +
  122 + Open hi = O[(d ? s : m)][(d ? m : t)][d?1:0][l];
  123 + for (int amb = m + (d?1:-1); amb != (d?t:s) + (d?1:-1); amb += (d?1:-1)) {
  124 +
  125 + if ((hi.p + C[d?m:s][d?t:m][d?1:0][amb].p +x.gra[d?s:t][m][amb][d?0:1][l]) > top) {
  126 + top = (hi.p + C[d?m:s][d?t:m][d?1:0][amb].p +x.gra[d?s:t][m][amb][(d?0:1)][l]); tU = hi; tL=C[d?m:s][d?t:m][d?1:0][amb];}
  127 + }
  128 +
  129 + if ((m == (d ? t : s)) && (hi.p + x.gra[d?s:t][m][d?s:t][(d ? 0 :1)][l]) > top) {
  130 + top = (hi.p + x.gra[(d ? s : t)][m][d?s:t][d?0:1][l]); tU = hi; tL = null;}
  131 + }
  132 + C[s][t][d?1:0][m] = new Closed(s, t, m, d?1:0,tU,tL,(float) top);
  133 + }
  134 + }
  135 + }
  136 + }
  137 + return null;
  138 + }
  139 +
  140 + public static ArrayList<DSet> sets = new ArrayList<DSet>();
  141 +
  142 + static synchronized private DSet get() {
  143 + synchronized (sets) {
  144 + if (sets.size()==0) return null;
  145 + return sets.remove(sets.size()-1);
  146 + }
  147 + }
  148 +
  149 + public static void add(short w1, short w2){
  150 + DSet ds =new DSet();
  151 + ds.w1=w1;
  152 + ds.w2=w2;
  153 + sets.add(ds);
  154 + }
  155 +}
... ...
dependencyParser/basic/mate-tools/src/decoder/ParallelRearrangeNBest.java 0 → 100755
  1 +package decoder;
  2 +
  3 +import is2.data.DataF;
  4 +import is2.data.Edges;
  5 +import is2.data.Parse;
  6 +import is2.data.ParseNBest;
  7 +
  8 +import java.util.ArrayList;
  9 +import java.util.concurrent.Callable;
  10 +
  11 +import extractors.Extractor;
  12 +
  13 +/**
  14 + * @author Dr. Bernd Bohnet, 30.08.2009
  15 + *
  16 + * This class implements a parallel edge rearrangement for non-projective parsing;
  17 + * The linear method was first suggest by Rayn McDonald et. al. 2005.
  18 + */
  19 +final public class ParallelRearrangeNBest implements Callable<Object> {
  20 +
  21 + // new parent child combination to explore
  22 + final static class PA {
  23 + final float p;
  24 + final short ch, pa;
  25 +
  26 + float best;
  27 +
  28 +
  29 +
  30 + public PA(float p2, short ch2, short pa2) { p=p2; ch=ch2;pa=pa2;}
  31 + }
  32 +
  33 + // list of parent child combinations
  34 + private static ArrayList<PA> parents = new ArrayList<PA>();
  35 +
  36 + // some data from the dependency tree
  37 + private short[] pos;
  38 + private DataF x;
  39 + private boolean[][] isChild ;
  40 + public short[] heads,types;
  41 + private float lastNBest;
  42 + private float best; // best so far
  43 + private float threshold;
  44 + private Extractor extractor;
  45 +
  46 +
  47 + /**
  48 + * Initialize the parallel rearrange thread
  49 + *
  50 + * @param isChild2 is a child
  51 + * @param edgesC the part-of-speech edge mapping
  52 + * @param pos the part-of-speech
  53 + * @param x the data
  54 + * @param lastNBest
  55 + * @param s the heads
  56 + * @param ts the types
  57 + */
  58 + public ParallelRearrangeNBest(short[] pos , DataF x, Parse p, float lastNBest, Extractor extractor, float best, float threshold) {
  59 +
  60 +
  61 + heads=p.heads;
  62 +
  63 + types= p.labels;
  64 +
  65 + isChild = new boolean[heads.length][heads.length];
  66 +
  67 + for(int i = 1, l1=1; i < heads.length; i++,l1=i)
  68 + while((l1= heads[l1]) != -1) isChild[l1][i] = true;
  69 +
  70 +
  71 + this.lastNBest =lastNBest;
  72 + this.pos =pos;
  73 + this.x=x;
  74 +
  75 + this.extractor = extractor;
  76 + this.best=best;
  77 + this.threshold = threshold;
  78 + }
  79 +
  80 + public ArrayList<ParseNBest> parses = new ArrayList<ParseNBest>();
  81 +
  82 + @Override
  83 + public Object call() {
  84 +
  85 + // check the list of new possible parents and children for a better combination
  86 + for(int ch = 1; ch < heads.length; ch++) {
  87 + for(short pa = 0; pa < heads.length; pa++) {
  88 + if(ch == pa || pa == heads[ch] || isChild[ch][pa]) continue;
  89 +
  90 + short oldP = heads[ch], oldT = types[ch];
  91 + heads[ch]=pa;
  92 +
  93 + short[] labels = Edges.get(pos[pa], pos[ch],ch<pa);
  94 +
  95 + for(int l=0;l<labels.length;l++) {
  96 +
  97 + types[ch]=labels[l];
  98 + float p_new = extractor.encode3(pos, heads, types, x);
  99 +
  100 + if (p_new<lastNBest || ((best+this.threshold)>p_new)) continue;
  101 +
  102 + ParseNBest p = new ParseNBest();
  103 + p.signature(heads, types);
  104 + p.f1=p_new;
  105 + parses.add(p);
  106 + }
  107 +
  108 + // change back
  109 + heads[ch]= oldP; types[ch]=oldT;
  110 +
  111 + // consider changes to labels only
  112 + labels = Edges.get(pos[oldP], pos[ch],ch<oldP);
  113 +
  114 + for(int l=0;l<labels.length;l++) {
  115 +
  116 + types[ch]=labels[l];
  117 + float p_new = (float) extractor.encode3(pos, heads, types, x);
  118 +
  119 + // optimization: add only if larger than smallest of n-best
  120 + if (p_new<lastNBest || ((best+this.threshold)>p_new)) continue;
  121 +
  122 + ParseNBest p = new ParseNBest();
  123 + p.signature(heads, types);
  124 + p.f1=p_new;
  125 + parses.add(p);
  126 + }
  127 +
  128 + heads[ch]= oldP; types[ch]=oldT;
  129 + }
  130 + }
  131 + return parses;
  132 + }
  133 +
  134 +
  135 +
  136 +}
... ...
dependencyParser/basic/mate-tools/src/decoder/ParallelRearrangeNBest2.java 0 → 100644
  1 +package decoder;
  2 +
  3 +import is2.data.DataF;
  4 +import is2.data.Edges;
  5 +import is2.data.Parse;
  6 +import is2.data.ParseNBest;
  7 +
  8 +import java.util.ArrayList;
  9 +import java.util.concurrent.Callable;
  10 +
  11 +import decoder.ParallelRearrangeNBest.PA;
  12 +
  13 +import extractors.Extractor;
  14 +
  15 +/**
  16 + * @author Dr. Bernd Bohnet, 30.08.2009
  17 + *
  18 + * This class implements a parallel edge rearrangement for non-projective parsing;
  19 + * The linear method was first suggest by Rayn McDonald et. al. 2005.
  20 + */
  21 +final public class ParallelRearrangeNBest2 implements Callable<Object> {
  22 +
  23 + // new parent child combination to explore
  24 + final static class PA {
  25 + final float p;
  26 + final short ch, pa;
  27 +
  28 +
  29 + public short[] heads,types;
  30 +
  31 + public PA(Parse p, short ch2, short pa2) {
  32 + this.p =(float)p.f1;
  33 + heads =p.heads;
  34 + types=p.labels;
  35 + ch=ch2;pa=pa2;
  36 +
  37 + }
  38 + }
  39 +
  40 + // list of parent child combinations
  41 + private static ArrayList<PA> parents = new ArrayList<PA>();
  42 +
  43 + // some data from the dependency tree
  44 + private short[] pos;
  45 + private DataF x;
  46 + private float lastNBest;
  47 + private float threshold;
  48 + private Extractor extractor;
  49 +
  50 +
  51 + /**
  52 + * Initialize the parallel rearrange thread
  53 + * @param pos the part-of-speech
  54 + * @param x the data
  55 + * @param lastNBest
  56 + * @param isChild2 is a child
  57 + * @param edgesC the part-of-speech edge mapping
  58 + * @param s the heads
  59 + * @param ts the types
  60 + */
  61 + public ParallelRearrangeNBest2(short[] pos , DataF x, float lastNBest, Extractor extractor, float threshold) {
  62 +
  63 +
  64 +
  65 + this.lastNBest =lastNBest;
  66 + this.pos =pos;
  67 + this.x=x;
  68 +
  69 + this.extractor = extractor;
  70 + this.threshold = threshold;
  71 + }
  72 +
  73 + public ArrayList<ParseNBest> parses = new ArrayList<ParseNBest>();
  74 +
  75 + @Override
  76 + public Object call() {
  77 +
  78 + try {
  79 +
  80 + while(true) {
  81 + PA p = getPA();
  82 +
  83 + if (p==null) return parses;
  84 +
  85 + short oldP = p.heads[p.ch], oldT = p.types[p.ch];
  86 + p.heads[p.ch]=p.pa;
  87 +
  88 + short[] labels = Edges.get(pos[p.pa], pos[p.ch],p.ch<p.pa);
  89 +
  90 + for(int l=0;l<labels.length;l++) {
  91 +
  92 + p.types[p.ch]=labels[l];
  93 + float p_new = extractor.encode3(pos, p.heads, p.types, x);
  94 +
  95 + if (p_new<lastNBest || ((p.p+this.threshold)>p_new)) continue;
  96 +
  97 + ParseNBest x = new ParseNBest();
  98 + x.signature(p.heads, p.types);
  99 + x.f1=p_new;
  100 + parses.add(x);
  101 + }
  102 +
  103 + // change back
  104 + p.heads[p.ch]= oldP; p.types[p.ch]=oldT;
  105 +
  106 + // consider changes to labels only
  107 + labels = Edges.get(pos[oldP], pos[p.ch],p.ch<oldP);
  108 +
  109 + for(int l=0;l<labels.length;l++) {
  110 +
  111 + p.types[p.ch]=labels[l];
  112 + float p_new = (float) extractor.encode3(pos, p.heads, p.types, x);
  113 +
  114 + // optimization: add only if larger than smallest of n-best
  115 + if (p_new<lastNBest || ((p.p+this.threshold)>p_new)) continue;
  116 +
  117 + ParseNBest x = new ParseNBest();
  118 + x.signature(p.heads, p.types);
  119 + x.f1=p_new;
  120 + parses.add(x);
  121 + }
  122 +
  123 + p.heads[p.ch]= oldP; p.types[p.ch]=oldT;
  124 + }
  125 + } catch(Exception e) {
  126 + e.printStackTrace();
  127 + }
  128 + return parses;
  129 + }
  130 +
  131 + /**
  132 + * Add a child-parent combination which are latter explored for rearrangement
  133 + *
  134 + * @param p2
  135 + * @param ch2
  136 + * @param pa
  137 + */
  138 + public static void add(Parse p, short ch2, short pa) {
  139 + parents.add(new PA(p,ch2,pa));
  140 + }
  141 +
  142 + public static PA getPA() {
  143 + synchronized(parents) {
  144 + if (parents.size()==0) return null;
  145 + return parents.remove(parents.size()-1);
  146 + }
  147 + }
  148 +
  149 +
  150 +
  151 +}
... ...
dependencyParser/basic/mate-tools/src/examples/DependencyParser.java 0 → 100644
  1 +package examples;
  2 +
  3 +
  4 +import is2.data.InstancesTagger;
  5 +import is2.data.SentenceData09;
  6 +import is2.io.CONLLReader09;
  7 +import is2.lemmatizer.Lemmatizer;
  8 +import is2.lemmatizer.MFO;
  9 +import is2.parser.Parser;
  10 +import is2.tag.Tagger;
  11 +//import org.apache.log4j.Logger;
  12 +
  13 +import java.io.File;
  14 +import java.util.Arrays;
  15 +
  16 +/**
  17 + * Dependency parsing
  18 + *
  19 + * @author B. Piwowarski <benjamin@bpiwowar.net>
  20 + * @date 10/10/12
  21 + */
  22 +//@TaskDescription(name = "dependency-parser", project = "mate-tools")
  23 +public class DependencyParser {
  24 + // final static private Logger LOGGER = Logger.getLogger(DependencyParser.class);
  25 + //@Argument(name = "lemmatizer", required = true, checkers = IOChecker.Readable.class)
  26 + File lemmatizerFile;
  27 +
  28 + //@Argument(name = "tagger", required = true)
  29 + File taggerFile;
  30 +
  31 + //@Argument(name = "parser", required = true)
  32 + File parserFile;
  33 +
  34 + //@Override
  35 + public int execute() throws Throwable {
  36 +
  37 + // Load lemmatizer
  38 + //LOGGER.info("Loading lemmatizer");
  39 + // true = do uppercase lemmatization
  40 + Lemmatizer lemmatizer = new Lemmatizer(lemmatizerFile.getAbsolutePath());
  41 +
  42 + // Load tagger
  43 + //LOGGER.info("Loading tagger");
  44 + Tagger tagger = new Tagger(taggerFile.getAbsolutePath());
  45 +
  46 + // Load parser
  47 + //LOGGER.info("Loading parser");
  48 + Parser parser = new Parser(parserFile.getAbsolutePath());
  49 +
  50 +
  51 + // Sentences to parse
  52 + String sentences[] = new String[]{
  53 + "Airfields have been constructed on a number of the islands .",
  54 + "Private investment has even made an increasingly modern ferry fleet possible .",
  55 + "Politically , the 1990s have been relatively quite times for the islands ."
  56 + };
  57 +
  58 + CONLLReader09 reader = new CONLLReader09(CONLLReader09.NO_NORMALIZE);
  59 +
  60 + for (String sentence : sentences) {
  61 + // Prepare the sentence
  62 + InstancesTagger instanceTagger = new InstancesTagger();
  63 + instanceTagger.init(1, new MFO());
  64 +
  65 + String[] split = sentence.split("\\s+");
  66 + String[] splitRoot = new String[split.length+1];
  67 + System.arraycopy(split, 0, splitRoot, 1, split.length);
  68 + splitRoot[0] = CONLLReader09.ROOT;
  69 +
  70 + SentenceData09 instance = new SentenceData09();
  71 + instance.init(splitRoot);
  72 +
  73 + reader.insert(instanceTagger, instance);
  74 +
  75 + SentenceData09 result = lemmatizer.apply(instance);
  76 + tagger.apply(result);
  77 + result = parser.parse(result, parser.params, false, parser.options);
  78 +
  79 +
  80 + // Output
  81 + System.out.println(Arrays.toString(result.forms));
  82 + System.out.println(Arrays.toString(result.plemmas));
  83 + System.out.println(Arrays.toString(result.ppos));
  84 + System.out.println(Arrays.toString(result.pheads));
  85 + System.out.println(Arrays.toString(result.plabels));
  86 + System.out.println();
  87 +
  88 + }
  89 +
  90 + return 0;
  91 + }
  92 +}
... ...
dependencyParser/basic/mate-tools/src/examples/FullPipelineSpanish.java 0 → 100644
  1 +package examples;
  2 +
  3 +import is2.data.SentenceData09;
  4 +import is2.io.CONLLWriter09;
  5 +import is2.lemmatizer.Lemmatizer;
  6 +
  7 +import is2.parser.Parser;
  8 +import is2.tag.Tagger;
  9 +import is2.tools.Tool;
  10 +
  11 +import java.io.IOException;
  12 +import java.util.ArrayList;
  13 +import java.util.StringTokenizer;
  14 +
  15 +/**
  16 + * @author Bernd Bohnet, 13.09.2010
  17 + *
  18 + * Illustrates the application the full pipeline: lemmatizer, morphologic, tagger, and parser
  19 + */
  20 +public class FullPipelineSpanish {
  21 +
  22 +
  23 + // shows how to parse a sentences and call the tools
  24 + public static void main(String[] args) throws IOException {
  25 +
  26 + // Create a data container for a sentence
  27 + SentenceData09 i = new SentenceData09();
  28 +
  29 + if (args.length==1) { // input might be a sentence: "This is another test ."
  30 + StringTokenizer st = new StringTokenizer(args[0]);
  31 + ArrayList<String> forms = new ArrayList<String>();
  32 +
  33 + forms.add("<root>");
  34 + while(st.hasMoreTokens()) forms.add(st.nextToken());
  35 +
  36 + i.init(forms.toArray(new String[0]));
  37 +
  38 + } else {
  39 + // provide a default sentence: Haus has a mutated vowel
  40 + i.init(new String[] {"<root>","También","estuve","emocionado","pero","no","pude","imaginar","mi","vida","sin","la",
  41 + "gente","tan","intima","a","mí","."});
  42 +
  43 + }
  44 +
  45 + // lemmatizing
  46 +
  47 + System.out.println("\nReading the model of the lemmatizer");
  48 + Tool lemmatizer = new Lemmatizer("models/lemma-spa.model"); // create a lemmatizer
  49 +
  50 + System.out.println("Applying the lemmatizer");
  51 + lemmatizer.apply(i);
  52 +
  53 + System.out.print(i.toString());
  54 + System.out.print("Lemmata: "); for (String l : i.plemmas) System.out.print(l+" "); System.out.println();
  55 +
  56 + // morphologic tagging
  57 +
  58 + System.out.println("\nReading the model of the morphologic tagger");
  59 + is2.mtag.Tagger morphTagger = new is2.mtag.Tagger("models/mtag-spa.model");
  60 +
  61 + System.out.println("\nApplying the morpholoigc tagger");
  62 + morphTagger.apply(i);
  63 +
  64 + System.out.print(i.toString());
  65 + System.out.print("Morph: "); for (String f : i.pfeats) System.out.print(f+" "); System.out.println();
  66 +
  67 + // part-of-speech tagging
  68 +
  69 + System.out.println("\nReading the model of the part-of-speech tagger");
  70 + Tool tagger = new Tagger("models/tag-spa.model");
  71 +
  72 + System.out.println("\nApplying the part-of-speech tagger");
  73 + tagger.apply(i);
  74 +
  75 + System.out.print(i.toString());
  76 + System.out.print("Part-of-Speech tags: "); for (String p : i.ppos) System.out.print(p+" "); System.out.println();
  77 +
  78 + // parsing
  79 +
  80 + System.out.println("\nReading the model of the dependency parser");
  81 + Tool parser = new Parser("models/prs-spa.model");
  82 +
  83 + System.out.println("\nApplying the parser");
  84 + parser.apply(i);
  85 +
  86 + System.out.println(i.toString());
  87 +
  88 + // write the result to a file
  89 +
  90 + CONLLWriter09 writer = new is2.io.CONLLWriter09("example-out.txt");
  91 +
  92 + writer.write(i, CONLLWriter09.NO_ROOT);
  93 + writer.finishWriting();
  94 +
  95 + }
  96 +
  97 +
  98 +}
... ...
dependencyParser/basic/mate-tools/src/examples/FullPipelineTest.java 0 → 100644
  1 +package examples;
  2 +
  3 +
  4 +import is2.data.InstancesTagger;
  5 +import is2.data.SentenceData09;
  6 +import is2.io.CONLLReader09;
  7 +import is2.io.CONLLWriter09;
  8 +import is2.lemmatizer.Lemmatizer;
  9 +import is2.lemmatizer.MFO;
  10 +import is2.parser.Parser;
  11 +import is2.tag.Tagger;
  12 +//import org.apache.log4j.Logger;
  13 +
  14 +import java.io.File;
  15 +import java.util.Arrays;
  16 +
  17 +/**
  18 + * Dependency parsing
  19 + *
  20 + * @author B. Piwowarski <benjamin@bpiwowar.net>
  21 + * @date 10/10/12
  22 + */
  23 +//@TaskDescription(name = "dependency-parser", project = "mate-tools")
  24 +public class FullPipelineTest {
  25 + // final static private Logger LOGGER = Logger.getLogger(DependencyParser.class);
  26 + //@Argument(name = "lemmatizer", required = true, checkers = IOChecker.Readable.class)
  27 + public File lemmatizerFile;
  28 +
  29 + //@Argument(name = "tagger", required = true)
  30 + public File taggerFile;
  31 +
  32 + public File mtaggerFile;
  33 +
  34 + //@Argument(name = "parser", required = true)
  35 + public File parserFile;
  36 +
  37 + //@Override
  38 + public int execute(String source, String target) throws Throwable {
  39 +
  40 + // Load lemmatizer
  41 + //LOGGER.info("Loading lemmatizer");
  42 + // true = do uppercase lemmatization
  43 + Lemmatizer lemmatizer = new Lemmatizer(lemmatizerFile.getAbsolutePath());
  44 +
  45 + // Load tagger
  46 + //LOGGER.info("Loading tagger");
  47 + Tagger tagger = new Tagger(taggerFile.getAbsolutePath());
  48 +
  49 + is2.mtag.Tagger mtagger = new is2.mtag.Tagger(mtaggerFile.getAbsolutePath());
  50 +
  51 + // Load parser
  52 + //LOGGER.info("Loading parser");
  53 + Parser parser = new Parser(parserFile.getAbsolutePath());
  54 +
  55 +
  56 + CONLLReader09 reader = new CONLLReader09(source);
  57 + CONLLWriter09 writer = new CONLLWriter09(target);
  58 +
  59 + int count=0;
  60 + while (true) {
  61 + // Prepare the sentence
  62 + InstancesTagger is = new InstancesTagger();
  63 + is.init(1, new MFO());
  64 +
  65 + SentenceData09 instance= reader.getNext(is);
  66 + if (instance ==null) break;
  67 + SentenceData09 result = null;
  68 +try {
  69 +
  70 + System.out.print("\b\b\b\b"+count);
  71 + result= lemmatizer.apply(instance);
  72 +
  73 + result = tagger.apply(result);
  74 + result= mtagger.apply(result);
  75 + result = parser.apply(result);
  76 +
  77 + count++;
  78 +} catch(Exception e) {
  79 +
  80 + System.out.println("error"+result);
  81 + System.out.println("error"+instance);
  82 + e.printStackTrace();
  83 + break;
  84 +}
  85 +
  86 + // Output
  87 + writer.write(result);
  88 +
  89 + }
  90 + writer.finishWriting();
  91 + return 0;
  92 + }
  93 +
  94 + public static void main(String args[]) throws Throwable {
  95 +
  96 + if (args.length<3) {
  97 + System.out.println("lemmatizer-model tagger-model parser-model source target");
  98 + System.exit(0);
  99 + }
  100 + FullPipelineTest p = new FullPipelineTest();
  101 + p.lemmatizerFile = new File(args[0]);
  102 + p.taggerFile = new File(args[1]);
  103 + p.mtaggerFile = new File(args[2]);
  104 + p.parserFile = new File(args[3]);
  105 +
  106 + p.execute(args[4], args[5]);
  107 +
  108 + }
  109 +
  110 +}
... ...
dependencyParser/basic/mate-tools/src/examples/MorphTagger.java 0 → 100644
  1 +package examples;
  2 +
  3 +import is2.data.SentenceData09;
  4 +import is2.lemmatizer.Lemmatizer;
  5 +import is2.lemmatizer.Options;
  6 +
  7 +import java.io.IOException;
  8 +import java.util.ArrayList;
  9 +import java.util.StringTokenizer;
  10 +
  11 +/**
  12 + * @author Bernd Bohnet, 13.09.2010
  13 + *
  14 + * Illustrates the application of some components: lemmatizer, tagger, and parser
  15 + */
  16 +public class MorphTagger {
  17 +
  18 +
  19 + /**
  20 + * How to lemmatize a sentences?
  21 + */
  22 + public static void main(String[] args) throws IOException {
  23 +
  24 +
  25 + // Create a data container for a sentence
  26 + SentenceData09 i = new SentenceData09();
  27 +
  28 + if (args.length==1) { // input might be a sentence: "This is another test ."
  29 + StringTokenizer st = new StringTokenizer(args[0]);
  30 + ArrayList<String> forms = new ArrayList<String>();
  31 +
  32 + forms.add("<root>");
  33 + while(st.hasMoreTokens()) forms.add(st.nextToken());
  34 +
  35 + i.init(forms.toArray(new String[0]));
  36 +
  37 + } else {
  38 + // provide a default sentence
  39 + i.init(new String[] {"<root>","Häuser","hat","ein","Umlaut","."});
  40 + }
  41 +
  42 + //print the forms
  43 + for (String l : i.forms) System.out.println("forms : "+l);
  44 +
  45 + // tell the lemmatizer the location of the model
  46 + is2.lemmatizer.Options optsLemmatizer = new Options(new String[] {"-model","models/lemma-ger.model"});
  47 +
  48 + // create a lemmatizer
  49 + Lemmatizer lemmatizer = new Lemmatizer(optsLemmatizer.modelName);
  50 +
  51 + // lemmatize a sentence; the result is stored in the stenenceData09 i
  52 + lemmatizer.apply(i);
  53 +
  54 +
  55 + // output the lemmata
  56 + for (String l : i.plemmas) System.out.println("lemma : "+l);
  57 +
  58 +
  59 + is2.mtag.Options morphologicTaggerOptions = new is2.mtag.Options(new String[] {"-model","models/mtag-ger.model"});
  60 +
  61 + is2.mtag.Tagger mt = new is2.mtag.Tagger(morphologicTaggerOptions);
  62 +
  63 + try {
  64 +
  65 +
  66 + // SentenceData09 snt = is2.mtag.Main.out(i.forms, lemmata);
  67 +
  68 + SentenceData09 snt = mt.apply(i);
  69 + for(String f : snt.pfeats) System.out.println("feats "+f);
  70 +
  71 + } catch(Exception e){
  72 + e.printStackTrace();
  73 + }
  74 +
  75 +
  76 + }
  77 +
  78 +
  79 +}
... ...
dependencyParser/basic/mate-tools/src/examples/ParseOnly.java 0 → 100755
  1 +package examples;
  2 +
  3 +import is2.data.SentenceData09;
  4 +import is2.parser.Options;
  5 +import is2.parser.Parser;
  6 +
  7 +
  8 +public class ParseOnly {
  9 +
  10 + public static void main(String[] args) {
  11 +
  12 + if (args.length ==0) {
  13 + plain();
  14 + }
  15 +
  16 + }
  17 +
  18 + /**
  19 + * This example shows how to parse a sentence.
  20 + */
  21 + public static void plain() {
  22 +
  23 + // initialize the options
  24 + String[] opts ={"-model","models/prs-eng-x.model"};
  25 + Options options = new Options(opts);
  26 +
  27 + // create a parser
  28 + Parser parser = new Parser(options);
  29 +
  30 + // Create a data container for a sentence
  31 + SentenceData09 i = new SentenceData09();
  32 +
  33 + // Provide the sentence
  34 + i.init(new String[] {"<root>","This","is","a","test","."});
  35 + i.setPPos(new String[]{"<root-POS>","DT","VBZ","DT","NN","."});
  36 +
  37 + // parse the sentence
  38 + SentenceData09 out = parser.apply(i);
  39 +
  40 + // output the sentence and dependency tree
  41 + System.out.println(out.toString());
  42 +
  43 + // Get the parsing results
  44 + out.getLabels();
  45 + out.getParents();
  46 +
  47 + }
  48 +
  49 +
  50 +}
... ...
dependencyParser/basic/mate-tools/src/examples/Pipeline.java 0 → 100644
  1 +package examples;
  2 +
  3 +import java.io.File;
  4 +import java.io.IOException;
  5 +import java.util.ArrayList;
  6 +import java.util.StringTokenizer;
  7 +
  8 +import is2.data.SentenceData09;
  9 +import is2.lemmatizer.Lemmatizer;
  10 +import is2.parser.Options;
  11 +import is2.parser.Parser;
  12 +import is2.tag.Tagger;
  13 +
  14 +/**
  15 + * @author Bernd Bohnet, 13.09.2010
  16 + *
  17 + * Illustrates the application of some components: lemmatizer, tagger, and parser
  18 + */
  19 +public class Pipeline {
  20 +
  21 +
  22 + // how to parse a sentences and call the tools
  23 + public static void main(String[] args) throws IOException {
  24 +
  25 +
  26 + // Create a data container for a sentence
  27 + SentenceData09 i = new SentenceData09();
  28 +
  29 + if (args.length==1) { // input might be a sentence: "This is another test ."
  30 + StringTokenizer st = new StringTokenizer(args[0]);
  31 + ArrayList<String> forms = new ArrayList<String>();
  32 +
  33 + forms.add("<root>");
  34 + while(st.hasMoreTokens()) forms.add(st.nextToken());
  35 +
  36 + i.init(forms.toArray(new String[0]));
  37 +
  38 + } else {
  39 + // provide a default sentence
  40 + i.init(new String[] {"<root>","This","is","a","test","."});
  41 + }
  42 +
  43 + //print the forms
  44 + for (String l : i.forms) System.out.println("form : "+l);
  45 +
  46 + // tell the lemmatizer the location of the model
  47 + is2.lemmatizer.Options optsLemmatizer = new is2.lemmatizer.Options(new String[] {"-model","models/lemma-eng.model"});
  48 +
  49 + // create a lemmatizer
  50 + Lemmatizer lemmatizer = new Lemmatizer(optsLemmatizer.modelName);
  51 +
  52 + // lemmatize a sentence; the result is stored in the stenenceData09 i
  53 + i = lemmatizer.apply(i);
  54 +
  55 +
  56 + // output the lemmata
  57 + for (String l : i.plemmas) System.out.println("lemma : "+l);
  58 +
  59 + // tell the tagger the location of the model
  60 + is2.tag.Options optsTagger = new is2.tag.Options(new String[]{"-model","models/tag-eng.model"});
  61 + Tagger tagger = new Tagger(optsTagger);
  62 +
  63 +
  64 +
  65 +// String pos[] =tagger.tag(i.forms, i.lemmas);
  66 +// i.setPPos(pos);
  67 +
  68 +
  69 + SentenceData09 tagged = tagger.tag(i);
  70 + for (String p : tagged.ppos) System.out.println("pos "+p);
  71 +
  72 +
  73 +
  74 + // initialize the options
  75 + Options optsParser = new Options(new String[]{"-model","models/prs-eng-x.model"});
  76 +
  77 + // create a parser
  78 + Parser parser = new Parser(optsParser);
  79 +
  80 + // parse the sentence (you get a copy of the input i)
  81 + SentenceData09 parse = parser.apply(tagged);
  82 +
  83 + System.out.println(parse.toString());
  84 +
  85 + // create some trash on the hard drive :-)
  86 + is2.io.CONLLWriter09 writer = new is2.io.CONLLWriter09("example-out.txt");
  87 +
  88 + writer.write(i);
  89 + writer.finishWriting();
  90 + }
  91 +
  92 +
  93 +
  94 +
  95 +}
... ...
dependencyParser/basic/mate-tools/src/extractors/Extractor.java 0 → 100644
  1 +/**
  2 + *
  3 + */
  4 +package extractors;
  5 +
  6 +import is2.data.Cluster;
  7 +import is2.data.DataF;
  8 +import is2.data.FV;
  9 +import is2.data.IFV;
  10 +import is2.data.Instances;
  11 +
  12 +/**
  13 + * @author Dr. Bernd Bohnet, 29.04.2011
  14 + *
  15 + *
  16 + */
  17 +public interface Extractor {
  18 +
  19 +
  20 + /**
  21 + * Initializes the Extractor general parts
  22 + */
  23 + public void initStat();
  24 +
  25 + /**
  26 + * Initializes the Extractor specific parts
  27 + */
  28 + public void init();
  29 +
  30 + public int basic(short[] pos, int[] forms, int w1, int w2, Cluster cluster, IFV f);
  31 +
  32 + public void firstm(Instances is, int i, int w1, int w2, int j, Cluster cluster, long[] svs);
  33 +
  34 + public void siblingm(Instances is, int i, short[] pos, int[] forms,
  35 + int[] lemmas, short[][] feats, int w1, int w2, int g, int j,
  36 + Cluster cluster, long[] svs, int n);
  37 +
  38 + public void gcm(Instances is, int i, int w1, int w2, int g, int j, Cluster cluster, long[] svs);
  39 +
  40 + public int getType();
  41 +
  42 + public FV encodeCat(Instances is, int n, short[] pos, int[] is2,
  43 + int[] is3, short[] heads, short[] labels, short[][] s, Cluster cl,
  44 + FV pred);
  45 +
  46 + public void setMaxForm(int integer);
  47 +
  48 + /**
  49 + * @return
  50 + */
  51 + public int getMaxForm();
  52 +
  53 +
  54 + public float encode3(short[] pos, short[] heads, short[] labs, DataF x);
  55 +
  56 +
  57 +
  58 +
  59 +}
... ...
dependencyParser/basic/mate-tools/src/extractors/ExtractorClusterStacked.java 0 → 100755
  1 +package extractors;
  2 +
  3 +
  4 +import is2.data.Cluster;
  5 +import is2.data.D4;
  6 +import is2.data.DataF;
  7 +import is2.data.Edges;
  8 +import is2.data.FV;
  9 +import is2.data.IFV;
  10 +import is2.data.Instances;
  11 +import is2.data.Long2IntInterface;
  12 +import is2.data.MFB;
  13 +import is2.util.DB;
  14 +
  15 +
  16 +
  17 +final public class ExtractorClusterStacked implements Extractor {
  18 +
  19 + public static int s_rel,s_word,s_type,s_dir,s_dist,s_feat,s_child,s_spath,s_lpath,s_pos;
  20 +
  21 +
  22 + final D4 d0 ,dl1,dl2, dwr,dr,dwwp,dw,dwp,dlf,d3lp, d2lp,d2pw,d2pp ;
  23 +
  24 + public final Long2IntInterface li;
  25 +
  26 + public ExtractorClusterStacked(Long2IntInterface li) {
  27 +
  28 + this.initFeatures();
  29 + this.li=li;
  30 + d0 = new D4(li);dl1 = new D4(li);dl2 = new D4(li);
  31 + dwr = new D4(li);
  32 + dr = new D4(li);
  33 + dwwp = new D4(li);
  34 +
  35 + dw = new D4(li);
  36 + dwp = new D4(li);
  37 +
  38 + dlf = new D4(li);
  39 + d3lp = new D4(li); d2lp = new D4(li); d2pw = new D4(li); d2pp = new D4(li);
  40 +
  41 + }
  42 +
  43 + public void initStat() {
  44 +
  45 +
  46 + MFB mf = new MFB();
  47 + s_rel = mf.getFeatureCounter().get(REL).intValue();
  48 + s_pos = mf.getFeatureCounter().get(POS).intValue();
  49 + s_word = mf.getFeatureCounter().get(WORD).intValue();
  50 + s_type = mf.getFeatureCounter().get(TYPE).intValue();//mf.getFeatureBits();
  51 + s_dir = mf.getFeatureCounter().get(DIR);
  52 + la = mf.getValue(DIR, LA);
  53 + ra = mf.getValue(DIR, RA);
  54 + s_dist = mf.getFeatureCounter().get(DIST);//mf.getFeatureBits(DIST);
  55 + s_feat = mf.getFeatureCounter().get(FEAT);//mf.getFeatureBits(Pipe.FEAT);
  56 + s_spath = mf.getFeatureCounter().get(Cluster.SPATH)==null?0:mf.getFeatureCounter().get(Cluster.SPATH);//mf.getFeatureBits(Cluster.SPATH);
  57 + s_lpath = mf.getFeatureCounter().get(Cluster.LPATH)==null?0:mf.getFeatureCounter().get(Cluster.LPATH);//mf.getFeatureBits(Cluster.LPATH);
  58 + }
  59 +
  60 + public void init(){
  61 + // DB.println("init");
  62 + d0.a0 = s_type;d0.a1 = s_pos;d0.a2 = s_pos;d0.a3 = s_pos;d0.a4 = s_pos;d0.a5 = s_pos;d0.a6 = s_pos;d0.a7 = s_pos;
  63 + dl1.a0 = s_type;dl1.a1 = s_rel; dl1.a2 = s_pos;dl1.a3 = s_pos; dl1.a4 = s_pos; dl1.a5 = s_pos; dl1.a6 = s_pos; dl1.a7 = s_pos;
  64 + dl2.a0 = s_type;dl2.a1 = s_rel;dl2.a2 = s_word;dl2.a3 = s_pos;dl2.a4 = s_pos;dl2.a5 = s_pos;dl2.a6 = s_pos;dl2.a7 = s_pos;
  65 + dwp.a0 = s_type; dwp.a1 = s_rel; dwp.a2 = s_word; dwp.a3 = s_pos; dwp.a4 = s_pos; dwp.a5 = s_word;
  66 + dwwp.a0 = s_type; dwwp.a1 = s_rel; dwwp.a2 = s_word; dwwp.a3 = s_word; dwwp.a4 = s_pos; dwwp.a5 = s_word;
  67 + dlf.a0 = s_type;dlf.a1 = s_rel; dlf.a2 = s_pos;dlf.a3 = s_pos; dlf.a4 = s_feat; dlf.a5 = s_feat; dlf.a6 = s_pos; dlf.a7 = s_pos;
  68 + d3lp.a0 = s_type; d3lp.a1 = s_rel; d3lp.a2 = s_lpath; d3lp.a3 = s_lpath; d3lp.a4 = s_lpath; d3lp.a5 = s_word; d3lp.a6 = s_spath; d3lp.a7 = s_spath;
  69 + d2lp.a0 = s_type; d2lp.a1 = s_rel; d2lp.a2 = s_lpath; d2lp.a3 = s_lpath; d2lp.a4 = s_word; d2lp.a5 = s_word; //d3lp.a6 = s_spath; d3lp.a7 = s_spath;
  70 + d2pw.a0 = s_type; d2pw.a1 = s_rel; d2pw.a2 = s_lpath; d2pw.a3 = s_lpath; d2pw.a4 = s_word; d2pw.a5 = s_word; //d3lp.a6 = s_spath; d3lp.a7 = s_spath;
  71 + d2pp.a0 = s_type; d2pp.a1 = s_rel; d2pp.a2 = s_lpath; d2pp.a3 = s_lpath; d2pp.a4 = s_pos; d2pp.a5 = s_pos; //d3lp.a6 = s_spath; d3lp.a7 = s_spath;
  72 + }
  73 +
  74 +
  75 + public int basic(short[] pposs, int[] form, int p, int d, Cluster cluster, IFV f)
  76 + {
  77 +
  78 + d0.clean(); dl1.clean(); dl2.clean(); dwp.clean(); dwwp.clean(); dlf.clean(); d3lp.clean();
  79 +
  80 + d3lp.clean(); d2lp.clean();d2pw.clean(); d2pp.clean();
  81 +
  82 + int n=1;
  83 + int dir= (p < d)? ra:la;
  84 + d0.v0= n++; d0.v1=pposs[p]; d0.v2=pposs[d]; //d0.stop=4;
  85 + int end= (p >= d ? p : d);
  86 + int start = (p >= d ? d : p) + 1;
  87 +
  88 + for(int i = start ; i <end ; i++) {
  89 + d0.v3=pposs[i];
  90 + d0.cz4();
  91 + d0.csa(s_dir,dir,f);
  92 + }
  93 + return n;
  94 + }
  95 +
  96 +
  97 + public void firstm(Instances is, int i,
  98 + int prnt, int dpnt, int label, Cluster cluster, long[] f)
  99 + {
  100 +
  101 +
  102 + //short[] pposs, int[] form, int[] lemmas, short[][] feats
  103 + for(int k=0;k<f.length;k++) f[k]=0;
  104 +
  105 + short[] pposs = is.pposs[i];
  106 + int[] form =is.forms[i];
  107 + short[][] feats = is.feats[i];
  108 +
  109 +
  110 + int pF = form[prnt],dF = form[dpnt];
  111 + int pL = is.plemmas[i][prnt],dL = is.plemmas[i][dpnt];
  112 + int pP = pposs[prnt],dP = pposs[dpnt];
  113 +
  114 + int prntLS = pF==-1?-1:cluster.getLP(pF), chldLS = dF==-1?-1:cluster.getLP(dF);
  115 +
  116 + final int dir= (prnt < dpnt)? ra:la;
  117 +
  118 + if (pF>maxForm) pF=-1;
  119 + if (pL>maxForm) pL=-1;
  120 +
  121 + if (dF>maxForm) dF=-1;
  122 + if (dL>maxForm) dL=-1;
  123 +
  124 +
  125 + int n=3,c=0;
  126 +
  127 + dl2.v1=label;
  128 + dl2.v0= n++; dl2.v2=pF; dl2.v3=dP; dl2.cz4(); f[c++]=dl2.csa(s_dir,dir);
  129 + dl2.v0= n++; dl2.cz3(); f[c++]=dl2.csa(s_dir,dir);
  130 + dl2.v0= n++; dl2.v2=dF; dl2.v3=pP; dl2.cz4(); f[c++]=dl2.csa(s_dir,dir);
  131 + dl2.v0= n++; dl2.cz3(); f[c++]=dl2.csa(s_dir,dir);
  132 +
  133 +
  134 + dwwp.v1=label;
  135 + dwwp.v0= n++; dwwp.v2=pF; dwwp.v3=dF; dwwp.cz4(); f[c++]=dwwp.csa(s_dir,dir);
  136 +
  137 + dl1.v1=label;
  138 + dl1.v0= n++; dl1.v2=dP; dl1.cz3(); f[c++]=dl1.csa(s_dir,dir);
  139 + dl1.v0= n++; dl1.v2=pP; dl1.cz3(); f[c++]=dl1.csa(s_dir,dir);
  140 + dl1.v0= n++; dl1.v3=dP; dl1.cz4(); f[c++]=dl1.csa(s_dir,dir);
  141 +
  142 + int pPm1 = prnt > 0 ? pposs[prnt - 1] : s_str, dPm1 = dpnt > 0 ? pposs[dpnt - 1] : s_str;
  143 + int pPp1 = prnt < pposs.length - 1 ? pposs[prnt + 1]:s_end, dPp1 = dpnt < pposs.length - 1 ? pposs[dpnt + 1]:s_end;
  144 +
  145 + int pPm2 = prnt > 1 ? pposs[prnt - 2] : s_str, dPm2 = dpnt > 1 ? pposs[dpnt - 2] : s_str;
  146 + int pPp2 = prnt < pposs.length - 2 ? pposs[prnt + 2]:s_end, dPp2 = dpnt < pposs.length - 2 ? pposs[dpnt + 2]:s_end;
  147 +
  148 + int pFm1 = prnt > 0 ? form[prnt - 1] : s_stwrd, dFm1 = dpnt > 0 ? form[dpnt - 1] : s_stwrd;
  149 + int pFp1 = prnt < form.length - 1 ? form[prnt + 1]:s_stwrd, dFp1 = dpnt < form.length - 1 ? form[dpnt + 1]:s_stwrd;
  150 +
  151 +
  152 +
  153 + dl1.v0= n++;dl1.v2=pP; dl1.v3=pPp1; dl1.v4=dP;dl1.v5=dPp1; dl1.cz6(); f[n++]=dl1.csa(s_dir,dir);
  154 + dl1.v0= n++; dl1.v5=dPm1; dl1.cz6(); f[n++]=dl1.csa(s_dir,dir);
  155 + dl1.v0= n++; dl1.v3=pPm1; dl1.cz6(); f[n++]=dl1.csa(s_dir,dir);
  156 + dl1.v0= n++; dl1.v5=dPp1; dl1.cz6(); f[n++]=dl1.csa(s_dir,dir);
  157 +
  158 +
  159 + dl1.v0= n++; dl1.v3=pPm1; dl1.cz5(); f[n++]=dl1.csa(s_dir,dir);
  160 + dl1.v0= n++; dl1.v3=dPm1; dl1.cz5(); f[n++]=dl1.csa(s_dir,dir);
  161 + dl1.v0= n++; dl1.v3=dPp1; dl1.cz5(); f[n++]=dl1.csa(s_dir,dir);
  162 + dl1.v0= n++; dl1.v3=pPp1; dl1.cz5(); f[n++]=dl1.csa(s_dir,dir);
  163 +
  164 + dl1.v0= n++;dl1.v2=pP; dl1.v3=pPp2; dl1.v4=dP;dl1.v5=dPp2; dl1.cz6(); f[n++]=dl1.csa(s_dir,dir);
  165 + dl1.v0= n++; dl1.v5=dPm2; dl1.cz6(); f[n++]=dl1.csa(s_dir,dir);
  166 + dl1.v0= n++; dl1.v3=pPm2; dl1.cz6(); f[n++]=dl1.csa(s_dir,dir);
  167 + dl1.v0= n++; dl1.v5=dPp2; dl1.cz6(); f[n++]=dl1.csa(s_dir,dir);
  168 +
  169 + dl1.v0= n++; dl1.v3=pPm2; dl1.cz5(); f[n++]=dl1.csa(s_dir,dir);
  170 + dl1.v0= n++; dl1.v3=dPm2; dl1.cz5(); f[n++]=dl1.csa(s_dir,dir);
  171 + dl1.v0= n++; dl1.v3=dPp2; dl1.cz5(); f[n++]=dl1.csa(s_dir,dir);
  172 + dl1.v0= n++; dl1.v3=pPp2; dl1.cz5(); f[n++]=dl1.csa(s_dir,dir);
  173 +
  174 +
  175 +
  176 + dl2.v0= n++; dl2.v3=dFm1; dl2.v3=pPp1;dl2.v4=pP; dl2.cz5(); f[n++]=dl2.getVal();
  177 + dl2.v0= n++; dl2.v3=dFp1; dl2.v3=pPm1; dl2.cz5(); f[n++]=dl2.getVal();
  178 + dl2.v0= n++; dl2.v3=pFm1; dl2.v3=dPp1;dl2.v4=dP; dl2.cz5(); f[n++]=dl2.getVal();
  179 + dl2.v0= n++; dl2.v3=pFp1; dl2.v3=dPm1; dl2.cz5(); f[n++]=dl2.getVal();
  180 +
  181 +
  182 + dl2.v0= n++; dl2.v3=dFm1; dl2.v3=dPm2;dl2.v4=pP; dl2.cz5(); f[n++]=dl2.getVal();
  183 + dl2.v0= n++; dl2.v3=dFp1; dl2.v3=dPp2; dl2.cz5(); f[n++]=dl2.getVal();
  184 + dl2.v0= n++; dl2.v3=pFm1; dl2.v3=pPm2;dl2.v4=dP; dl2.cz5(); f[n++]=dl2.getVal();
  185 + dl2.v0= n++; dl2.v3=pFp1; dl2.v3=pPp2; dl2.cz5(); f[n++]=dl2.getVal();
  186 +
  187 +
  188 + dwwp.v0= n++; dwwp.v2=pF; dwwp.v3=dF; dwwp.v4=dP; dwwp.cz5(); f[n++]=dwwp.csa(s_dir,dir);
  189 + dwwp.v0= n++; dwwp.v2=pF; dwwp.v3=dF; dwwp.v4=pP; dwwp.cz5(); f[n++]=dwwp.csa(s_dir,dir);
  190 + dwwp.v0= n++; dwwp.v2=dF; dwwp.v3=pF; dwwp.v4=pP; dwwp.v4=dP; dwwp.cz6(); f[n++]=dwwp.csa(s_dir,dir);
  191 +
  192 +
  193 +
  194 + // lemmas
  195 +
  196 + dl2.v1=label;
  197 + dl2.v0= n++; dl2.v2=pL; dl2.v3=dP; dl2.cz4(); f[c++]=dl2.csa(s_dir,dir);
  198 + dl2.v0= n++; dl2.cz3(); f[c++]=dl2.csa(s_dir,dir);
  199 + dl2.v0= n++; dl2.v2=dL; dl2.v3=pP; dl2.cz4(); f[c++]=dl2.csa(s_dir,dir);
  200 + dl2.v0= n++; dl2.cz3(); f[c++]=dl2.csa(s_dir,dir);
  201 +
  202 +
  203 + dwwp.v1=label;
  204 + dwwp.v0= n++; dwwp.v2=pL; dwwp.v3=dL; dwwp.cz4(); f[c++]=dwwp.csa(s_dir,dir);
  205 +
  206 + dwp.v1= label;
  207 + dwp.v0=n++;dwp.v2=dL; dwp.v3=pP;dwp.v4=dP;dwp.v5=pL; dwp.cz6(); f[c++]=dwp.csa(s_dir,dir);
  208 + dwp.v0=n++;dwp.cz5(); f[c++]=dwp.csa(s_dir,dir);
  209 +
  210 + dwp.v0=n++;dwp.v2=pL; dwp.cz5(); f[c++]=dwp.csa(s_dir,dir);
  211 + dwwp.v0= n++; dwwp.v2=pL; dwwp.v3=dL; dwwp.v4=dP; dwwp.cz5(); f[c++]=dwwp.csa(s_dir,dir);
  212 + dwwp.v0= n++; dwwp.v4=pP; dwwp.cz5(); f[c++]=dwwp.csa(s_dir,dir);
  213 +
  214 +
  215 + // cluster
  216 +
  217 + d2pw.v1=label;
  218 + d2pw.v0=n++; d2pw.v2=prntLS; d2pw.v3=chldLS; d2pw.cz4(); f[c++]=d2pw.csa(s_dir,dir);
  219 + d2pw.v0=n++; d2pw.v4=pF; d2pw.cz5(); f[c++]=d2pw.csa(s_dir,dir);
  220 + d2pw.v0=n++; d2pw.v4=dF; d2pw.cz5(); f[c++]=d2pw.csa(s_dir,dir);
  221 + d2pw.v0=n++; d2pw.v5=pF; d2pw.cz6(); f[c++]=d2pw.csa(s_dir,dir);
  222 +
  223 +
  224 + d2pp.v1=label;
  225 + d2pp.v0=n++; d2pp.v2=prntLS; d2pp.v3=chldLS; d2pp.cz4(); f[c++]=d2pp.csa(s_dir,dir);
  226 + d2pp.v0=n++; d2pp.v4=pP; d2pp.cz5(); f[c++]=d2pp.csa(s_dir,dir);
  227 + d2pp.v0=n++; d2pp.v4=dP; d2pp.cz5(); f[c++]=d2pp.csa(s_dir,dir);
  228 + d2pp.v0=n++; d2pp.v5=pP; d2pp.cz6(); f[c++]=d2pp.csa(s_dir,dir);
  229 +
  230 +
  231 + short[] prel = is.plabels[i];
  232 + short[] phead = is.pheads[i];
  233 +
  234 +
  235 + //take those in for stacking
  236 + // dl2.v1=label;
  237 + // dl2.v0= n++;dl2.v2=prel[dpnt];dl2.v3=pP;dl2.v4=dP; dl2.v5=prnt==phead[dpnt]?1:2; dl2.cz6(); f[c++]=dl2.csa(s_dir,dir);
  238 + // dl2.v0= n++;dl2.v2=pP;dl2.v3=dP; dl2.v4=prnt==phead[dpnt]?1:2; dl2.cz5(); f[c++]=dl2.csa(s_dir,dir);
  239 +
  240 +
  241 +
  242 + if (feats==null) return;
  243 +
  244 + short[] featsP =feats[prnt], featsD =feats[dpnt];
  245 + dlf.v0= n++; dlf.v1=label; dlf.v2=pP; dlf.v3=dP;
  246 + extractFeat(f, c, dir, featsP, featsD);
  247 +
  248 + return;
  249 + }
  250 +
  251 +
  252 +
  253 + public void gcm(Instances is , int i, int p, int d, int gc, int label,Cluster cluster, long[] f) {
  254 +
  255 + for(int k=0;k<f.length;k++) f[k]=0;
  256 +
  257 + short[] pos= is.pposs[i];
  258 + int[] forms=is.forms[i];
  259 + int[] lemmas=is.plemmas[i];
  260 + short[][] feats=is.feats[i];
  261 +
  262 + int pP = pos[p], dP = pos[d];
  263 + int prntF = forms[p], chldF = forms[d];
  264 + int prntL = lemmas[p], chldL = lemmas[d];
  265 + int prntLS = prntF==-1?-1:cluster.getLP(prntF), chldLS = chldF==-1?-1:cluster.getLP(chldF);
  266 +
  267 + int gP = gc != -1 ? pos[gc] : s_str;
  268 + int gcF = gc != -1 ? forms[gc] : s_stwrd;
  269 + int gcL = gc != -1 ? lemmas[gc] : s_stwrd;
  270 + int gcLS = (gc != -1) && (gcF!=-1) ? cluster.getLP(gcF) : s_stwrd;
  271 +
  272 + if (prntF>maxForm) prntF=-1;
  273 + if (prntL>maxForm) prntL=-1;
  274 +
  275 + if (chldF>maxForm) chldF=-1;
  276 + if (chldL>maxForm) chldL=-1;
  277 +
  278 + if (gcF>maxForm) gcF=-1;
  279 + if (gcL>maxForm) gcL=-1;
  280 +
  281 +
  282 + int dir= (p < d)? ra:la, dir_gra =(d < gc)? ra:la;
  283 +
  284 + int n=84,c=0;
  285 +
  286 + //dl1.v023();
  287 + dl1.v1=label;
  288 + dl1.v0= n++; dl1.v2=pP; dl1.v3=dP;dl1.v4=gP; dl1.cz5(); dl1.cs(s_dir,dir);f[c++]=dl1.csa(s_dir,dir_gra);
  289 + dl1.v0= n++; dl1.v2=pP; dl1.v3=gP; dl1.cz4();dl1.cs(s_dir,dir);f[c++]=dl1.csa(s_dir,dir_gra);
  290 + dl1.v0= n++; dl1.v2=dP; dl1.cz4(); dl1.cs(s_dir,dir);f[c++]=dl1.csa(s_dir,dir_gra);
  291 +
  292 + dwwp.v1=label;
  293 + dwwp.v0= n++; dwwp.v2=prntF; dwwp.v3=gcF;
  294 + dwwp.cz4(); dwwp.cs(s_dir,dir);f[c++]=dwwp.csa(s_dir,dir_gra);
  295 +
  296 + dwwp.v0= n++; dwwp.v2=chldF; dwwp.v3=gcF;
  297 + dwwp.cz4(); dwwp.cs(s_dir,dir);f[c++]=dwwp.csa(s_dir,dir_gra);
  298 +
  299 + dwp.v1=label;
  300 + dwp.v0= n++; dwp.v2=gcF; dwp.v3=pP;
  301 + dwp.cz4(); dwp.cs(s_dir,dir);f[c++]=dwp.csa(s_dir,dir_gra);
  302 +
  303 + dwp.v0= n++; dwp.v2=gcF; dwp.v3=dP;
  304 + dwp.cz4(); dwp.cs(s_dir,dir);f[c++]=dwp.csa(s_dir,dir_gra);
  305 +
  306 + dwp.v0= n++; dwp.v2=prntF; dwp.v3=gP;
  307 + dwp.cz4(); dwp.cs(s_dir,dir);f[c++]=dwp.csa(s_dir,dir_gra);
  308 +
  309 + dwp.v0= n++; dwp.v2=chldF; dwp.v3=gP;
  310 + dwp.cz4(); dwp.cs(s_dir,dir); f[c++]=dwp.csa(s_dir,dir_gra);
  311 +
  312 +
  313 + // lemma
  314 +
  315 + dwwp.v0= n++; dwwp.v2=prntL; dwwp.v3=gcL;
  316 + dwwp.cz4();dwwp.cs(s_dir,dir);f[c++]=dwwp.csa(s_dir,dir_gra);
  317 +
  318 + dwwp.v0= n++; dwwp.v2=chldL; dwwp.v3=gcL;
  319 + dwwp.cz4(); dwwp.cs(s_dir,dir);f[c++]=dwwp.csa(s_dir,dir_gra);
  320 +
  321 + dwp.v0= n++; dwp.v2=gcL; dwp.v3=pP;
  322 + dwp.cz4(); dwp.cs(s_dir,dir);f[c++]=dwp.csa(s_dir,dir_gra);
  323 +
  324 + dwp.v0= n++; dwp.v2=gcL; dwp.v3=dP;
  325 + dwp.cz4(); dwp.cs(s_dir,dir);f[c++]=dwp.csa(s_dir,dir_gra);
  326 +
  327 + dwp.v0= n++; dwp.v2=prntL; dwp.v3=gP;
  328 + dwp.cz4(); dwp.cs(s_dir,dir);f[c++]=dwp.csa(s_dir,dir_gra);
  329 +
  330 + dwp.v0= n++; dwp.v2=chldL; dwp.v3=gP;
  331 + dwp.cz4(); dwp.cs(s_dir,dir); f[c++]=dwp.csa(s_dir,dir_gra);
  332 +
  333 +
  334 + // clusters
  335 +
  336 + d2lp.v1= label;
  337 + d2lp.v0= n++; d2lp.v2=prntLS; d2lp.v3=gcLS; d2lp.cz4(); d2lp.cs(s_dir,dir);f[c++]=d2lp.csa(s_dir,dir_gra);// f.add(li.l2i(l));
  338 + d2lp.v0= n++; d2lp.v2=chldLS; d2lp.v3=gcLS; d2lp.cz4(); d2lp.cs(s_dir,dir);f[c++]=d2lp.csa(s_dir,dir_gra);
  339 + d3lp.v0= n++; d3lp.v1= label; d3lp.v2=prntLS; d3lp.v3=chldLS; d3lp.v4=gcLS; d3lp.cz5(); d3lp.cs(s_dir,dir);f[c++]=d3lp.csa(s_dir,dir_gra);
  340 +
  341 + //_f83;
  342 + d2lp.v0= n++; d2lp.v2=prntLS; d2lp.v3=chldLS; d2lp.v4=gcF; d2lp.cz5(); f[c++]=d2lp.csa(s_dir,dir);
  343 + d2lp.v0= n++; d2lp.v2=prntLS; d2lp.v3=gcLS; d2lp.v4=chldF; d2lp.cz5(); f[c++]=d2lp.csa(s_dir,dir);
  344 + d2lp.v0= n++; d2lp.v2=chldLS; d2lp.v3=gcLS; d2lp.v4=prntF; d2lp.cz5(); f[c++]=d2lp.csa(s_dir,dir);
  345 +
  346 + d2pp.v1= label;
  347 + d2pp.v0= n++; d2pp.v2=prntLS; d2pp.v3=chldLS; d2pp.v4=gP; d2pp.cz5(); f[c++]=d2pp.csa(s_dir,dir);
  348 + d2pp.v0= n++; d2pp.v2=prntLS; d2pp.v3=gcLS; d2pp.v4=dP; d2pp.cz5(); f[c++]=d2pp.csa(s_dir,dir);
  349 + d2pp.v0= n++; d2pp.v2=chldLS; d2pp.v3=gcLS; d2pp.v4=pP; d2pp.cz5(); f[c++]=d2pp.csa(s_dir,dir);
  350 +
  351 +
  352 +
  353 + // linear features
  354 +
  355 + int prntPm1 = p != 0 ? pos[p - 1] : s_str; // parent-pos-minus1
  356 + int chldPm1 = d - 1 >=0 ? pos[d - 1] : s_str; // child-pos-minus1
  357 + int prntPp1 = p != pos.length - 1 ? pos[p + 1] : s_end;
  358 + int chldPp1 = d != pos.length - 1 ? pos[d + 1] : s_end;
  359 +
  360 + int gcPm1 = gc > 0 ? pos[gc - 1] : s_str;
  361 + int gcPp1 = gc < pos.length - 1 ? pos[gc + 1] : s_end;
  362 +
  363 + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcPp1;dl1.v4=dP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
  364 + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcPm1;dl1.v4=dP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
  365 + dl1.v0= n++; dl1.v2=gP; dl1.v3=dP;dl1.v4=chldPp1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
  366 + dl1.v0= n++; dl1.v2=gP; dl1.v3=dP;dl1.v4=chldPm1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
  367 + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcPp1;dl1.v4=chldPm1;dl1.v5=dP;dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
  368 + dl1.v0= n++; dl1.v2=gcPm1; dl1.v3=gP;dl1.v4=chldPm1;dl1.v5=dP; dl1.cz6();f[c++]=dl1.csa(s_dir,dir);
  369 + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcPp1;dl1.v4=dP;dl1.v5=chldPp1; dl1.cz6();f[c++]=dl1.csa(s_dir,dir);
  370 + dl1.v0= n++; dl1.v2=gcPm1; dl1.v3=gP;dl1.v4=dP;dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
  371 + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcPp1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
  372 + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcPm1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
  373 + dl1.v0= n++; dl1.v2=gP; dl1.v3=pP;dl1.v4=prntPp1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
  374 + dl1.v0= n++; dl1.v2=gP; dl1.v3=pP;dl1.v4=prntPm1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
  375 + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcPp1;dl1.v4=prntPm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
  376 + dl1.v0= n++; dl1.v2=gcPm1; dl1.v3=gP;dl1.v4=prntPm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
  377 + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcPp1;dl1.v4=pP;dl1.v5=prntPp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
  378 + dl1.v0= n++; dl1.v2=gcPm1; dl1.v3=gP; dl1.v4=pP; dl1.v5=prntPp1;dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
  379 +
  380 +
  381 + int pLSp1 = p != pos.length - 1 ? forms[p + 1]==-1?-1:cluster.getLP(forms[p + 1]): _cend;
  382 + int cLSp1 = d != pos.length - 1 ? forms[d + 1] ==-1?-1:cluster.getLP(forms[d + 1]):_cend;
  383 + int gcLSp1 = gc < pos.length -1 ? forms[gc + 1] ==-1?-1:cluster.getLP(forms[gc + 1]) : s_end;
  384 +
  385 + int pLSm1 = p != 0 ? lemmas[p - 1]==-1?-1:cluster.getLP(lemmas[p - 1]): _cstr;
  386 + int cLSm1 = d - 1 >=0 ? lemmas[d - 1] ==-1?-1:cluster.getLP(lemmas[d - 1]):_cstr;
  387 + int gcLSm1 = gc > 0 ? lemmas[gc - 1] ==-1?-1:cluster.getLP(lemmas[gc - 1]) : _cstr;
  388 +
  389 +
  390 + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcLSp1;dl1.v4=dP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
  391 + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcLSm1;dl1.v4=dP; dl1.cz5();f[c++]=dl1.csa(s_dir,dir);
  392 + dl1.v0= n++; dl1.v2=gP; dl1.v3=dP;dl1.v4=cLSp1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
  393 + dl1.v0= n++; dl1.v2=gP; dl1.v3=dP;dl1.v4=cLSm1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
  394 + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcLSp1;dl1.v4=cLSm1;dl1.v5=dP;dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
  395 + dl1.v0= n++; dl1.v2=gcLSm1; dl1.v3=gP;dl1.v4=cLSm1;dl1.v5=dP; dl1.cz6();f[c++]=dl1.csa(s_dir,dir);
  396 + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcLSp1;dl1.v4=dP;dl1.v5=cLSp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
  397 + dl1.v0= n++; dl1.v2=cLSm1; dl1.v3=gP;dl1.v4=dP;dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
  398 + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcLSp1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
  399 + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcLSm1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
  400 + dl1.v0= n++; dl1.v2=gP; dl1.v3=pP;dl1.v4=pLSp1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
  401 + dl1.v0= n++; dl1.v2=gP; dl1.v3=pP;dl1.v4=pLSm1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
  402 + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcLSp1;dl1.v4=pLSm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
  403 + dl1.v0= n++; dl1.v2=gcLSm1; dl1.v3=gP;dl1.v4=pLSm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
  404 + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcLSp1;dl1.v4=pP;dl1.v5=pLSp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
  405 + dl1.v0= n++; dl1.v2=gcLSm1; dl1.v3=gP; dl1.v4=pP; dl1.v5=pLSp1;dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
  406 +
  407 +
  408 +
  409 + short[] prel = is.plabels[i],phead=is.pheads[i];
  410 +
  411 + int g = p==phead[d]?1:2 ;
  412 + if (gc>=0) g += d==phead[gc]?4:8;
  413 +
  414 + int gr = gc==-1?s_relend:prel[gc];
  415 +
  416 + // take those in for stacking
  417 + /*
  418 + dl2.v1=label;
  419 + dl2.v0= n++;dl2.v2=prel[d];dl2.v3=g;dl2.v4=gP;dl2.v5=dP;dl2.cz6();f[c++]=dl2.csa(s_dir,dir);
  420 + dl2.v0= n++;dl2.v2=prel[d];dl2.v3=g;dl2.v4=gP;dl2.v5=pP;dl2.cz6();f[c++]=dl2.csa(s_dir,dir);
  421 + dl2.v0= n++;dl2.v2=prel[d];dl2.v3=g;dl2.v4=gP;dl2.v5=pP;dl2.v6=dP;dl2.cz7();f[c++]=dl2.csa(s_dir,dir);
  422 +
  423 + dl2.v0= n++;dl2.v2=gr;dl2.v3=g;dl2.v4=gP;dl2.v5=dP;dl2.cz6();f[c++]=dl2.csa(s_dir,dir);
  424 + dl2.v0= n++;dl2.v2=gr;dl2.v3=g;dl2.v4=gP;dl2.v5=pP;dl2.cz6();f[c++]=dl2.csa(s_dir,dir);
  425 + dl2.v0= n++;dl2.v2=gr;dl2.v3=g;dl2.v4=gP;dl2.v5=pP;dl2.v6=dP;dl2.cz7();f[c++]=dl2.csa(s_dir,dir);
  426 +
  427 +*/
  428 + if (feats==null) return;
  429 +
  430 + short[] featsP =feats[d];
  431 + short[] featsD =gc!=-1?feats[gc]:null;
  432 +
  433 + dlf.v0= n++; dlf.v1=label; dlf.v2=gP; dlf.v3=dP;
  434 + extractFeat(f, c, dir, featsP, featsD);
  435 + return;
  436 + }
  437 +
  438 +
  439 + public void siblingm(Instances is , int i,short pos[], int forms[], int[] lemmas, short[][] feats, int prnt, int d, int sblng, int label, Cluster cluster, long[] f, int v)
  440 + {
  441 +
  442 + for(int k=0;k<f.length;k++) f[k]=0;
  443 +
  444 + int pP = pos[prnt], dP = pos[d];
  445 + int prntF = forms[prnt],chldF = forms[d];
  446 + int prntL = lemmas[prnt], chldL = lemmas[d];
  447 + int prntLS = prntF==-1?-1:cluster.getLP(prntF), chldLS = chldF==-1?-1:cluster.getLP(chldF);
  448 +
  449 + int sP = sblng!=-1 ? pos[sblng] : s_str, sblF = sblng!=-1 ? forms[sblng] : s_stwrd, sblL = sblng!=-1 ? lemmas[sblng] : s_stwrd;
  450 +
  451 + int sblLS = (sblng != -1)&&(sblF!=-1) ? cluster.getLP(sblF) : s_stwrd;
  452 +
  453 +
  454 + int dir= (prnt < d)? ra:la;
  455 +
  456 + int abs = Math.abs(prnt-d);
  457 +
  458 + final int dist;
  459 + if (abs > 10)dist=d10;else if (abs>5) dist=d5;else if( abs==5)dist=d4;else if (abs==4)dist=d3;else if (abs==3)dist=d2;
  460 + else if (abs==2)dist=d1; else dist=di0;
  461 +
  462 + int n=147;
  463 +
  464 + if (prntF>maxForm) prntF=-1;
  465 + if (prntL>maxForm) prntL=-1;
  466 +
  467 + if (chldF>maxForm) chldF=-1;
  468 + if (chldL>maxForm) chldL=-1;
  469 +
  470 + if (sblF>maxForm) sblF=-1;
  471 + if (sblL>maxForm) sblL=-1;
  472 +
  473 +
  474 + dl1.v0= n++; dl1.v1=label;dl1.v2=pP; dl1.v3=dP;dl1.v4=sP; dl1.cz5(); f[0]=dl1.csa(s_dir,dir);f[1]=dl1.csa(s_dist,dist);
  475 + dl1.v0= n++; dl1.v3=sP; dl1.cz4(); f[2]=dl1.csa(s_dir,dir); f[3]=dl1.csa(s_dist,dist);
  476 + dl1.v0= n++; dl1.v2=dP;dl1.cz4(); f[4]=dl1.csa(s_dir,dir); f[5]=dl1.csa(s_dist,dist);
  477 +
  478 + // sibling only could be tried
  479 + dwwp.v1=label;
  480 + dwwp.v0= n++; dwwp.v2=prntF; dwwp.v3=sblF; dwwp.cz4(); f[6]=dwwp.csa(s_dir,dir); f[7]=dwwp.csa(s_dist,dist);
  481 + dwwp.v0= n++; dwwp.v2=chldF; dwwp.cz4(); f[8]=dwwp.csa(s_dir,dir); f[9]=dwwp.csa(s_dist,dist);
  482 + dwp.v0= n++; dwp.v1=label; dwp.v2=sblF; dwp.v3=pP; dwp.cz4(); f[10]=dwp.csa(s_dir,dir); f[11]=dwp.csa(s_dist,dist);
  483 + dwp.v0= n++; /*dwp.v1=label; */dwp.v3=dP; dwp.cz4(); f[12]=dwp.csa(s_dir,dir); f[13]=dwp.csa(s_dist,dist);
  484 + dwp.v0= n++; /*dwp.v1=label;*/ dwp.v2=prntF; dwp.v3=sP; dwp.cz4(); f[14]=dwp.csa(s_dir,dir); f[15]=dwp.csa(s_dist,dist);
  485 + dwp.v0= n++; /*dwp.v1=label;*/ dwp.v2=chldF; dwp.cz4(); f[16]=dwp.csa(s_dir,dir); f[17]=dwp.csa(s_dist,dist);
  486 +
  487 + //lemmas
  488 + dwwp.v0= n++; dwwp.v2=prntL; dwwp.v3=sblL; dwwp.cz4(); f[18]=dwwp.csa(s_dir,dir);
  489 + dwwp.v0= n++; dwwp.v2=chldL; dwwp.cz4(); f[19]=dwwp.csa(s_dir,dir); f[20]=dwwp.csa(s_dist,dist);
  490 + dwp.v0= n++; /*dwp.v1=label;*/ dwp.v2=sblL; dwp.v3=pP; dwp.cz4(); f[21]=dwp.csa(s_dir,dir); f[22]=dwp.csa(s_dist,dist);
  491 + dwp.v0= n++; /*dwp.v1=label; */ dwp.v3=dP; dwp.cz4(); f[23]=dwp.csa(s_dir,dir);f[24]=dwp.csa(s_dist,dist);
  492 + dwp.v0= n++; /*dwp.v1=label;*/ dwp.v2=prntL; dwp.v3=sP; dwp.cz4(); f[25]=dwp.csa(s_dir,dir); f[26]=dwp.csa(s_dist,dist);
  493 + dwp.v0= n++; /*dwp.v1=label;*/ dwp.v2=chldL; dwp.cz4(); f[27]=dwp.csa(s_dir,dir);f[28]=dwp.csa(s_dist,dist);
  494 +
  495 +
  496 + // clusters
  497 +
  498 + d2lp.v1=label;
  499 + d2lp.v0= n++; d2lp.v2=prntLS; d2lp.v3=sblLS; d2lp.cz4(); f[29]=d2lp.csa(s_dir,dir);
  500 + d2lp.v0= n++; d2lp.v2=chldLS; d2lp.v3=sblLS; d2lp.cz4(); f[30]=d2lp.csa(s_dir,dir); f[31]=d2lp.csa(s_dist,dist);
  501 +
  502 + d3lp.v1= label;
  503 + d3lp.v0= n++; d3lp.v2=prntLS; d3lp.v3=chldLS; d3lp.v4=sblLS;d3lp.cz5(); f[32]=d3lp.csa(s_dir,dir);
  504 +
  505 + d2lp.v0= n++; d2lp.v2=prntLS; d2lp.v3=chldLS; d2lp.v4=sblF; d2lp.cz5(); f[33]=d2lp.csa(s_dir,dir); f[34]=d2lp.csa(s_dist,dist);
  506 + d2lp.v0= n++; d2lp.v2=prntLS; d2lp.v3=sblLS; d2lp.v4=chldF; d2lp.cz5(); f[35]=d2lp.csa(s_dir,dir); f[36]=d2lp.csa(s_dist,dist);
  507 + d2lp.v0= n++; d2lp.v2=chldLS; d2lp.v3=sblLS; d2lp.v4=prntF; d2lp.cz5(); f[37]=d2lp.csa(s_dir,dir); f[38]=d2lp.csa(s_dist,dist);
  508 +
  509 + d2pp.v1=label;
  510 + d2pp.v0= n++; d2pp.v2=prntLS; d2pp.v3=chldLS; d2pp.v4=sP; d2pp.cz5(); f[39]=d2pp.csa(s_dir,dir); f[40]=d2pp.csa(s_dist,dist);
  511 + d2pp.v0= n++; d2pp.v2=prntLS; d2pp.v3=sblLS; d2pp.v4=dP; d2pp.cz5(); f[41]=d2pp.csa(s_dir,dir); f[42]=d2pp.csa(s_dist,dist);
  512 + d2pp.v0= n++; d2pp.v2=chldLS; d2pp.v3=sblLS; d2pp.v4=pP; d2pp.cz5(); f[43]=d2pp.csa(s_dir,dir); f[44]=d2pp.csa(s_dist,dist);
  513 +
  514 +
  515 + int prntPm1 = prnt!=0 ? pos[prnt-1] : s_str;
  516 + int chldPm1 = d-1>=0 ? pos[d-1] : s_str;
  517 + int prntPp1 = prnt!=pos.length-1 ? pos[prnt+1] : s_end;
  518 + int chldPp1 = d!=pos.length-1 ? pos[d+1] : s_end;
  519 +
  520 + // sibling part of speech minus and plus 1
  521 + int sblPm1 = sblng>0 ? pos[sblng-1]:s_str;
  522 + int sblPp1 = sblng<pos.length-1 ? pos[sblng + 1]:s_end;
  523 +
  524 + dl1.v0=n++; dl1.v2=sP; dl1.v3=sblPp1;dl1.v4=pP; dl1.cz5(); f[45]=dl1.csa(s_dir,dir);
  525 + dl1.v0=n++; dl1.v2=sP; dl1.v3=sblPm1;dl1.v4=pP; dl1.cz5(); f[46]=dl1.csa(s_dir,dir);// f.add(li.l2i(l));
  526 + dl1.v0=n++; dl1.v2=sP; dl1.v3=pP;dl1.v4=prntPp1;dl1.cz5(); f[47]=dl1.csa(s_dir,dir);// f.add(li.l2i(l));
  527 + dl1.v0=n++; dl1.v2=sP; dl1.v3=pP;dl1.v4=prntPm1; dl1.cz5(); f[48]=dl1.csa(s_dir,dir);// f.add(li.l2i(l));
  528 + dl1.v0=n++; dl1.v2=sP; dl1.v3=sblPp1;dl1.v4=prntPm1;dl1.v5=pP; dl1.cz6(); f[49]=dl1.csa(s_dir,dir);
  529 + dl1.v0=n++; dl1.v2=sblPm1; dl1.v3=sP;dl1.v4=prntPm1;dl1.v5=pP;dl1.cz6(); f[50]=dl1.csa(s_dir,dir);// f.add(li.l2i(l));
  530 + dl1.v0=n++; dl1.v2=sP; dl1.v3=sblPp1;dl1.v4=pP;dl1.v5=prntPp1; dl1.cz6(); f[51]=dl1.csa(s_dir,dir);// f.add(li.l2i(l));
  531 + dl1.v0=n++; dl1.v2=sblPm1; dl1.v3=sP; dl1.v4=pP;dl1.v5=prntPp1; dl1.cz6(); f[52]=dl1.csa(s_dir,dir);// f.add(li.l2i(l));
  532 + dl1.v0=n++; dl1.v2=sP; dl1.v3=sblPp1;dl1.v4=dP; dl1.cz5(); f[53]=dl1.csa(s_dir,dir);
  533 + dl1.v0=n++; dl1.v2=sP; dl1.v3=sblPm1;dl1.v4=dP; dl1.cz5(); f[54]=dl1.csa(s_dir,dir);
  534 + dl1.v0=n++; dl1.v2=sP; dl1.v3=dP;dl1.v4=chldPp1;dl1.cz5(); f[55]=dl1.csa(s_dir,dir);
  535 + dl1.v0=n++; dl1.v2=sP; dl1.v3=dP;dl1.v4=chldPm1; dl1.cz5(); f[56]=dl1.csa(s_dir,dir);
  536 + dl1.v0=n++; dl1.v2=sP; dl1.v3=sblPp1;dl1.v4=chldPm1;dl1.v5=dP; dl1.cz6(); f[57]=dl1.csa(s_dir,dir);// f.add(li.l2i(l));
  537 + dl1.v0=n++; dl1.v2=sblPm1; dl1.v3=sP;dl1.v4=chldPm1;dl1.v5=dP;dl1.cz6(); f[58]=dl1.csa(s_dir,dir);
  538 + dl1.v0= n++;dl1.v2=sP; dl1.v3=sblPp1;dl1.v4=dP;dl1.v5=chldPp1;dl1.cz6();f[59]=dl1.csa(s_dir,dir);// f.add(li.l2i(l));
  539 + dl1.v0= n++; dl1.v2=sblPm1; dl1.v3=sP;dl1.v4=dP;dl1.v5=chldPp1;dl1.cz6(); f[60]=dl1.csa(s_dir,dir);
  540 +
  541 + int c=61;
  542 +
  543 + int pLSp1 = prnt != pos.length - 1 ? forms[prnt + 1]==-1?-1:cluster.getLP(forms[prnt + 1]): _cend;
  544 + int cLSp1 = d != pos.length - 1 ? forms[d + 1] ==-1?-1:cluster.getLP(forms[d + 1]):_cend;
  545 + int sLSp1 = sblng < pos.length -1 ? forms[sblng + 1] ==-1?-1:cluster.getLP(forms[sblng + 1]) : _cend;
  546 +
  547 + int pLSm1 = prnt!=0 ? forms[prnt - 1]==-1?-1:cluster.getLP(forms[prnt - 1]): _cstr;
  548 + int cLSm1 = d-1>=0 ? forms[d - 1] ==-1?-1:cluster.getLP(forms[d - 1]):_cstr;
  549 + int sLSm1 = sblng>0 ? forms[sblng - 1] ==-1?-1:cluster.getLP(forms[sblng - 1]):_cstr;
  550 +
  551 + //int c=61;
  552 +
  553 + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
  554 + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSm1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
  555 + dl1.v0=n++; dl1.v2=sP; dl1.v3=pP;dl1.v4=pLSp1;dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
  556 + dl1.v0=n++; dl1.v2=sP; dl1.v3=pP;dl1.v4=pLSm1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
  557 + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=pLSm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
  558 + dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP;dl1.v4=pLSm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
  559 + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=pP;dl1.v5=pLSp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
  560 + dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP; dl1.v4=pP;dl1.v5=pLSp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
  561 + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=dP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
  562 + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSm1;dl1.v4=dP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
  563 + dl1.v0=n++; dl1.v2=sP; dl1.v3=dP;dl1.v4=cLSp1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
  564 + dl1.v0=n++;dl1.v2=sP; dl1.v3=dP;dl1.v4=cLSm1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
  565 + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSm1;dl1.v4=cLSm1;dl1.v5=dP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
  566 + dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP;dl1.v4=cLSm1;dl1.v5=dP;dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
  567 + dl1.v0=n++;dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=dP;dl1.v5=cLSp1;dl1.cz6();f[c++]=dl1.csa(s_dir,dir);
  568 + dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP;dl1.v4=dP;dl1.v5=cLSp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
  569 +
  570 +
  571 +
  572 + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
  573 + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSm1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
  574 + dl1.v0=n++; dl1.v2=sP; dl1.v3=pP;dl1.v4=pLSp1;dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
  575 + dl1.v0=n++; dl1.v2=sP; dl1.v3=pP;dl1.v4=pLSm1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
  576 + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=pLSm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
  577 + dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP;dl1.v4=pLSm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
  578 + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=pP;dl1.v5=pLSp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
  579 + dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP; dl1.v4=pP;dl1.v5=pLSp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
  580 + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=dP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
  581 + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSm1;dl1.v4=dP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
  582 + dl1.v0=n++; dl1.v2=sP; dl1.v3=dP;dl1.v4=cLSp1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
  583 + dl1.v0=n++;dl1.v2=sP; dl1.v3=dP;dl1.v4=cLSm1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
  584 + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSm1;dl1.v4=cLSm1;dl1.v5=dP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
  585 + dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP;dl1.v4=cLSm1;dl1.v5=dP;dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
  586 + dl1.v0= n++;dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=dP;dl1.v5=cLSp1;dl1.cz6();f[c++]=dl1.csa(s_dir,dir);
  587 + dl1.v0= n++; dl1.v2=sLSm1; dl1.v3=sP;dl1.v4=dP;dl1.v5=cLSp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
  588 +
  589 + // take those in for stacking
  590 +
  591 + /*
  592 + short[] prel = is.plabels[i],phead=is.pheads[i];
  593 +
  594 + int g = prnt==phead[d]?1:2 ;
  595 + if (sblng>=0) g += prnt==phead[sblng]?4:8;
  596 +
  597 + int gr = sblng==-1?s_relend:prel[sblng];
  598 +
  599 +
  600 + dl2.v0= n++;dl2.v2=prel[d];dl2.v3=g;dl2.v4=sP;dl2.v5=dP;dl2.cz6();f[c++]=dl2.csa(s_dir,dir);
  601 + dl2.v0= n++;dl2.v2=prel[d];dl2.v3=g;dl2.v4=sP;dl2.v5=pP;dl2.cz6();f[c++]=dl2.csa(s_dir,dir);
  602 + dl2.v0= n++;dl2.v2=prel[d];dl2.v3=g;dl2.v4=sP;dl2.v5=pP;dl2.v6=dP;dl2.cz7();f[c++]=dl2.csa(s_dir,dir);
  603 +
  604 + dl2.v0= n++;dl2.v2=gr;dl2.v3=g;dl2.v4=sP;dl2.v5=dP;dl2.cz6();f[c++]=dl2.csa(s_dir,dir);
  605 + dl2.v0= n++;dl2.v2=gr;dl2.v3=g;dl2.v4=sP;dl2.v5=pP;dl2.cz6();f[c++]=dl2.csa(s_dir,dir);
  606 + dl2.v0= n++;dl2.v2=gr;dl2.v3=g;dl2.v4=sP;dl2.v5=pP;dl2.v6=dP;dl2.cz7();f[c++]=dl2.csa(s_dir,dir);
  607 +*/
  608 +
  609 + if (feats==null) return;
  610 +
  611 + int cnt=c;
  612 +
  613 + short[] featsP =feats[d];
  614 + short[] featsSbl =sblng!=-1?feats[sblng]:null;
  615 +
  616 + dlf.v0= n++; dlf.v1=label; dlf.v2=sP; dlf.v3=dP;
  617 +
  618 +
  619 + cnt = extractFeat(f, cnt ,dir, featsP, featsSbl);
  620 +
  621 + featsP =feats[prnt];
  622 + featsSbl =sblng!=-1?feats[sblng]:null;
  623 +
  624 + dlf.v0= n++; dlf.v1=label; dlf.v2=pP; dlf.v3=sP;
  625 + if (featsP!=null && featsSbl!=null) {
  626 + for(short i1=0;i1<featsP.length;i1++) {
  627 + for(short i2=0;i2<featsSbl.length;i2++) {
  628 + dlf.v4=featsP[i1]; dlf.v5=featsSbl[i2];
  629 + dlf.cz6(); f[cnt++]=dlf.csa(s_dir,prnt<sblng?1:2);
  630 + }
  631 + }
  632 + } else if (featsP==null && featsSbl!=null) {
  633 +
  634 + for(short i2=0;i2<featsSbl.length;i2++) {
  635 + dlf.v4=nofeat; dlf.v5=featsSbl[i2];
  636 + dlf.cz6(); f[cnt++]=dlf.csa(s_dir,dir);
  637 + }
  638 +
  639 + } else if (featsP!=null && featsSbl==null) {
  640 +
  641 + for(short i1=0;i1<featsP.length;i1++) {
  642 + dlf.v4=featsP[i1]; dlf.v5=nofeat;
  643 + dlf.cz6(); f[cnt++]=dlf.csa(s_dir,dir);
  644 + }
  645 + }
  646 +
  647 + return;
  648 + }
  649 +
  650 + private int extractFeat(long[] f, int cnt, int dir, short[] featsP, short[] featsD) {
  651 + if (featsP!=null && featsD!=null) {
  652 + for(short i1=0;i1<featsP.length;i1++) {
  653 + for(short i2=0;i2<featsD.length;i2++) {
  654 + dlf.v4=featsP[i1]; dlf.v5=featsD[i2];
  655 + dlf.cz6(); f[cnt++]=dlf.csa(s_dir,dir);
  656 + }
  657 + }
  658 + } else if (featsP==null && featsD!=null) {
  659 +
  660 + for(short i2=0;i2<featsD.length;i2++) {
  661 + dlf.v4=nofeat; dlf.v5=featsD[i2];
  662 + dlf.cz6(); f[cnt++]=dlf.csa(s_dir,dir);
  663 +
  664 + }
  665 + } else if (featsP!=null && featsD==null) {
  666 +
  667 + for(short i1=0;i1<featsP.length;i1++) {
  668 + dlf.v4=featsP[i1]; dlf.v5=nofeat;
  669 + dlf.cz6(); f[cnt++]=dlf.csa(s_dir,dir);
  670 +
  671 + }
  672 + }
  673 + return cnt;
  674 + }
  675 +
  676 + public IFV encodeCat2(Instances is, int ic, short pposs[], int forms[], int[] lemmas, short[] heads, short[] types, short feats[][],
  677 + Cluster cluster, IFV f, Long2IntInterface li) {
  678 +
  679 +
  680 + long[] svs = new long[250];
  681 +
  682 + for (int i = 1; i < heads.length; i++) {
  683 +
  684 +
  685 + int n =basic(pposs, forms, heads[i], i, cluster, f);
  686 + firstm(is, ic, heads[i], i, types[i], cluster,svs);
  687 + for(int k=0;k<svs.length;k++) f.add(li.l2i(svs[k]));
  688 +
  689 + int ch,cmi,cmo;
  690 + if (heads[i] < i) {
  691 + ch = rightmostRight(heads, heads[i], i);
  692 + cmi = leftmostLeft(heads, i, heads[i]);
  693 + cmo = rightmostRight(heads, i, heads.length);
  694 +
  695 + } else {
  696 + ch = leftmostLeft(heads, heads[i], i);
  697 + cmi = rightmostRight(heads, i, heads[i]);
  698 + cmo = leftmostLeft(heads, i, 0);
  699 + }
  700 +
  701 + siblingm(is,ic,pposs, forms,lemmas, feats, heads[i], i, ch,types[i], cluster, svs,n);
  702 + for(int k=0;k<svs.length;k++) f.add(li.l2i(svs[k]));
  703 +
  704 +
  705 + gcm(is, ic,heads[i],i,cmi, types[i], cluster, svs);
  706 + for(int k=0;k<svs.length;k++) f.add(li.l2i(svs[k]));
  707 +
  708 + gcm(is, ic, heads[i],i,cmo, types[i], cluster, svs);
  709 + for(int k=0;k<svs.length;k++)f.add(li.l2i(svs[k]));
  710 + }
  711 +
  712 + return f;
  713 + }
  714 +
  715 + public FV encodeCat(Instances is, int ic, short pposs[], int forms[], int[] lemmas, short[] heads, short[] types, short feats[][], Cluster cluster, FV f) {
  716 +
  717 +
  718 + long[] svs = new long[250];
  719 +
  720 + for (int i = 1; i < heads.length; i++) {
  721 +
  722 +
  723 + int n =basic(pposs, forms, heads[i], i, cluster, f);
  724 + firstm(is, ic, heads[i], i, types[i], cluster,svs);
  725 + for(int k=0;k<svs.length;k++) dl1.map(f,svs[k]);
  726 +
  727 + int ch,cmi,cmo;
  728 + if (heads[i] < i) {
  729 + ch = rightmostRight(heads, heads[i], i);
  730 + cmi = leftmostLeft(heads, i, heads[i]);
  731 + cmo = rightmostRight(heads, i, heads.length);
  732 +
  733 + } else {
  734 + ch = leftmostLeft(heads, heads[i], i);
  735 + cmi = rightmostRight(heads, i, heads[i]);
  736 + cmo = leftmostLeft(heads, i, 0);
  737 + }
  738 +
  739 + siblingm(is,ic,pposs, forms,lemmas, feats, heads[i], i, ch,types[i], cluster, svs,n);
  740 + for(int k=0;k<svs.length;k++) dl1.map(f,svs[k]);
  741 +
  742 +
  743 + gcm(is, ic,heads[i],i,cmi, types[i], cluster, svs);
  744 + for(int k=0;k<svs.length;k++) dl1.map(f,svs[k]);
  745 +
  746 + gcm(is, ic, heads[i],i,cmo, types[i], cluster, svs);
  747 + for(int k=0;k<svs.length;k++) dl1.map(f,svs[k]);
  748 + }
  749 +
  750 + return f;
  751 + }
  752 +
  753 +
  754 + public float encode3(short[] pos, short heads[] , short[] types, DataF d2) {
  755 +
  756 + double v = 0;
  757 + for (int i = 1; i < heads.length; i++) {
  758 +
  759 + int dir= (heads[i] < i)? 0:1;
  760 +
  761 + v += d2.pl[heads[i]][i];
  762 + v += d2.lab[heads[i]][i][types[i]][dir];
  763 +
  764 + boolean left = i<heads[i];
  765 + short[] labels = Edges.get(pos[heads[i]], pos[i], left);
  766 + int lid=-1;
  767 + for(int k=0;k<labels.length;k++) if (types[i]== labels[k]) {lid= k;break;}
  768 +
  769 + int ch,cmi,cmo;
  770 + if (heads[i] < i) {
  771 + ch = rightmostRight(heads, heads[i], i);
  772 + cmi = leftmostLeft(heads, i, heads[i]);
  773 + cmo = rightmostRight(heads, i, heads.length);
  774 +
  775 + if (ch==-1) ch=heads[i];
  776 + if (cmi==-1) cmi=heads[i];
  777 + if (cmo==-1) cmo=heads[i];
  778 +
  779 + } else {
  780 + ch = leftmostLeft(heads, heads[i], i);
  781 + cmi = rightmostRight(heads, i, heads[i]);
  782 + cmo = leftmostLeft(heads, i, 0);
  783 +
  784 + if (ch==-1) ch=i;
  785 + if (cmi==-1) cmi=i;
  786 + if (cmo==-1) cmo=i;
  787 + }
  788 + v += d2.sib[heads[i]][i][ch][dir][lid];
  789 + v += d2.gra[heads[i]][i][cmi][dir][lid];
  790 + v += d2.gra[heads[i]][i][cmo][dir][lid];
  791 + }
  792 + return (float)v;
  793 + }
  794 +
  795 + /**
  796 + * Provide the scores of the edges
  797 + * @param pos
  798 + * @param heads
  799 + * @param types
  800 + * @param edgesScores
  801 + * @param d2
  802 + * @return
  803 + */
  804 + public static float encode3(short[] pos, short heads[] , short[] types, float[] edgesScores, DataF d2) {
  805 +
  806 + double v = 0;
  807 + for (int i = 1; i < heads.length; i++) {
  808 +
  809 + int dir= (heads[i] < i)? 0:1;
  810 +
  811 + edgesScores[i] = d2.pl[heads[i]][i];
  812 + edgesScores[i] += d2.lab[heads[i]][i][types[i]][dir];
  813 +
  814 + boolean left = i<heads[i];
  815 + short[] labels = Edges.get(pos[heads[i]], pos[i], left);
  816 + int lid=-1;
  817 + for(int k=0;k<labels.length;k++) if (types[i]== labels[k]) {lid= k;break;}
  818 +
  819 + int ch,cmi,cmo;
  820 + if (heads[i] < i) {
  821 + ch = rightmostRight(heads, heads[i], i);
  822 + cmi = leftmostLeft(heads, i, heads[i]);
  823 + cmo = rightmostRight(heads, i, heads.length);
  824 +
  825 + if (ch==-1) ch=heads[i];
  826 + if (cmi==-1) cmi=heads[i];
  827 + if (cmo==-1) cmo=heads[i];
  828 +
  829 + } else {
  830 + ch = leftmostLeft(heads, heads[i], i);
  831 + cmi = rightmostRight(heads, i, heads[i]);
  832 + cmo = leftmostLeft(heads, i, 0);
  833 +
  834 + if (ch==-1) ch=i;
  835 + if (cmi==-1) cmi=i;
  836 + if (cmo==-1) cmo=i;
  837 + }
  838 + edgesScores[i] += d2.sib[heads[i]][i][ch][dir][lid];
  839 + edgesScores[i] += d2.gra[heads[i]][i][cmi][dir][lid];
  840 + edgesScores[i] += d2.gra[heads[i]][i][cmo][dir][lid];
  841 + v+=edgesScores[i];
  842 + }
  843 + return (float)v;
  844 + }
  845 +
  846 +
  847 + private static int rightmostRight(short[] heads, int head, int max) {
  848 + int rightmost = -1;
  849 + for (int i = head + 1; i < max; i++) if (heads[i] == head) rightmost = i;
  850 +
  851 + return rightmost;
  852 + }
  853 +
  854 + private static int leftmostLeft(short[] heads, int head, int min) {
  855 + int leftmost = -1;
  856 + for (int i = head - 1; i > min; i--) if (heads[i] == head) leftmost = i;
  857 + return leftmost;
  858 + }
  859 +
  860 + public static final String REL = "REL",END = "END",STR = "STR",LA = "LA",RA = "RA";
  861 +
  862 + private static int ra,la;
  863 + private static int s_str;
  864 + private static int s_end, _cend,_cstr, s_stwrd,s_relend;
  865 +
  866 + protected static final String TYPE = "TYPE",DIR = "D";
  867 + public static final String POS = "POS";
  868 + protected static final String DIST = "DIST",MID = "MID", FEAT="F";
  869 +
  870 + private static final String _0 = "0",_4 = "4",_3 = "3", _2 = "2",_1 = "1",_5 = "5",_10 = "10";
  871 +
  872 + private static int di0, d4,d3,d2,d1,d5,d10;
  873 +
  874 +
  875 + private static final String WORD = "WORD",STWRD = "STWRD", STPOS = "STPOS";
  876 +
  877 +
  878 +
  879 + private static int nofeat;
  880 +
  881 +
  882 + public static int maxForm;
  883 +
  884 +
  885 + /**
  886 + * Initialize the features.
  887 + * @param maxFeatures
  888 + */
  889 + static public void initFeatures() {
  890 +
  891 +
  892 + MFB mf = new MFB();
  893 + mf.register(POS, MID);
  894 + s_str = mf.register(POS, STR);
  895 + s_end = mf.register(POS, END);
  896 +
  897 + s_relend = mf.register(REL, END);
  898 +
  899 + _cstr= mf.register(Cluster.SPATH,STR);
  900 + _cend=mf.register(Cluster.SPATH,END);
  901 +
  902 +
  903 + mf.register(TYPE, POS);
  904 +
  905 + s_stwrd=mf.register(WORD,STWRD);
  906 + mf.register(POS,STPOS);
  907 +
  908 + la = mf.register(DIR, LA);
  909 + ra = mf.register(DIR, RA);
  910 +
  911 + // mf.register(TYPE, CHAR);
  912 +
  913 + mf.register(TYPE, FEAT);
  914 + nofeat=mf.register(FEAT, "NOFEAT");
  915 +
  916 + for(int k=0;k<215;k++) mf.register(TYPE, "F"+k);
  917 +
  918 +
  919 + di0=mf.register(DIST, _0);
  920 + d1=mf.register(DIST, _1);
  921 + d2=mf.register(DIST, _2);
  922 + d3=mf.register(DIST, _3);
  923 + d4=mf.register(DIST, _4);
  924 + d5=mf.register(DIST, _5);
  925 + // d5l=mf.register(DIST, _5l);
  926 + d10=mf.register(DIST, _10);
  927 +
  928 +
  929 + }
  930 +
  931 + /* (non-Javadoc)
  932 + * @see extractors.Extractor#getType()
  933 + */
  934 + @Override
  935 + public int getType() {
  936 +
  937 + return s_type;
  938 + }
  939 +
  940 + /* (non-Javadoc)
  941 + * @see extractors.Extractor#setMaxForm(int)
  942 + */
  943 + @Override
  944 + public void setMaxForm(int max) {
  945 + maxForm = max;
  946 + }
  947 +
  948 + /* (non-Javadoc)
  949 + * @see extractors.Extractor#getMaxForm()
  950 + */
  951 + @Override
  952 + public int getMaxForm() {
  953 + return maxForm;
  954 + }
  955 +
  956 +
  957 +
  958 +}
... ...
dependencyParser/basic/mate-tools/src/extractors/ExtractorClusterStackedR2.java 0 → 100644
  1 +package extractors;
  2 +
  3 +
  4 +import java.util.Arrays;
  5 +
  6 +import is2.data.Cluster;
  7 +import is2.data.D4;
  8 +import is2.data.DataF;
  9 +import is2.data.Edges;
  10 +import is2.data.FV;
  11 +import is2.data.IFV;
  12 +import is2.data.Instances;
  13 +import is2.data.Long2IntInterface;
  14 +import is2.data.MFB;
  15 +import is2.util.DB;
  16 +
  17 +
  18 +
  19 +final public class ExtractorClusterStackedR2 implements Extractor {
  20 +
  21 + public static int s_rel,s_word,s_type,s_dir,s_dist,s_feat,s_child,s_spath,s_lpath,s_pos;
  22 +
  23 + MFB mf;
  24 +
  25 + final D4 d0 ,dl1,dl2, dwr,dr,dwwp,dw,dwp,dlf,d3lp, d2lp,d2pw,d2pp ;
  26 +
  27 + public final Long2IntInterface li;
  28 +
  29 + public ExtractorClusterStackedR2(Long2IntInterface li) {
  30 +
  31 + initFeatures();
  32 + this.li=li;
  33 + d0 = new D4(li);dl1 = new D4(li);dl2 = new D4(li);
  34 + dwr = new D4(li);
  35 + dr = new D4(li);
  36 + dwwp = new D4(li);
  37 +
  38 + dw = new D4(li);
  39 + dwp = new D4(li);
  40 +
  41 + dlf = new D4(li);
  42 + d3lp = new D4(li); d2lp = new D4(li); d2pw = new D4(li); d2pp = new D4(li);
  43 +
  44 + }
  45 +
  46 + public void initStat() {
  47 +
  48 +
  49 + mf = new MFB();
  50 + s_rel = mf.getFeatureCounter().get(REL).intValue();
  51 + s_pos = mf.getFeatureCounter().get(POS).intValue();
  52 + s_word = mf.getFeatureCounter().get(WORD).intValue();
  53 + s_type = mf.getFeatureCounter().get(TYPE).intValue();//mf.getFeatureBits();
  54 + s_dir = mf.getFeatureCounter().get(DIR);
  55 + la = mf.getValue(DIR, LA);
  56 + ra = mf.getValue(DIR, RA);
  57 + s_dist = mf.getFeatureCounter().get(DIST);//mf.getFeatureBits(DIST);
  58 + s_feat = mf.getFeatureCounter().get(FEAT);//mf.getFeatureBits(Pipe.FEAT);
  59 + s_spath = mf.getFeatureCounter().get(Cluster.SPATH)==null?0:mf.getFeatureCounter().get(Cluster.SPATH);//mf.getFeatureBits(Cluster.SPATH);
  60 + s_lpath = mf.getFeatureCounter().get(Cluster.LPATH)==null?0:mf.getFeatureCounter().get(Cluster.LPATH);//mf.getFeatureBits(Cluster.LPATH);
  61 + }
  62 +
  63 + public void init(){
  64 + // DB.println("init");
  65 + d0.a0 = s_type;d0.a1 = s_pos;d0.a2 = s_pos;d0.a3 = s_pos;d0.a4 = s_pos;d0.a5 = s_pos;d0.a6 = s_pos;d0.a7 = s_pos;
  66 + dl1.a0 = s_type;dl1.a1 = s_rel; dl1.a2 = s_pos;dl1.a3 = s_pos; dl1.a4 = s_pos; dl1.a5 = s_pos; dl1.a6 = s_pos; dl1.a7 = s_pos;
  67 + dl2.a0 = s_type;dl2.a1 = s_rel;dl2.a2 = s_word;dl2.a3 = s_pos;dl2.a4 = s_pos;dl2.a5 = s_pos;dl2.a6 = s_pos;dl2.a7 = s_pos;
  68 + dwp.a0 = s_type; dwp.a1 = s_rel; dwp.a2 = s_word; dwp.a3 = s_pos; dwp.a4 = s_pos; dwp.a5 = s_word;
  69 + dwwp.a0 = s_type; dwwp.a1 = s_rel; dwwp.a2 = s_word; dwwp.a3 = s_word; dwwp.a4 = s_pos; dwwp.a5 = s_word;
  70 + dlf.a0 = s_type;dlf.a1 = s_rel; dlf.a2 = s_pos;dlf.a3 = s_pos; dlf.a4 = s_feat; dlf.a5 = s_feat; dlf.a6 = s_pos; dlf.a7 = s_pos;
  71 + d3lp.a0 = s_type; d3lp.a1 = s_rel; d3lp.a2 = s_lpath; d3lp.a3 = s_lpath; d3lp.a4 = s_lpath; d3lp.a5 = s_word; d3lp.a6 = s_spath; d3lp.a7 = s_spath;
  72 + d2lp.a0 = s_type; d2lp.a1 = s_rel; d2lp.a2 = s_lpath; d2lp.a3 = s_lpath; d2lp.a4 = s_word; d2lp.a5 = s_word; //d3lp.a6 = s_spath; d3lp.a7 = s_spath;
  73 + d2pw.a0 = s_type; d2pw.a1 = s_rel; d2pw.a2 = s_lpath; d2pw.a3 = s_lpath; d2pw.a4 = s_word; d2pw.a5 = s_word; //d3lp.a6 = s_spath; d3lp.a7 = s_spath;
  74 + d2pp.a0 = s_type; d2pp.a1 = s_rel; d2pp.a2 = s_lpath; d2pp.a3 = s_lpath; d2pp.a4 = s_pos; d2pp.a5 = s_pos; //d3lp.a6 = s_spath; d3lp.a7 = s_spath;
  75 + }
  76 +
  77 +
  78 + public int basic(short[] pposs, int[] form, int p, int d, Cluster cluster, IFV f)
  79 + {
  80 +
  81 + d0.clean(); dl1.clean(); dl2.clean(); dwp.clean(); dwwp.clean(); dlf.clean(); d3lp.clean();
  82 +
  83 + d3lp.clean(); d2lp.clean();d2pw.clean(); d2pp.clean();
  84 +
  85 + int n=1;
  86 + int dir= (p < d)? ra:la;
  87 + // d0.v0= n; d0.v1=pposs[p]; d0.v2=pposs[d]; //d0.stop=4;
  88 + int end= (p >= d ? p : d);
  89 + int start = (p >= d ? d : p) + 1;
  90 +
  91 + StringBuilder s = new StringBuilder(end-start);
  92 + int[] x = new int[end-start];
  93 + int c=0;
  94 + for(int i = start ; i <end ; i++) {
  95 + //d0.v3=pposs[i];
  96 + //d0.cz4();
  97 + //d0.csa(s_dir,dir,f);
  98 +// s.append((char)pposs[i]);
  99 + x[c++] =pposs[i];
  100 + }
  101 +
  102 + Arrays.sort(x);
  103 + for(int i = 0;i<x.length ; i++) {
  104 + if (i==0 || x[i]!=x[i-1] ) s.append(x[i]);
  105 + }
  106 + int v = mf.register("px", s.toString());
  107 +
  108 + dwp.v0 = n++; dwp.v1 = 1;dwp.v2 = v; dwp.v3 = pposs[p]; dwp.v4 = pposs[d]; dwp.cz5(); dwp.csa(s_dir,dir,f);
  109 +
  110 + return n;
  111 + }
  112 +
  113 +
  114 + public void firstm(Instances is, int i,
  115 + int prnt, int dpnt, int label, Cluster cluster, long[] f)
  116 + {
  117 +
  118 +
  119 + //short[] pposs, int[] form, int[] lemmas, short[][] feats
  120 + for(int k=0;k<f.length;k++) f[k]=0;
  121 +
  122 + short[] pposs = is.pposs[i];
  123 + int[] form =is.forms[i];
  124 + short[][] feats = is.feats[i];
  125 +
  126 +
  127 + int pF = form[prnt],dF = form[dpnt];
  128 + int pL = is.plemmas[i][prnt],dL = is.plemmas[i][dpnt];
  129 + int pP = pposs[prnt],dP = pposs[dpnt];
  130 +
  131 + int prntLS = pF==-1?-1:cluster.getLP(pF), chldLS = dF==-1?-1:cluster.getLP(dF);
  132 +
  133 + final int dir= (prnt < dpnt)? ra:la;
  134 +
  135 + if (pF>maxForm) pF=-1;
  136 + if (pL>maxForm) pL=-1;
  137 +
  138 + if (dF>maxForm) dF=-1;
  139 + if (dL>maxForm) dL=-1;
  140 +
  141 +
  142 + int n=3,c=0;
  143 +
  144 + dl2.v1=label;
  145 + dl2.v0= n++; dl2.v2=pF; dl2.v3=dP; dl2.cz4(); f[c++]=dl2.csa(s_dir,dir);
  146 + dl2.v0= n++; dl2.cz3(); f[c++]=dl2.csa(s_dir,dir);
  147 + dl2.v0= n++; dl2.v2=dF; dl2.v3=pP; dl2.cz4(); f[c++]=dl2.csa(s_dir,dir);
  148 + dl2.v0= n++; dl2.cz3(); f[c++]=dl2.csa(s_dir,dir);
  149 +
  150 +
  151 + dwwp.v1=label;
  152 + dwwp.v0= n++; dwwp.v2=pF; dwwp.v3=dF; dwwp.cz4(); f[c++]=dwwp.csa(s_dir,dir);
  153 +
  154 + dl1.v1=label;
  155 + dl1.v0= n++; dl1.v2=dP; dl1.cz3(); f[c++]=dl1.csa(s_dir,dir);
  156 + dl1.v0= n++; dl1.v2=pP; dl1.cz3(); f[c++]=dl1.csa(s_dir,dir);
  157 + dl1.v0= n++; dl1.v3=dP; dl1.cz4(); f[c++]=dl1.csa(s_dir,dir);
  158 +
  159 + int pPm1 = prnt > 0 ? pposs[prnt - 1] : s_str, dPm1 = dpnt > 0 ? pposs[dpnt - 1] : s_str;
  160 + int pPp1 = prnt < pposs.length - 1 ? pposs[prnt + 1]:s_end, dPp1 = dpnt < pposs.length - 1 ? pposs[dpnt + 1]:s_end;
  161 +
  162 + int pPm2 = prnt > 1 ? pposs[prnt - 2] : s_str, dPm2 = dpnt > 1 ? pposs[dpnt - 2] : s_str;
  163 + int pPp2 = prnt < pposs.length - 2 ? pposs[prnt + 2]:s_end, dPp2 = dpnt < pposs.length - 2 ? pposs[dpnt + 2]:s_end;
  164 +
  165 + int pFm1 = prnt > 0 ? form[prnt - 1] : s_stwrd, dFm1 = dpnt > 0 ? form[dpnt - 1] : s_stwrd;
  166 + int pFp1 = prnt < form.length - 1 ? form[prnt + 1]:s_stwrd, dFp1 = dpnt < form.length - 1 ? form[dpnt + 1]:s_stwrd;
  167 +
  168 +
  169 +
  170 + dl1.v0= n++;dl1.v2=pP; dl1.v3=pPp1; dl1.v4=dP;dl1.v5=dPp1; dl1.cz6(); f[n++]=dl1.csa(s_dir,dir);
  171 + dl1.v0= n++; dl1.v5=dPm1; dl1.cz6(); f[n++]=dl1.csa(s_dir,dir);
  172 + dl1.v0= n++; dl1.v3=pPm1; dl1.cz6(); f[n++]=dl1.csa(s_dir,dir);
  173 + dl1.v0= n++; dl1.v5=dPp1; dl1.cz6(); f[n++]=dl1.csa(s_dir,dir);
  174 +
  175 +
  176 + dl1.v0= n++; dl1.v3=pPm1; dl1.cz5(); f[n++]=dl1.csa(s_dir,dir);
  177 + dl1.v0= n++; dl1.v3=dPm1; dl1.cz5(); f[n++]=dl1.csa(s_dir,dir);
  178 + dl1.v0= n++; dl1.v3=dPp1; dl1.cz5(); f[n++]=dl1.csa(s_dir,dir);
  179 + dl1.v0= n++; dl1.v3=pPp1; dl1.cz5(); f[n++]=dl1.csa(s_dir,dir);
  180 +
  181 + dl1.v0= n++;dl1.v2=pP; dl1.v3=pPp2; dl1.v4=dP;dl1.v5=dPp2; dl1.cz6(); f[n++]=dl1.csa(s_dir,dir);
  182 + dl1.v0= n++; dl1.v5=dPm2; dl1.cz6(); f[n++]=dl1.csa(s_dir,dir);
  183 + dl1.v0= n++; dl1.v3=pPm2; dl1.cz6(); f[n++]=dl1.csa(s_dir,dir);
  184 + dl1.v0= n++; dl1.v5=dPp2; dl1.cz6(); f[n++]=dl1.csa(s_dir,dir);
  185 +
  186 + dl1.v0= n++; dl1.v3=pPm2; dl1.cz5(); f[n++]=dl1.csa(s_dir,dir);
  187 + dl1.v0= n++; dl1.v3=dPm2; dl1.cz5(); f[n++]=dl1.csa(s_dir,dir);
  188 + dl1.v0= n++; dl1.v3=dPp2; dl1.cz5(); f[n++]=dl1.csa(s_dir,dir);
  189 + dl1.v0= n++; dl1.v3=pPp2; dl1.cz5(); f[n++]=dl1.csa(s_dir,dir);
  190 +
  191 +
  192 +
  193 + dl2.v0= n++; dl2.v3=dFm1; dl2.v3=pPp1;dl2.v4=pP; dl2.cz5(); f[n++]=dl2.getVal();
  194 + dl2.v0= n++; dl2.v3=dFp1; dl2.v3=pPm1; dl2.cz5(); f[n++]=dl2.getVal();
  195 + dl2.v0= n++; dl2.v3=pFm1; dl2.v3=dPp1;dl2.v4=dP; dl2.cz5(); f[n++]=dl2.getVal();
  196 + dl2.v0= n++; dl2.v3=pFp1; dl2.v3=dPm1; dl2.cz5(); f[n++]=dl2.getVal();
  197 +
  198 +
  199 + dl2.v0= n++; dl2.v3=dFm1; dl2.v3=dPm2;dl2.v4=pP; dl2.cz5(); f[n++]=dl2.getVal();
  200 + dl2.v0= n++; dl2.v3=dFp1; dl2.v3=dPp2; dl2.cz5(); f[n++]=dl2.getVal();
  201 + dl2.v0= n++; dl2.v3=pFm1; dl2.v3=pPm2;dl2.v4=dP; dl2.cz5(); f[n++]=dl2.getVal();
  202 + dl2.v0= n++; dl2.v3=pFp1; dl2.v3=pPp2; dl2.cz5(); f[n++]=dl2.getVal();
  203 +
  204 +
  205 + dwwp.v0= n++; dwwp.v2=pF; dwwp.v3=dF; dwwp.v4=dP; dwwp.cz5(); f[n++]=dwwp.csa(s_dir,dir);
  206 + dwwp.v0= n++; dwwp.v2=pF; dwwp.v3=dF; dwwp.v4=pP; dwwp.cz5(); f[n++]=dwwp.csa(s_dir,dir);
  207 + dwwp.v0= n++; dwwp.v2=dF; dwwp.v3=pF; dwwp.v4=pP; dwwp.v4=dP; dwwp.cz6(); f[n++]=dwwp.csa(s_dir,dir);
  208 +
  209 +
  210 +
  211 + // lemmas
  212 +
  213 + dl2.v1=label;
  214 + dl2.v0= n++; dl2.v2=pL; dl2.v3=dP; dl2.cz4(); f[c++]=dl2.csa(s_dir,dir);
  215 + dl2.v0= n++; dl2.cz3(); f[c++]=dl2.csa(s_dir,dir);
  216 + dl2.v0= n++; dl2.v2=dL; dl2.v3=pP; dl2.cz4(); f[c++]=dl2.csa(s_dir,dir);
  217 + dl2.v0= n++; dl2.cz3(); f[c++]=dl2.csa(s_dir,dir);
  218 +
  219 +
  220 + dwwp.v1=label;
  221 + dwwp.v0= n++; dwwp.v2=pL; dwwp.v3=dL; dwwp.cz4(); f[c++]=dwwp.csa(s_dir,dir);
  222 +
  223 + dwp.v1= label;
  224 + dwp.v0=n++;dwp.v2=dL; dwp.v3=pP;dwp.v4=dP;dwp.v5=pL; dwp.cz6(); f[c++]=dwp.csa(s_dir,dir);
  225 + dwp.v0=n++;dwp.cz5(); f[c++]=dwp.csa(s_dir,dir);
  226 +
  227 + dwp.v0=n++;dwp.v2=pL; dwp.cz5(); f[c++]=dwp.csa(s_dir,dir);
  228 + dwwp.v0= n++; dwwp.v2=pL; dwwp.v3=dL; dwwp.v4=dP; dwwp.cz5(); f[c++]=dwwp.csa(s_dir,dir);
  229 + dwwp.v0= n++; dwwp.v4=pP; dwwp.cz5(); f[c++]=dwwp.csa(s_dir,dir);
  230 +
  231 +
  232 + // cluster
  233 +
  234 + d2pw.v1=label;
  235 + d2pw.v0=n++; d2pw.v2=prntLS; d2pw.v3=chldLS; d2pw.cz4(); f[c++]=d2pw.csa(s_dir,dir);
  236 + d2pw.v0=n++; d2pw.v4=pF; d2pw.cz5(); f[c++]=d2pw.csa(s_dir,dir);
  237 + d2pw.v0=n++; d2pw.v4=dF; d2pw.cz5(); f[c++]=d2pw.csa(s_dir,dir);
  238 + d2pw.v0=n++; d2pw.v5=pF; d2pw.cz6(); f[c++]=d2pw.csa(s_dir,dir);
  239 +
  240 +
  241 + d2pp.v1=label;
  242 + d2pp.v0=n++; d2pp.v2=prntLS; d2pp.v3=chldLS; d2pp.cz4(); f[c++]=d2pp.csa(s_dir,dir);
  243 + d2pp.v0=n++; d2pp.v4=pP; d2pp.cz5(); f[c++]=d2pp.csa(s_dir,dir);
  244 + d2pp.v0=n++; d2pp.v4=dP; d2pp.cz5(); f[c++]=d2pp.csa(s_dir,dir);
  245 + d2pp.v0=n++; d2pp.v5=pP; d2pp.cz6(); f[c++]=d2pp.csa(s_dir,dir);
  246 +
  247 +
  248 + short[] prel = is.plabels[i];
  249 + short[] phead = is.pheads[i];
  250 +
  251 +
  252 + //take those in for stacking
  253 + // dl2.v1=label;
  254 + // dl2.v0= n++;dl2.v2=prel[dpnt];dl2.v3=pP;dl2.v4=dP; dl2.v5=prnt==phead[dpnt]?1:2; dl2.cz6(); f[c++]=dl2.csa(s_dir,dir);
  255 + // dl2.v0= n++;dl2.v2=pP;dl2.v3=dP; dl2.v4=prnt==phead[dpnt]?1:2; dl2.cz5(); f[c++]=dl2.csa(s_dir,dir);
  256 +
  257 +
  258 +
  259 + if (feats==null) return;
  260 +
  261 + short[] featsP =feats[prnt], featsD =feats[dpnt];
  262 + dlf.v0= n++; dlf.v1=label; dlf.v2=pP; dlf.v3=dP;
  263 + extractFeat(f, c, dir, featsP, featsD);
  264 +
  265 + return;
  266 + }
  267 +
  268 +
  269 +
  270 + public void gcm(Instances is , int i, int p, int d, int gc, int label,Cluster cluster, long[] f) {
  271 +
  272 + for(int k=0;k<f.length;k++) f[k]=0;
  273 +
  274 + short[] pos= is.pposs[i];
  275 + int[] forms=is.forms[i];
  276 + int[] lemmas=is.plemmas[i];
  277 + short[][] feats=is.feats[i];
  278 +
  279 + int pP = pos[p], dP = pos[d];
  280 + int prntF = forms[p], chldF = forms[d];
  281 + int prntL = lemmas[p], chldL = lemmas[d];
  282 + int prntLS = prntF==-1?-1:cluster.getLP(prntF), chldLS = chldF==-1?-1:cluster.getLP(chldF);
  283 +
  284 + int gP = gc != -1 ? pos[gc] : s_str;
  285 + int gcF = gc != -1 ? forms[gc] : s_stwrd;
  286 + int gcL = gc != -1 ? lemmas[gc] : s_stwrd;
  287 + int gcLS = (gc != -1) && (gcF!=-1) ? cluster.getLP(gcF) : s_stwrd;
  288 +
  289 + if (prntF>maxForm) prntF=-1;
  290 + if (prntL>maxForm) prntL=-1;
  291 +
  292 + if (chldF>maxForm) chldF=-1;
  293 + if (chldL>maxForm) chldL=-1;
  294 +
  295 + if (gcF>maxForm) gcF=-1;
  296 + if (gcL>maxForm) gcL=-1;
  297 +
  298 +
  299 + int dir= (p < d)? ra:la, dir_gra =(d < gc)? ra:la;
  300 +
  301 + int n=84,c=0;
  302 +
  303 + //dl1.v023();
  304 + dl1.v1=label;
  305 + dl1.v0= n++; dl1.v2=pP; dl1.v3=dP;dl1.v4=gP; dl1.cz5(); dl1.cs(s_dir,dir);f[c++]=dl1.csa(s_dir,dir_gra);
  306 + dl1.v0= n++; dl1.v2=pP; dl1.v3=gP; dl1.cz4();dl1.cs(s_dir,dir);f[c++]=dl1.csa(s_dir,dir_gra);
  307 + dl1.v0= n++; dl1.v2=dP; dl1.cz4(); dl1.cs(s_dir,dir);f[c++]=dl1.csa(s_dir,dir_gra);
  308 +
  309 + dwwp.v1=label;
  310 + dwwp.v0= n++; dwwp.v2=prntF; dwwp.v3=gcF;
  311 + dwwp.cz4(); dwwp.cs(s_dir,dir);f[c++]=dwwp.csa(s_dir,dir_gra);
  312 +
  313 + dwwp.v0= n++; dwwp.v2=chldF; dwwp.v3=gcF;
  314 + dwwp.cz4(); dwwp.cs(s_dir,dir);f[c++]=dwwp.csa(s_dir,dir_gra);
  315 +
  316 + dwp.v1=label;
  317 + dwp.v0= n++; dwp.v2=gcF; dwp.v3=pP;
  318 + dwp.cz4(); dwp.cs(s_dir,dir);f[c++]=dwp.csa(s_dir,dir_gra);
  319 +
  320 + dwp.v0= n++; dwp.v2=gcF; dwp.v3=dP;
  321 + dwp.cz4(); dwp.cs(s_dir,dir);f[c++]=dwp.csa(s_dir,dir_gra);
  322 +
  323 + dwp.v0= n++; dwp.v2=prntF; dwp.v3=gP;
  324 + dwp.cz4(); dwp.cs(s_dir,dir);f[c++]=dwp.csa(s_dir,dir_gra);
  325 +
  326 + dwp.v0= n++; dwp.v2=chldF; dwp.v3=gP;
  327 + dwp.cz4(); dwp.cs(s_dir,dir); f[c++]=dwp.csa(s_dir,dir_gra);
  328 +
  329 +
  330 + // lemma
  331 +
  332 + dwwp.v0= n++; dwwp.v2=prntL; dwwp.v3=gcL;
  333 + dwwp.cz4();dwwp.cs(s_dir,dir);f[c++]=dwwp.csa(s_dir,dir_gra);
  334 +
  335 + dwwp.v0= n++; dwwp.v2=chldL; dwwp.v3=gcL;
  336 + dwwp.cz4(); dwwp.cs(s_dir,dir);f[c++]=dwwp.csa(s_dir,dir_gra);
  337 +
  338 + dwp.v0= n++; dwp.v2=gcL; dwp.v3=pP;
  339 + dwp.cz4(); dwp.cs(s_dir,dir);f[c++]=dwp.csa(s_dir,dir_gra);
  340 +
  341 + dwp.v0= n++; dwp.v2=gcL; dwp.v3=dP;
  342 + dwp.cz4(); dwp.cs(s_dir,dir);f[c++]=dwp.csa(s_dir,dir_gra);
  343 +
  344 + dwp.v0= n++; dwp.v2=prntL; dwp.v3=gP;
  345 + dwp.cz4(); dwp.cs(s_dir,dir);f[c++]=dwp.csa(s_dir,dir_gra);
  346 +
  347 + dwp.v0= n++; dwp.v2=chldL; dwp.v3=gP;
  348 + dwp.cz4(); dwp.cs(s_dir,dir); f[c++]=dwp.csa(s_dir,dir_gra);
  349 +
  350 +
  351 + // clusters
  352 +
  353 + d2lp.v1= label;
  354 + d2lp.v0= n++; d2lp.v2=prntLS; d2lp.v3=gcLS; d2lp.cz4(); d2lp.cs(s_dir,dir);f[c++]=d2lp.csa(s_dir,dir_gra);// f.add(li.l2i(l));
  355 + d2lp.v0= n++; d2lp.v2=chldLS; d2lp.v3=gcLS; d2lp.cz4(); d2lp.cs(s_dir,dir);f[c++]=d2lp.csa(s_dir,dir_gra);
  356 + d3lp.v0= n++; d3lp.v1= label; d3lp.v2=prntLS; d3lp.v3=chldLS; d3lp.v4=gcLS; d3lp.cz5(); d3lp.cs(s_dir,dir);f[c++]=d3lp.csa(s_dir,dir_gra);
  357 +
  358 + //_f83;
  359 + d2lp.v0= n++; d2lp.v2=prntLS; d2lp.v3=chldLS; d2lp.v4=gcF; d2lp.cz5(); f[c++]=d2lp.csa(s_dir,dir);
  360 + d2lp.v0= n++; d2lp.v2=prntLS; d2lp.v3=gcLS; d2lp.v4=chldF; d2lp.cz5(); f[c++]=d2lp.csa(s_dir,dir);
  361 + d2lp.v0= n++; d2lp.v2=chldLS; d2lp.v3=gcLS; d2lp.v4=prntF; d2lp.cz5(); f[c++]=d2lp.csa(s_dir,dir);
  362 +
  363 + d2pp.v1= label;
  364 + d2pp.v0= n++; d2pp.v2=prntLS; d2pp.v3=chldLS; d2pp.v4=gP; d2pp.cz5(); f[c++]=d2pp.csa(s_dir,dir);
  365 + d2pp.v0= n++; d2pp.v2=prntLS; d2pp.v3=gcLS; d2pp.v4=dP; d2pp.cz5(); f[c++]=d2pp.csa(s_dir,dir);
  366 + d2pp.v0= n++; d2pp.v2=chldLS; d2pp.v3=gcLS; d2pp.v4=pP; d2pp.cz5(); f[c++]=d2pp.csa(s_dir,dir);
  367 +
  368 +
  369 +
  370 + // linear features
  371 +
  372 + int prntPm1 = p != 0 ? pos[p - 1] : s_str; // parent-pos-minus1
  373 + int chldPm1 = d - 1 >=0 ? pos[d - 1] : s_str; // child-pos-minus1
  374 + int prntPp1 = p != pos.length - 1 ? pos[p + 1] : s_end;
  375 + int chldPp1 = d != pos.length - 1 ? pos[d + 1] : s_end;
  376 +
  377 + int gcPm1 = gc > 0 ? pos[gc - 1] : s_str;
  378 + int gcPp1 = gc < pos.length - 1 ? pos[gc + 1] : s_end;
  379 +
  380 + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcPp1;dl1.v4=dP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
  381 + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcPm1;dl1.v4=dP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
  382 + dl1.v0= n++; dl1.v2=gP; dl1.v3=dP;dl1.v4=chldPp1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
  383 + dl1.v0= n++; dl1.v2=gP; dl1.v3=dP;dl1.v4=chldPm1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
  384 + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcPp1;dl1.v4=chldPm1;dl1.v5=dP;dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
  385 + dl1.v0= n++; dl1.v2=gcPm1; dl1.v3=gP;dl1.v4=chldPm1;dl1.v5=dP; dl1.cz6();f[c++]=dl1.csa(s_dir,dir);
  386 + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcPp1;dl1.v4=dP;dl1.v5=chldPp1; dl1.cz6();f[c++]=dl1.csa(s_dir,dir);
  387 + dl1.v0= n++; dl1.v2=gcPm1; dl1.v3=gP;dl1.v4=dP;dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
  388 + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcPp1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
  389 + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcPm1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
  390 + dl1.v0= n++; dl1.v2=gP; dl1.v3=pP;dl1.v4=prntPp1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
  391 + dl1.v0= n++; dl1.v2=gP; dl1.v3=pP;dl1.v4=prntPm1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
  392 + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcPp1;dl1.v4=prntPm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
  393 + dl1.v0= n++; dl1.v2=gcPm1; dl1.v3=gP;dl1.v4=prntPm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
  394 + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcPp1;dl1.v4=pP;dl1.v5=prntPp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
  395 + dl1.v0= n++; dl1.v2=gcPm1; dl1.v3=gP; dl1.v4=pP; dl1.v5=prntPp1;dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
  396 +
  397 +
  398 + int pLSp1 = p != pos.length - 1 ? forms[p + 1]==-1?-1:cluster.getLP(forms[p + 1]): _cend;
  399 + int cLSp1 = d != pos.length - 1 ? forms[d + 1] ==-1?-1:cluster.getLP(forms[d + 1]):_cend;
  400 + int gcLSp1 = gc < pos.length -1 ? forms[gc + 1] ==-1?-1:cluster.getLP(forms[gc + 1]) : s_end;
  401 +
  402 + int pLSm1 = p != 0 ? lemmas[p - 1]==-1?-1:cluster.getLP(lemmas[p - 1]): _cstr;
  403 + int cLSm1 = d - 1 >=0 ? lemmas[d - 1] ==-1?-1:cluster.getLP(lemmas[d - 1]):_cstr;
  404 + int gcLSm1 = gc > 0 ? lemmas[gc - 1] ==-1?-1:cluster.getLP(lemmas[gc - 1]) : _cstr;
  405 +
  406 +
  407 + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcLSp1;dl1.v4=dP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
  408 + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcLSm1;dl1.v4=dP; dl1.cz5();f[c++]=dl1.csa(s_dir,dir);
  409 + dl1.v0= n++; dl1.v2=gP; dl1.v3=dP;dl1.v4=cLSp1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
  410 + dl1.v0= n++; dl1.v2=gP; dl1.v3=dP;dl1.v4=cLSm1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
  411 + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcLSp1;dl1.v4=cLSm1;dl1.v5=dP;dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
  412 + dl1.v0= n++; dl1.v2=gcLSm1; dl1.v3=gP;dl1.v4=cLSm1;dl1.v5=dP; dl1.cz6();f[c++]=dl1.csa(s_dir,dir);
  413 + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcLSp1;dl1.v4=dP;dl1.v5=cLSp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
  414 + dl1.v0= n++; dl1.v2=cLSm1; dl1.v3=gP;dl1.v4=dP;dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
  415 + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcLSp1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
  416 + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcLSm1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
  417 + dl1.v0= n++; dl1.v2=gP; dl1.v3=pP;dl1.v4=pLSp1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
  418 + dl1.v0= n++; dl1.v2=gP; dl1.v3=pP;dl1.v4=pLSm1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
  419 + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcLSp1;dl1.v4=pLSm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
  420 + dl1.v0= n++; dl1.v2=gcLSm1; dl1.v3=gP;dl1.v4=pLSm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
  421 + dl1.v0= n++; dl1.v2=gP; dl1.v3=gcLSp1;dl1.v4=pP;dl1.v5=pLSp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
  422 + dl1.v0= n++; dl1.v2=gcLSm1; dl1.v3=gP; dl1.v4=pP; dl1.v5=pLSp1;dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
  423 +
  424 +
  425 +
  426 + short[] prel = is.plabels[i],phead=is.pheads[i];
  427 +
  428 + int g = p==phead[d]?1:2 ;
  429 + if (gc>=0) g += d==phead[gc]?4:8;
  430 +
  431 + int gr = gc==-1?s_relend:prel[gc];
  432 +
  433 + // take those in for stacking
  434 + /*
  435 + dl2.v1=label;
  436 + dl2.v0= n++;dl2.v2=prel[d];dl2.v3=g;dl2.v4=gP;dl2.v5=dP;dl2.cz6();f[c++]=dl2.csa(s_dir,dir);
  437 + dl2.v0= n++;dl2.v2=prel[d];dl2.v3=g;dl2.v4=gP;dl2.v5=pP;dl2.cz6();f[c++]=dl2.csa(s_dir,dir);
  438 + dl2.v0= n++;dl2.v2=prel[d];dl2.v3=g;dl2.v4=gP;dl2.v5=pP;dl2.v6=dP;dl2.cz7();f[c++]=dl2.csa(s_dir,dir);
  439 +
  440 + dl2.v0= n++;dl2.v2=gr;dl2.v3=g;dl2.v4=gP;dl2.v5=dP;dl2.cz6();f[c++]=dl2.csa(s_dir,dir);
  441 + dl2.v0= n++;dl2.v2=gr;dl2.v3=g;dl2.v4=gP;dl2.v5=pP;dl2.cz6();f[c++]=dl2.csa(s_dir,dir);
  442 + dl2.v0= n++;dl2.v2=gr;dl2.v3=g;dl2.v4=gP;dl2.v5=pP;dl2.v6=dP;dl2.cz7();f[c++]=dl2.csa(s_dir,dir);
  443 +
  444 +*/
  445 + if (feats==null) return;
  446 +
  447 + short[] featsP =feats[d];
  448 + short[] featsD =gc!=-1?feats[gc]:null;
  449 +
  450 + dlf.v0= n++; dlf.v1=label; dlf.v2=gP; dlf.v3=dP;
  451 + extractFeat(f, c, dir, featsP, featsD);
  452 + return;
  453 + }
  454 +
  455 +
  456 + public void siblingm(Instances is , int i,short pos[], int forms[], int[] lemmas, short[][] feats, int prnt, int d, int sblng, int label, Cluster cluster, long[] f, int v)
  457 + {
  458 +
  459 + for(int k=0;k<f.length;k++) f[k]=0;
  460 +
  461 + int pP = pos[prnt], dP = pos[d];
  462 + int prntF = forms[prnt],chldF = forms[d];
  463 + int prntL = lemmas[prnt], chldL = lemmas[d];
  464 + int prntLS = prntF==-1?-1:cluster.getLP(prntF), chldLS = chldF==-1?-1:cluster.getLP(chldF);
  465 +
  466 + int sP = sblng!=-1 ? pos[sblng] : s_str, sblF = sblng!=-1 ? forms[sblng] : s_stwrd, sblL = sblng!=-1 ? lemmas[sblng] : s_stwrd;
  467 +
  468 + int sblLS = (sblng != -1)&&(sblF!=-1) ? cluster.getLP(sblF) : s_stwrd;
  469 +
  470 +
  471 + int dir= (prnt < d)? ra:la;
  472 +
  473 + int abs = Math.abs(prnt-d);
  474 +
  475 + final int dist;
  476 + if (abs > 10)dist=d10;else if (abs>5) dist=d5;else if( abs==5)dist=d4;else if (abs==4)dist=d3;else if (abs==3)dist=d2;
  477 + else if (abs==2)dist=d1; else dist=di0;
  478 +
  479 + int n=147;
  480 +
  481 + if (prntF>maxForm) prntF=-1;
  482 + if (prntL>maxForm) prntL=-1;
  483 +
  484 + if (chldF>maxForm) chldF=-1;
  485 + if (chldL>maxForm) chldL=-1;
  486 +
  487 + if (sblF>maxForm) sblF=-1;
  488 + if (sblL>maxForm) sblL=-1;
  489 +
  490 +
  491 + dl1.v0= n++; dl1.v1=label;dl1.v2=pP; dl1.v3=dP;dl1.v4=sP; dl1.cz5(); f[0]=dl1.csa(s_dir,dir);f[1]=dl1.csa(s_dist,dist);
  492 + dl1.v0= n++; dl1.v3=sP; dl1.cz4(); f[2]=dl1.csa(s_dir,dir); f[3]=dl1.csa(s_dist,dist);
  493 + dl1.v0= n++; dl1.v2=dP;dl1.cz4(); f[4]=dl1.csa(s_dir,dir); f[5]=dl1.csa(s_dist,dist);
  494 +
  495 + // sibling only could be tried
  496 + dwwp.v1=label;
  497 + dwwp.v0= n++; dwwp.v2=prntF; dwwp.v3=sblF; dwwp.cz4(); f[6]=dwwp.csa(s_dir,dir); f[7]=dwwp.csa(s_dist,dist);
  498 + dwwp.v0= n++; dwwp.v2=chldF; dwwp.cz4(); f[8]=dwwp.csa(s_dir,dir); f[9]=dwwp.csa(s_dist,dist);
  499 + dwp.v0= n++; dwp.v1=label; dwp.v2=sblF; dwp.v3=pP; dwp.cz4(); f[10]=dwp.csa(s_dir,dir); f[11]=dwp.csa(s_dist,dist);
  500 + dwp.v0= n++; /*dwp.v1=label; */dwp.v3=dP; dwp.cz4(); f[12]=dwp.csa(s_dir,dir); f[13]=dwp.csa(s_dist,dist);
  501 + dwp.v0= n++; /*dwp.v1=label;*/ dwp.v2=prntF; dwp.v3=sP; dwp.cz4(); f[14]=dwp.csa(s_dir,dir); f[15]=dwp.csa(s_dist,dist);
  502 + dwp.v0= n++; /*dwp.v1=label;*/ dwp.v2=chldF; dwp.cz4(); f[16]=dwp.csa(s_dir,dir); f[17]=dwp.csa(s_dist,dist);
  503 +
  504 + //lemmas
  505 + dwwp.v0= n++; dwwp.v2=prntL; dwwp.v3=sblL; dwwp.cz4(); f[18]=dwwp.csa(s_dir,dir);
  506 + dwwp.v0= n++; dwwp.v2=chldL; dwwp.cz4(); f[19]=dwwp.csa(s_dir,dir); f[20]=dwwp.csa(s_dist,dist);
  507 + dwp.v0= n++; /*dwp.v1=label;*/ dwp.v2=sblL; dwp.v3=pP; dwp.cz4(); f[21]=dwp.csa(s_dir,dir); f[22]=dwp.csa(s_dist,dist);
  508 + dwp.v0= n++; /*dwp.v1=label; */ dwp.v3=dP; dwp.cz4(); f[23]=dwp.csa(s_dir,dir);f[24]=dwp.csa(s_dist,dist);
  509 + dwp.v0= n++; /*dwp.v1=label;*/ dwp.v2=prntL; dwp.v3=sP; dwp.cz4(); f[25]=dwp.csa(s_dir,dir); f[26]=dwp.csa(s_dist,dist);
  510 + dwp.v0= n++; /*dwp.v1=label;*/ dwp.v2=chldL; dwp.cz4(); f[27]=dwp.csa(s_dir,dir);f[28]=dwp.csa(s_dist,dist);
  511 +
  512 +
  513 + // clusters
  514 +
  515 + d2lp.v1=label;
  516 + d2lp.v0= n++; d2lp.v2=prntLS; d2lp.v3=sblLS; d2lp.cz4(); f[29]=d2lp.csa(s_dir,dir);
  517 + d2lp.v0= n++; d2lp.v2=chldLS; d2lp.v3=sblLS; d2lp.cz4(); f[30]=d2lp.csa(s_dir,dir); f[31]=d2lp.csa(s_dist,dist);
  518 +
  519 + d3lp.v1= label;
  520 + d3lp.v0= n++; d3lp.v2=prntLS; d3lp.v3=chldLS; d3lp.v4=sblLS;d3lp.cz5(); f[32]=d3lp.csa(s_dir,dir);
  521 +
  522 + d2lp.v0= n++; d2lp.v2=prntLS; d2lp.v3=chldLS; d2lp.v4=sblF; d2lp.cz5(); f[33]=d2lp.csa(s_dir,dir); f[34]=d2lp.csa(s_dist,dist);
  523 + d2lp.v0= n++; d2lp.v2=prntLS; d2lp.v3=sblLS; d2lp.v4=chldF; d2lp.cz5(); f[35]=d2lp.csa(s_dir,dir); f[36]=d2lp.csa(s_dist,dist);
  524 + d2lp.v0= n++; d2lp.v2=chldLS; d2lp.v3=sblLS; d2lp.v4=prntF; d2lp.cz5(); f[37]=d2lp.csa(s_dir,dir); f[38]=d2lp.csa(s_dist,dist);
  525 +
  526 + d2pp.v1=label;
  527 + d2pp.v0= n++; d2pp.v2=prntLS; d2pp.v3=chldLS; d2pp.v4=sP; d2pp.cz5(); f[39]=d2pp.csa(s_dir,dir); f[40]=d2pp.csa(s_dist,dist);
  528 + d2pp.v0= n++; d2pp.v2=prntLS; d2pp.v3=sblLS; d2pp.v4=dP; d2pp.cz5(); f[41]=d2pp.csa(s_dir,dir); f[42]=d2pp.csa(s_dist,dist);
  529 + d2pp.v0= n++; d2pp.v2=chldLS; d2pp.v3=sblLS; d2pp.v4=pP; d2pp.cz5(); f[43]=d2pp.csa(s_dir,dir); f[44]=d2pp.csa(s_dist,dist);
  530 +
  531 +
  532 + int prntPm1 = prnt!=0 ? pos[prnt-1] : s_str;
  533 + int chldPm1 = d-1>=0 ? pos[d-1] : s_str;
  534 + int prntPp1 = prnt!=pos.length-1 ? pos[prnt+1] : s_end;
  535 + int chldPp1 = d!=pos.length-1 ? pos[d+1] : s_end;
  536 +
  537 + // sibling part of speech minus and plus 1
  538 + int sblPm1 = sblng>0 ? pos[sblng-1]:s_str;
  539 + int sblPp1 = sblng<pos.length-1 ? pos[sblng + 1]:s_end;
  540 +
  541 + dl1.v0=n++; dl1.v2=sP; dl1.v3=sblPp1;dl1.v4=pP; dl1.cz5(); f[45]=dl1.csa(s_dir,dir);
  542 + dl1.v0=n++; dl1.v2=sP; dl1.v3=sblPm1;dl1.v4=pP; dl1.cz5(); f[46]=dl1.csa(s_dir,dir);// f.add(li.l2i(l));
  543 + dl1.v0=n++; dl1.v2=sP; dl1.v3=pP;dl1.v4=prntPp1;dl1.cz5(); f[47]=dl1.csa(s_dir,dir);// f.add(li.l2i(l));
  544 + dl1.v0=n++; dl1.v2=sP; dl1.v3=pP;dl1.v4=prntPm1; dl1.cz5(); f[48]=dl1.csa(s_dir,dir);// f.add(li.l2i(l));
  545 + dl1.v0=n++; dl1.v2=sP; dl1.v3=sblPp1;dl1.v4=prntPm1;dl1.v5=pP; dl1.cz6(); f[49]=dl1.csa(s_dir,dir);
  546 + dl1.v0=n++; dl1.v2=sblPm1; dl1.v3=sP;dl1.v4=prntPm1;dl1.v5=pP;dl1.cz6(); f[50]=dl1.csa(s_dir,dir);// f.add(li.l2i(l));
  547 + dl1.v0=n++; dl1.v2=sP; dl1.v3=sblPp1;dl1.v4=pP;dl1.v5=prntPp1; dl1.cz6(); f[51]=dl1.csa(s_dir,dir);// f.add(li.l2i(l));
  548 + dl1.v0=n++; dl1.v2=sblPm1; dl1.v3=sP; dl1.v4=pP;dl1.v5=prntPp1; dl1.cz6(); f[52]=dl1.csa(s_dir,dir);// f.add(li.l2i(l));
  549 + dl1.v0=n++; dl1.v2=sP; dl1.v3=sblPp1;dl1.v4=dP; dl1.cz5(); f[53]=dl1.csa(s_dir,dir);
  550 + dl1.v0=n++; dl1.v2=sP; dl1.v3=sblPm1;dl1.v4=dP; dl1.cz5(); f[54]=dl1.csa(s_dir,dir);
  551 + dl1.v0=n++; dl1.v2=sP; dl1.v3=dP;dl1.v4=chldPp1;dl1.cz5(); f[55]=dl1.csa(s_dir,dir);
  552 + dl1.v0=n++; dl1.v2=sP; dl1.v3=dP;dl1.v4=chldPm1; dl1.cz5(); f[56]=dl1.csa(s_dir,dir);
  553 + dl1.v0=n++; dl1.v2=sP; dl1.v3=sblPp1;dl1.v4=chldPm1;dl1.v5=dP; dl1.cz6(); f[57]=dl1.csa(s_dir,dir);// f.add(li.l2i(l));
  554 + dl1.v0=n++; dl1.v2=sblPm1; dl1.v3=sP;dl1.v4=chldPm1;dl1.v5=dP;dl1.cz6(); f[58]=dl1.csa(s_dir,dir);
  555 + dl1.v0= n++;dl1.v2=sP; dl1.v3=sblPp1;dl1.v4=dP;dl1.v5=chldPp1;dl1.cz6();f[59]=dl1.csa(s_dir,dir);// f.add(li.l2i(l));
  556 + dl1.v0= n++; dl1.v2=sblPm1; dl1.v3=sP;dl1.v4=dP;dl1.v5=chldPp1;dl1.cz6(); f[60]=dl1.csa(s_dir,dir);
  557 +
  558 + int c=61;
  559 +
  560 + int pLSp1 = prnt != pos.length - 1 ? forms[prnt + 1]==-1?-1:cluster.getLP(forms[prnt + 1]): _cend;
  561 + int cLSp1 = d != pos.length - 1 ? forms[d + 1] ==-1?-1:cluster.getLP(forms[d + 1]):_cend;
  562 + int sLSp1 = sblng < pos.length -1 ? forms[sblng + 1] ==-1?-1:cluster.getLP(forms[sblng + 1]) : _cend;
  563 +
  564 + int pLSm1 = prnt!=0 ? forms[prnt - 1]==-1?-1:cluster.getLP(forms[prnt - 1]): _cstr;
  565 + int cLSm1 = d-1>=0 ? forms[d - 1] ==-1?-1:cluster.getLP(forms[d - 1]):_cstr;
  566 + int sLSm1 = sblng>0 ? forms[sblng - 1] ==-1?-1:cluster.getLP(forms[sblng - 1]):_cstr;
  567 +
  568 + //int c=61;
  569 +
  570 + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
  571 + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSm1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
  572 + dl1.v0=n++; dl1.v2=sP; dl1.v3=pP;dl1.v4=pLSp1;dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
  573 + dl1.v0=n++; dl1.v2=sP; dl1.v3=pP;dl1.v4=pLSm1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
  574 + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=pLSm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
  575 + dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP;dl1.v4=pLSm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
  576 + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=pP;dl1.v5=pLSp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
  577 + dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP; dl1.v4=pP;dl1.v5=pLSp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
  578 + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=dP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
  579 + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSm1;dl1.v4=dP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
  580 + dl1.v0=n++; dl1.v2=sP; dl1.v3=dP;dl1.v4=cLSp1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
  581 + dl1.v0=n++;dl1.v2=sP; dl1.v3=dP;dl1.v4=cLSm1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
  582 + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSm1;dl1.v4=cLSm1;dl1.v5=dP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
  583 + dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP;dl1.v4=cLSm1;dl1.v5=dP;dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
  584 + dl1.v0=n++;dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=dP;dl1.v5=cLSp1;dl1.cz6();f[c++]=dl1.csa(s_dir,dir);
  585 + dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP;dl1.v4=dP;dl1.v5=cLSp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
  586 +
  587 +
  588 +
  589 + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
  590 + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSm1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
  591 + dl1.v0=n++; dl1.v2=sP; dl1.v3=pP;dl1.v4=pLSp1;dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
  592 + dl1.v0=n++; dl1.v2=sP; dl1.v3=pP;dl1.v4=pLSm1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
  593 + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=pLSm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
  594 + dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP;dl1.v4=pLSm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
  595 + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=pP;dl1.v5=pLSp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
  596 + dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP; dl1.v4=pP;dl1.v5=pLSp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
  597 + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=dP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
  598 + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSm1;dl1.v4=dP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
  599 + dl1.v0=n++; dl1.v2=sP; dl1.v3=dP;dl1.v4=cLSp1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
  600 + dl1.v0=n++;dl1.v2=sP; dl1.v3=dP;dl1.v4=cLSm1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
  601 + dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSm1;dl1.v4=cLSm1;dl1.v5=dP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
  602 + dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP;dl1.v4=cLSm1;dl1.v5=dP;dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
  603 + dl1.v0= n++;dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=dP;dl1.v5=cLSp1;dl1.cz6();f[c++]=dl1.csa(s_dir,dir);
  604 + dl1.v0= n++; dl1.v2=sLSm1; dl1.v3=sP;dl1.v4=dP;dl1.v5=cLSp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
  605 +
  606 + // take those in for stacking
  607 +
  608 + /*
  609 + short[] prel = is.plabels[i],phead=is.pheads[i];
  610 +
  611 + int g = prnt==phead[d]?1:2 ;
  612 + if (sblng>=0) g += prnt==phead[sblng]?4:8;
  613 +
  614 + int gr = sblng==-1?s_relend:prel[sblng];
  615 +
  616 +
  617 + dl2.v0= n++;dl2.v2=prel[d];dl2.v3=g;dl2.v4=sP;dl2.v5=dP;dl2.cz6();f[c++]=dl2.csa(s_dir,dir);
  618 + dl2.v0= n++;dl2.v2=prel[d];dl2.v3=g;dl2.v4=sP;dl2.v5=pP;dl2.cz6();f[c++]=dl2.csa(s_dir,dir);
  619 + dl2.v0= n++;dl2.v2=prel[d];dl2.v3=g;dl2.v4=sP;dl2.v5=pP;dl2.v6=dP;dl2.cz7();f[c++]=dl2.csa(s_dir,dir);
  620 +
  621 + dl2.v0= n++;dl2.v2=gr;dl2.v3=g;dl2.v4=sP;dl2.v5=dP;dl2.cz6();f[c++]=dl2.csa(s_dir,dir);
  622 + dl2.v0= n++;dl2.v2=gr;dl2.v3=g;dl2.v4=sP;dl2.v5=pP;dl2.cz6();f[c++]=dl2.csa(s_dir,dir);
  623 + dl2.v0= n++;dl2.v2=gr;dl2.v3=g;dl2.v4=sP;dl2.v5=pP;dl2.v6=dP;dl2.cz7();f[c++]=dl2.csa(s_dir,dir);
  624 +*/
  625 +
  626 + if (feats==null) return;
  627 +
  628 + int cnt=c;
  629 +
  630 + short[] featsP =feats[d];
  631 + short[] featsSbl =sblng!=-1?feats[sblng]:null;
  632 +
  633 + dlf.v0= n++; dlf.v1=label; dlf.v2=sP; dlf.v3=dP;
  634 +
  635 +
  636 + cnt = extractFeat(f, cnt ,dir, featsP, featsSbl);
  637 +
  638 + featsP =feats[prnt];
  639 + featsSbl =sblng!=-1?feats[sblng]:null;
  640 +
  641 + dlf.v0= n++; dlf.v1=label; dlf.v2=pP; dlf.v3=sP;
  642 + if (featsP!=null && featsSbl!=null) {
  643 + for(short i1=0;i1<featsP.length;i1++) {
  644 + for(short i2=0;i2<featsSbl.length;i2++) {
  645 + dlf.v4=featsP[i1]; dlf.v5=featsSbl[i2];
  646 + dlf.cz6(); f[cnt++]=dlf.csa(s_dir,prnt<sblng?1:2);
  647 + }
  648 + }
  649 + } else if (featsP==null && featsSbl!=null) {
  650 +
  651 + for(short i2=0;i2<featsSbl.length;i2++) {
  652 + dlf.v4=nofeat; dlf.v5=featsSbl[i2];
  653 + dlf.cz6(); f[cnt++]=dlf.csa(s_dir,dir);
  654 + }
  655 +
  656 + } else if (featsP!=null && featsSbl==null) {
  657 +
  658 + for(short i1=0;i1<featsP.length;i1++) {
  659 + dlf.v4=featsP[i1]; dlf.v5=nofeat;
  660 + dlf.cz6(); f[cnt++]=dlf.csa(s_dir,dir);
  661 + }
  662 + }
  663 +
  664 + return;
  665 + }
  666 +
  667 + private int extractFeat(long[] f, int cnt, int dir, short[] featsP, short[] featsD) {
  668 + if (featsP!=null && featsD!=null) {
  669 + for(short i1=0;i1<featsP.length;i1++) {
  670 + for(short i2=0;i2<featsD.length;i2++) {
  671 + dlf.v4=featsP[i1]; dlf.v5=featsD[i2];
  672 + dlf.cz6(); f[cnt++]=dlf.csa(s_dir,dir);
  673 + }
  674 + }
  675 + } else if (featsP==null && featsD!=null) {
  676 +
  677 + for(short i2=0;i2<featsD.length;i2++) {
  678 + dlf.v4=nofeat; dlf.v5=featsD[i2];
  679 + dlf.cz6(); f[cnt++]=dlf.csa(s_dir,dir);
  680 +
  681 + }
  682 + } else if (featsP!=null && featsD==null) {
  683 +
  684 + for(short i1=0;i1<featsP.length;i1++) {
  685 + dlf.v4=featsP[i1]; dlf.v5=nofeat;
  686 + dlf.cz6(); f[cnt++]=dlf.csa(s_dir,dir);
  687 +
  688 + }
  689 + }
  690 + return cnt;
  691 + }
  692 +
  693 +
  694 + public FV encodeCat(Instances is, int ic, short pposs[], int forms[], int[] lemmas, short[] heads, short[] types, short feats[][], Cluster cluster, FV f) {
  695 +
  696 +
  697 + long[] svs = new long[250];
  698 +
  699 + for (int i = 1; i < heads.length; i++) {
  700 +
  701 +
  702 + int n =basic(pposs, forms, heads[i], i, cluster, f);
  703 +
  704 + firstm(is, ic, heads[i], i, types[i], cluster,svs);
  705 + for(int k=0;k<svs.length;k++) dl1.map(f,svs[k]);
  706 +
  707 + int ch,cmi,cmo;
  708 + if (heads[i] < i) {
  709 + ch = rightmostRight(heads, heads[i], i);
  710 + cmi = leftmostLeft(heads, i, heads[i]);
  711 + cmo = rightmostRight(heads, i, heads.length);
  712 +
  713 + } else {
  714 + ch = leftmostLeft(heads, heads[i], i);
  715 + cmi = rightmostRight(heads, i, heads[i]);
  716 + cmo = leftmostLeft(heads, i, 0);
  717 + }
  718 +
  719 + siblingm(is,ic,pposs, forms,lemmas, feats, heads[i], i, ch,types[i], cluster, svs,n);
  720 + for(int k=0;k<svs.length;k++) dl1.map(f,svs[k]);
  721 +
  722 +
  723 + gcm(is, ic,heads[i],i,cmi, types[i], cluster, svs);
  724 + for(int k=0;k<svs.length;k++) dl1.map(f,svs[k]);
  725 +
  726 + gcm(is, ic, heads[i],i,cmo, types[i], cluster, svs);
  727 + for(int k=0;k<svs.length;k++) dl1.map(f,svs[k]);
  728 + }
  729 +
  730 + return f;
  731 + }
  732 +
  733 +
  734 + public float encode3(short[] pos, short heads[] , short[] types, DataF d2) {
  735 +
  736 + double v = 0;
  737 + for (int i = 1; i < heads.length; i++) {
  738 +
  739 + int dir= (heads[i] < i)? 0:1;
  740 +
  741 + v += d2.pl[heads[i]][i];
  742 + v += d2.lab[heads[i]][i][types[i]][dir];
  743 +
  744 + boolean left = i<heads[i];
  745 + short[] labels = Edges.get(pos[heads[i]], pos[i], left);
  746 + int lid=-1;
  747 + for(int k=0;k<labels.length;k++) if (types[i]== labels[k]) {lid= k;break;}
  748 +
  749 + int ch,cmi,cmo;
  750 + if (heads[i] < i) {
  751 + ch = rightmostRight(heads, heads[i], i);
  752 + cmi = leftmostLeft(heads, i, heads[i]);
  753 + cmo = rightmostRight(heads, i, heads.length);
  754 +
  755 + if (ch==-1) ch=heads[i];
  756 + if (cmi==-1) cmi=heads[i];
  757 + if (cmo==-1) cmo=heads[i];
  758 +
  759 + } else {
  760 + ch = leftmostLeft(heads, heads[i], i);
  761 + cmi = rightmostRight(heads, i, heads[i]);
  762 + cmo = leftmostLeft(heads, i, 0);
  763 +
  764 + if (ch==-1) ch=i;
  765 + if (cmi==-1) cmi=i;
  766 + if (cmo==-1) cmo=i;
  767 + }
  768 + v += d2.sib[heads[i]][i][ch][dir][lid];
  769 + v += d2.gra[heads[i]][i][cmi][dir][lid];
  770 + v += d2.gra[heads[i]][i][cmo][dir][lid];
  771 + }
  772 + return (float)v;
  773 + }
  774 +
  775 + /**
  776 + * Provide the scores of the edges
  777 + * @param pos
  778 + * @param heads
  779 + * @param types
  780 + * @param edgesScores
  781 + * @param d2
  782 + * @return
  783 + */
  784 + public static float encode3(short[] pos, short heads[] , short[] types, float[] edgesScores, DataF d2) {
  785 +
  786 + double v = 0;
  787 + for (int i = 1; i < heads.length; i++) {
  788 +
  789 + int dir= (heads[i] < i)? 0:1;
  790 +
  791 + edgesScores[i] = d2.pl[heads[i]][i];
  792 + edgesScores[i] += d2.lab[heads[i]][i][types[i]][dir];
  793 +
  794 + boolean left = i<heads[i];
  795 + short[] labels = Edges.get(pos[heads[i]], pos[i], left);
  796 + int lid=-1;
  797 + for(int k=0;k<labels.length;k++) if (types[i]== labels[k]) {lid= k;break;}
  798 +
  799 + int ch,cmi,cmo;
  800 + if (heads[i] < i) {
  801 + ch = rightmostRight(heads, heads[i], i);
  802 + cmi = leftmostLeft(heads, i, heads[i]);
  803 + cmo = rightmostRight(heads, i, heads.length);
  804 +
  805 + if (ch==-1) ch=heads[i];
  806 + if (cmi==-1) cmi=heads[i];
  807 + if (cmo==-1) cmo=heads[i];
  808 +
  809 + } else {
  810 + ch = leftmostLeft(heads, heads[i], i);
  811 + cmi = rightmostRight(heads, i, heads[i]);
  812 + cmo = leftmostLeft(heads, i, 0);
  813 +
  814 + if (ch==-1) ch=i;
  815 + if (cmi==-1) cmi=i;
  816 + if (cmo==-1) cmo=i;
  817 + }
  818 + edgesScores[i] += d2.sib[heads[i]][i][ch][dir][lid];
  819 + edgesScores[i] += d2.gra[heads[i]][i][cmi][dir][lid];
  820 + edgesScores[i] += d2.gra[heads[i]][i][cmo][dir][lid];
  821 + v+=edgesScores[i];
  822 + }
  823 + return (float)v;
  824 + }
  825 +
  826 +
  827 + private static int rightmostRight(short[] heads, int head, int max) {
  828 + int rightmost = -1;
  829 + for (int i = head + 1; i < max; i++) if (heads[i] == head) rightmost = i;
  830 +
  831 + return rightmost;
  832 + }
  833 +
  834 + private static int leftmostLeft(short[] heads, int head, int min) {
  835 + int leftmost = -1;
  836 + for (int i = head - 1; i > min; i--) if (heads[i] == head) leftmost = i;
  837 + return leftmost;
  838 + }
  839 +
  840 + public static final String REL = "REL",END = "END",STR = "STR",LA = "LA",RA = "RA";
  841 +
  842 + private static int ra,la;
  843 + private static int s_str;
  844 + private static int s_end, _cend,_cstr, s_stwrd,s_relend;
  845 +
  846 + protected static final String TYPE = "TYPE",DIR = "D", FEAT="F";
  847 + public static final String POS = "POS";
  848 + protected static final String DIST = "DIST",MID = "MID";
  849 +
  850 + private static final String _0 = "0",_4 = "4",_3 = "3", _2 = "2",_1 = "1",_5 = "5",_10 = "10";
  851 +
  852 + private static int di0, d4,d3,d2,d1,d5,d10;
  853 +
  854 +
  855 + private static final String WORD = "WORD",STWRD = "STWRD", STPOS = "STPOS";
  856 +
  857 +
  858 +
  859 + private static int nofeat;
  860 +
  861 +
  862 + private static int maxForm;
  863 +
  864 +
  865 + /**
  866 + * Initialize the features.
  867 + * @param maxFeatures
  868 + */
  869 + static public void initFeatures() {
  870 +
  871 +
  872 + MFB mf = new MFB();
  873 + mf.register(POS, MID);
  874 + s_str = mf.register(POS, STR);
  875 + s_end = mf.register(POS, END);
  876 +
  877 + s_relend = mf.register(REL, END);
  878 +
  879 + _cstr= mf.register(Cluster.SPATH,STR);
  880 + _cend=mf.register(Cluster.SPATH,END);
  881 +
  882 +
  883 + mf.register(TYPE, POS);
  884 +
  885 + s_stwrd=mf.register(WORD,STWRD);
  886 + mf.register(POS,STPOS);
  887 +
  888 + la = mf.register(DIR, LA);
  889 + ra = mf.register(DIR, RA);
  890 +
  891 + // mf.register(TYPE, CHAR);
  892 +
  893 + mf.register(TYPE, FEAT);
  894 + nofeat=mf.register(FEAT, "NOFEAT");
  895 +
  896 + for(int k=0;k<215;k++) mf.register(TYPE, "F"+k);
  897 +
  898 +
  899 + di0=mf.register(DIST, _0);
  900 + d1=mf.register(DIST, _1);
  901 + d2=mf.register(DIST, _2);
  902 + d3=mf.register(DIST, _3);
  903 + d4=mf.register(DIST, _4);
  904 + d5=mf.register(DIST, _5);
  905 + // d5l=mf.register(DIST, _5l);
  906 + d10=mf.register(DIST, _10);
  907 +
  908 +
  909 + }
  910 +
  911 + /* (non-Javadoc)
  912 + * @see extractors.Extractor#getType()
  913 + */
  914 + @Override
  915 + public int getType() {
  916 + return s_type;
  917 + }
  918 +
  919 + /* (non-Javadoc)
  920 + * @see extractors.Extractor#setMaxForm(java.lang.Integer)
  921 + */
  922 + @Override
  923 + public void setMaxForm(int max) {
  924 + maxForm = max;
  925 + }
  926 +
  927 + /* (non-Javadoc)
  928 + * @see extractors.Extractor#getMaxForm()
  929 + */
  930 + @Override
  931 + public int getMaxForm() {
  932 + return maxForm;
  933 + }
  934 +
  935 +
  936 +
  937 +}
... ...
dependencyParser/basic/mate-tools/src/extractors/ExtractorFactory.java 0 → 100644
  1 +/**
  2 + *
  3 + */
  4 +package extractors;
  5 +
  6 +import is2.data.Long2IntInterface;
  7 +
  8 +/**
  9 + * @author Dr. Bernd Bohnet, 29.04.2011
  10 + *
  11 + *
  12 + */
  13 +public class ExtractorFactory {
  14 +
  15 + public static final int StackedClustered = 4;
  16 + public static final int StackedClusteredR2 = 5;
  17 +
  18 +
  19 + private int type=-1;
  20 +
  21 + /**
  22 + * @param stackedClusteredR22
  23 + */
  24 + public ExtractorFactory(int t) {
  25 + type=t;
  26 + }
  27 +
  28 + /**
  29 + * @param stackedClusteredR22
  30 + * @param l2i
  31 + * @return
  32 + */
  33 + public Extractor getExtractor(Long2IntInterface l2i) {
  34 + switch(type)
  35 + {
  36 + case StackedClustered:
  37 + return new ExtractorClusterStacked(l2i);
  38 + case StackedClusteredR2:
  39 + return new ExtractorClusterStackedR2(l2i);
  40 + }
  41 + return null;
  42 + }
  43 +
  44 +}
... ...
dependencyParser/basic/mate-tools/src/extractors/ExtractorReranker.java 0 → 100644
  1 +package extractors;
  2 +
  3 +
  4 +import is2.data.Cluster;
  5 +import is2.data.D4;
  6 +import is2.data.Instances;
  7 +import is2.data.Long2IntInterface;
  8 +import is2.data.MFB;
  9 +import is2.data.ParseNBest;
  10 +import is2.util.DB;
  11 +
  12 +import java.util.Arrays;
  13 +
  14 +
  15 +
  16 +final public class ExtractorReranker {
  17 +
  18 + public static int s_rel,s_word,s_type,s_dir,s_dist,s_feat,s_child,s_spath,s_lpath,s_pos;
  19 + public static int d0,d1,d2,d3,d4,d5,d10;
  20 +
  21 + MFB mf;
  22 +
  23 + final D4 dl1,dl2, dwr,dr,dwwp,dw,dwp,dlf,d3lp, d2lp,d2pw,d2pp ;
  24 +
  25 + public final Long2IntInterface li;
  26 +
  27 + public ExtractorReranker(Long2IntInterface li) {
  28 + this.li=li;
  29 + dl1 = new D4(li);dl2 = new D4(li);
  30 + dwr = new D4(li);
  31 + dr = new D4(li);
  32 + dwwp = new D4(li);
  33 +
  34 + dw = new D4(li);
  35 + dwp = new D4(li);
  36 +
  37 + dlf = new D4(li);
  38 + d3lp = new D4(li); d2lp = new D4(li); d2pw = new D4(li); d2pp = new D4(li);
  39 +
  40 + }
  41 +
  42 + public static void initStat() {
  43 + DB.println("init called ");
  44 + MFB mf = new MFB();
  45 + s_rel = mf.getFeatureCounter().get(REL).intValue();;
  46 + s_pos = mf.getFeatureCounter().get(POS).intValue();
  47 + s_word = mf.getFeatureCounter().get(WORD).intValue();
  48 + s_type = mf.getFeatureCounter().get(TYPE).intValue();//mf.getFeatureBits();
  49 + s_dir = mf.getFeatureCounter().get(DIR);
  50 + la = mf.getValue(DIR, LA);
  51 + ra = mf.getValue(DIR, RA);
  52 + s_dist = mf.getFeatureCounter().get(DIST);//mf.getFeatureBits(DIST);
  53 + s_feat = mf.getFeatureCounter().get(FEAT);//mf.getFeatureBits(Pipe.FEAT);
  54 + s_spath = mf.getFeatureCounter().get(Cluster.SPATH)==null?0:mf.getFeatureCounter().get(Cluster.SPATH);//mf.getFeatureBits(Cluster.SPATH);
  55 + s_lpath = mf.getFeatureCounter().get(Cluster.LPATH)==null?0:mf.getFeatureCounter().get(Cluster.LPATH);//mf.getFeatureBits(Cluster.LPATH);
  56 + }
  57 +
  58 + public void init(){
  59 + mf = new MFB();
  60 +
  61 + dl1.a0 = s_type;dl1.a1 = 3; dl1.a2 = s_pos;dl1.a3 = s_pos; dl1.a4 = s_pos; dl1.a5 = s_pos; dl1.a6 = s_pos; dl1.a7 = s_pos;
  62 + dl2.a0 = s_type;dl2.a1 = 3;dl2.a2 = s_rel;dl2.a3 = s_rel;dl2.a4 = s_rel;dl2.a5 = s_rel;dl2.a6 = s_rel;dl2.a7 = s_rel;dl2.a8 = s_rel; dl2.a9 = s_rel;
  63 + dwp.a0 = s_type; dwp.a1 = 3; dwp.a2 = s_word; dwp.a3 = s_rel; dwp.a4 = s_rel; dwp.a5 = s_rel;dwp.a6 = s_rel;dwp.a7 = s_rel;
  64 + dwwp.a0 = s_type; dwwp.a1 = 3; dwwp.a2 = s_word; dwwp.a3 = s_word; dwwp.a4 = s_pos; dwwp.a5 = s_word;dwwp.a6 = s_pos;dwwp.a7 = s_pos;
  65 + }
  66 +
  67 +
  68 +
  69 +
  70 +
  71 +
  72 + public static final String REL = "REL",END = "END",STR = "STR",LA = "LA",RA = "RA", FEAT="F";
  73 +
  74 + private static int ra,la;
  75 + private static int s_str;
  76 + private static int s_end, _cend,_cstr, s_stwrd,s_relend;
  77 +
  78 + protected static final String TYPE = "TYPE",DIR = "D";
  79 + public static final String POS = "POS";
  80 + protected static final String DIST = "DIST",MID = "MID";
  81 +
  82 + private static final String _0 = "0",_4 = "4",_3 = "3", _2 = "2",_1 = "1",_5 = "5",_10 = "10";
  83 +
  84 +
  85 +
  86 + private static final String WORD = "WORD",STWRD = "STWRD", STPOS = "STPOS";
  87 +
  88 +
  89 +
  90 + private static int nofeat;
  91 +
  92 +
  93 + public static int maxForm;
  94 +
  95 +
  96 + final public static int _FC =60;
  97 +
  98 +
  99 + /**
  100 + * Initialize the features.
  101 + * @param maxFeatures
  102 + */
  103 + static public void initFeatures() {
  104 +
  105 +
  106 + MFB mf = new MFB();
  107 + mf.register(POS, MID);
  108 + s_str = mf.register(POS, STR);
  109 + s_end = mf.register(POS, END);
  110 +
  111 + s_relend = mf.register(REL, END);
  112 +
  113 + _cstr= mf.register(Cluster.SPATH,STR);
  114 + _cend=mf.register(Cluster.SPATH,END);
  115 +
  116 +
  117 + mf.register(TYPE, POS);
  118 +
  119 + s_stwrd=mf.register(WORD,STWRD);
  120 + mf.register(POS,STPOS);
  121 +
  122 + la = mf.register(DIR, LA);
  123 + ra = mf.register(DIR, RA);
  124 +
  125 + // mf.register(TYPE, CHAR);
  126 +
  127 + mf.register(TYPE, FEAT);
  128 + nofeat=mf.register(FEAT, "NOFEAT");
  129 +
  130 + for(int k=0;k<60;k++) mf.register(TYPE, "F"+k);
  131 +
  132 +
  133 + d0 =mf.register(DIST, _0);
  134 + d1= mf.register(DIST, _1);
  135 + d2 =mf.register(DIST, _2);
  136 + d3= mf.register(DIST, _3);
  137 + d4= mf.register(DIST, _4);
  138 + d5= mf.register(DIST, _5);
  139 + // d5l=mf.register(DIST, _5l);
  140 + d10= mf.register(DIST, _10);
  141 +
  142 +
  143 + }
  144 +
  145 + /**
  146 + * @param is
  147 + * @param n
  148 + * @param parseNBest
  149 + * @param vs
  150 + */
  151 + public void extractFeatures3(Instances is, int i, ParseNBest parse, int rank, long[] v) {
  152 +
  153 + int f=1,n=0;
  154 +
  155 + for(short k= 0; k<is.length(i)-1;k++) {
  156 +
  157 + short[] chld = children(parse.heads,k);
  158 +
  159 + f=2;
  160 +
  161 + int fm = is.forms[i][k];
  162 + int hh = k!=0? is.pposs[i][parse.heads[k]]:s_end;
  163 + int h = is.pposs[i][k];
  164 + int hrel = parse.labels[k];
  165 + int hhrel = k!=0? parse.labels[parse.heads[k]]:s_relend;
  166 + int hhf = k!=0? is.forms[i][parse.heads[k]]:s_stwrd;
  167 +
  168 +
  169 +
  170 + int rlast = chld.length>0?parse.labels[chld[chld.length-1]]:s_relend;
  171 +
  172 + int [] rels = new int[chld.length];
  173 + int [] pss = new int[chld.length];
  174 + for(int j=0;j<chld.length;j++) {
  175 + rels[j] = parse.labels[chld[j]];
  176 + pss[j] = is.pposs[i][chld[j]];
  177 + }
  178 +
  179 + StringBuilder rl = new StringBuilder(chld.length);
  180 + StringBuilder psl = new StringBuilder(chld.length);
  181 + for(int j=0;j<chld.length;j++) {
  182 + rl.append((char)rels[j]);
  183 + psl.append((char)pss[j]);
  184 + }
  185 +
  186 + int rli = mf.register("rli", rl.toString());
  187 + int pli = mf.register("pli", psl.toString());
  188 +
  189 + dwwp.v0=f++; dwwp.v2=rli; dwwp.cz3(); v[n++]=dwwp.getVal();
  190 + dwwp.v0=f++; dwwp.v2=pli; dwwp.cz3(); v[n++]=dwwp.getVal();
  191 +
  192 + dwwp.v0=f++; dwwp.v2=rli; dwwp.v3=h;dwwp.cz4(); v[n++]=dwwp.getVal();
  193 + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=h;dwwp.cz4(); v[n++]=dwwp.getVal();
  194 +
  195 + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=hh; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal();
  196 + dwwp.v0=f++; dwwp.v2=rli; dwwp.v3=hh; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal();
  197 +
  198 + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=rli; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal();
  199 + dwwp.v0=f++; dwwp.v2=fm; dwwp.v3=rli; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal();
  200 + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=fm; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal();
  201 +
  202 +
  203 +
  204 + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=rli; dwwp.v4=hh; dwwp.cz5(); v[n++]=dwwp.getVal();
  205 + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=rli; dwwp.v4=hh; dwwp.v5=h; dwwp.cz6(); v[n++]=dwwp.getVal();
  206 +
  207 + dwp.v0= f++; dwp.v2=rli; dwp.v3=hrel;dwp.v4=hh; dwp.v5=h; dwp.cz6(); v[n++]=dwp.getVal();
  208 +
  209 + Arrays.sort(rels);
  210 + Arrays.sort(pss);
  211 +
  212 + rl = new StringBuilder(chld.length);
  213 + psl = new StringBuilder(chld.length);
  214 + for(int j=0;j<chld.length;j++) {
  215 + rl.append((char)rels[j]);
  216 + psl.append((char)pss[j]);
  217 + }
  218 + rli = mf.register("rli", rl.toString());
  219 + pli = mf.register("pli", psl.toString());
  220 +
  221 +
  222 + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=rli; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal();
  223 + dwwp.v0=f++; dwwp.v2=fm; dwwp.v3=rli; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal();
  224 + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=fm; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal();
  225 +
  226 + dwwp.v0=f++; dwwp.v2=rli; dwwp.v3=h;dwwp.cz4(); v[n++]=dwwp.getVal();
  227 +
  228 + dl1.v0= f++; dl1.v2=h; dl1.v3=hrel; dl1.v4=hhrel;dl1.v5=hh; dl1.v6=rlast; dl1.cz6(); v[n++]=dl1.getVal();
  229 + dwp.v0= f++; dwp.v2=fm; dwp.v3=hrel; dwp.v4=hh; dwp.cz5(); v[n++]=dwp.getVal();
  230 + dwp.v0= f++; dwp.v2=hhf; dwp.v3=hrel; dwp.v4=hh; dwp.v5=h; dwp.cz6(); v[n++]=dwp.getVal();
  231 +
  232 + dwwp.v0=f++; dwwp.v2=fm; dwwp.v3=hhf; dwwp.v4=hrel; dwwp.v5=hhrel; dwwp.cz6(); v[n++]=dwwp.getVal();
  233 + dwwp.v0=f++; dwwp.v2=h; dwwp.v3=hhf; dwwp.v4=hrel; dwwp.v5=hhrel; dwwp.cz6(); v[n++]=dwwp.getVal();
  234 + dwwp.v0=f++; dwwp.v2=fm; dwwp.v3=hh; dwwp.v4=hrel; dwwp.v5=hhrel; dwwp.cz6(); v[n++]=dwwp.getVal();
  235 +
  236 + dwwp.v0=f++; dwwp.v2=fm; dwwp.v3=hhf; dwwp.v4=h; dwwp.v5=hh; dwwp.cz6(); v[n++]=dwwp.getVal();
  237 + dwwp.v0=f++; dwwp.v2=h; dwwp.v3=hhf; dwwp.v4=hrel; dwwp.v5=hh; dwwp.cz6(); v[n++]=dwwp.getVal();
  238 + dwwp.v0=f++; dwwp.v2=fm; dwwp.v3=hh; dwwp.v4=h; dwwp.v5=hrel; dwwp.cz6(); v[n++]=dwwp.getVal();
  239 +
  240 +
  241 + // dl1.v0= f++; dl1.v2=h;dl1.v3=hrel; dl1.v4=hhrel;dl1.v5=hh; dl1.v6=hhhrel;dl1.v7=hhh; dl1.v8=rlast; dl1.cz9(); v[n++]=dl1.getVal();
  242 +// dl1.v0= f++; dl1.v2=h;dl1.v3=hrel; dl1.v4=hhrel;dl1.v5=hh; dl1.v6=hhhrel;dl1.v7=hhh; dl1.v8=rlast; dl1.cz9(); v[n++]=dl1.getVal();
  243 + // dl1.v0= f++; dl1.v2=h;dl1.v3=hrel; dl1.v4=dir;dl1.v5=hh; dl1.v6=hhh;dl1.v7=rlast; dl1.v8=r1; dl1.cz9(); v[n++]=dl1.getVal();
  244 + // dl1.v0= f++; dl1.v2=h;dl1.v3=hh; dl1.v4=hhh;dl1.v5=hrel; dl1.cz6(); v[n++]=dl1.getVal();
  245 +
  246 +
  247 + short hp = parse.heads[k];
  248 + short[] hchld = hp==-1?new short[0]:children(parse.heads,hp);
  249 +
  250 + int [] hrels = new int[hchld.length];
  251 + int [] hpss = new int[hchld.length];
  252 + for(int j=0;j<hchld.length;j++) {
  253 + hrels[j] = parse.labels[hchld[j]];
  254 + hpss[j] = is.pposs[i][hchld[j]];
  255 + }
  256 +
  257 +
  258 + StringBuilder hrl = new StringBuilder(hchld.length);
  259 + StringBuilder hpsl = new StringBuilder(hchld.length);
  260 + for(int j=0;j<hchld.length;j++) {
  261 + hrl.append((char)hrels[j]);
  262 + hpsl.append((char)hpss[j]);
  263 + }
  264 + int hrli = mf.register("rli", hrl.toString());
  265 + int hpli = mf.register("pli", hpsl.toString());
  266 +
  267 + dwwp.v0=f++; dwwp.v2=hpli; dwwp.v3=hrli; dwwp.cz4(); v[n++]=dwwp.getVal();
  268 + dwwp.v0=f++; dwwp.v2=fm; dwwp.v3=hrli; dwwp.cz4(); v[n++]=dwwp.getVal();
  269 + dwwp.v0=f++; dwwp.v2=hpli; dwwp.v3=fm; dwwp.cz4(); v[n++]=dwwp.getVal();
  270 +
  271 + dwwp.v0=f++; dwwp.v2=hpli; dwwp.v3=rli; dwwp.v4=hrel;dwwp.v5=h; dwwp.cz5(); v[n++]=dwwp.getVal();
  272 + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=hrli;dwwp.v4=hrel;dwwp.v5=h; dwwp.cz5(); v[n++]=dwwp.getVal();
  273 + dwwp.v0=f++; dwwp.v2=hpli; dwwp.v3=hpli;dwwp.v4=hrel;dwwp.v5=h; dwwp.cz5(); v[n++]=dwwp.getVal();
  274 +
  275 +
  276 +
  277 + }
  278 +
  279 + v[n]=Integer.MIN_VALUE;
  280 + }
  281 +
  282 + /**
  283 + * This works seem works well with n-best n=8 (88.858074) , n=10 (88.836884), n=12 (88.858)
  284 + * n=14 (88.913417) n=16 (88.79546) n=20 (88.80621) n 50 (88.729364)
  285 + * 1-best: 88.749605
  286 + *
  287 + * @param is
  288 + * @param i
  289 + * @param parse
  290 + * @param rank
  291 + * @param v
  292 + * @param cluster
  293 + */
  294 + public void extractFeatures(Instances is, int i, ParseNBest parse, int rank, long[] v, Cluster cluster) {
  295 +
  296 + // mf.getValue(REL, "SB");
  297 +
  298 + int f=1,n=0;
  299 +
  300 + for(short k= 0; k<is.length(i)-1;k++) {
  301 +
  302 + short[] chld = children(parse.heads,k);
  303 +
  304 + int abs = Math.abs(parse.heads[k]-k);
  305 + final int dist;
  306 + if (abs > 10)dist=d10;else if (abs>5) dist=d5;else if( abs==5)dist=d4;else if (abs==4)dist=d3;else if (abs==3)dist=d2;
  307 + else if (abs==2)dist=d1; else dist=d0;
  308 +
  309 +
  310 + f=2;
  311 +
  312 + int fm = is.forms[i][k];
  313 + int hh = k!=0? is.pposs[i][parse.heads[k]]:s_end;
  314 + int h = is.pposs[i][k];
  315 + int hrel = parse.labels[k];//is.labels[i][k];
  316 + int hhrel = k!=0? parse.labels[parse.heads[k]]:s_relend;
  317 + int hhf = k!=0? is.forms[i][parse.heads[k]]:s_stwrd;
  318 +
  319 + int r1 = chld.length>0?parse.labels[chld[0]]:s_relend;
  320 + int rlast = chld.length>0?parse.labels[chld[chld.length-1]]:s_relend;
  321 +
  322 + int [] rels = new int[chld.length];
  323 + int [] pss = new int[chld.length];
  324 + int [] cls = new int[chld.length];
  325 +
  326 + int[] rc = new int[30]; // 20 was a good length
  327 +
  328 + for(int j=0;j<chld.length;j++) {
  329 + rels[j] = parse.labels[chld[j]];
  330 + if (rels[j]<rc.length) rc[rels[j]]++;
  331 + pss[j] = is.pposs[i][chld[j]];
  332 +// cls[j] = is.forms[i][chld[j]]==-1?0:cluster.getLP(is.forms[i][chld[j]]);
  333 +// cls[j] = cls[j]==-1?0:cls[j];
  334 + }
  335 +
  336 + StringBuilder rl = new StringBuilder(chld.length);
  337 + StringBuilder psl = new StringBuilder(chld.length);
  338 + StringBuilder csl = new StringBuilder(chld.length);
  339 + for(int j=0;j<chld.length;j++) {
  340 + rl.append((char)rels[j]);
  341 + psl.append((char)pss[j]);
  342 +// csl.append((char)cls[j]);
  343 + }
  344 +
  345 + int rli = mf.register("rli", rl.toString());
  346 + int pli = mf.register("pli", psl.toString());
  347 +// int cli = mf.register("cli", csl.toString());
  348 +
  349 +
  350 + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=rli; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal();
  351 + dwwp.v0=f++; dwwp.v2=fm; dwwp.v3=rli; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal();
  352 + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=fm; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal();
  353 + // dwwp.v0=f++; dwwp.v2=cli; dwwp.v3=fm; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal();
  354 +
  355 + dwwp.v0=f++; dwwp.v2=rli; dwwp.cz3(); v[n++]=dwwp.getVal();
  356 + dwwp.v0=f++; dwwp.v2=pli; dwwp.cz3(); v[n++]=dwwp.getVal();
  357 + //dwwp.v0=f++; dwwp.v2=cli; dwwp.cz3(); v[n++]=dwwp.getVal();
  358 +
  359 + // dwwp.v0=f++; dwwp.v2=cli;dwwp.v3=h; dwwp.cz4(); v[n++]=dwwp.getVal();
  360 +
  361 + for(int j=1;j<rc.length;j++) {
  362 + dwwp.v0=f++; dwwp.v2=rc[j]==0?1:rc[j]==1?2:3; dwwp.v3=j; dwwp.cz4(); v[n++]=dwwp.getVal();//
  363 + }
  364 +
  365 + dwwp.v0=f++; dwwp.v2=rli; dwwp.v3=h;dwwp.cz4(); v[n++]=dwwp.getVal();
  366 + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=h;dwwp.cz4(); v[n++]=dwwp.getVal();
  367 +
  368 + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=hh; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal();
  369 + dwwp.v0=f++; dwwp.v2=rli; dwwp.v3=hh; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal();
  370 +
  371 + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=rli; dwwp.v4=hh; dwwp.cz5(); v[n++]=dwwp.getVal();
  372 + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=rli; dwwp.v4=hh; dwwp.v5=h; dwwp.cz6(); v[n++]=dwwp.getVal();
  373 +
  374 + dwp.v0= f++; dwp.v2=rli; dwp.v3=hrel;dwp.v4=hh; dwp.v5=h; dwp.cz6(); v[n++]=dwp.getVal();
  375 +
  376 + //dwwp.v0=f++; dwwp.v2=h; dwwp.v3=hh; dwwp.v4=dist; dwwp.cz5(); v[n++]=dwwp.getVal();
  377 +
  378 + Arrays.sort(rels);
  379 + Arrays.sort(pss);
  380 +
  381 + rl = new StringBuilder(chld.length);
  382 + psl = new StringBuilder(chld.length);
  383 + for(int j=0;j<chld.length;j++) {
  384 + rl.append((char)rels[j]);
  385 + psl.append((char)pss[j]);
  386 + }
  387 + rli = mf.register("rli", rl.toString());
  388 + pli = mf.register("pli", psl.toString());
  389 +
  390 + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=rli; dwwp.v4=1; dwwp.v5=h; dwwp.cz6(); v[n++]=dwwp.getVal();
  391 + dwwp.v0=f++; dwwp.v2=fm; dwwp.v3=rli; dwwp.v4=1; dwwp.v5=h; dwwp.cz6(); v[n++]=dwwp.getVal();
  392 + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=fm; dwwp.v4=1; dwwp.v5=h; dwwp.cz6(); v[n++]=dwwp.getVal();
  393 +
  394 + dwwp.v0=f++; dwwp.v2=rli; dwwp.v3=h;dwwp.cz4(); v[n++]=dwwp.getVal();
  395 +
  396 + dl1.v0= f++; dl1.v2=h; dl1.v3=hrel; dl1.v4=hhrel;dl1.v5=hh; dl1.v6=rlast; dl1.cz6(); v[n++]=dl1.getVal();
  397 + dwp.v0= f++; dwp.v2=fm; dwp.v3=hrel; dwp.v4=hh; dwp.cz5(); v[n++]=dwp.getVal();
  398 + dwp.v0= f++; dwp.v2=hhf; dwp.v3=hrel;dwp.v4=hh; dwp.v5=h; dwp.cz6(); v[n++]=dwp.getVal();
  399 + }
  400 +
  401 + v[n]=Integer.MIN_VALUE;
  402 + }
  403 +
  404 + /**
  405 +
  406 + * Works well!
  407 + * @param is
  408 + * @param i
  409 + * @param parse
  410 + * @param rank
  411 + * @param v
  412 + */
  413 + public void extractFeatures6(Instances is, int i, ParseNBest parse, int rank, long[] v) {
  414 +
  415 + // mf.getValue(REL, "SB");
  416 +
  417 + int f=1,n=0;
  418 +
  419 + for(short k= 0; k<is.length(i)-1;k++) {
  420 +
  421 + short[] chld = children(parse.heads,k);
  422 +
  423 + f=2;
  424 +
  425 + int fm = is.forms[i][k];
  426 + int hh = k!=0? is.pposs[i][parse.heads[k]]:s_end;
  427 + int h = is.pposs[i][k];
  428 + int hrel = parse.labels[k];//is.labels[i][k];
  429 + int hhrel = k!=0? parse.labels[parse.heads[k]]:s_relend;
  430 + int hhf = k!=0? is.forms[i][parse.heads[k]]:s_stwrd;
  431 +
  432 + int r1 = chld.length>0?parse.labels[chld[0]]:s_relend;
  433 + int rlast = chld.length>0?parse.labels[chld[chld.length-1]]:s_relend;
  434 +
  435 + int [] rels = new int[chld.length];
  436 + int [] pss = new int[chld.length];
  437 +
  438 + int[] rc = new int[30]; // 20 was a good length
  439 +
  440 + for(int j=0;j<chld.length;j++) {
  441 + rels[j] = parse.labels[chld[j]];
  442 + if (rels[j]<rc.length) rc[rels[j]]++;
  443 + // if (rels[j]==sb) numSB++;
  444 + pss[j] = is.pposs[i][chld[j]];
  445 + }
  446 +
  447 + StringBuilder rl = new StringBuilder(chld.length);
  448 + StringBuilder psl = new StringBuilder(chld.length);
  449 + for(int j=0;j<chld.length;j++) {
  450 + rl.append((char)rels[j]);
  451 + psl.append((char)pss[j]);
  452 + }
  453 +
  454 + int rli = mf.register("rli", rl.toString());
  455 + int pli = mf.register("pli", psl.toString());
  456 +
  457 +
  458 + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=rli; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal();
  459 + dwwp.v0=f++; dwwp.v2=fm; dwwp.v3=rli; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal();
  460 + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=fm; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal();
  461 +
  462 + dwwp.v0=f++; dwwp.v2=rli; dwwp.cz3(); v[n++]=dwwp.getVal();
  463 + dwwp.v0=f++; dwwp.v2=pli; dwwp.cz3(); v[n++]=dwwp.getVal();
  464 +
  465 + for(int j=1;j<rc.length;j++) {
  466 + dwwp.v0=f++; dwwp.v2=rc[j]==0?1:rc[j]==1?2:3; dwwp.v3=j; dwwp.cz4(); v[n++]=dwwp.getVal();//
  467 + }
  468 +
  469 + dwwp.v0=f++; dwwp.v2=rli; dwwp.v3=h;dwwp.cz4(); v[n++]=dwwp.getVal();
  470 + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=h;dwwp.cz4(); v[n++]=dwwp.getVal();
  471 +
  472 + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=hh; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal();
  473 + dwwp.v0=f++; dwwp.v2=rli; dwwp.v3=hh; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal();
  474 +
  475 + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=rli; dwwp.v4=hh; dwwp.cz5(); v[n++]=dwwp.getVal();
  476 + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=rli; dwwp.v4=hh; dwwp.v5=h; dwwp.cz6(); v[n++]=dwwp.getVal();
  477 +
  478 + dwp.v0= f++; dwp.v2=rli; dwp.v3=hrel;dwp.v4=hh; dwp.v5=h; dwp.cz6(); v[n++]=dwp.getVal();
  479 +
  480 +
  481 + Arrays.sort(rels);
  482 + Arrays.sort(pss);
  483 +
  484 + rl = new StringBuilder(chld.length);
  485 + psl = new StringBuilder(chld.length);
  486 + for(int j=0;j<chld.length;j++) {
  487 + rl.append((char)rels[j]);
  488 + psl.append((char)pss[j]);
  489 + }
  490 + rli = mf.register("rli", rl.toString());
  491 + pli = mf.register("pli", psl.toString());
  492 +
  493 +
  494 + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=rli; dwwp.v4=1; dwwp.v5=h; dwwp.cz6(); v[n++]=dwwp.getVal();
  495 + dwwp.v0=f++; dwwp.v2=fm; dwwp.v3=rli; dwwp.v4=1; dwwp.v5=h; dwwp.cz6(); v[n++]=dwwp.getVal();
  496 + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=fm; dwwp.v4=1; dwwp.v5=h; dwwp.cz6(); v[n++]=dwwp.getVal();
  497 +
  498 + dwwp.v0=f++; dwwp.v2=rli; dwwp.v3=h;dwwp.cz4(); v[n++]=dwwp.getVal();
  499 +
  500 + dl1.v0= f++; dl1.v2=h; dl1.v3=hrel; dl1.v4=hhrel;dl1.v5=hh; dl1.v6=rlast; dl1.cz6(); v[n++]=dl1.getVal();
  501 + dwp.v0= f++; dwp.v2=fm; dwp.v3=hrel; dwp.v4=hh; dwp.cz5(); v[n++]=dwp.getVal();
  502 + dwp.v0= f++; dwp.v2=hhf; dwp.v3=hrel;dwp.v4=hh; dwp.v5=h; dwp.cz6(); v[n++]=dwp.getVal();
  503 +
  504 + }
  505 +
  506 + v[n]=Integer.MIN_VALUE;
  507 + }
  508 +
  509 +
  510 +
  511 + public void extractFeatures2(Instances is, int i, ParseNBest parse, int rank, long[] v) {
  512 +
  513 +
  514 +
  515 + int f=1,n=0;
  516 +
  517 + for(short k= 0; k<is.length(i)-1;k++) {
  518 +
  519 + short[] chld = children(parse.heads,k);
  520 +
  521 + f=2;
  522 +
  523 + int fm = is.forms[i][k];
  524 + int hh = k!=0? is.pposs[i][parse.heads[k]]:s_end;
  525 + int h = is.pposs[i][k];
  526 + int hrel = parse.labels[k];//is.labels[i][k];
  527 + int hhrel = k!=0? parse.labels[parse.heads[k]]:s_relend;
  528 + int hhf = k!=0? is.forms[i][parse.heads[k]]:s_stwrd;
  529 +
  530 + int r1 = chld.length>0?parse.labels[chld[0]]:s_relend;
  531 + int rlast = chld.length>0?parse.labels[chld[chld.length-1]]:s_relend;
  532 +
  533 + int [] rels = new int[chld.length];
  534 + int [] pss = new int[chld.length];
  535 +
  536 +
  537 +
  538 + for(int j=0;j<chld.length;j++) {
  539 + rels[j] = parse.labels[chld[j]];
  540 + pss[j] = is.pposs[i][chld[j]];
  541 + }
  542 +
  543 + StringBuilder rl = new StringBuilder(chld.length);
  544 + StringBuilder psl = new StringBuilder(chld.length);
  545 + for(int j=0;j<chld.length;j++) {
  546 + rl.append((char)rels[j]);
  547 + psl.append((char)pss[j]);
  548 + }
  549 +
  550 + int rli = mf.register("rli", rl.toString());
  551 + int pli = mf.register("pli", psl.toString());
  552 +
  553 +
  554 + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=rli; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal();
  555 + dwwp.v0=f++; dwwp.v2=fm; dwwp.v3=rli; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal();
  556 + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=fm; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal();
  557 +
  558 + dwwp.v0=f++; dwwp.v2=rli; dwwp.cz3(); v[n++]=dwwp.getVal();
  559 + dwwp.v0=f++; dwwp.v2=pli; dwwp.cz3(); v[n++]=dwwp.getVal();
  560 +
  561 + dwwp.v0=f++; dwwp.v2=rli; dwwp.v3=h;dwwp.cz4(); v[n++]=dwwp.getVal();
  562 + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=h;dwwp.cz4(); v[n++]=dwwp.getVal();
  563 +
  564 + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=hh; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal();
  565 + dwwp.v0=f++; dwwp.v2=rli; dwwp.v3=hh; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal();
  566 +
  567 + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=rli; dwwp.v4=hh; dwwp.cz5(); v[n++]=dwwp.getVal();
  568 + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=rli; dwwp.v4=hh; dwwp.v5=h; dwwp.cz6(); v[n++]=dwwp.getVal();
  569 +
  570 + dwp.v0= f++; dwp.v2=rli; dwp.v3=hrel;dwp.v4=hh; dwp.v5=h; dwp.cz6(); v[n++]=dwp.getVal();
  571 +
  572 +
  573 + Arrays.sort(rels);
  574 + Arrays.sort(pss);
  575 +
  576 + rl = new StringBuilder(chld.length);
  577 + psl = new StringBuilder(chld.length);
  578 + for(int j=0;j<chld.length;j++) {
  579 + rl.append((char)rels[j]);
  580 + psl.append((char)pss[j]);
  581 + }
  582 + rli = mf.register("rli", rl.toString());
  583 + pli = mf.register("pli", psl.toString());
  584 +
  585 +
  586 + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=rli; dwwp.v4=1; dwwp.v5=h; dwwp.cz6(); v[n++]=dwwp.getVal();
  587 + dwwp.v0=f++; dwwp.v2=fm; dwwp.v3=rli; dwwp.v4=1; dwwp.v5=h; dwwp.cz6(); v[n++]=dwwp.getVal();
  588 + dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=fm; dwwp.v4=1; dwwp.v5=h; dwwp.cz6(); v[n++]=dwwp.getVal();
  589 +
  590 + dwwp.v0=f++; dwwp.v2=rli; dwwp.v3=h;dwwp.cz4(); v[n++]=dwwp.getVal();
  591 +
  592 + dl1.v0= f++; dl1.v2=h; dl1.v3=hrel; dl1.v4=hhrel;dl1.v5=hh; dl1.v6=rlast; dl1.cz6(); v[n++]=dl1.getVal();
  593 + dwp.v0= f++; dwp.v2=fm; dwp.v3=hrel; dwp.v4=hh; dwp.cz5(); v[n++]=dwp.getVal();
  594 + dwp.v0= f++; dwp.v2=hhf; dwp.v3=hrel;dwp.v4=hh; dwp.v5=h; dwp.cz6(); v[n++]=dwp.getVal();
  595 +
  596 + }
  597 +
  598 + v[n]=Integer.MIN_VALUE;
  599 + }
  600 +
  601 +
  602 +
  603 + /**
  604 + * @param parse
  605 + * @param k
  606 + * @return
  607 + */
  608 + private short[] children(short[] heads, short h) {
  609 +
  610 + int c=0;
  611 + for(int k=0;k<heads.length;k++) if (heads[k] ==h ) c++;
  612 +
  613 + short[] clds = new short[c];
  614 + c=0;
  615 + for(int k=0;k<heads.length;k++) if (heads[k] ==h ) clds[c++]=(short)k;
  616 + return clds;
  617 + }
  618 +
  619 +
  620 +
  621 +}
... ...
dependencyParser/basic/mate-tools/src/extractors/ParallelExtract.java 0 → 100755
  1 +package extractors;
  2 +
  3 +import is2.data.Cluster;
  4 +import is2.data.DataF;
  5 +import is2.data.Edges;
  6 +import is2.data.F2SF;
  7 +import is2.data.FV;
  8 +import is2.data.Instances;
  9 +import is2.data.Long2IntInterface;
  10 +
  11 +import java.util.ArrayList;
  12 +import java.util.concurrent.Callable;
  13 +
  14 +
  15 +/**
  16 + * @author Bernd Bohnet, 30.08.2009
  17 + *
  18 + * This class implements a parallel feature extractor.
  19 + */
  20 +final public class ParallelExtract implements Callable<Object>
  21 +{
  22 + // the data space of the weights for a dependency tree
  23 + final DataF d;
  24 +
  25 + // the data extractor does the actual work
  26 + final Extractor extractor;
  27 +
  28 + private Instances is;
  29 + private int i;
  30 +
  31 + private F2SF para;
  32 +
  33 + private Cluster cluster;
  34 +
  35 + private Long2IntInterface li;
  36 +
  37 + public ParallelExtract(Extractor e, Instances is, int i, DataF d, F2SF para,Cluster cluster, Long2IntInterface li) {
  38 +
  39 + this.is =is;
  40 + extractor=e;
  41 + this.d =d;
  42 + this.i=i;
  43 + this.para=para;
  44 + this.cluster = cluster;
  45 + this.li=li;
  46 + }
  47 +
  48 +
  49 + public static class DSet {
  50 + int w1,w2;
  51 + }
  52 +
  53 + public Object call() {
  54 +
  55 + try {
  56 +
  57 + F2SF f= para;
  58 +
  59 +
  60 + short[] pos=is.pposs[i];
  61 + int[] forms=is.forms[i];
  62 + int[] lemmas=is.plemmas[i];
  63 + short[][] feats=is.feats[i];
  64 + int length = pos.length;
  65 +
  66 + long[] svs = new long[250];
  67 +
  68 + int type=extractor.getType();
  69 +
  70 + while (true) {
  71 +
  72 + DSet set = get();
  73 + if (set ==null) break;
  74 +
  75 + int w1=set.w1;
  76 + int w2=set.w2;
  77 +
  78 + f.clear();
  79 + int n =extractor.basic(pos, forms, w1, w2,cluster, f);
  80 + d.pl[w1][w2]=f.getScoreF();
  81 +
  82 + short[] labels = Edges.get(pos[w1], pos[w2],false);
  83 + float[][] lab = d.lab[w1][w2];
  84 +
  85 + extractor.firstm(is, i, w1, w2, 0, cluster, svs);
  86 +
  87 + if (labels!=null) {
  88 +
  89 +
  90 + for (int l = labels.length - 1; l >= 0; l--) {
  91 +
  92 + short label = labels[l];
  93 +
  94 + f.clear();
  95 + for(int k=svs.length-1;k>=0;k--) if (svs[k]>0) f.add(li.l2i(svs[k]+label*type));
  96 + lab[label][0]=f.getScoreF();
  97 + }
  98 + }
  99 +
  100 + labels = Edges.get(pos[w1], pos[w2],true);
  101 +
  102 + if (labels!=null) {
  103 +
  104 + for (int l = labels.length - 1; l >= 0; l--) {
  105 +
  106 + int label = labels[l];
  107 + f.clear();
  108 + for(int k=svs.length-1;k>=0;k--) if (svs[k]>0) f.add(li.l2i(svs[k]+label*type));
  109 + lab[label][1]=f.getScoreF();
  110 + }
  111 + }
  112 +
  113 + int s = w1<w2 ? w1 : w2;
  114 + int e = w1<w2 ? w2 : w1;
  115 +
  116 + int sg = w1<w2 ? w1 : 0;
  117 + int eg = w1<w2 ? length : w1+1;
  118 +
  119 +
  120 + for(int m=s;m<e;m++) {
  121 + for(int dir=0;dir<2;dir++) {
  122 + labels = Edges.get(pos[w1], pos[w2],dir==1);
  123 + float lab2[]= new float[labels.length];
  124 +
  125 + int g = (m==s||e==m) ? -1 : m;
  126 +
  127 +
  128 + extractor.siblingm(is,i,pos,forms,lemmas,feats, w1, w2, g, 0, cluster, svs,n);
  129 +
  130 + for (int l = labels.length - 1; l >= 0; l--) {
  131 +
  132 + int label = labels[l];
  133 + f.clear();
  134 +
  135 + for(int k=svs.length-1;k>=0;k--) {
  136 + if (svs[k]>0) f.add(li.l2i(svs[k]+label*type));
  137 + }
  138 + lab2[l] = (float)f.score;//f.getScoreF();
  139 + }
  140 + d.sib[w1][w2][m][dir]=lab2;
  141 + }
  142 + }
  143 +
  144 + for(int m=sg;m<eg;m++) {
  145 + for(int dir=0;dir<2;dir++) {
  146 + labels = Edges.get(pos[w1], pos[w2],dir==1);
  147 + float[] lab2 = new float[labels.length];
  148 +
  149 + int g = (m==s||e==m) ? -1 : m;
  150 +
  151 + extractor.gcm(is, i, w1,w2,g, 0, cluster, svs);
  152 +
  153 + for (int l = labels.length - 1; l >= 0; l--) {
  154 +
  155 + int label = labels[l];
  156 +
  157 + f.clear();
  158 + for(int k=svs.length-1;k>=0;k--) {
  159 + if (svs[k]>0) f.add(li.l2i(svs[k]+label*type));
  160 + }
  161 + lab2[l] = f.getScoreF();
  162 + }
  163 + d.gra[w1][w2][m][dir] =lab2;
  164 + }
  165 + }
  166 +
  167 + }
  168 + } catch(Exception e ) {
  169 + e.printStackTrace();
  170 + }
  171 + return null;
  172 + }
  173 +
  174 +
  175 + static ArrayList<DSet> sets = new ArrayList<DSet>();
  176 +
  177 + private DSet get() {
  178 +
  179 + synchronized (sets) {
  180 + if (sets.size()==0) return null;
  181 + return sets.remove(sets.size()-1);
  182 + }
  183 + }
  184 + static public void add(int w1, int w2){
  185 + DSet ds =new DSet();
  186 + ds.w1=w1;
  187 + ds.w2=w2;
  188 + sets.add(ds);
  189 + }
  190 +
  191 +
  192 +
  193 +
  194 +}
... ...
dependencyParser/basic/mate-tools/src/is2/data/Closed.java 0 → 100755
  1 +package is2.data;
  2 +
  3 +
  4 +
  5 +final public class Closed {
  6 +
  7 + public double p;
  8 + short b,e,m;
  9 + byte dir;
  10 +
  11 + Closed d;
  12 + Open u;
  13 +
  14 + public Closed(short s, short t, int m, int dir,Open u, Closed d, float score) {
  15 + this.b = s;
  16 + this.e = t;
  17 + this.m = (short)m;
  18 + this.dir = (byte)dir;
  19 + this.u=u;
  20 + this.d =d;
  21 + p=score;
  22 + }
  23 +
  24 +
  25 + public void create(Parse parse) {
  26 + if (u != null) u.create(parse);
  27 + if (d != null) d.create(parse);
  28 + }
  29 +}
  30 +
  31 +
... ...
dependencyParser/basic/mate-tools/src/is2/data/Cluster.java 0 → 100644
  1 +/**
  2 + *
  3 + */
  4 +package is2.data;
  5 +
  6 +
  7 +
  8 +import is2.util.DB;
  9 +
  10 +import java.io.BufferedReader;
  11 +import java.io.DataInputStream;
  12 +import java.io.DataOutputStream;
  13 +import java.io.FileInputStream;
  14 +import java.io.IOException;
  15 +import java.io.InputStreamReader;
  16 +
  17 +/**
  18 + * @author Dr. Bernd Bohnet, 28.10.2010
  19 + *
  20 + *
  21 + */
  22 +final public class Cluster {
  23 +
  24 + public static final String LPATH = "LP";
  25 + public static final String SPATH = "SP";
  26 +
  27 + // [word][p] p = [0:long-path | 1:short-path]
  28 + final private short[][] word2path;
  29 +
  30 + public Cluster() {
  31 + word2path =new short[0][0];
  32 + }
  33 +
  34 + /**
  35 + * @param clusterFile
  36 + * @param mf
  37 + *
  38 + */
  39 + public Cluster(String clusterFile, IEncoderPlus mf, int ls) {
  40 +
  41 + final String REGEX = "\t";
  42 +
  43 + // register words
  44 + try {
  45 + BufferedReader inputReader = new BufferedReader(new InputStreamReader(new FileInputStream(clusterFile),"UTF-8"),32768);
  46 +
  47 + int cnt=0;
  48 + String line;
  49 + while ((line =inputReader.readLine())!=null) {
  50 +
  51 + cnt++;
  52 + try {
  53 + String[] split = line.split(REGEX);
  54 + mf.register(SPATH, split[0].length()<ls?split[0]:split[0].substring(0,ls));
  55 + mf.register(LPATH, split[0]);
  56 + mf.register(PipeGen.WORD, split[1]);
  57 + } catch(Exception e) {
  58 + System.out.println("Error in cluster line "+cnt+" error: "+e.getMessage());
  59 + }
  60 + }
  61 + System.out.println("read number of clusters "+cnt);
  62 + inputReader.close();
  63 +
  64 + } catch (Exception e) {
  65 + e.printStackTrace();
  66 + }
  67 +
  68 + word2path = new short[mf.getFeatureCounter().get(PipeGen.WORD)][2];
  69 +
  70 +
  71 + // insert words
  72 + try {
  73 + String line;
  74 + BufferedReader inputReader = new BufferedReader(new InputStreamReader(new FileInputStream(clusterFile),"UTF-8"),32768);
  75 +
  76 + while ((line =inputReader.readLine())!=null) {
  77 +
  78 + String[] split = line.split(REGEX);
  79 + int wd = mf.getValue(PipeGen.WORD, split[1]);
  80 + word2path[wd][0] = (short)mf.getValue(SPATH, split[0].length()<ls?split[0]:split[0].substring(0,ls));
  81 + word2path[wd][1] = (short)mf.getValue(LPATH, split[0]);
  82 + }
  83 + inputReader.close();
  84 + int fill=0;
  85 + for(int l = 0; l<word2path.length; l++ ){
  86 + if (word2path[l][0]!=0) fill++;
  87 + }
  88 + /*
  89 + for(int l = 0; l<word2path.length; l++ ){
  90 + if (word2path[l][1]!=0) fillL++;
  91 + if (word2path[l][1]<-1) System.out.println("lower "+word2path[l][1]);
  92 + }
  93 + */
  94 + System.out.println("filled "+fill+" of "+word2path.length);
  95 +
  96 + } catch (Exception e) {
  97 + e.printStackTrace();
  98 + }
  99 + }
  100 +
  101 + /**
  102 + * Read the cluster
  103 + * @param dos
  104 + * @throws IOException
  105 + */
  106 + public Cluster(DataInputStream dis) throws IOException {
  107 +
  108 + word2path = new short[dis.readInt()][2];
  109 + for(int i =0;i<word2path.length;i++) {
  110 + word2path[i][0]=dis.readShort();
  111 + word2path[i][1]=dis.readShort();
  112 + }
  113 + DB.println("Read cluster with "+word2path.length+" words ");
  114 + }
  115 +
  116 + /**
  117 + * Write the cluster
  118 + * @param dos
  119 + * @throws IOException
  120 + */
  121 + public void write(DataOutputStream dos) throws IOException {
  122 +
  123 + dos.writeInt(word2path.length);
  124 + for(short[] i : word2path) {
  125 + dos.writeShort(i[0]);
  126 + dos.writeShort(i[1]);
  127 + }
  128 +
  129 + }
  130 +
  131 + /**
  132 + * @param form the id of a word form
  133 + * @return the short path to the word form in the cluster
  134 +
  135 + final public int getSP(int form) {
  136 + if (word2path.length<form) return -1;
  137 + return word2path[form][0];
  138 + }
  139 + */
  140 + /**
  141 + * get the long path to a word form in the cluster
  142 + * @param form the id of a word form
  143 + * @return the long path to the word
  144 + */
  145 + final public int getLP(int form) {
  146 + if (word2path.length<=form || word2path[form].length<=0) return -1;
  147 + return word2path[form][0]==0?-1:word2path[form][0];
  148 + }
  149 +
  150 + final public int getLP(int form, int l) {
  151 + if (word2path.length<form) return -1;
  152 + return word2path[form][l]==0?-1:word2path[form][l];
  153 + }
  154 +
  155 + final public int size() {
  156 + return word2path.length;
  157 + }
  158 +}
... ...
dependencyParser/basic/mate-tools/src/is2/data/D4.java 0 → 100644
  1 +/**
  2 + *
  3 + */
  4 +package is2.data;
  5 +
  6 +import is2.util.DB;
  7 +
  8 +/**
  9 + * @author Dr. Bernd Bohnet, 30.10.2010
  10 + *
  11 + * This class computes the mapping of features to the weight vector.
  12 + */
  13 +final public class D4 extends DX {
  14 + private long shift;
  15 + private long h;
  16 +
  17 +
  18 + private final Long2IntInterface _li;
  19 + public D4(Long2IntInterface li) {
  20 + _li=li;
  21 + }
  22 +
  23 +
  24 + final public void clean() {
  25 + v0=0;v1=0;v2=0;v3=0;v4=0;v5=0;v6=0;v7=0;v8=0;
  26 + shift=0;h=0;
  27 + }
  28 +
  29 + final public void cz3(){
  30 + if (v0<0||v1<0||v2<0) { h=-1;return;}
  31 +
  32 + h= v0+v1*(shift =a0)+(long)v2*(shift *=a1);
  33 + shift *=a2;
  34 + }
  35 +
  36 + final public long c3(){
  37 + if (v0<0||v1<0||v2<0) { h=-1;return h;}
  38 +
  39 + h= v0+v1*(shift =a0)+(long)v2*(shift *=a1);
  40 + shift *=a2;
  41 + return h;
  42 + }
  43 +
  44 + final public void cz4(){
  45 + if (v0<0||v1<0||v2<0||v3<0) {h=-1;return;}
  46 +
  47 + h =v0+v1*(shift =a0)+v2*(shift *=a1)+v3*(shift *=a2);
  48 + shift *=a3;
  49 + }
  50 +
  51 + final public long c4(){
  52 + if (v0<0||v1<0||v2<0||v3<0) {h=-1;return h;}
  53 +
  54 + h =v0+v1*(shift =a0)+v2*(shift *=a1)+v3*(shift *=a2);
  55 + shift *=a3;
  56 + return h;
  57 + }
  58 +
  59 +
  60 + final public void cz5(){
  61 +
  62 + if (v0<0||v1<0||v2<0||v3<0||v4<0) {h=-1;return;}
  63 +
  64 + h =v0+v1*(shift =a0)+v2*(shift *=a1)+v3*(shift*=a2)+v4*(shift*=a3);
  65 + shift*=a4;
  66 +
  67 + }
  68 +
  69 + final public long c5(){
  70 +
  71 + if (v0<0||v1<0||v2<0||v3<0||v4<0) {h=-1;return h;}
  72 +
  73 + h =v0+v1*(shift =a0)+v2*(shift *=a1)+v3*(shift *=a2)+v4*(shift*=a3);
  74 + shift*=a4;
  75 + return h;
  76 + }
  77 +
  78 +
  79 + final public void cz6(){
  80 +
  81 + if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0) {h=-1; return;}
  82 +
  83 + h =v0+v1*(shift =a0)+v2*(shift *=a1)+v3*(shift *=a2);
  84 + h +=v4*(shift*=a3)+v5*(shift*=a4);
  85 + shift*=a5;
  86 + }
  87 +
  88 + final public long c6(){
  89 +
  90 + if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0) {h=-1; return h;}
  91 +
  92 + h =v0+v1*(shift =a0)+v2*(shift *=a1)+v3*(shift *=a2);
  93 + h +=v4*(shift*=a3)+v5*(shift*=a4);
  94 + shift*=a5;
  95 + return h;
  96 + }
  97 +
  98 +
  99 + final public long cs(int b, int v) {
  100 + if (h<0) {h=-1; return h;}
  101 +
  102 + h += v*shift;
  103 + shift *=b;
  104 + return h;
  105 +
  106 + }
  107 +
  108 + final public void csa(int b, int v, IFV f) {
  109 + if (h<0) {h=-1; return;}
  110 +
  111 + h += v*shift;
  112 + shift *=b;
  113 + f.add(_li.l2i(h));
  114 + }
  115 +
  116 + final public long csa(int b, int v) {
  117 + if (h<0) {h=-1; return-1; }
  118 +
  119 + h += v*shift;
  120 + shift *=b;
  121 + return h;
  122 + }
  123 +
  124 + public final long getVal(){
  125 + return h;
  126 + }
  127 +
  128 + public final void map(IFV f, long l){
  129 + if (l>0) f.add(this._li.l2i(l));
  130 + }
  131 +
  132 + /**
  133 + * @param f
  134 + */
  135 + final public void add(IFV f) {
  136 + f.add(_li.l2i(h));
  137 + }
  138 +
  139 + final public void cz7() {
  140 + if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0||v6<0) {h=-1; return;}
  141 +
  142 + h =v0+v1*(shift =a0)+v2*(shift *=a1)+v3*(shift *=a2);
  143 + h +=v4*(shift*=a3)+v5*(shift*=a4)+v6*(shift*=a5);
  144 + shift*=a6;
  145 +
  146 + }
  147 +
  148 + final public long c7() {
  149 + if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0||v6<0) {h=-1; return h;}
  150 +
  151 + h =v0+v1*(shift =a0)+v2*(shift *=a1)+v3*(shift *=a2);
  152 + h +=v4*(shift*=a3)+v5*(shift*=a4)+v6*(shift*=a5);
  153 + shift*=a6;
  154 + return h;
  155 + }
  156 +
  157 + /**
  158 + *
  159 + */
  160 + final public void cz8() {
  161 + if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0||v6<0||v7<0) {h=-1; return;}
  162 +
  163 + h =v0+v1*(shift =a0)+v2*(shift *=a1)+v3*(shift *=a2);
  164 + h +=v4*(shift*=a3)+v5*(shift*=a4)+v6*(shift*=a5)+v7*(shift*=a6);
  165 + shift*=a7;
  166 + }
  167 +
  168 + final public void cz9() {
  169 + if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0||v6<0||v7<0||v8<0) {h=-1; return;}
  170 +
  171 + h =v0+v1*(shift =a0)+v2*(shift *=a1)+v3*(shift *=a2);
  172 + h +=v4*(shift*=a3)+v5*(shift*=a4)+v6*(shift*=a5)+v7*(shift*=a6)+v8*(shift*=a7);
  173 + shift*=a8;
  174 + }
  175 +
  176 +
  177 + /* (non-Javadoc)
  178 + * @see is2.data.DX#computeLabeValue(short, short)
  179 + */
  180 + @Override
  181 + public int computeLabeValue(int label, int shift) {
  182 + return label*shift;
  183 + }
  184 +
  185 +
  186 + public void fix() {
  187 +
  188 + }
  189 +
  190 +
  191 +}
0 192 \ No newline at end of file
... ...
dependencyParser/basic/mate-tools/src/is2/data/D6.java 0 → 100644
  1 +/**
  2 + *
  3 + */
  4 +package is2.data;
  5 +
  6 +import is2.util.DB;
  7 +
  8 +/**
  9 + * @author Dr. Bernd Bohnet, 30.10.2010
  10 + *
  11 + * This class computes the mapping of features to the weight vector.
  12 + */
  13 +final public class D6 extends DX {
  14 + private long shift;
  15 + private long h;
  16 +
  17 +
  18 + private final Long2IntInterface _li;
  19 + public D6(Long2IntInterface li) {
  20 + _li=li;
  21 + }
  22 +
  23 + boolean fixed =false;
  24 +
  25 + public void fix() {
  26 +
  27 + if (fixed) {
  28 + DB.println("warning: already fixed");
  29 + // return;
  30 + }
  31 +
  32 + long t0= 1, t1=a0, t2=t1*a1, t3=t2*a2,t4=t3*a3, t5=t4*a4,t6=t5*a5, t7=t6*a6, t8=t7*a7, t9=t8*a8;
  33 +
  34 +
  35 +
  36 +
  37 + a0=t0;a1=t1;a2=t2;a3=t3;a4=t4;a5=t5;a6=t6;a7=t7;a8=t8; a9=t9;
  38 +
  39 + fixed=true;
  40 + }
  41 +
  42 +
  43 +
  44 + final public void clean() {
  45 + v0=0;v1=0;v2=0;v3=0;v4=0;v5=0;v6=0;v7=0;v8=0;
  46 + shift=0;h=0;
  47 + }
  48 +
  49 + final public void cz3(){
  50 + if (v0<0||v1<0||v2<0) { h=-1;return;}
  51 +
  52 + h= v0+v1*a1+v2*a2;
  53 + shift =a3;
  54 + }
  55 +
  56 + final public long c3(){
  57 + if (v0<0||v1<0||v2<0) { h=-1;return h;}
  58 +
  59 + h= v0+v1*a1+v2*a2;
  60 + shift =a3;
  61 + return h;
  62 + }
  63 +
  64 + final public void cz4(){
  65 + if (v0<0||v1<0||v2<0||v3<0) {h=-1;return;}
  66 +
  67 + h =v0+v1*a1+v2*a2+v3*a3;
  68 + shift =a4;
  69 + }
  70 +
  71 + final public long c4(){
  72 + if (v0<0||v1<0||v2<0||v3<0) {h=-1;return h;}
  73 +
  74 + h =v0+v1*a1+v2*a2+v3*a3;
  75 + shift =a4;
  76 + return h;
  77 + }
  78 +
  79 +
  80 + final public void cz5(){
  81 +
  82 + if (v0<0||v1<0||v2<0||v3<0||v4<0) {h=-1;return;}
  83 +
  84 + h =v0+v1*a1+v2*a2+v3*a3+v4*a4;
  85 + shift=a5;
  86 +
  87 + }
  88 +
  89 + final public long c5(){
  90 +
  91 + if (v0<0||v1<0||v2<0||v3<0||v4<0) {h=-1;return h;}
  92 +
  93 + h =v0+v1*a1+v2*a2+v3*a3+v4*a4;
  94 + shift=a5;
  95 + return h;
  96 + }
  97 +
  98 +
  99 + final public void cz6(){
  100 +
  101 + if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0) {h=-1; return;}
  102 +
  103 + h =v0+v1*a1+v2*a2+v3*a3 +v4*a4+v5*a5;
  104 + shift=a6;
  105 + }
  106 +
  107 + final public long c6(){
  108 +
  109 + if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0) {h=-1; return h;}
  110 +
  111 + h =v0+v1*a1+v2*a2+v3*a3 +v4*a4+v5*a5;
  112 + shift=a6;
  113 + return h;
  114 + }
  115 +
  116 +
  117 + final public long cs(int b, int v) {
  118 + if (h<0) {h=-1; return h;}
  119 +
  120 + h += v*shift;
  121 + shift *=b;
  122 + return h;
  123 +
  124 + }
  125 +
  126 + final public void csa(int b, int v, IFV f) {
  127 + if (h<0) {h=-1; return;}
  128 +
  129 + h += v*shift;
  130 + shift *=b;
  131 + f.add(_li.l2i(h));
  132 + }
  133 +
  134 + final public long csa(int b, int v) {
  135 + if (h<0) {h=-1; return-1; }
  136 +
  137 + h += v*shift;
  138 + shift *=b;
  139 + return h;
  140 + }
  141 +
  142 + public final long getVal(){
  143 + return h;
  144 + }
  145 +
  146 + public final void map(IFV f, long l){
  147 + if (l>0) f.add(this._li.l2i(l));
  148 + }
  149 +
  150 + /**
  151 + * @param f
  152 + */
  153 + final public void add(IFV f) {
  154 + f.add(_li.l2i(h));
  155 + }
  156 +
  157 + final public void cz7() {
  158 + if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0||v6<0) {h=-1; return;}
  159 +
  160 + h =v0+v1*a1+v2*a2+v3*a3 +v4*a4+v5*a5+v6*a6;
  161 + shift=a7;
  162 +
  163 + }
  164 +
  165 + final public long c7() {
  166 + if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0||v6<0) {h=-1; return h;}
  167 +
  168 + h =v0+v1*a1+v2*a2+v3*a3 +v4*a4+v5*a5+v6*a6;
  169 + shift=a7;
  170 + return h;
  171 + }
  172 +
  173 + /**
  174 + *
  175 + */
  176 + final public void cz8() {
  177 + if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0||v6<0||v7<0) {h=-1; return;}
  178 +
  179 + h =v0+v1*a1+v2*a2+v3*a3 +v4*a4+v5*a5+v6*a6+v7*a7;
  180 + shift=a8;
  181 + }
  182 +
  183 +
  184 +
  185 + /* (non-Javadoc)
  186 + * @see is2.data.DX#computeLabeValue(short, short)
  187 + */
  188 + @Override
  189 + public int computeLabeValue(int label, int shift) {
  190 + return label*shift;
  191 + }
  192 +
  193 +
  194 +
  195 +
  196 +
  197 +}
0 198 \ No newline at end of file
... ...
dependencyParser/basic/mate-tools/src/is2/data/D7.java 0 → 100644
  1 +/**
  2 + *
  3 + */
  4 +package is2.data;
  5 +
  6 +
  7 +/**
  8 + * @author Dr. Bernd Bohnet, 30.10.2010
  9 + *
  10 + * This class computes the mapping of features to the weight vector.
  11 + */
  12 +final public class D7 extends DX {
  13 +
  14 + private long shift;
  15 + private long h;
  16 + private final Long2IntInterface _li;
  17 +
  18 + public D7(Long2IntInterface li) {
  19 + _li=li;
  20 + }
  21 +
  22 + boolean fixed =false;
  23 +
  24 + public void fix() {
  25 +
  26 + long t0= 1, t1=a0, t2=t1*a1, t3=t2*a2,t4=t3*a3, t5=t4*a4,t6=t5*a5, t7=t6*a6, t8=t7*a7, t9=t8*a8;
  27 +
  28 + a0=t0;a1=t1;a2=t2;a3=t3;a4=t4;a5=t5;a6=t6;a7=t7;a8=t8; a9=t9;
  29 +
  30 + }
  31 +
  32 +
  33 +
  34 + final public void clean() {
  35 + v0=0;v1=0;v2=0;v3=0;v4=0;v5=0;v6=0;v7=0;v8=0;
  36 + shift=0;h=0;
  37 + }
  38 +
  39 + final public void cz3(){
  40 + if (v2<0) { h=-1;return;}
  41 +
  42 + h= v0+v1*a1+v2*a2;
  43 + shift =a3;
  44 + }
  45 +
  46 + final public long c3(){
  47 + if (v2<0) { h=-1;return h;}
  48 +
  49 + h= v0+v1*a1+v2*a2;
  50 + shift =a3;
  51 + return h;
  52 + }
  53 +
  54 + final public long d3(){
  55 + if (v2<0)return -1;
  56 + return v0+v2*a2;
  57 + }
  58 +
  59 + final public void cz4(){
  60 + // if (v0<0||v1<0||v2<0||v3<0) {h=-1;return;}
  61 + if (v2<0||v3<0) {h=-1;return;}
  62 +
  63 + h =v0+v1*a1+v2*a2+v3*a3;
  64 + shift =a4;
  65 + }
  66 +
  67 + final public long c4(){
  68 + if (v2<0||v3<0) {h=-1;return h;}
  69 +
  70 + h =v0+v1*a1+v2*a2+v3*a3;
  71 + shift =a4;
  72 + return h;
  73 + }
  74 +
  75 +
  76 + final public long d4(){
  77 + if (v2<0||v3<0) return -1;
  78 + return v0+v2*a2+v3*a3;
  79 + }
  80 +
  81 +
  82 + final public void cz5(){
  83 +
  84 + if (v2<0||v3<0||v4<0) {h=-1;return;}
  85 +
  86 + h =v0+v1*a1+v2*a2+v3*a3+v4*a4;
  87 + shift=a5;
  88 +
  89 + }
  90 +
  91 + final public long c5(){
  92 +
  93 + if (v2<0||v3<0||v4<0) {h=-1;return h;}
  94 +
  95 + h =v0+v1*a1+v2*a2+v3*a3+v4*a4;
  96 + shift=a5;
  97 + return h;
  98 + }
  99 +
  100 + final public long d5(){
  101 + if (v2<0||v3<0||v4<0) return -1;
  102 + return v0+v2*a2+v3*a3+v4*a4;
  103 + }
  104 +
  105 +
  106 + final public void cz6(){
  107 +
  108 + if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0) {h=-1; return;}
  109 +
  110 + h =v0+v1*a1+v2*a2+v3*a3 +v4*a4+v5*a5;
  111 + shift=a6;
  112 + }
  113 +
  114 + final public long c6(){
  115 +
  116 + if (v2<0||v3<0||v4<0||v5<0) {h=-1; return h;}
  117 +
  118 + h =v0+v1*a1+v2*a2+v3*a3 +v4*a4+v5*a5;
  119 + shift=a6;
  120 + return h;
  121 + }
  122 +
  123 + final public long d6(){
  124 + if (v2<0||v3<0||v4<0||v5<0) return -1;
  125 + return v0+v2*a2+v3*a3 +v4*a4+v5*a5;
  126 + }
  127 +
  128 +
  129 + final public long cs(int b, int v) {
  130 + if (h<0) {h=-1; return h;}
  131 +
  132 + h += v*shift;
  133 + shift *=b;
  134 + return h;
  135 +
  136 + }
  137 +
  138 + final public void csa(int b, int v, IFV f) {
  139 + if (h<0) {h=-1; return;}
  140 +
  141 + h += v*shift;
  142 + shift *=b;
  143 + f.add(_li.l2i(h));
  144 + }
  145 +
  146 + final public long csa(int b, int v) {
  147 + if (h<0) {h=-1; return-1; }
  148 +
  149 + h += v*shift;
  150 + shift *=b;
  151 + return h;
  152 + }
  153 +
  154 + public final long getVal(){
  155 + return h;
  156 + }
  157 +
  158 + public final void map(IFV f, long l){
  159 + if (l>0) f.add(this._li.l2i(l));
  160 + }
  161 +
  162 + /**
  163 + * @param f
  164 + */
  165 + final public void add(IFV f) {
  166 + f.add(_li.l2i(h));
  167 + }
  168 +
  169 + final public void cz7() {
  170 + if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0||v6<0) {h=-1; return;}
  171 +
  172 + h =v0+v1*a1+v2*a2+v3*a3 +v4*a4+v5*a5+v6*a6;
  173 + shift=a7;
  174 +
  175 + }
  176 +
  177 +
  178 + final public long c7() {
  179 + if (v2<0||v3<0||v4<0||v5<0||v6<0) {h=-1; return h;}
  180 +
  181 + h =v0+v1*a1+v2*a2+v3*a3 +v4*a4+v5*a5+v6*a6;
  182 + shift=a7;
  183 + return h;
  184 + }
  185 +
  186 + final public long d7() {
  187 + if (v2<0||v3<0||v4<0||v5<0||v6<0) return -1;
  188 + return v0+v2*a2+v3*a3 +v4*a4+v5*a5+v6*a6;
  189 + }
  190 +
  191 + /**
  192 + *
  193 + */
  194 + final public void cz8() {
  195 + if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0||v6<0||v7<0) {h=-1; return;}
  196 +
  197 + h =v0+v1*a1+v2*a2+v3*a3 +v4*a4+v5*a5+v6*a6+v7*a7;
  198 + shift=a8;
  199 + }
  200 +
  201 + final public long d8() {
  202 + if (v2<0||v3<0||v4<0||v5<0||v6<0||v7<0) {return-1;}
  203 + return v0+v2*a2+v3*a3 +v4*a4+v5*a5+v6*a6+v7*a7;
  204 + }
  205 +
  206 +
  207 +
  208 + /* (non-Javadoc)
  209 + * @see is2.data.DX#computeLabeValue(short, short)
  210 + */
  211 + @Override
  212 + public int computeLabeValue(int label, int shift) {
  213 + return label*shift;
  214 + }
  215 +
  216 +
  217 +
  218 +
  219 +
  220 +}
0 221 \ No newline at end of file
... ...
dependencyParser/basic/mate-tools/src/is2/data/DPSTree.java 0 → 100644
  1 +/**
  2 + *
  3 + */
  4 +package is2.data;
  5 +
  6 +import is2.util.DB;
  7 +
  8 +import java.util.ArrayList;
  9 +import java.util.Collections;
  10 +import java.util.Stack;
  11 +
  12 +/**
  13 + * @author Dr. Bernd Bohnet, 17.01.2011
  14 + *
  15 + * Dynamic phrase structure tree.
  16 + */
  17 +public class DPSTree {
  18 +
  19 +
  20 + private int size=0;
  21 +
  22 + public int[] heads;
  23 + public int[] labels;
  24 +
  25 + public DPSTree() {
  26 + this(30);
  27 + }
  28 +
  29 + public DPSTree(int initialCapacity) {
  30 + heads = new int[initialCapacity];
  31 + labels = new int[initialCapacity];
  32 + }
  33 +
  34 +
  35 + /**
  36 + * Increases the capacity of this <tt>Graph</tt> instance, if
  37 + * necessary, to ensure that it can hold at least the number of nodes
  38 + * specified by the minimum capacity argument.
  39 + *
  40 + * @param minCapacity the desired minimum capacity.
  41 + */
  42 + private void ensureCapacity(int minCapacity) {
  43 +
  44 +
  45 + if (minCapacity > heads.length) {
  46 +
  47 + int newCapacity =minCapacity + 1;
  48 +
  49 + if (newCapacity < minCapacity) newCapacity = minCapacity;
  50 + int oldIndex[] = heads;
  51 + heads = new int[newCapacity];
  52 + System.arraycopy(oldIndex, 0, heads, 0, oldIndex.length);
  53 +
  54 + oldIndex = labels;
  55 + labels = new int[newCapacity];
  56 + System.arraycopy(oldIndex, 0, labels, 0, oldIndex.length);
  57 +
  58 + }
  59 + }
  60 +
  61 +
  62 + final public int size() {
  63 + return size;
  64 + }
  65 +
  66 +
  67 + final public boolean isEmpty() {
  68 + return size == 0;
  69 + }
  70 +
  71 + final public void clear() {
  72 + size = 0;
  73 + }
  74 +
  75 + final public void createTerminals(int terminals) {
  76 + ensureCapacity(terminals+1);
  77 + size= terminals+1;
  78 + }
  79 +
  80 + final public int create(int phrase) {
  81 +
  82 + ensureCapacity(size+1);
  83 + labels[size] =phrase;
  84 + size++;
  85 + return size-1;
  86 + }
  87 +
  88 + public int create(int phrase, int nodeId) {
  89 +
  90 + if (nodeId<0) return this.create(phrase);
  91 +// DB.println("create phrase "+nodeId+" label "+phrase);
  92 + ensureCapacity(nodeId+1);
  93 + labels[nodeId] =phrase;
  94 + if (size<nodeId) size=nodeId+1;
  95 + return nodeId;
  96 + }
  97 +
  98 + public void createEdge(int i, int j) {
  99 + heads[i] =j;
  100 +// DB.println("create edge "+i+"\t "+j);
  101 + }
  102 +
  103 + public DPSTree clone() {
  104 + DPSTree ps = new DPSTree(this.size+1);
  105 +
  106 + for(int k=0;k<size;k++) {
  107 + ps.heads[k] = heads[k];
  108 + ps.labels[k] = labels[k];
  109 + }
  110 + ps.size=size;
  111 + return ps;
  112 +
  113 + }
  114 +
  115 +}
0 116 \ No newline at end of file
... ...
dependencyParser/basic/mate-tools/src/is2/data/DX.java 0 → 100644
  1 +/**
  2 + *
  3 + */
  4 +package is2.data;
  5 +
  6 +import is2.data.IFV;
  7 +
  8 +/**
  9 + * @author Dr. Bernd Bohnet, 30.08.2011
  10 + *
  11 + *
  12 + */
  13 +public abstract class DX {
  14 +
  15 + public long a0,a1,a2,a3,a4,a5,a6,a7,a8,a9;
  16 + public long v0,v1,v2,v3,v4,v5,v6,v7,v8,v9;
  17 +
  18 + public abstract void cz3();
  19 +
  20 + public abstract void cz4();
  21 +
  22 + public abstract void cz5();
  23 +
  24 + public abstract void cz6();
  25 +
  26 + public abstract void cz7();
  27 +
  28 + public abstract void cz8();
  29 +
  30 + public abstract void clean();
  31 +
  32 + public abstract long cs(int b, int v);
  33 +
  34 + public abstract long csa(int b, int v);
  35 +
  36 + public abstract void csa(int b, int v, IFV f);
  37 +
  38 + /**
  39 + * @return
  40 + */
  41 + public abstract long getVal();
  42 +
  43 + /**
  44 + * @param f
  45 + * @param l
  46 + */
  47 + public abstract void map(IFV f, long l);
  48 +
  49 + /**
  50 + * @param label
  51 + * @param s_type
  52 + * @return
  53 + */
  54 + public abstract int computeLabeValue(int label,int s_type) ;
  55 +
  56 + public abstract void fix();
  57 +
  58 +}
0 59 \ No newline at end of file
... ...
dependencyParser/basic/mate-tools/src/is2/data/DataF.java 0 → 100755
  1 +package is2.data;
  2 +
  3 +
  4 +
  5 +final public class DataF {
  6 +
  7 + final public short typesLen;
  8 + final public int len;
  9 +
  10 + // first order features
  11 + final public float[][] pl;
  12 +
  13 + // remove !!!!
  14 +// final public float[][] highestLab;
  15 +
  16 + //final public FV[][][] label;
  17 + final public float[][][][] lab;
  18 +
  19 +
  20 + public FV fv;
  21 +
  22 + final public float[][][][][] sib;
  23 +
  24 + final public float[][][][][] gra;
  25 +
  26 +
  27 + public DataF(int length, short types) {
  28 + typesLen=types;
  29 + len =length;
  30 +
  31 + pl = new float[length][length];
  32 + lab = new float[length][length][types][2];
  33 + // highestLab = new float[length][length];
  34 +
  35 + sib = new float[length][length][length][2][];
  36 + gra = new float[length][length][length][2][];
  37 +
  38 + }
  39 +}
... ...
dependencyParser/basic/mate-tools/src/is2/data/DataFES.java 0 → 100644
  1 +package is2.data;
  2 +
  3 +
  4 +
  5 +final public class DataFES {
  6 +
  7 + final public short typesLen;
  8 + final public int len;
  9 +
  10 + // first order features
  11 + final public float[][] pl;
  12 +
  13 + // remove !!!!
  14 +// final public float[][] highestLab;
  15 +
  16 + //final public FV[][][] label;
  17 + final public float[][][] lab;
  18 +
  19 +
  20 + public FV fv;
  21 +
  22 + final public float[][][][] sib;
  23 +
  24 + final public float[][][][] gra;
  25 +
  26 +
  27 + public DataFES(int length, short types) {
  28 + typesLen=types;
  29 + len =length;
  30 +
  31 + pl = new float[length][length];
  32 + lab = new float[length][length][types];
  33 +
  34 + sib = new float[length][length][length][];
  35 + gra = new float[length][length][length][];
  36 +
  37 + }
  38 +}
... ...
dependencyParser/basic/mate-tools/src/is2/data/DataT.java 0 → 100644
  1 +package is2.data;
  2 +
  3 +
  4 +
  5 +final public class DataT {
  6 +
  7 + final public short typesLen;
  8 + final public int len;
  9 +
  10 +
  11 + //final public FV[][][] label;
  12 + // a b lab op
  13 + final public float[][][][] lab;
  14 +
  15 +
  16 +
  17 + public DataT(int length, short types) {
  18 + typesLen=types;
  19 + len =length;
  20 +
  21 + lab = new float[length][length][types][4];
  22 +
  23 +
  24 + }
  25 +}
... ...