From b14f366d3736553d40e94d29cd005e7c919498d1 Mon Sep 17 00:00:00 2001
From: Jan Lupa <jl320820@students.mimuw.edu.pl>
Date: Mon, 8 Aug 2016 16:05:58 +0200
Subject: [PATCH] Added mate parser's original code
---
dependencyParser/mate-tools/.classpath | 8 ++++++++
dependencyParser/mate-tools/.externalToolBuilders/New_Builder.launch | 8 ++++++++
dependencyParser/mate-tools/.externalToolBuilders/ana.launch | 20 ++++++++++++++++++++
dependencyParser/mate-tools/.project | 17 +++++++++++++++++
dependencyParser/mate-tools/build.xml | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/lib/commons-math-2.2.jar | Bin 0 -> 988514 bytes
dependencyParser/mate-tools/lib/trove-2.0.4.jar | Bin 0 -> 746790 bytes
dependencyParser/mate-tools/src/decoder/ParallelDecoder.java | 155 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/decoder/ParallelRearrangeNBest.java | 136 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/decoder/ParallelRearrangeNBest2.java | 151 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/examples/DependencyParser.java | 92 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/examples/FullPipelineSpanish.java | 98 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/examples/FullPipelineTest.java | 110 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/examples/MorphTagger.java | 79 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/examples/ParseOnly.java | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/examples/Pipeline.java | 95 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/extractors/Extractor.java | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/extractors/ExtractorClusterStacked.java | 958 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/extractors/ExtractorClusterStackedR2.java | 937 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/extractors/ExtractorFactory.java | 44 ++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/extractors/ExtractorReranker.java | 621 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/extractors/ParallelExtract.java | 194 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/data/Closed.java | 31 +++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/data/Cluster.java | 158 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/data/D4.java | 191 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/data/D6.java | 197 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/data/D7.java | 220 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/data/DPSTree.java | 115 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/data/DX.java | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/data/DataF.java | 39 +++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/data/DataFES.java | 38 ++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/data/DataT.java | 25 +++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/data/Edges.java | 224 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/data/F2S.java | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/data/F2SD.java | 44 ++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/data/F2SF.java | 78 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/data/F2SP.java | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/data/F2ST.java | 47 +++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/data/FV.java | 551 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/data/FVR.java | 468 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/data/IEncoder.java | 26 ++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/data/IEncoderPlus.java | 28 ++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/data/IFV.java | 28 ++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/data/Instances.java | 392 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/data/InstancesTagger.java | 77 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/data/IntIntHash.java | 270 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/data/Long2Int.java | 113 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/data/Long2IntExact.java | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/data/Long2IntInterface.java | 15 +++++++++++++++
dependencyParser/mate-tools/src/is2/data/Long2IntQuick.java | 47 +++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/data/MFB.java | 256 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/data/MFC.java | 246 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/data/MFO.java | 386 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/data/Open.java | 37 +++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/data/PSTree.java | 711 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/data/Parameter.java | 13 +++++++++++++
dependencyParser/mate-tools/src/is2/data/ParametersFloat.java | 183 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/data/Parse.java | 157 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/data/ParseNBest.java | 107 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/data/PipeGen.java | 83 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/data/PrimeFinder.java | 66 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/data/RandomIndex.java | 161 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/data/SentenceData09.java | 530 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/data/Thesaurus.java | 194 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/io/CONLLReader04.java | 272 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/io/CONLLReader06.java | 275 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/io/CONLLReader08.java | 413 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/io/CONLLReader09.java | 411 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/io/CONLLWriter06.java | 193 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/io/CONLLWriter09.java | 307 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/io/IOGenerals.java | 33 +++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/io/PSReader.java | 23 +++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/io/TigerReader.java | 403 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/lemmatizer/Evaluator.java | 105 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/lemmatizer/Lemmatizer.java | 535 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/lemmatizer/MFO.java | 257 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/lemmatizer/Options.java | 67 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/lemmatizer/Pipe.java | 585 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/lemmatizer/StringEdit.java | 318 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/mtag/Convert.java | 98 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/mtag/Evaluator.java | 148 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/mtag/ExtractorM.java | 514 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/mtag/MFO.java | 540 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/mtag/Options.java | 45 +++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/mtag/Pipe.java | 508 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/mtag/Tagger.java | 371 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/parser/Closed.java | 32 ++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/parser/D5.java | 254 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/parser/Decoder.java | 161 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/parser/Edges.java | 206 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/parser/Evaluator.java | 94 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/parser/Extractor.java | 973 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/parser/MFO.java | 257 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/parser/Open.java | 38 ++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/parser/Options.java | 63 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/parser/ParallelDecoder.java | 170 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/parser/ParallelExtract.java | 246 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/parser/ParallelRearrange.java | 127 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/parser/Parameters.java | 38 ++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/parser/ParametersFloat.java | 137 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/parser/Parser.java | 664 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/parser/Pipe.java | 221 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/parser/package.html | 11 +++++++++++
dependencyParser/mate-tools/src/is2/parserR2/Decoder.java | 377 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/parserR2/Options.java | 77 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/parserR2/Parameters.java | 38 ++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/parserR2/ParametersFloat.java | 181 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/parserR2/Parser.java | 690 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/parserR2/Pipe.java | 257 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/parserR2/PipeReranker.java | 138 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/parserR2/Reranker.java | 1059 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/parserR2/package.html | 3 +++
dependencyParser/mate-tools/src/is2/tag/ExtractorT2.java | 523 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/tag/Lexicon.java | 140 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/tag/MFO.java | 537 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/tag/Options.java | 125 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/tag/POS.java | 29 +++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/tag/Tagger.java | 500 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/tag/package.html | 4 ++++
dependencyParser/mate-tools/src/is2/tools/IPipe.java | 30 ++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/tools/Retrainable.java | 25 +++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/tools/Tool.java | 25 +++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/tools/ToolIO.java | 17 +++++++++++++++++
dependencyParser/mate-tools/src/is2/tools/Train.java | 25 +++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/util/Convert.java | 455 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/util/Convert0409.java | 182 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/util/ConvertADJ.java | 129 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/util/ConvertLowerCase0909.java | 89 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/util/ConvertTiger2CoNLL.java | 124 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/util/DB.java | 81 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/util/Edges.java | 206 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/util/Evaluator.java | 616 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/util/EvaluatorTagger.java | 736 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/util/ExtractParagraphs.java | 87 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/util/IntStack.java | 86 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/util/Long2Int.java | 81 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/util/Options.java | 129 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/util/OptionsSuper.java | 216 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/util/ParserEvaluator.java | 94 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/util/Split.java | 94 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/util/Split2.java | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
dependencyParser/mate-tools/src/is2/util/Split3.java | 67 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
142 files changed, 29145 insertions(+), 0 deletions(-)
create mode 100644 dependencyParser/mate-tools/.classpath
create mode 100644 dependencyParser/mate-tools/.externalToolBuilders/New_Builder.launch
create mode 100644 dependencyParser/mate-tools/.externalToolBuilders/ana.launch
create mode 100644 dependencyParser/mate-tools/.project
create mode 100644 dependencyParser/mate-tools/build.xml
create mode 100644 dependencyParser/mate-tools/lib/commons-math-2.2.jar
create mode 100644 dependencyParser/mate-tools/lib/trove-2.0.4.jar
create mode 100755 dependencyParser/mate-tools/src/decoder/ParallelDecoder.java
create mode 100755 dependencyParser/mate-tools/src/decoder/ParallelRearrangeNBest.java
create mode 100644 dependencyParser/mate-tools/src/decoder/ParallelRearrangeNBest2.java
create mode 100644 dependencyParser/mate-tools/src/examples/DependencyParser.java
create mode 100644 dependencyParser/mate-tools/src/examples/FullPipelineSpanish.java
create mode 100644 dependencyParser/mate-tools/src/examples/FullPipelineTest.java
create mode 100644 dependencyParser/mate-tools/src/examples/MorphTagger.java
create mode 100755 dependencyParser/mate-tools/src/examples/ParseOnly.java
create mode 100644 dependencyParser/mate-tools/src/examples/Pipeline.java
create mode 100644 dependencyParser/mate-tools/src/extractors/Extractor.java
create mode 100755 dependencyParser/mate-tools/src/extractors/ExtractorClusterStacked.java
create mode 100644 dependencyParser/mate-tools/src/extractors/ExtractorClusterStackedR2.java
create mode 100644 dependencyParser/mate-tools/src/extractors/ExtractorFactory.java
create mode 100644 dependencyParser/mate-tools/src/extractors/ExtractorReranker.java
create mode 100755 dependencyParser/mate-tools/src/extractors/ParallelExtract.java
create mode 100755 dependencyParser/mate-tools/src/is2/data/Closed.java
create mode 100644 dependencyParser/mate-tools/src/is2/data/Cluster.java
create mode 100644 dependencyParser/mate-tools/src/is2/data/D4.java
create mode 100644 dependencyParser/mate-tools/src/is2/data/D6.java
create mode 100644 dependencyParser/mate-tools/src/is2/data/D7.java
create mode 100644 dependencyParser/mate-tools/src/is2/data/DPSTree.java
create mode 100644 dependencyParser/mate-tools/src/is2/data/DX.java
create mode 100755 dependencyParser/mate-tools/src/is2/data/DataF.java
create mode 100644 dependencyParser/mate-tools/src/is2/data/DataFES.java
create mode 100644 dependencyParser/mate-tools/src/is2/data/DataT.java
create mode 100644 dependencyParser/mate-tools/src/is2/data/Edges.java
create mode 100755 dependencyParser/mate-tools/src/is2/data/F2S.java
create mode 100755 dependencyParser/mate-tools/src/is2/data/F2SD.java
create mode 100755 dependencyParser/mate-tools/src/is2/data/F2SF.java
create mode 100644 dependencyParser/mate-tools/src/is2/data/F2SP.java
create mode 100644 dependencyParser/mate-tools/src/is2/data/F2ST.java
create mode 100755 dependencyParser/mate-tools/src/is2/data/FV.java
create mode 100644 dependencyParser/mate-tools/src/is2/data/FVR.java
create mode 100755 dependencyParser/mate-tools/src/is2/data/IEncoder.java
create mode 100644 dependencyParser/mate-tools/src/is2/data/IEncoderPlus.java
create mode 100755 dependencyParser/mate-tools/src/is2/data/IFV.java
create mode 100755 dependencyParser/mate-tools/src/is2/data/Instances.java
create mode 100644 dependencyParser/mate-tools/src/is2/data/InstancesTagger.java
create mode 100644 dependencyParser/mate-tools/src/is2/data/IntIntHash.java
create mode 100755 dependencyParser/mate-tools/src/is2/data/Long2Int.java
create mode 100644 dependencyParser/mate-tools/src/is2/data/Long2IntExact.java
create mode 100755 dependencyParser/mate-tools/src/is2/data/Long2IntInterface.java
create mode 100644 dependencyParser/mate-tools/src/is2/data/Long2IntQuick.java
create mode 100755 dependencyParser/mate-tools/src/is2/data/MFB.java
create mode 100644 dependencyParser/mate-tools/src/is2/data/MFC.java
create mode 100755 dependencyParser/mate-tools/src/is2/data/MFO.java
create mode 100755 dependencyParser/mate-tools/src/is2/data/Open.java
create mode 100644 dependencyParser/mate-tools/src/is2/data/PSTree.java
create mode 100644 dependencyParser/mate-tools/src/is2/data/Parameter.java
create mode 100755 dependencyParser/mate-tools/src/is2/data/ParametersFloat.java
create mode 100755 dependencyParser/mate-tools/src/is2/data/Parse.java
create mode 100644 dependencyParser/mate-tools/src/is2/data/ParseNBest.java
create mode 100755 dependencyParser/mate-tools/src/is2/data/PipeGen.java
create mode 100644 dependencyParser/mate-tools/src/is2/data/PrimeFinder.java
create mode 100644 dependencyParser/mate-tools/src/is2/data/RandomIndex.java
create mode 100755 dependencyParser/mate-tools/src/is2/data/SentenceData09.java
create mode 100644 dependencyParser/mate-tools/src/is2/data/Thesaurus.java
create mode 100644 dependencyParser/mate-tools/src/is2/io/CONLLReader04.java
create mode 100755 dependencyParser/mate-tools/src/is2/io/CONLLReader06.java
create mode 100644 dependencyParser/mate-tools/src/is2/io/CONLLReader08.java
create mode 100755 dependencyParser/mate-tools/src/is2/io/CONLLReader09.java
create mode 100755 dependencyParser/mate-tools/src/is2/io/CONLLWriter06.java
create mode 100755 dependencyParser/mate-tools/src/is2/io/CONLLWriter09.java
create mode 100644 dependencyParser/mate-tools/src/is2/io/IOGenerals.java
create mode 100644 dependencyParser/mate-tools/src/is2/io/PSReader.java
create mode 100644 dependencyParser/mate-tools/src/is2/io/TigerReader.java
create mode 100755 dependencyParser/mate-tools/src/is2/lemmatizer/Evaluator.java
create mode 100755 dependencyParser/mate-tools/src/is2/lemmatizer/Lemmatizer.java
create mode 100755 dependencyParser/mate-tools/src/is2/lemmatizer/MFO.java
create mode 100755 dependencyParser/mate-tools/src/is2/lemmatizer/Options.java
create mode 100755 dependencyParser/mate-tools/src/is2/lemmatizer/Pipe.java
create mode 100755 dependencyParser/mate-tools/src/is2/lemmatizer/StringEdit.java
create mode 100755 dependencyParser/mate-tools/src/is2/mtag/Convert.java
create mode 100755 dependencyParser/mate-tools/src/is2/mtag/Evaluator.java
create mode 100644 dependencyParser/mate-tools/src/is2/mtag/ExtractorM.java
create mode 100755 dependencyParser/mate-tools/src/is2/mtag/MFO.java
create mode 100755 dependencyParser/mate-tools/src/is2/mtag/Options.java
create mode 100755 dependencyParser/mate-tools/src/is2/mtag/Pipe.java
create mode 100644 dependencyParser/mate-tools/src/is2/mtag/Tagger.java
create mode 100755 dependencyParser/mate-tools/src/is2/parser/Closed.java
create mode 100644 dependencyParser/mate-tools/src/is2/parser/D5.java
create mode 100755 dependencyParser/mate-tools/src/is2/parser/Decoder.java
create mode 100644 dependencyParser/mate-tools/src/is2/parser/Edges.java
create mode 100755 dependencyParser/mate-tools/src/is2/parser/Evaluator.java
create mode 100755 dependencyParser/mate-tools/src/is2/parser/Extractor.java
create mode 100755 dependencyParser/mate-tools/src/is2/parser/MFO.java
create mode 100755 dependencyParser/mate-tools/src/is2/parser/Open.java
create mode 100755 dependencyParser/mate-tools/src/is2/parser/Options.java
create mode 100755 dependencyParser/mate-tools/src/is2/parser/ParallelDecoder.java
create mode 100755 dependencyParser/mate-tools/src/is2/parser/ParallelExtract.java
create mode 100755 dependencyParser/mate-tools/src/is2/parser/ParallelRearrange.java
create mode 100755 dependencyParser/mate-tools/src/is2/parser/Parameters.java
create mode 100755 dependencyParser/mate-tools/src/is2/parser/ParametersFloat.java
create mode 100755 dependencyParser/mate-tools/src/is2/parser/Parser.java
create mode 100755 dependencyParser/mate-tools/src/is2/parser/Pipe.java
create mode 100755 dependencyParser/mate-tools/src/is2/parser/package.html
create mode 100755 dependencyParser/mate-tools/src/is2/parserR2/Decoder.java
create mode 100755 dependencyParser/mate-tools/src/is2/parserR2/Options.java
create mode 100755 dependencyParser/mate-tools/src/is2/parserR2/Parameters.java
create mode 100755 dependencyParser/mate-tools/src/is2/parserR2/ParametersFloat.java
create mode 100755 dependencyParser/mate-tools/src/is2/parserR2/Parser.java
create mode 100755 dependencyParser/mate-tools/src/is2/parserR2/Pipe.java
create mode 100644 dependencyParser/mate-tools/src/is2/parserR2/PipeReranker.java
create mode 100644 dependencyParser/mate-tools/src/is2/parserR2/Reranker.java
create mode 100755 dependencyParser/mate-tools/src/is2/parserR2/package.html
create mode 100644 dependencyParser/mate-tools/src/is2/tag/ExtractorT2.java
create mode 100644 dependencyParser/mate-tools/src/is2/tag/Lexicon.java
create mode 100644 dependencyParser/mate-tools/src/is2/tag/MFO.java
create mode 100644 dependencyParser/mate-tools/src/is2/tag/Options.java
create mode 100644 dependencyParser/mate-tools/src/is2/tag/POS.java
create mode 100644 dependencyParser/mate-tools/src/is2/tag/Tagger.java
create mode 100644 dependencyParser/mate-tools/src/is2/tag/package.html
create mode 100644 dependencyParser/mate-tools/src/is2/tools/IPipe.java
create mode 100644 dependencyParser/mate-tools/src/is2/tools/Retrainable.java
create mode 100644 dependencyParser/mate-tools/src/is2/tools/Tool.java
create mode 100644 dependencyParser/mate-tools/src/is2/tools/ToolIO.java
create mode 100644 dependencyParser/mate-tools/src/is2/tools/Train.java
create mode 100644 dependencyParser/mate-tools/src/is2/util/Convert.java
create mode 100644 dependencyParser/mate-tools/src/is2/util/Convert0409.java
create mode 100644 dependencyParser/mate-tools/src/is2/util/ConvertADJ.java
create mode 100644 dependencyParser/mate-tools/src/is2/util/ConvertLowerCase0909.java
create mode 100644 dependencyParser/mate-tools/src/is2/util/ConvertTiger2CoNLL.java
create mode 100755 dependencyParser/mate-tools/src/is2/util/DB.java
create mode 100644 dependencyParser/mate-tools/src/is2/util/Edges.java
create mode 100644 dependencyParser/mate-tools/src/is2/util/Evaluator.java
create mode 100644 dependencyParser/mate-tools/src/is2/util/EvaluatorTagger.java
create mode 100644 dependencyParser/mate-tools/src/is2/util/ExtractParagraphs.java
create mode 100644 dependencyParser/mate-tools/src/is2/util/IntStack.java
create mode 100644 dependencyParser/mate-tools/src/is2/util/Long2Int.java
create mode 100644 dependencyParser/mate-tools/src/is2/util/Options.java
create mode 100755 dependencyParser/mate-tools/src/is2/util/OptionsSuper.java
create mode 100644 dependencyParser/mate-tools/src/is2/util/ParserEvaluator.java
create mode 100755 dependencyParser/mate-tools/src/is2/util/Split.java
create mode 100644 dependencyParser/mate-tools/src/is2/util/Split2.java
create mode 100644 dependencyParser/mate-tools/src/is2/util/Split3.java
diff --git a/dependencyParser/mate-tools/.classpath b/dependencyParser/mate-tools/.classpath
new file mode 100644
index 0000000..8092159
--- /dev/null
+++ b/dependencyParser/mate-tools/.classpath
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<classpath>
+ <classpathentry kind="src" path="src"/>
+ <classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
+ <classpathentry kind="lib" path="/mtt/lib/trove.jar"/>
+ <classpathentry kind="lib" path="lib/commons-math-2.2.jar"/>
+ <classpathentry kind="output" path="classes"/>
+</classpath>
diff --git a/dependencyParser/mate-tools/.externalToolBuilders/New_Builder.launch b/dependencyParser/mate-tools/.externalToolBuilders/New_Builder.launch
new file mode 100644
index 0000000..eca73f7
--- /dev/null
+++ b/dependencyParser/mate-tools/.externalToolBuilders/New_Builder.launch
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<launchConfiguration type="org.eclipse.ui.externaltools.ProgramBuilderLaunchConfigurationType">
+<booleanAttribute key="org.eclipse.debug.ui.ATTR_LAUNCH_IN_BACKGROUND" value="false"/>
+<stringAttribute key="org.eclipse.ui.externaltools.ATTR_LOCATION" value="${workspace_loc:/mate-tools/.project}"/>
+<stringAttribute key="org.eclipse.ui.externaltools.ATTR_RUN_BUILD_KINDS" value="full,incremental,"/>
+<booleanAttribute key="org.eclipse.ui.externaltools.ATTR_TRIGGERS_CONFIGURED" value="true"/>
+<stringAttribute key="org.eclipse.ui.externaltools.ATTR_WORKING_DIRECTORY" value="${workspace_loc:/mate-tools}"/>
+</launchConfiguration>
diff --git a/dependencyParser/mate-tools/.externalToolBuilders/ana.launch b/dependencyParser/mate-tools/.externalToolBuilders/ana.launch
new file mode 100644
index 0000000..09df90d
--- /dev/null
+++ b/dependencyParser/mate-tools/.externalToolBuilders/ana.launch
@@ -0,0 +1,20 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<launchConfiguration type="org.eclipse.ant.AntBuilderLaunchConfigurationType">
+<booleanAttribute key="org.eclipse.ant.ui.ATTR_TARGETS_UPDATED" value="true"/>
+<booleanAttribute key="org.eclipse.ant.ui.DEFAULT_VM_INSTALL" value="false"/>
+<listAttribute key="org.eclipse.debug.core.MAPPED_RESOURCE_PATHS">
+<listEntry value="/mate-tools/scripts/build.xml"/>
+</listAttribute>
+<listAttribute key="org.eclipse.debug.core.MAPPED_RESOURCE_TYPES">
+<listEntry value="1"/>
+</listAttribute>
+<booleanAttribute key="org.eclipse.debug.ui.ATTR_LAUNCH_IN_BACKGROUND" value="false"/>
+<stringAttribute key="org.eclipse.jdt.launching.CLASSPATH_PROVIDER" value="org.eclipse.ant.ui.AntClasspathProvider"/>
+<booleanAttribute key="org.eclipse.jdt.launching.DEFAULT_CLASSPATH" value="true"/>
+<stringAttribute key="org.eclipse.jdt.launching.PROJECT_ATTR" value="mate-tools"/>
+<booleanAttribute key="org.eclipse.ui.externaltools.ATTR_BUILDER_ENABLED" value="false"/>
+<stringAttribute key="org.eclipse.ui.externaltools.ATTR_LOCATION" value="${workspace_loc:/mate-tools/scripts/build.xml}"/>
+<stringAttribute key="org.eclipse.ui.externaltools.ATTR_RUN_BUILD_KINDS" value="full,incremental,"/>
+<booleanAttribute key="org.eclipse.ui.externaltools.ATTR_TRIGGERS_CONFIGURED" value="true"/>
+<stringAttribute key="org.eclipse.ui.externaltools.ATTR_WORKING_DIRECTORY" value="${workspace_loc:/mate-tools}"/>
+</launchConfiguration>
diff --git a/dependencyParser/mate-tools/.project b/dependencyParser/mate-tools/.project
new file mode 100644
index 0000000..f813b9e
--- /dev/null
+++ b/dependencyParser/mate-tools/.project
@@ -0,0 +1,17 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<projectDescription>
+ <name>mate-tools</name>
+ <comment></comment>
+ <projects>
+ </projects>
+ <buildSpec>
+ <buildCommand>
+ <name>org.eclipse.jdt.core.javabuilder</name>
+ <arguments>
+ </arguments>
+ </buildCommand>
+ </buildSpec>
+ <natures>
+ <nature>org.eclipse.jdt.core.javanature</nature>
+ </natures>
+</projectDescription>
diff --git a/dependencyParser/mate-tools/build.xml b/dependencyParser/mate-tools/build.xml
new file mode 100644
index 0000000..c558279
--- /dev/null
+++ b/dependencyParser/mate-tools/build.xml
@@ -0,0 +1,64 @@
+<project name="analyse" default="compile" basedir=".">
+ <description>
+ The base ant build file.
+ </description>
+
+ <!-- set global properties for this build -->
+ <property name="src" location="src"/>
+ <property name="classes" location="classes"/>
+ <property name="dist" location="dist"/>
+ <property name="include" location="include"/>
+
+
+ <target name="init" description="Clears the /class directory">
+ <!-- Create the time stamp -->
+ <tstamp/>
+ <mkdir dir="dist"/>
+ <mkdir dir="javadoc"/>
+ <mkdir dir="classes"/>
+ </target>
+
+ <target name="compile" depends="init" description="Compile the source" >
+ <!-- Compile the java code from ${src} into ${build} executable="javac" -->
+
+ <javac srcdir="${src}"
+ destdir="${classes}"
+ includeantruntime="false"
+ executable="javac.exe"
+ optimize="true"
+ debug="off"
+ classpath=""/>
+
+ </target>
+
+ <target name="build" description="Build the distribution .jar file" >
+ <!-- Create the temporary distribution directory -->
+ <delete includeEmptyDirs="true"><fileset dir="dist" includes="**/*" excludes="gtc*.jar"/></delete>
+ <mkdir dir="${dist}/temp-${DSTAMP}"/>
+ <copy todir="${dist}/temp-${DSTAMP}"><fileset dir="${classes}" /></copy>
+ <copy todir="${dist}/temp-${DSTAMP}"><fileset dir="${include}" /></copy>
+ <!-- copy everything from /include/others to dist
+ <copy todir="${dist}/temp-${DSTAMP}"><fileset dir="${include}/others" /></copy>-->
+ <!-- copy everything from /include/classes to dist
+ <copy todir="${dist}/temp-${DSTAMP}"><fileset dir="${include}/classes" /></copy>-->
+ <!-- pack everything into a .jar file -->
+ <jar jarfile="${dist}/anna-3.5.jar"
+ basedir="${dist}/temp-${DSTAMP}"/>
+ <delete dir="${dist}/temp-{DSTAMP}" />
+ </target>
+
+ <target name="javadoc" depends="init" description="Create the javadoc API documentation" >
+ <delete includeEmptyDirs="true"><fileset dir="javadoc" includes="**/*"/></delete>
+ <!-- TODO: you might add new packages to packagenames -->
+ <javadoc destdir="javadoc" access="package" source="1.4"
+ use="false" notree="false" nonavbar="false" noindex="true"
+ splitindex="false" author="true" version="true"
+ nodeprecatedlist="true" nodeprecated="false"
+ packagenames="gtc.*.*"
+ sourcepath="src" classpath="class"/>
+ </target>
+
+
+ <target name="all" depends="init,compile,javadoc" description="Make all" />
+</project>
+
diff --git a/dependencyParser/mate-tools/lib/commons-math-2.2.jar b/dependencyParser/mate-tools/lib/commons-math-2.2.jar
new file mode 100644
index 0000000..b29a39c
Binary files /dev/null and b/dependencyParser/mate-tools/lib/commons-math-2.2.jar differ
diff --git a/dependencyParser/mate-tools/lib/trove-2.0.4.jar b/dependencyParser/mate-tools/lib/trove-2.0.4.jar
new file mode 100644
index 0000000..cb1c8f1
Binary files /dev/null and b/dependencyParser/mate-tools/lib/trove-2.0.4.jar differ
diff --git a/dependencyParser/mate-tools/src/decoder/ParallelDecoder.java b/dependencyParser/mate-tools/src/decoder/ParallelDecoder.java
new file mode 100755
index 0000000..0dd1c18
--- /dev/null
+++ b/dependencyParser/mate-tools/src/decoder/ParallelDecoder.java
@@ -0,0 +1,155 @@
+package decoder;
+
+import is2.data.Closed;
+import is2.data.DataF;
+import is2.data.Edges;
+import is2.data.Open;
+
+import java.util.ArrayList;
+import java.util.concurrent.Callable;
+
+/**
+ * @author Bernd Bohnet, 30.08.2009
+ *
+ * This class implements a parallel feature extractor.
+ */
+final public class ParallelDecoder implements Callable<Object>
+{
+ // some constants
+ private static final float INIT_BEST = (-1.0F / 0.0F);
+ private static final boolean[] DIR ={false,true};
+
+ // the data space of the weights for a dependency tree
+ final private DataF x;
+
+ private short[] pos;
+
+ private Open O[][][][];
+ private Closed C[][][][] ;
+
+ private int n;
+
+ boolean done=false;
+ public boolean waiting =false;
+
+ /**
+ * Initialize the parallel decoder.
+ *
+ * @param pos part-of-speech
+ * @param d data
+ * @param edges part-of-speech edge mapping
+ * @param o open spans
+ * @param c closed spans
+ * @param n number of words
+ */
+ public ParallelDecoder(short[] pos, DataF d, Open o[][][][], Closed c[][][][], int n) {
+
+ this.pos =pos;
+ this.x =d;
+
+ this.O=o;
+ this.C=c;
+ this.n=n;
+ }
+
+
+ private static class DSet { short w1,w2;}
+
+ @Override
+ public Object call() {
+
+ while (true){
+
+ DSet set = get();
+ if (done && set==null) break;
+
+ if (set ==null) return null;
+
+ short s=set.w1, t=set.w2;
+
+ for(short dir =1;dir>=0;dir--) {
+
+ short[] labs = (dir==1) ? Edges.get(pos[s],pos[t], false):Edges.get(pos[t],pos[s], true);
+
+ O[s][t][dir] = new Open[labs.length];
+ for (int l = O[s][t][dir].length - 1; l >= 0; l--) {
+
+ double tRP = INIT_BEST;
+
+ Closed tL = null, tR = null;
+
+ for (int r = s; r < t; r++) {
+
+ if (s == 0 && r != 0) continue;
+
+ double tLPr = INIT_BEST,tRPr = INIT_BEST;
+ Closed tLCld = null, tRCld = null;
+
+ if (r == s) tLPr = dir==1 ? x.sib[s][t][s][0][l] : x.gra[t][s][s][1 ][l];
+ else
+ for (int i = s + 1; i <= r; i++)
+ if (((dir==1 ? x.sib[s][t][i][0][l] : x.gra[t][s][i][1][l]) + C[s][r][1][i].p) > tLPr) {
+ tLPr = ((dir==1 ? x.sib[s][t][i][0][l] : x.gra[t][s][i][1][l]) + C[s][r][1][i].p);tLCld = C[s][r][1][i];}
+
+ if (r == t-1) tRPr = dir==1 ? x.gra[s][t][s][0][l] : x.sib[t][s][s][1][l];
+ else
+ for (int i = r + 1; i < t; i++)
+ if (((dir == 1 ? x.gra[s][t][i][0][l] : x.sib[t][s][i][1][l]) + C[r+1][t][0][i].p) > tRPr) {
+ tRPr = ((dir==1?x.gra[s][t][i][0][l]:x.sib[t][s][i][1][l]) + C[r+1][t][0][i].p); tRCld=C[r + 1][t][0][i];}
+
+ if (tLPr + tRPr > tRP) {tRP = tLPr + tRPr; tL = tLCld;tR = tRCld;}
+ }
+ O[s][t][dir][l] = new Open(s, t, dir, labs[l],tL, tR,
+ (float) ( tRP+((dir==1)?x.pl[s][t]: x.pl[t][s]) + ((dir==1)? x.lab[s][t][labs[l]][0]:x.lab[t][s][labs[l]][1])));
+ }
+ }
+ C[s][t][1] = new Closed[n]; C[s][t][0] = new Closed[n];
+
+ for (int m = s ; m <= t; m++) {
+ for(boolean d : DIR) {
+ if ((d && m!=s)||!d && (m!=t && s!=0)) {
+
+ // create closed structure
+
+ double top = INIT_BEST;
+
+ Open tU = null; Closed tL = null;
+ int numLabels =O[(d ? s : m)][(d ? m : t)][d?1:0].length;
+
+ //for (int l = numLabels-1; l >=0; l--) {
+ for (int l = 0; l < numLabels; l++) {
+
+ Open hi = O[(d ? s : m)][(d ? m : t)][d?1:0][l];
+ for (int amb = m + (d?1:-1); amb != (d?t:s) + (d?1:-1); amb += (d?1:-1)) {
+
+ if ((hi.p + C[d?m:s][d?t:m][d?1:0][amb].p +x.gra[d?s:t][m][amb][d?0:1][l]) > top) {
+ top = (hi.p + C[d?m:s][d?t:m][d?1:0][amb].p +x.gra[d?s:t][m][amb][(d?0:1)][l]); tU = hi; tL=C[d?m:s][d?t:m][d?1:0][amb];}
+ }
+
+ if ((m == (d ? t : s)) && (hi.p + x.gra[d?s:t][m][d?s:t][(d ? 0 :1)][l]) > top) {
+ top = (hi.p + x.gra[(d ? s : t)][m][d?s:t][d?0:1][l]); tU = hi; tL = null;}
+ }
+ C[s][t][d?1:0][m] = new Closed(s, t, m, d?1:0,tU,tL,(float) top);
+ }
+ }
+ }
+ }
+ return null;
+ }
+
+ public static ArrayList<DSet> sets = new ArrayList<DSet>();
+
+ static synchronized private DSet get() {
+ synchronized (sets) {
+ if (sets.size()==0) return null;
+ return sets.remove(sets.size()-1);
+ }
+ }
+
+ public static void add(short w1, short w2){
+ DSet ds =new DSet();
+ ds.w1=w1;
+ ds.w2=w2;
+ sets.add(ds);
+ }
+}
diff --git a/dependencyParser/mate-tools/src/decoder/ParallelRearrangeNBest.java b/dependencyParser/mate-tools/src/decoder/ParallelRearrangeNBest.java
new file mode 100755
index 0000000..493917b
--- /dev/null
+++ b/dependencyParser/mate-tools/src/decoder/ParallelRearrangeNBest.java
@@ -0,0 +1,136 @@
+package decoder;
+
+import is2.data.DataF;
+import is2.data.Edges;
+import is2.data.Parse;
+import is2.data.ParseNBest;
+
+import java.util.ArrayList;
+import java.util.concurrent.Callable;
+
+import extractors.Extractor;
+
+/**
+ * @author Dr. Bernd Bohnet, 30.08.2009
+ *
+ * This class implements a parallel edge rearrangement for non-projective parsing;
+ * The linear method was first suggest by Rayn McDonald et. al. 2005.
+ */
+final public class ParallelRearrangeNBest implements Callable<Object> {
+
+ // new parent child combination to explore
+ final static class PA {
+ final float p;
+ final short ch, pa;
+
+ float best;
+
+
+
+ public PA(float p2, short ch2, short pa2) { p=p2; ch=ch2;pa=pa2;}
+ }
+
+ // list of parent child combinations
+ private static ArrayList<PA> parents = new ArrayList<PA>();
+
+ // some data from the dependency tree
+ private short[] pos;
+ private DataF x;
+ private boolean[][] isChild ;
+ public short[] heads,types;
+ private float lastNBest;
+ private float best; // best so far
+ private float threshold;
+ private Extractor extractor;
+
+
+ /**
+ * Initialize the parallel rearrange thread
+ *
+ * @param isChild2 is a child
+ * @param edgesC the part-of-speech edge mapping
+ * @param pos the part-of-speech
+ * @param x the data
+ * @param lastNBest
+ * @param s the heads
+ * @param ts the types
+ */
+ public ParallelRearrangeNBest(short[] pos , DataF x, Parse p, float lastNBest, Extractor extractor, float best, float threshold) {
+
+
+ heads=p.heads;
+
+ types= p.labels;
+
+ isChild = new boolean[heads.length][heads.length];
+
+ for(int i = 1, l1=1; i < heads.length; i++,l1=i)
+ while((l1= heads[l1]) != -1) isChild[l1][i] = true;
+
+
+ this.lastNBest =lastNBest;
+ this.pos =pos;
+ this.x=x;
+
+ this.extractor = extractor;
+ this.best=best;
+ this.threshold = threshold;
+ }
+
+ public ArrayList<ParseNBest> parses = new ArrayList<ParseNBest>();
+
+ @Override
+ public Object call() {
+
+ // check the list of new possible parents and children for a better combination
+ for(int ch = 1; ch < heads.length; ch++) {
+ for(short pa = 0; pa < heads.length; pa++) {
+ if(ch == pa || pa == heads[ch] || isChild[ch][pa]) continue;
+
+ short oldP = heads[ch], oldT = types[ch];
+ heads[ch]=pa;
+
+ short[] labels = Edges.get(pos[pa], pos[ch],ch<pa);
+
+ for(int l=0;l<labels.length;l++) {
+
+ types[ch]=labels[l];
+ float p_new = extractor.encode3(pos, heads, types, x);
+
+ if (p_new<lastNBest || ((best+this.threshold)>p_new)) continue;
+
+ ParseNBest p = new ParseNBest();
+ p.signature(heads, types);
+ p.f1=p_new;
+ parses.add(p);
+ }
+
+ // change back
+ heads[ch]= oldP; types[ch]=oldT;
+
+ // consider changes to labels only
+ labels = Edges.get(pos[oldP], pos[ch],ch<oldP);
+
+ for(int l=0;l<labels.length;l++) {
+
+ types[ch]=labels[l];
+ float p_new = (float) extractor.encode3(pos, heads, types, x);
+
+ // optimization: add only if larger than smallest of n-best
+ if (p_new<lastNBest || ((best+this.threshold)>p_new)) continue;
+
+ ParseNBest p = new ParseNBest();
+ p.signature(heads, types);
+ p.f1=p_new;
+ parses.add(p);
+ }
+
+ heads[ch]= oldP; types[ch]=oldT;
+ }
+ }
+ return parses;
+ }
+
+
+
+}
diff --git a/dependencyParser/mate-tools/src/decoder/ParallelRearrangeNBest2.java b/dependencyParser/mate-tools/src/decoder/ParallelRearrangeNBest2.java
new file mode 100644
index 0000000..a25b392
--- /dev/null
+++ b/dependencyParser/mate-tools/src/decoder/ParallelRearrangeNBest2.java
@@ -0,0 +1,151 @@
+package decoder;
+
+import is2.data.DataF;
+import is2.data.Edges;
+import is2.data.Parse;
+import is2.data.ParseNBest;
+
+import java.util.ArrayList;
+import java.util.concurrent.Callable;
+
+import decoder.ParallelRearrangeNBest.PA;
+
+import extractors.Extractor;
+
+/**
+ * @author Dr. Bernd Bohnet, 30.08.2009
+ *
+ * This class implements a parallel edge rearrangement for non-projective parsing;
+ * The linear method was first suggest by Rayn McDonald et. al. 2005.
+ */
+final public class ParallelRearrangeNBest2 implements Callable<Object> {
+
+ // new parent child combination to explore
+ final static class PA {
+ final float p;
+ final short ch, pa;
+
+
+ public short[] heads,types;
+
+ public PA(Parse p, short ch2, short pa2) {
+ this.p =(float)p.f1;
+ heads =p.heads;
+ types=p.labels;
+ ch=ch2;pa=pa2;
+
+ }
+ }
+
+ // list of parent child combinations
+ private static ArrayList<PA> parents = new ArrayList<PA>();
+
+ // some data from the dependency tree
+ private short[] pos;
+ private DataF x;
+ private float lastNBest;
+ private float threshold;
+ private Extractor extractor;
+
+
+ /**
+ * Initialize the parallel rearrange thread
+ * @param pos the part-of-speech
+ * @param x the data
+ * @param lastNBest
+ * @param isChild2 is a child
+ * @param edgesC the part-of-speech edge mapping
+ * @param s the heads
+ * @param ts the types
+ */
+ public ParallelRearrangeNBest2(short[] pos , DataF x, float lastNBest, Extractor extractor, float threshold) {
+
+
+
+ this.lastNBest =lastNBest;
+ this.pos =pos;
+ this.x=x;
+
+ this.extractor = extractor;
+ this.threshold = threshold;
+ }
+
+ public ArrayList<ParseNBest> parses = new ArrayList<ParseNBest>();
+
+ @Override
+ public Object call() {
+
+ try {
+
+ while(true) {
+ PA p = getPA();
+
+ if (p==null) return parses;
+
+ short oldP = p.heads[p.ch], oldT = p.types[p.ch];
+ p.heads[p.ch]=p.pa;
+
+ short[] labels = Edges.get(pos[p.pa], pos[p.ch],p.ch<p.pa);
+
+ for(int l=0;l<labels.length;l++) {
+
+ p.types[p.ch]=labels[l];
+ float p_new = extractor.encode3(pos, p.heads, p.types, x);
+
+ if (p_new<lastNBest || ((p.p+this.threshold)>p_new)) continue;
+
+ ParseNBest x = new ParseNBest();
+ x.signature(p.heads, p.types);
+ x.f1=p_new;
+ parses.add(x);
+ }
+
+ // change back
+ p.heads[p.ch]= oldP; p.types[p.ch]=oldT;
+
+ // consider changes to labels only
+ labels = Edges.get(pos[oldP], pos[p.ch],p.ch<oldP);
+
+ for(int l=0;l<labels.length;l++) {
+
+ p.types[p.ch]=labels[l];
+ float p_new = (float) extractor.encode3(pos, p.heads, p.types, x);
+
+ // optimization: add only if larger than smallest of n-best
+ if (p_new<lastNBest || ((p.p+this.threshold)>p_new)) continue;
+
+ ParseNBest x = new ParseNBest();
+ x.signature(p.heads, p.types);
+ x.f1=p_new;
+ parses.add(x);
+ }
+
+ p.heads[p.ch]= oldP; p.types[p.ch]=oldT;
+ }
+ } catch(Exception e) {
+ e.printStackTrace();
+ }
+ return parses;
+ }
+
+ /**
+ * Add a child-parent combination which are latter explored for rearrangement
+ *
+ * @param p2
+ * @param ch2
+ * @param pa
+ */
+ public static void add(Parse p, short ch2, short pa) {
+ parents.add(new PA(p,ch2,pa));
+ }
+
+ public static PA getPA() {
+ synchronized(parents) {
+ if (parents.size()==0) return null;
+ return parents.remove(parents.size()-1);
+ }
+ }
+
+
+
+}
diff --git a/dependencyParser/mate-tools/src/examples/DependencyParser.java b/dependencyParser/mate-tools/src/examples/DependencyParser.java
new file mode 100644
index 0000000..c41a101
--- /dev/null
+++ b/dependencyParser/mate-tools/src/examples/DependencyParser.java
@@ -0,0 +1,92 @@
+package examples;
+
+
+import is2.data.InstancesTagger;
+import is2.data.SentenceData09;
+import is2.io.CONLLReader09;
+import is2.lemmatizer.Lemmatizer;
+import is2.lemmatizer.MFO;
+import is2.parser.Parser;
+import is2.tag.Tagger;
+//import org.apache.log4j.Logger;
+
+import java.io.File;
+import java.util.Arrays;
+
+/**
+ * Dependency parsing
+ *
+ * @author B. Piwowarski <benjamin@bpiwowar.net>
+ * @date 10/10/12
+ */
+//@TaskDescription(name = "dependency-parser", project = "mate-tools")
+public class DependencyParser {
+ // final static private Logger LOGGER = Logger.getLogger(DependencyParser.class);
+ //@Argument(name = "lemmatizer", required = true, checkers = IOChecker.Readable.class)
+ File lemmatizerFile;
+
+ //@Argument(name = "tagger", required = true)
+ File taggerFile;
+
+ //@Argument(name = "parser", required = true)
+ File parserFile;
+
+ //@Override
+ public int execute() throws Throwable {
+
+ // Load lemmatizer
+ //LOGGER.info("Loading lemmatizer");
+ // true = do uppercase lemmatization
+ Lemmatizer lemmatizer = new Lemmatizer(lemmatizerFile.getAbsolutePath());
+
+ // Load tagger
+ //LOGGER.info("Loading tagger");
+ Tagger tagger = new Tagger(taggerFile.getAbsolutePath());
+
+ // Load parser
+ //LOGGER.info("Loading parser");
+ Parser parser = new Parser(parserFile.getAbsolutePath());
+
+
+ // Sentences to parse
+ String sentences[] = new String[]{
+ "Airfields have been constructed on a number of the islands .",
+ "Private investment has even made an increasingly modern ferry fleet possible .",
+ "Politically , the 1990s have been relatively quite times for the islands ."
+ };
+
+ CONLLReader09 reader = new CONLLReader09(CONLLReader09.NO_NORMALIZE);
+
+ for (String sentence : sentences) {
+ // Prepare the sentence
+ InstancesTagger instanceTagger = new InstancesTagger();
+ instanceTagger.init(1, new MFO());
+
+ String[] split = sentence.split("\\s+");
+ String[] splitRoot = new String[split.length+1];
+ System.arraycopy(split, 0, splitRoot, 1, split.length);
+ splitRoot[0] = CONLLReader09.ROOT;
+
+ SentenceData09 instance = new SentenceData09();
+ instance.init(splitRoot);
+
+ reader.insert(instanceTagger, instance);
+
+ SentenceData09 result = lemmatizer.apply(instance);
+ tagger.apply(result);
+ result = parser.parse(result, parser.params, false, parser.options);
+
+
+ // Output
+ System.out.println(Arrays.toString(result.forms));
+ System.out.println(Arrays.toString(result.plemmas));
+ System.out.println(Arrays.toString(result.ppos));
+ System.out.println(Arrays.toString(result.pheads));
+ System.out.println(Arrays.toString(result.plabels));
+ System.out.println();
+
+ }
+
+ return 0;
+ }
+}
diff --git a/dependencyParser/mate-tools/src/examples/FullPipelineSpanish.java b/dependencyParser/mate-tools/src/examples/FullPipelineSpanish.java
new file mode 100644
index 0000000..a255595
--- /dev/null
+++ b/dependencyParser/mate-tools/src/examples/FullPipelineSpanish.java
@@ -0,0 +1,98 @@
+package examples;
+
+import is2.data.SentenceData09;
+import is2.io.CONLLWriter09;
+import is2.lemmatizer.Lemmatizer;
+
+import is2.parser.Parser;
+import is2.tag.Tagger;
+import is2.tools.Tool;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.StringTokenizer;
+
+/**
+ * @author Bernd Bohnet, 13.09.2010
+ *
+ * Illustrates the application the full pipeline: lemmatizer, morphologic, tagger, and parser
+ */
+public class FullPipelineSpanish {
+
+
+ // shows how to parse a sentences and call the tools
+ public static void main(String[] args) throws IOException {
+
+ // Create a data container for a sentence
+ SentenceData09 i = new SentenceData09();
+
+ if (args.length==1) { // input might be a sentence: "This is another test ."
+ StringTokenizer st = new StringTokenizer(args[0]);
+ ArrayList<String> forms = new ArrayList<String>();
+
+ forms.add("<root>");
+ while(st.hasMoreTokens()) forms.add(st.nextToken());
+
+ i.init(forms.toArray(new String[0]));
+
+ } else {
+ // provide a default sentence: Haus has a mutated vowel
+ i.init(new String[] {"<root>","También","estuve","emocionado","pero","no","pude","imaginar","mi","vida","sin","la",
+ "gente","tan","intima","a","mÃ","."});
+
+ }
+
+ // lemmatizing
+
+ System.out.println("\nReading the model of the lemmatizer");
+ Tool lemmatizer = new Lemmatizer("models/lemma-spa.model"); // create a lemmatizer
+
+ System.out.println("Applying the lemmatizer");
+ lemmatizer.apply(i);
+
+ System.out.print(i.toString());
+ System.out.print("Lemmata: "); for (String l : i.plemmas) System.out.print(l+" "); System.out.println();
+
+ // morphologic tagging
+
+ System.out.println("\nReading the model of the morphologic tagger");
+ is2.mtag.Tagger morphTagger = new is2.mtag.Tagger("models/mtag-spa.model");
+
+ System.out.println("\nApplying the morpholoigc tagger");
+ morphTagger.apply(i);
+
+ System.out.print(i.toString());
+ System.out.print("Morph: "); for (String f : i.pfeats) System.out.print(f+" "); System.out.println();
+
+ // part-of-speech tagging
+
+ System.out.println("\nReading the model of the part-of-speech tagger");
+ Tool tagger = new Tagger("models/tag-spa.model");
+
+ System.out.println("\nApplying the part-of-speech tagger");
+ tagger.apply(i);
+
+ System.out.print(i.toString());
+ System.out.print("Part-of-Speech tags: "); for (String p : i.ppos) System.out.print(p+" "); System.out.println();
+
+ // parsing
+
+ System.out.println("\nReading the model of the dependency parser");
+ Tool parser = new Parser("models/prs-spa.model");
+
+ System.out.println("\nApplying the parser");
+ parser.apply(i);
+
+ System.out.println(i.toString());
+
+ // write the result to a file
+
+ CONLLWriter09 writer = new is2.io.CONLLWriter09("example-out.txt");
+
+ writer.write(i, CONLLWriter09.NO_ROOT);
+ writer.finishWriting();
+
+ }
+
+
+}
diff --git a/dependencyParser/mate-tools/src/examples/FullPipelineTest.java b/dependencyParser/mate-tools/src/examples/FullPipelineTest.java
new file mode 100644
index 0000000..c8f992a
--- /dev/null
+++ b/dependencyParser/mate-tools/src/examples/FullPipelineTest.java
@@ -0,0 +1,110 @@
+package examples;
+
+
+import is2.data.InstancesTagger;
+import is2.data.SentenceData09;
+import is2.io.CONLLReader09;
+import is2.io.CONLLWriter09;
+import is2.lemmatizer.Lemmatizer;
+import is2.lemmatizer.MFO;
+import is2.parser.Parser;
+import is2.tag.Tagger;
+//import org.apache.log4j.Logger;
+
+import java.io.File;
+import java.util.Arrays;
+
+/**
+ * Dependency parsing
+ *
+ * @author B. Piwowarski <benjamin@bpiwowar.net>
+ * @date 10/10/12
+ */
+//@TaskDescription(name = "dependency-parser", project = "mate-tools")
+public class FullPipelineTest {
+ // final static private Logger LOGGER = Logger.getLogger(DependencyParser.class);
+ //@Argument(name = "lemmatizer", required = true, checkers = IOChecker.Readable.class)
+ public File lemmatizerFile;
+
+ //@Argument(name = "tagger", required = true)
+ public File taggerFile;
+
+ public File mtaggerFile;
+
+ //@Argument(name = "parser", required = true)
+ public File parserFile;
+
+ //@Override
+ public int execute(String source, String target) throws Throwable {
+
+ // Load lemmatizer
+ //LOGGER.info("Loading lemmatizer");
+ // true = do uppercase lemmatization
+ Lemmatizer lemmatizer = new Lemmatizer(lemmatizerFile.getAbsolutePath());
+
+ // Load tagger
+ //LOGGER.info("Loading tagger");
+ Tagger tagger = new Tagger(taggerFile.getAbsolutePath());
+
+ is2.mtag.Tagger mtagger = new is2.mtag.Tagger(mtaggerFile.getAbsolutePath());
+
+ // Load parser
+ //LOGGER.info("Loading parser");
+ Parser parser = new Parser(parserFile.getAbsolutePath());
+
+
+ CONLLReader09 reader = new CONLLReader09(source);
+ CONLLWriter09 writer = new CONLLWriter09(target);
+
+ int count=0;
+ while (true) {
+ // Prepare the sentence
+ InstancesTagger is = new InstancesTagger();
+ is.init(1, new MFO());
+
+ SentenceData09 instance= reader.getNext(is);
+ if (instance ==null) break;
+ SentenceData09 result = null;
+try {
+
+ System.out.print("\b\b\b\b"+count);
+ result= lemmatizer.apply(instance);
+
+ result = tagger.apply(result);
+ result= mtagger.apply(result);
+ result = parser.apply(result);
+
+ count++;
+} catch(Exception e) {
+
+ System.out.println("error"+result);
+ System.out.println("error"+instance);
+ e.printStackTrace();
+ break;
+}
+
+ // Output
+ writer.write(result);
+
+ }
+ writer.finishWriting();
+ return 0;
+ }
+
+ public static void main(String args[]) throws Throwable {
+
+ if (args.length<3) {
+ System.out.println("lemmatizer-model tagger-model parser-model source target");
+ System.exit(0);
+ }
+ FullPipelineTest p = new FullPipelineTest();
+ p.lemmatizerFile = new File(args[0]);
+ p.taggerFile = new File(args[1]);
+ p.mtaggerFile = new File(args[2]);
+ p.parserFile = new File(args[3]);
+
+ p.execute(args[4], args[5]);
+
+ }
+
+}
diff --git a/dependencyParser/mate-tools/src/examples/MorphTagger.java b/dependencyParser/mate-tools/src/examples/MorphTagger.java
new file mode 100644
index 0000000..0088426
--- /dev/null
+++ b/dependencyParser/mate-tools/src/examples/MorphTagger.java
@@ -0,0 +1,79 @@
+package examples;
+
+import is2.data.SentenceData09;
+import is2.lemmatizer.Lemmatizer;
+import is2.lemmatizer.Options;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.StringTokenizer;
+
+/**
+ * @author Bernd Bohnet, 13.09.2010
+ *
+ * Illustrates the application of some components: lemmatizer, tagger, and parser
+ */
+public class MorphTagger {
+
+
+ /**
+ * How to lemmatize a sentences?
+ */
+ public static void main(String[] args) throws IOException {
+
+
+ // Create a data container for a sentence
+ SentenceData09 i = new SentenceData09();
+
+ if (args.length==1) { // input might be a sentence: "This is another test ."
+ StringTokenizer st = new StringTokenizer(args[0]);
+ ArrayList<String> forms = new ArrayList<String>();
+
+ forms.add("<root>");
+ while(st.hasMoreTokens()) forms.add(st.nextToken());
+
+ i.init(forms.toArray(new String[0]));
+
+ } else {
+ // provide a default sentence
+ i.init(new String[] {"<root>","Häuser","hat","ein","Umlaut","."});
+ }
+
+ //print the forms
+ for (String l : i.forms) System.out.println("forms : "+l);
+
+ // tell the lemmatizer the location of the model
+ is2.lemmatizer.Options optsLemmatizer = new Options(new String[] {"-model","models/lemma-ger.model"});
+
+ // create a lemmatizer
+ Lemmatizer lemmatizer = new Lemmatizer(optsLemmatizer.modelName);
+
+ // lemmatize a sentence; the result is stored in the stenenceData09 i
+ lemmatizer.apply(i);
+
+
+ // output the lemmata
+ for (String l : i.plemmas) System.out.println("lemma : "+l);
+
+
+ is2.mtag.Options morphologicTaggerOptions = new is2.mtag.Options(new String[] {"-model","models/mtag-ger.model"});
+
+ is2.mtag.Tagger mt = new is2.mtag.Tagger(morphologicTaggerOptions);
+
+ try {
+
+
+ // SentenceData09 snt = is2.mtag.Main.out(i.forms, lemmata);
+
+ SentenceData09 snt = mt.apply(i);
+ for(String f : snt.pfeats) System.out.println("feats "+f);
+
+ } catch(Exception e){
+ e.printStackTrace();
+ }
+
+
+ }
+
+
+}
diff --git a/dependencyParser/mate-tools/src/examples/ParseOnly.java b/dependencyParser/mate-tools/src/examples/ParseOnly.java
new file mode 100755
index 0000000..cec31dd
--- /dev/null
+++ b/dependencyParser/mate-tools/src/examples/ParseOnly.java
@@ -0,0 +1,50 @@
+package examples;
+
+import is2.data.SentenceData09;
+import is2.parser.Options;
+import is2.parser.Parser;
+
+
+public class ParseOnly {
+
+ public static void main(String[] args) {
+
+ if (args.length ==0) {
+ plain();
+ }
+
+ }
+
+ /**
+ * This example shows how to parse a sentence.
+ */
+ public static void plain() {
+
+ // initialize the options
+ String[] opts ={"-model","models/prs-eng-x.model"};
+ Options options = new Options(opts);
+
+ // create a parser
+ Parser parser = new Parser(options);
+
+ // Create a data container for a sentence
+ SentenceData09 i = new SentenceData09();
+
+ // Provide the sentence
+ i.init(new String[] {"<root>","This","is","a","test","."});
+ i.setPPos(new String[]{"<root-POS>","DT","VBZ","DT","NN","."});
+
+ // parse the sentence
+ SentenceData09 out = parser.apply(i);
+
+ // output the sentence and dependency tree
+ System.out.println(out.toString());
+
+ // Get the parsing results
+ out.getLabels();
+ out.getParents();
+
+ }
+
+
+}
diff --git a/dependencyParser/mate-tools/src/examples/Pipeline.java b/dependencyParser/mate-tools/src/examples/Pipeline.java
new file mode 100644
index 0000000..e55869d
--- /dev/null
+++ b/dependencyParser/mate-tools/src/examples/Pipeline.java
@@ -0,0 +1,95 @@
+package examples;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.StringTokenizer;
+
+import is2.data.SentenceData09;
+import is2.lemmatizer.Lemmatizer;
+import is2.parser.Options;
+import is2.parser.Parser;
+import is2.tag.Tagger;
+
+/**
+ * @author Bernd Bohnet, 13.09.2010
+ *
+ * Illustrates the application of some components: lemmatizer, tagger, and parser
+ */
+public class Pipeline {
+
+
+ // how to parse a sentences and call the tools
+ public static void main(String[] args) throws IOException {
+
+
+ // Create a data container for a sentence
+ SentenceData09 i = new SentenceData09();
+
+ if (args.length==1) { // input might be a sentence: "This is another test ."
+ StringTokenizer st = new StringTokenizer(args[0]);
+ ArrayList<String> forms = new ArrayList<String>();
+
+ forms.add("<root>");
+ while(st.hasMoreTokens()) forms.add(st.nextToken());
+
+ i.init(forms.toArray(new String[0]));
+
+ } else {
+ // provide a default sentence
+ i.init(new String[] {"<root>","This","is","a","test","."});
+ }
+
+ //print the forms
+ for (String l : i.forms) System.out.println("form : "+l);
+
+ // tell the lemmatizer the location of the model
+ is2.lemmatizer.Options optsLemmatizer = new is2.lemmatizer.Options(new String[] {"-model","models/lemma-eng.model"});
+
+ // create a lemmatizer
+ Lemmatizer lemmatizer = new Lemmatizer(optsLemmatizer.modelName);
+
+ // lemmatize a sentence; the result is stored in the stenenceData09 i
+ i = lemmatizer.apply(i);
+
+
+ // output the lemmata
+ for (String l : i.plemmas) System.out.println("lemma : "+l);
+
+ // tell the tagger the location of the model
+ is2.tag.Options optsTagger = new is2.tag.Options(new String[]{"-model","models/tag-eng.model"});
+ Tagger tagger = new Tagger(optsTagger);
+
+
+
+// String pos[] =tagger.tag(i.forms, i.lemmas);
+// i.setPPos(pos);
+
+
+ SentenceData09 tagged = tagger.tag(i);
+ for (String p : tagged.ppos) System.out.println("pos "+p);
+
+
+
+ // initialize the options
+ Options optsParser = new Options(new String[]{"-model","models/prs-eng-x.model"});
+
+ // create a parser
+ Parser parser = new Parser(optsParser);
+
+ // parse the sentence (you get a copy of the input i)
+ SentenceData09 parse = parser.apply(tagged);
+
+ System.out.println(parse.toString());
+
+ // create some trash on the hard drive :-)
+ is2.io.CONLLWriter09 writer = new is2.io.CONLLWriter09("example-out.txt");
+
+ writer.write(i);
+ writer.finishWriting();
+ }
+
+
+
+
+}
diff --git a/dependencyParser/mate-tools/src/extractors/Extractor.java b/dependencyParser/mate-tools/src/extractors/Extractor.java
new file mode 100644
index 0000000..327895d
--- /dev/null
+++ b/dependencyParser/mate-tools/src/extractors/Extractor.java
@@ -0,0 +1,59 @@
+/**
+ *
+ */
+package extractors;
+
+import is2.data.Cluster;
+import is2.data.DataF;
+import is2.data.FV;
+import is2.data.IFV;
+import is2.data.Instances;
+
+/**
+ * @author Dr. Bernd Bohnet, 29.04.2011
+ *
+ *
+ */
+public interface Extractor {
+
+
+ /**
+ * Initializes the Extractor general parts
+ */
+ public void initStat();
+
+ /**
+ * Initializes the Extractor specific parts
+ */
+ public void init();
+
+ public int basic(short[] pos, int[] forms, int w1, int w2, Cluster cluster, IFV f);
+
+ public void firstm(Instances is, int i, int w1, int w2, int j, Cluster cluster, long[] svs);
+
+ public void siblingm(Instances is, int i, short[] pos, int[] forms,
+ int[] lemmas, short[][] feats, int w1, int w2, int g, int j,
+ Cluster cluster, long[] svs, int n);
+
+ public void gcm(Instances is, int i, int w1, int w2, int g, int j, Cluster cluster, long[] svs);
+
+ public int getType();
+
+ public FV encodeCat(Instances is, int n, short[] pos, int[] is2,
+ int[] is3, short[] heads, short[] labels, short[][] s, Cluster cl,
+ FV pred);
+
+ public void setMaxForm(int integer);
+
+ /**
+ * @return
+ */
+ public int getMaxForm();
+
+
+ public float encode3(short[] pos, short[] heads, short[] labs, DataF x);
+
+
+
+
+}
diff --git a/dependencyParser/mate-tools/src/extractors/ExtractorClusterStacked.java b/dependencyParser/mate-tools/src/extractors/ExtractorClusterStacked.java
new file mode 100755
index 0000000..79a44ca
--- /dev/null
+++ b/dependencyParser/mate-tools/src/extractors/ExtractorClusterStacked.java
@@ -0,0 +1,958 @@
+package extractors;
+
+
+import is2.data.Cluster;
+import is2.data.D4;
+import is2.data.DataF;
+import is2.data.Edges;
+import is2.data.FV;
+import is2.data.IFV;
+import is2.data.Instances;
+import is2.data.Long2IntInterface;
+import is2.data.MFB;
+import is2.util.DB;
+
+
+
+final public class ExtractorClusterStacked implements Extractor {
+
+ public static int s_rel,s_word,s_type,s_dir,s_dist,s_feat,s_child,s_spath,s_lpath,s_pos;
+
+
+ final D4 d0 ,dl1,dl2, dwr,dr,dwwp,dw,dwp,dlf,d3lp, d2lp,d2pw,d2pp ;
+
+ public final Long2IntInterface li;
+
+ public ExtractorClusterStacked(Long2IntInterface li) {
+
+ this.initFeatures();
+ this.li=li;
+ d0 = new D4(li);dl1 = new D4(li);dl2 = new D4(li);
+ dwr = new D4(li);
+ dr = new D4(li);
+ dwwp = new D4(li);
+
+ dw = new D4(li);
+ dwp = new D4(li);
+
+ dlf = new D4(li);
+ d3lp = new D4(li); d2lp = new D4(li); d2pw = new D4(li); d2pp = new D4(li);
+
+ }
+
+ public void initStat() {
+
+
+ MFB mf = new MFB();
+ s_rel = mf.getFeatureCounter().get(REL).intValue();
+ s_pos = mf.getFeatureCounter().get(POS).intValue();
+ s_word = mf.getFeatureCounter().get(WORD).intValue();
+ s_type = mf.getFeatureCounter().get(TYPE).intValue();//mf.getFeatureBits();
+ s_dir = mf.getFeatureCounter().get(DIR);
+ la = mf.getValue(DIR, LA);
+ ra = mf.getValue(DIR, RA);
+ s_dist = mf.getFeatureCounter().get(DIST);//mf.getFeatureBits(DIST);
+ s_feat = mf.getFeatureCounter().get(FEAT);//mf.getFeatureBits(Pipe.FEAT);
+ s_spath = mf.getFeatureCounter().get(Cluster.SPATH)==null?0:mf.getFeatureCounter().get(Cluster.SPATH);//mf.getFeatureBits(Cluster.SPATH);
+ s_lpath = mf.getFeatureCounter().get(Cluster.LPATH)==null?0:mf.getFeatureCounter().get(Cluster.LPATH);//mf.getFeatureBits(Cluster.LPATH);
+ }
+
+ public void init(){
+ // DB.println("init");
+ d0.a0 = s_type;d0.a1 = s_pos;d0.a2 = s_pos;d0.a3 = s_pos;d0.a4 = s_pos;d0.a5 = s_pos;d0.a6 = s_pos;d0.a7 = s_pos;
+ dl1.a0 = s_type;dl1.a1 = s_rel; dl1.a2 = s_pos;dl1.a3 = s_pos; dl1.a4 = s_pos; dl1.a5 = s_pos; dl1.a6 = s_pos; dl1.a7 = s_pos;
+ dl2.a0 = s_type;dl2.a1 = s_rel;dl2.a2 = s_word;dl2.a3 = s_pos;dl2.a4 = s_pos;dl2.a5 = s_pos;dl2.a6 = s_pos;dl2.a7 = s_pos;
+ dwp.a0 = s_type; dwp.a1 = s_rel; dwp.a2 = s_word; dwp.a3 = s_pos; dwp.a4 = s_pos; dwp.a5 = s_word;
+ dwwp.a0 = s_type; dwwp.a1 = s_rel; dwwp.a2 = s_word; dwwp.a3 = s_word; dwwp.a4 = s_pos; dwwp.a5 = s_word;
+ dlf.a0 = s_type;dlf.a1 = s_rel; dlf.a2 = s_pos;dlf.a3 = s_pos; dlf.a4 = s_feat; dlf.a5 = s_feat; dlf.a6 = s_pos; dlf.a7 = s_pos;
+ d3lp.a0 = s_type; d3lp.a1 = s_rel; d3lp.a2 = s_lpath; d3lp.a3 = s_lpath; d3lp.a4 = s_lpath; d3lp.a5 = s_word; d3lp.a6 = s_spath; d3lp.a7 = s_spath;
+ d2lp.a0 = s_type; d2lp.a1 = s_rel; d2lp.a2 = s_lpath; d2lp.a3 = s_lpath; d2lp.a4 = s_word; d2lp.a5 = s_word; //d3lp.a6 = s_spath; d3lp.a7 = s_spath;
+ d2pw.a0 = s_type; d2pw.a1 = s_rel; d2pw.a2 = s_lpath; d2pw.a3 = s_lpath; d2pw.a4 = s_word; d2pw.a5 = s_word; //d3lp.a6 = s_spath; d3lp.a7 = s_spath;
+ d2pp.a0 = s_type; d2pp.a1 = s_rel; d2pp.a2 = s_lpath; d2pp.a3 = s_lpath; d2pp.a4 = s_pos; d2pp.a5 = s_pos; //d3lp.a6 = s_spath; d3lp.a7 = s_spath;
+ }
+
+
+ public int basic(short[] pposs, int[] form, int p, int d, Cluster cluster, IFV f)
+ {
+
+ d0.clean(); dl1.clean(); dl2.clean(); dwp.clean(); dwwp.clean(); dlf.clean(); d3lp.clean();
+
+ d3lp.clean(); d2lp.clean();d2pw.clean(); d2pp.clean();
+
+ int n=1;
+ int dir= (p < d)? ra:la;
+ d0.v0= n++; d0.v1=pposs[p]; d0.v2=pposs[d]; //d0.stop=4;
+ int end= (p >= d ? p : d);
+ int start = (p >= d ? d : p) + 1;
+
+ for(int i = start ; i <end ; i++) {
+ d0.v3=pposs[i];
+ d0.cz4();
+ d0.csa(s_dir,dir,f);
+ }
+ return n;
+ }
+
+
+ public void firstm(Instances is, int i,
+ int prnt, int dpnt, int label, Cluster cluster, long[] f)
+ {
+
+
+ //short[] pposs, int[] form, int[] lemmas, short[][] feats
+ for(int k=0;k<f.length;k++) f[k]=0;
+
+ short[] pposs = is.pposs[i];
+ int[] form =is.forms[i];
+ short[][] feats = is.feats[i];
+
+
+ int pF = form[prnt],dF = form[dpnt];
+ int pL = is.plemmas[i][prnt],dL = is.plemmas[i][dpnt];
+ int pP = pposs[prnt],dP = pposs[dpnt];
+
+ int prntLS = pF==-1?-1:cluster.getLP(pF), chldLS = dF==-1?-1:cluster.getLP(dF);
+
+ final int dir= (prnt < dpnt)? ra:la;
+
+ if (pF>maxForm) pF=-1;
+ if (pL>maxForm) pL=-1;
+
+ if (dF>maxForm) dF=-1;
+ if (dL>maxForm) dL=-1;
+
+
+ int n=3,c=0;
+
+ dl2.v1=label;
+ dl2.v0= n++; dl2.v2=pF; dl2.v3=dP; dl2.cz4(); f[c++]=dl2.csa(s_dir,dir);
+ dl2.v0= n++; dl2.cz3(); f[c++]=dl2.csa(s_dir,dir);
+ dl2.v0= n++; dl2.v2=dF; dl2.v3=pP; dl2.cz4(); f[c++]=dl2.csa(s_dir,dir);
+ dl2.v0= n++; dl2.cz3(); f[c++]=dl2.csa(s_dir,dir);
+
+
+ dwwp.v1=label;
+ dwwp.v0= n++; dwwp.v2=pF; dwwp.v3=dF; dwwp.cz4(); f[c++]=dwwp.csa(s_dir,dir);
+
+ dl1.v1=label;
+ dl1.v0= n++; dl1.v2=dP; dl1.cz3(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v2=pP; dl1.cz3(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v3=dP; dl1.cz4(); f[c++]=dl1.csa(s_dir,dir);
+
+ int pPm1 = prnt > 0 ? pposs[prnt - 1] : s_str, dPm1 = dpnt > 0 ? pposs[dpnt - 1] : s_str;
+ int pPp1 = prnt < pposs.length - 1 ? pposs[prnt + 1]:s_end, dPp1 = dpnt < pposs.length - 1 ? pposs[dpnt + 1]:s_end;
+
+ int pPm2 = prnt > 1 ? pposs[prnt - 2] : s_str, dPm2 = dpnt > 1 ? pposs[dpnt - 2] : s_str;
+ int pPp2 = prnt < pposs.length - 2 ? pposs[prnt + 2]:s_end, dPp2 = dpnt < pposs.length - 2 ? pposs[dpnt + 2]:s_end;
+
+ int pFm1 = prnt > 0 ? form[prnt - 1] : s_stwrd, dFm1 = dpnt > 0 ? form[dpnt - 1] : s_stwrd;
+ int pFp1 = prnt < form.length - 1 ? form[prnt + 1]:s_stwrd, dFp1 = dpnt < form.length - 1 ? form[dpnt + 1]:s_stwrd;
+
+
+
+ dl1.v0= n++;dl1.v2=pP; dl1.v3=pPp1; dl1.v4=dP;dl1.v5=dPp1; dl1.cz6(); f[n++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v5=dPm1; dl1.cz6(); f[n++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v3=pPm1; dl1.cz6(); f[n++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v5=dPp1; dl1.cz6(); f[n++]=dl1.csa(s_dir,dir);
+
+
+ dl1.v0= n++; dl1.v3=pPm1; dl1.cz5(); f[n++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v3=dPm1; dl1.cz5(); f[n++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v3=dPp1; dl1.cz5(); f[n++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v3=pPp1; dl1.cz5(); f[n++]=dl1.csa(s_dir,dir);
+
+ dl1.v0= n++;dl1.v2=pP; dl1.v3=pPp2; dl1.v4=dP;dl1.v5=dPp2; dl1.cz6(); f[n++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v5=dPm2; dl1.cz6(); f[n++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v3=pPm2; dl1.cz6(); f[n++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v5=dPp2; dl1.cz6(); f[n++]=dl1.csa(s_dir,dir);
+
+ dl1.v0= n++; dl1.v3=pPm2; dl1.cz5(); f[n++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v3=dPm2; dl1.cz5(); f[n++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v3=dPp2; dl1.cz5(); f[n++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v3=pPp2; dl1.cz5(); f[n++]=dl1.csa(s_dir,dir);
+
+
+
+ dl2.v0= n++; dl2.v3=dFm1; dl2.v3=pPp1;dl2.v4=pP; dl2.cz5(); f[n++]=dl2.getVal();
+ dl2.v0= n++; dl2.v3=dFp1; dl2.v3=pPm1; dl2.cz5(); f[n++]=dl2.getVal();
+ dl2.v0= n++; dl2.v3=pFm1; dl2.v3=dPp1;dl2.v4=dP; dl2.cz5(); f[n++]=dl2.getVal();
+ dl2.v0= n++; dl2.v3=pFp1; dl2.v3=dPm1; dl2.cz5(); f[n++]=dl2.getVal();
+
+
+ dl2.v0= n++; dl2.v3=dFm1; dl2.v3=dPm2;dl2.v4=pP; dl2.cz5(); f[n++]=dl2.getVal();
+ dl2.v0= n++; dl2.v3=dFp1; dl2.v3=dPp2; dl2.cz5(); f[n++]=dl2.getVal();
+ dl2.v0= n++; dl2.v3=pFm1; dl2.v3=pPm2;dl2.v4=dP; dl2.cz5(); f[n++]=dl2.getVal();
+ dl2.v0= n++; dl2.v3=pFp1; dl2.v3=pPp2; dl2.cz5(); f[n++]=dl2.getVal();
+
+
+ dwwp.v0= n++; dwwp.v2=pF; dwwp.v3=dF; dwwp.v4=dP; dwwp.cz5(); f[n++]=dwwp.csa(s_dir,dir);
+ dwwp.v0= n++; dwwp.v2=pF; dwwp.v3=dF; dwwp.v4=pP; dwwp.cz5(); f[n++]=dwwp.csa(s_dir,dir);
+ dwwp.v0= n++; dwwp.v2=dF; dwwp.v3=pF; dwwp.v4=pP; dwwp.v4=dP; dwwp.cz6(); f[n++]=dwwp.csa(s_dir,dir);
+
+
+
+ // lemmas
+
+ dl2.v1=label;
+ dl2.v0= n++; dl2.v2=pL; dl2.v3=dP; dl2.cz4(); f[c++]=dl2.csa(s_dir,dir);
+ dl2.v0= n++; dl2.cz3(); f[c++]=dl2.csa(s_dir,dir);
+ dl2.v0= n++; dl2.v2=dL; dl2.v3=pP; dl2.cz4(); f[c++]=dl2.csa(s_dir,dir);
+ dl2.v0= n++; dl2.cz3(); f[c++]=dl2.csa(s_dir,dir);
+
+
+ dwwp.v1=label;
+ dwwp.v0= n++; dwwp.v2=pL; dwwp.v3=dL; dwwp.cz4(); f[c++]=dwwp.csa(s_dir,dir);
+
+ dwp.v1= label;
+ dwp.v0=n++;dwp.v2=dL; dwp.v3=pP;dwp.v4=dP;dwp.v5=pL; dwp.cz6(); f[c++]=dwp.csa(s_dir,dir);
+ dwp.v0=n++;dwp.cz5(); f[c++]=dwp.csa(s_dir,dir);
+
+ dwp.v0=n++;dwp.v2=pL; dwp.cz5(); f[c++]=dwp.csa(s_dir,dir);
+ dwwp.v0= n++; dwwp.v2=pL; dwwp.v3=dL; dwwp.v4=dP; dwwp.cz5(); f[c++]=dwwp.csa(s_dir,dir);
+ dwwp.v0= n++; dwwp.v4=pP; dwwp.cz5(); f[c++]=dwwp.csa(s_dir,dir);
+
+
+ // cluster
+
+ d2pw.v1=label;
+ d2pw.v0=n++; d2pw.v2=prntLS; d2pw.v3=chldLS; d2pw.cz4(); f[c++]=d2pw.csa(s_dir,dir);
+ d2pw.v0=n++; d2pw.v4=pF; d2pw.cz5(); f[c++]=d2pw.csa(s_dir,dir);
+ d2pw.v0=n++; d2pw.v4=dF; d2pw.cz5(); f[c++]=d2pw.csa(s_dir,dir);
+ d2pw.v0=n++; d2pw.v5=pF; d2pw.cz6(); f[c++]=d2pw.csa(s_dir,dir);
+
+
+ d2pp.v1=label;
+ d2pp.v0=n++; d2pp.v2=prntLS; d2pp.v3=chldLS; d2pp.cz4(); f[c++]=d2pp.csa(s_dir,dir);
+ d2pp.v0=n++; d2pp.v4=pP; d2pp.cz5(); f[c++]=d2pp.csa(s_dir,dir);
+ d2pp.v0=n++; d2pp.v4=dP; d2pp.cz5(); f[c++]=d2pp.csa(s_dir,dir);
+ d2pp.v0=n++; d2pp.v5=pP; d2pp.cz6(); f[c++]=d2pp.csa(s_dir,dir);
+
+
+ short[] prel = is.plabels[i];
+ short[] phead = is.pheads[i];
+
+
+ //take those in for stacking
+ // dl2.v1=label;
+ // dl2.v0= n++;dl2.v2=prel[dpnt];dl2.v3=pP;dl2.v4=dP; dl2.v5=prnt==phead[dpnt]?1:2; dl2.cz6(); f[c++]=dl2.csa(s_dir,dir);
+ // dl2.v0= n++;dl2.v2=pP;dl2.v3=dP; dl2.v4=prnt==phead[dpnt]?1:2; dl2.cz5(); f[c++]=dl2.csa(s_dir,dir);
+
+
+
+ if (feats==null) return;
+
+ short[] featsP =feats[prnt], featsD =feats[dpnt];
+ dlf.v0= n++; dlf.v1=label; dlf.v2=pP; dlf.v3=dP;
+ extractFeat(f, c, dir, featsP, featsD);
+
+ return;
+ }
+
+
+
+ public void gcm(Instances is , int i, int p, int d, int gc, int label,Cluster cluster, long[] f) {
+
+ for(int k=0;k<f.length;k++) f[k]=0;
+
+ short[] pos= is.pposs[i];
+ int[] forms=is.forms[i];
+ int[] lemmas=is.plemmas[i];
+ short[][] feats=is.feats[i];
+
+ int pP = pos[p], dP = pos[d];
+ int prntF = forms[p], chldF = forms[d];
+ int prntL = lemmas[p], chldL = lemmas[d];
+ int prntLS = prntF==-1?-1:cluster.getLP(prntF), chldLS = chldF==-1?-1:cluster.getLP(chldF);
+
+ int gP = gc != -1 ? pos[gc] : s_str;
+ int gcF = gc != -1 ? forms[gc] : s_stwrd;
+ int gcL = gc != -1 ? lemmas[gc] : s_stwrd;
+ int gcLS = (gc != -1) && (gcF!=-1) ? cluster.getLP(gcF) : s_stwrd;
+
+ if (prntF>maxForm) prntF=-1;
+ if (prntL>maxForm) prntL=-1;
+
+ if (chldF>maxForm) chldF=-1;
+ if (chldL>maxForm) chldL=-1;
+
+ if (gcF>maxForm) gcF=-1;
+ if (gcL>maxForm) gcL=-1;
+
+
+ int dir= (p < d)? ra:la, dir_gra =(d < gc)? ra:la;
+
+ int n=84,c=0;
+
+ //dl1.v023();
+ dl1.v1=label;
+ dl1.v0= n++; dl1.v2=pP; dl1.v3=dP;dl1.v4=gP; dl1.cz5(); dl1.cs(s_dir,dir);f[c++]=dl1.csa(s_dir,dir_gra);
+ dl1.v0= n++; dl1.v2=pP; dl1.v3=gP; dl1.cz4();dl1.cs(s_dir,dir);f[c++]=dl1.csa(s_dir,dir_gra);
+ dl1.v0= n++; dl1.v2=dP; dl1.cz4(); dl1.cs(s_dir,dir);f[c++]=dl1.csa(s_dir,dir_gra);
+
+ dwwp.v1=label;
+ dwwp.v0= n++; dwwp.v2=prntF; dwwp.v3=gcF;
+ dwwp.cz4(); dwwp.cs(s_dir,dir);f[c++]=dwwp.csa(s_dir,dir_gra);
+
+ dwwp.v0= n++; dwwp.v2=chldF; dwwp.v3=gcF;
+ dwwp.cz4(); dwwp.cs(s_dir,dir);f[c++]=dwwp.csa(s_dir,dir_gra);
+
+ dwp.v1=label;
+ dwp.v0= n++; dwp.v2=gcF; dwp.v3=pP;
+ dwp.cz4(); dwp.cs(s_dir,dir);f[c++]=dwp.csa(s_dir,dir_gra);
+
+ dwp.v0= n++; dwp.v2=gcF; dwp.v3=dP;
+ dwp.cz4(); dwp.cs(s_dir,dir);f[c++]=dwp.csa(s_dir,dir_gra);
+
+ dwp.v0= n++; dwp.v2=prntF; dwp.v3=gP;
+ dwp.cz4(); dwp.cs(s_dir,dir);f[c++]=dwp.csa(s_dir,dir_gra);
+
+ dwp.v0= n++; dwp.v2=chldF; dwp.v3=gP;
+ dwp.cz4(); dwp.cs(s_dir,dir); f[c++]=dwp.csa(s_dir,dir_gra);
+
+
+ // lemma
+
+ dwwp.v0= n++; dwwp.v2=prntL; dwwp.v3=gcL;
+ dwwp.cz4();dwwp.cs(s_dir,dir);f[c++]=dwwp.csa(s_dir,dir_gra);
+
+ dwwp.v0= n++; dwwp.v2=chldL; dwwp.v3=gcL;
+ dwwp.cz4(); dwwp.cs(s_dir,dir);f[c++]=dwwp.csa(s_dir,dir_gra);
+
+ dwp.v0= n++; dwp.v2=gcL; dwp.v3=pP;
+ dwp.cz4(); dwp.cs(s_dir,dir);f[c++]=dwp.csa(s_dir,dir_gra);
+
+ dwp.v0= n++; dwp.v2=gcL; dwp.v3=dP;
+ dwp.cz4(); dwp.cs(s_dir,dir);f[c++]=dwp.csa(s_dir,dir_gra);
+
+ dwp.v0= n++; dwp.v2=prntL; dwp.v3=gP;
+ dwp.cz4(); dwp.cs(s_dir,dir);f[c++]=dwp.csa(s_dir,dir_gra);
+
+ dwp.v0= n++; dwp.v2=chldL; dwp.v3=gP;
+ dwp.cz4(); dwp.cs(s_dir,dir); f[c++]=dwp.csa(s_dir,dir_gra);
+
+
+ // clusters
+
+ d2lp.v1= label;
+ d2lp.v0= n++; d2lp.v2=prntLS; d2lp.v3=gcLS; d2lp.cz4(); d2lp.cs(s_dir,dir);f[c++]=d2lp.csa(s_dir,dir_gra);// f.add(li.l2i(l));
+ d2lp.v0= n++; d2lp.v2=chldLS; d2lp.v3=gcLS; d2lp.cz4(); d2lp.cs(s_dir,dir);f[c++]=d2lp.csa(s_dir,dir_gra);
+ d3lp.v0= n++; d3lp.v1= label; d3lp.v2=prntLS; d3lp.v3=chldLS; d3lp.v4=gcLS; d3lp.cz5(); d3lp.cs(s_dir,dir);f[c++]=d3lp.csa(s_dir,dir_gra);
+
+ //_f83;
+ d2lp.v0= n++; d2lp.v2=prntLS; d2lp.v3=chldLS; d2lp.v4=gcF; d2lp.cz5(); f[c++]=d2lp.csa(s_dir,dir);
+ d2lp.v0= n++; d2lp.v2=prntLS; d2lp.v3=gcLS; d2lp.v4=chldF; d2lp.cz5(); f[c++]=d2lp.csa(s_dir,dir);
+ d2lp.v0= n++; d2lp.v2=chldLS; d2lp.v3=gcLS; d2lp.v4=prntF; d2lp.cz5(); f[c++]=d2lp.csa(s_dir,dir);
+
+ d2pp.v1= label;
+ d2pp.v0= n++; d2pp.v2=prntLS; d2pp.v3=chldLS; d2pp.v4=gP; d2pp.cz5(); f[c++]=d2pp.csa(s_dir,dir);
+ d2pp.v0= n++; d2pp.v2=prntLS; d2pp.v3=gcLS; d2pp.v4=dP; d2pp.cz5(); f[c++]=d2pp.csa(s_dir,dir);
+ d2pp.v0= n++; d2pp.v2=chldLS; d2pp.v3=gcLS; d2pp.v4=pP; d2pp.cz5(); f[c++]=d2pp.csa(s_dir,dir);
+
+
+
+ // linear features
+
+ int prntPm1 = p != 0 ? pos[p - 1] : s_str; // parent-pos-minus1
+ int chldPm1 = d - 1 >=0 ? pos[d - 1] : s_str; // child-pos-minus1
+ int prntPp1 = p != pos.length - 1 ? pos[p + 1] : s_end;
+ int chldPp1 = d != pos.length - 1 ? pos[d + 1] : s_end;
+
+ int gcPm1 = gc > 0 ? pos[gc - 1] : s_str;
+ int gcPp1 = gc < pos.length - 1 ? pos[gc + 1] : s_end;
+
+ dl1.v0= n++; dl1.v2=gP; dl1.v3=gcPp1;dl1.v4=dP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v2=gP; dl1.v3=gcPm1;dl1.v4=dP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v2=gP; dl1.v3=dP;dl1.v4=chldPp1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v2=gP; dl1.v3=dP;dl1.v4=chldPm1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v2=gP; dl1.v3=gcPp1;dl1.v4=chldPm1;dl1.v5=dP;dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v2=gcPm1; dl1.v3=gP;dl1.v4=chldPm1;dl1.v5=dP; dl1.cz6();f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v2=gP; dl1.v3=gcPp1;dl1.v4=dP;dl1.v5=chldPp1; dl1.cz6();f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v2=gcPm1; dl1.v3=gP;dl1.v4=dP;dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v2=gP; dl1.v3=gcPp1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v2=gP; dl1.v3=gcPm1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v2=gP; dl1.v3=pP;dl1.v4=prntPp1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v2=gP; dl1.v3=pP;dl1.v4=prntPm1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v2=gP; dl1.v3=gcPp1;dl1.v4=prntPm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v2=gcPm1; dl1.v3=gP;dl1.v4=prntPm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v2=gP; dl1.v3=gcPp1;dl1.v4=pP;dl1.v5=prntPp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v2=gcPm1; dl1.v3=gP; dl1.v4=pP; dl1.v5=prntPp1;dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
+
+
+ int pLSp1 = p != pos.length - 1 ? forms[p + 1]==-1?-1:cluster.getLP(forms[p + 1]): _cend;
+ int cLSp1 = d != pos.length - 1 ? forms[d + 1] ==-1?-1:cluster.getLP(forms[d + 1]):_cend;
+ int gcLSp1 = gc < pos.length -1 ? forms[gc + 1] ==-1?-1:cluster.getLP(forms[gc + 1]) : s_end;
+
+ int pLSm1 = p != 0 ? lemmas[p - 1]==-1?-1:cluster.getLP(lemmas[p - 1]): _cstr;
+ int cLSm1 = d - 1 >=0 ? lemmas[d - 1] ==-1?-1:cluster.getLP(lemmas[d - 1]):_cstr;
+ int gcLSm1 = gc > 0 ? lemmas[gc - 1] ==-1?-1:cluster.getLP(lemmas[gc - 1]) : _cstr;
+
+
+ dl1.v0= n++; dl1.v2=gP; dl1.v3=gcLSp1;dl1.v4=dP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v2=gP; dl1.v3=gcLSm1;dl1.v4=dP; dl1.cz5();f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v2=gP; dl1.v3=dP;dl1.v4=cLSp1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v2=gP; dl1.v3=dP;dl1.v4=cLSm1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v2=gP; dl1.v3=gcLSp1;dl1.v4=cLSm1;dl1.v5=dP;dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v2=gcLSm1; dl1.v3=gP;dl1.v4=cLSm1;dl1.v5=dP; dl1.cz6();f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v2=gP; dl1.v3=gcLSp1;dl1.v4=dP;dl1.v5=cLSp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v2=cLSm1; dl1.v3=gP;dl1.v4=dP;dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v2=gP; dl1.v3=gcLSp1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v2=gP; dl1.v3=gcLSm1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v2=gP; dl1.v3=pP;dl1.v4=pLSp1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v2=gP; dl1.v3=pP;dl1.v4=pLSm1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v2=gP; dl1.v3=gcLSp1;dl1.v4=pLSm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v2=gcLSm1; dl1.v3=gP;dl1.v4=pLSm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v2=gP; dl1.v3=gcLSp1;dl1.v4=pP;dl1.v5=pLSp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v2=gcLSm1; dl1.v3=gP; dl1.v4=pP; dl1.v5=pLSp1;dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
+
+
+
+ short[] prel = is.plabels[i],phead=is.pheads[i];
+
+ int g = p==phead[d]?1:2 ;
+ if (gc>=0) g += d==phead[gc]?4:8;
+
+ int gr = gc==-1?s_relend:prel[gc];
+
+ // take those in for stacking
+ /*
+ dl2.v1=label;
+ dl2.v0= n++;dl2.v2=prel[d];dl2.v3=g;dl2.v4=gP;dl2.v5=dP;dl2.cz6();f[c++]=dl2.csa(s_dir,dir);
+ dl2.v0= n++;dl2.v2=prel[d];dl2.v3=g;dl2.v4=gP;dl2.v5=pP;dl2.cz6();f[c++]=dl2.csa(s_dir,dir);
+ dl2.v0= n++;dl2.v2=prel[d];dl2.v3=g;dl2.v4=gP;dl2.v5=pP;dl2.v6=dP;dl2.cz7();f[c++]=dl2.csa(s_dir,dir);
+
+ dl2.v0= n++;dl2.v2=gr;dl2.v3=g;dl2.v4=gP;dl2.v5=dP;dl2.cz6();f[c++]=dl2.csa(s_dir,dir);
+ dl2.v0= n++;dl2.v2=gr;dl2.v3=g;dl2.v4=gP;dl2.v5=pP;dl2.cz6();f[c++]=dl2.csa(s_dir,dir);
+ dl2.v0= n++;dl2.v2=gr;dl2.v3=g;dl2.v4=gP;dl2.v5=pP;dl2.v6=dP;dl2.cz7();f[c++]=dl2.csa(s_dir,dir);
+
+*/
+ if (feats==null) return;
+
+ short[] featsP =feats[d];
+ short[] featsD =gc!=-1?feats[gc]:null;
+
+ dlf.v0= n++; dlf.v1=label; dlf.v2=gP; dlf.v3=dP;
+ extractFeat(f, c, dir, featsP, featsD);
+ return;
+ }
+
+
+ public void siblingm(Instances is , int i,short pos[], int forms[], int[] lemmas, short[][] feats, int prnt, int d, int sblng, int label, Cluster cluster, long[] f, int v)
+ {
+
+ for(int k=0;k<f.length;k++) f[k]=0;
+
+ int pP = pos[prnt], dP = pos[d];
+ int prntF = forms[prnt],chldF = forms[d];
+ int prntL = lemmas[prnt], chldL = lemmas[d];
+ int prntLS = prntF==-1?-1:cluster.getLP(prntF), chldLS = chldF==-1?-1:cluster.getLP(chldF);
+
+ int sP = sblng!=-1 ? pos[sblng] : s_str, sblF = sblng!=-1 ? forms[sblng] : s_stwrd, sblL = sblng!=-1 ? lemmas[sblng] : s_stwrd;
+
+ int sblLS = (sblng != -1)&&(sblF!=-1) ? cluster.getLP(sblF) : s_stwrd;
+
+
+ int dir= (prnt < d)? ra:la;
+
+ int abs = Math.abs(prnt-d);
+
+ final int dist;
+ if (abs > 10)dist=d10;else if (abs>5) dist=d5;else if( abs==5)dist=d4;else if (abs==4)dist=d3;else if (abs==3)dist=d2;
+ else if (abs==2)dist=d1; else dist=di0;
+
+ int n=147;
+
+ if (prntF>maxForm) prntF=-1;
+ if (prntL>maxForm) prntL=-1;
+
+ if (chldF>maxForm) chldF=-1;
+ if (chldL>maxForm) chldL=-1;
+
+ if (sblF>maxForm) sblF=-1;
+ if (sblL>maxForm) sblL=-1;
+
+
+ dl1.v0= n++; dl1.v1=label;dl1.v2=pP; dl1.v3=dP;dl1.v4=sP; dl1.cz5(); f[0]=dl1.csa(s_dir,dir);f[1]=dl1.csa(s_dist,dist);
+ dl1.v0= n++; dl1.v3=sP; dl1.cz4(); f[2]=dl1.csa(s_dir,dir); f[3]=dl1.csa(s_dist,dist);
+ dl1.v0= n++; dl1.v2=dP;dl1.cz4(); f[4]=dl1.csa(s_dir,dir); f[5]=dl1.csa(s_dist,dist);
+
+ // sibling only could be tried
+ dwwp.v1=label;
+ dwwp.v0= n++; dwwp.v2=prntF; dwwp.v3=sblF; dwwp.cz4(); f[6]=dwwp.csa(s_dir,dir); f[7]=dwwp.csa(s_dist,dist);
+ dwwp.v0= n++; dwwp.v2=chldF; dwwp.cz4(); f[8]=dwwp.csa(s_dir,dir); f[9]=dwwp.csa(s_dist,dist);
+ dwp.v0= n++; dwp.v1=label; dwp.v2=sblF; dwp.v3=pP; dwp.cz4(); f[10]=dwp.csa(s_dir,dir); f[11]=dwp.csa(s_dist,dist);
+ dwp.v0= n++; /*dwp.v1=label; */dwp.v3=dP; dwp.cz4(); f[12]=dwp.csa(s_dir,dir); f[13]=dwp.csa(s_dist,dist);
+ dwp.v0= n++; /*dwp.v1=label;*/ dwp.v2=prntF; dwp.v3=sP; dwp.cz4(); f[14]=dwp.csa(s_dir,dir); f[15]=dwp.csa(s_dist,dist);
+ dwp.v0= n++; /*dwp.v1=label;*/ dwp.v2=chldF; dwp.cz4(); f[16]=dwp.csa(s_dir,dir); f[17]=dwp.csa(s_dist,dist);
+
+ //lemmas
+ dwwp.v0= n++; dwwp.v2=prntL; dwwp.v3=sblL; dwwp.cz4(); f[18]=dwwp.csa(s_dir,dir);
+ dwwp.v0= n++; dwwp.v2=chldL; dwwp.cz4(); f[19]=dwwp.csa(s_dir,dir); f[20]=dwwp.csa(s_dist,dist);
+ dwp.v0= n++; /*dwp.v1=label;*/ dwp.v2=sblL; dwp.v3=pP; dwp.cz4(); f[21]=dwp.csa(s_dir,dir); f[22]=dwp.csa(s_dist,dist);
+ dwp.v0= n++; /*dwp.v1=label; */ dwp.v3=dP; dwp.cz4(); f[23]=dwp.csa(s_dir,dir);f[24]=dwp.csa(s_dist,dist);
+ dwp.v0= n++; /*dwp.v1=label;*/ dwp.v2=prntL; dwp.v3=sP; dwp.cz4(); f[25]=dwp.csa(s_dir,dir); f[26]=dwp.csa(s_dist,dist);
+ dwp.v0= n++; /*dwp.v1=label;*/ dwp.v2=chldL; dwp.cz4(); f[27]=dwp.csa(s_dir,dir);f[28]=dwp.csa(s_dist,dist);
+
+
+ // clusters
+
+ d2lp.v1=label;
+ d2lp.v0= n++; d2lp.v2=prntLS; d2lp.v3=sblLS; d2lp.cz4(); f[29]=d2lp.csa(s_dir,dir);
+ d2lp.v0= n++; d2lp.v2=chldLS; d2lp.v3=sblLS; d2lp.cz4(); f[30]=d2lp.csa(s_dir,dir); f[31]=d2lp.csa(s_dist,dist);
+
+ d3lp.v1= label;
+ d3lp.v0= n++; d3lp.v2=prntLS; d3lp.v3=chldLS; d3lp.v4=sblLS;d3lp.cz5(); f[32]=d3lp.csa(s_dir,dir);
+
+ d2lp.v0= n++; d2lp.v2=prntLS; d2lp.v3=chldLS; d2lp.v4=sblF; d2lp.cz5(); f[33]=d2lp.csa(s_dir,dir); f[34]=d2lp.csa(s_dist,dist);
+ d2lp.v0= n++; d2lp.v2=prntLS; d2lp.v3=sblLS; d2lp.v4=chldF; d2lp.cz5(); f[35]=d2lp.csa(s_dir,dir); f[36]=d2lp.csa(s_dist,dist);
+ d2lp.v0= n++; d2lp.v2=chldLS; d2lp.v3=sblLS; d2lp.v4=prntF; d2lp.cz5(); f[37]=d2lp.csa(s_dir,dir); f[38]=d2lp.csa(s_dist,dist);
+
+ d2pp.v1=label;
+ d2pp.v0= n++; d2pp.v2=prntLS; d2pp.v3=chldLS; d2pp.v4=sP; d2pp.cz5(); f[39]=d2pp.csa(s_dir,dir); f[40]=d2pp.csa(s_dist,dist);
+ d2pp.v0= n++; d2pp.v2=prntLS; d2pp.v3=sblLS; d2pp.v4=dP; d2pp.cz5(); f[41]=d2pp.csa(s_dir,dir); f[42]=d2pp.csa(s_dist,dist);
+ d2pp.v0= n++; d2pp.v2=chldLS; d2pp.v3=sblLS; d2pp.v4=pP; d2pp.cz5(); f[43]=d2pp.csa(s_dir,dir); f[44]=d2pp.csa(s_dist,dist);
+
+
+ int prntPm1 = prnt!=0 ? pos[prnt-1] : s_str;
+ int chldPm1 = d-1>=0 ? pos[d-1] : s_str;
+ int prntPp1 = prnt!=pos.length-1 ? pos[prnt+1] : s_end;
+ int chldPp1 = d!=pos.length-1 ? pos[d+1] : s_end;
+
+ // sibling part of speech minus and plus 1
+ int sblPm1 = sblng>0 ? pos[sblng-1]:s_str;
+ int sblPp1 = sblng<pos.length-1 ? pos[sblng + 1]:s_end;
+
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=sblPp1;dl1.v4=pP; dl1.cz5(); f[45]=dl1.csa(s_dir,dir);
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=sblPm1;dl1.v4=pP; dl1.cz5(); f[46]=dl1.csa(s_dir,dir);// f.add(li.l2i(l));
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=pP;dl1.v4=prntPp1;dl1.cz5(); f[47]=dl1.csa(s_dir,dir);// f.add(li.l2i(l));
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=pP;dl1.v4=prntPm1; dl1.cz5(); f[48]=dl1.csa(s_dir,dir);// f.add(li.l2i(l));
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=sblPp1;dl1.v4=prntPm1;dl1.v5=pP; dl1.cz6(); f[49]=dl1.csa(s_dir,dir);
+ dl1.v0=n++; dl1.v2=sblPm1; dl1.v3=sP;dl1.v4=prntPm1;dl1.v5=pP;dl1.cz6(); f[50]=dl1.csa(s_dir,dir);// f.add(li.l2i(l));
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=sblPp1;dl1.v4=pP;dl1.v5=prntPp1; dl1.cz6(); f[51]=dl1.csa(s_dir,dir);// f.add(li.l2i(l));
+ dl1.v0=n++; dl1.v2=sblPm1; dl1.v3=sP; dl1.v4=pP;dl1.v5=prntPp1; dl1.cz6(); f[52]=dl1.csa(s_dir,dir);// f.add(li.l2i(l));
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=sblPp1;dl1.v4=dP; dl1.cz5(); f[53]=dl1.csa(s_dir,dir);
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=sblPm1;dl1.v4=dP; dl1.cz5(); f[54]=dl1.csa(s_dir,dir);
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=dP;dl1.v4=chldPp1;dl1.cz5(); f[55]=dl1.csa(s_dir,dir);
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=dP;dl1.v4=chldPm1; dl1.cz5(); f[56]=dl1.csa(s_dir,dir);
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=sblPp1;dl1.v4=chldPm1;dl1.v5=dP; dl1.cz6(); f[57]=dl1.csa(s_dir,dir);// f.add(li.l2i(l));
+ dl1.v0=n++; dl1.v2=sblPm1; dl1.v3=sP;dl1.v4=chldPm1;dl1.v5=dP;dl1.cz6(); f[58]=dl1.csa(s_dir,dir);
+ dl1.v0= n++;dl1.v2=sP; dl1.v3=sblPp1;dl1.v4=dP;dl1.v5=chldPp1;dl1.cz6();f[59]=dl1.csa(s_dir,dir);// f.add(li.l2i(l));
+ dl1.v0= n++; dl1.v2=sblPm1; dl1.v3=sP;dl1.v4=dP;dl1.v5=chldPp1;dl1.cz6(); f[60]=dl1.csa(s_dir,dir);
+
+ int c=61;
+
+ int pLSp1 = prnt != pos.length - 1 ? forms[prnt + 1]==-1?-1:cluster.getLP(forms[prnt + 1]): _cend;
+ int cLSp1 = d != pos.length - 1 ? forms[d + 1] ==-1?-1:cluster.getLP(forms[d + 1]):_cend;
+ int sLSp1 = sblng < pos.length -1 ? forms[sblng + 1] ==-1?-1:cluster.getLP(forms[sblng + 1]) : _cend;
+
+ int pLSm1 = prnt!=0 ? forms[prnt - 1]==-1?-1:cluster.getLP(forms[prnt - 1]): _cstr;
+ int cLSm1 = d-1>=0 ? forms[d - 1] ==-1?-1:cluster.getLP(forms[d - 1]):_cstr;
+ int sLSm1 = sblng>0 ? forms[sblng - 1] ==-1?-1:cluster.getLP(forms[sblng - 1]):_cstr;
+
+ //int c=61;
+
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSm1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=pP;dl1.v4=pLSp1;dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=pP;dl1.v4=pLSm1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=pLSm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP;dl1.v4=pLSm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=pP;dl1.v5=pLSp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP; dl1.v4=pP;dl1.v5=pLSp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=dP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSm1;dl1.v4=dP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=dP;dl1.v4=cLSp1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0=n++;dl1.v2=sP; dl1.v3=dP;dl1.v4=cLSm1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSm1;dl1.v4=cLSm1;dl1.v5=dP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP;dl1.v4=cLSm1;dl1.v5=dP;dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0=n++;dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=dP;dl1.v5=cLSp1;dl1.cz6();f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP;dl1.v4=dP;dl1.v5=cLSp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
+
+
+
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSm1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=pP;dl1.v4=pLSp1;dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=pP;dl1.v4=pLSm1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=pLSm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP;dl1.v4=pLSm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=pP;dl1.v5=pLSp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP; dl1.v4=pP;dl1.v5=pLSp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=dP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSm1;dl1.v4=dP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=dP;dl1.v4=cLSp1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0=n++;dl1.v2=sP; dl1.v3=dP;dl1.v4=cLSm1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSm1;dl1.v4=cLSm1;dl1.v5=dP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP;dl1.v4=cLSm1;dl1.v5=dP;dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++;dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=dP;dl1.v5=cLSp1;dl1.cz6();f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v2=sLSm1; dl1.v3=sP;dl1.v4=dP;dl1.v5=cLSp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
+
+ // take those in for stacking
+
+ /*
+ short[] prel = is.plabels[i],phead=is.pheads[i];
+
+ int g = prnt==phead[d]?1:2 ;
+ if (sblng>=0) g += prnt==phead[sblng]?4:8;
+
+ int gr = sblng==-1?s_relend:prel[sblng];
+
+
+ dl2.v0= n++;dl2.v2=prel[d];dl2.v3=g;dl2.v4=sP;dl2.v5=dP;dl2.cz6();f[c++]=dl2.csa(s_dir,dir);
+ dl2.v0= n++;dl2.v2=prel[d];dl2.v3=g;dl2.v4=sP;dl2.v5=pP;dl2.cz6();f[c++]=dl2.csa(s_dir,dir);
+ dl2.v0= n++;dl2.v2=prel[d];dl2.v3=g;dl2.v4=sP;dl2.v5=pP;dl2.v6=dP;dl2.cz7();f[c++]=dl2.csa(s_dir,dir);
+
+ dl2.v0= n++;dl2.v2=gr;dl2.v3=g;dl2.v4=sP;dl2.v5=dP;dl2.cz6();f[c++]=dl2.csa(s_dir,dir);
+ dl2.v0= n++;dl2.v2=gr;dl2.v3=g;dl2.v4=sP;dl2.v5=pP;dl2.cz6();f[c++]=dl2.csa(s_dir,dir);
+ dl2.v0= n++;dl2.v2=gr;dl2.v3=g;dl2.v4=sP;dl2.v5=pP;dl2.v6=dP;dl2.cz7();f[c++]=dl2.csa(s_dir,dir);
+*/
+
+ if (feats==null) return;
+
+ int cnt=c;
+
+ short[] featsP =feats[d];
+ short[] featsSbl =sblng!=-1?feats[sblng]:null;
+
+ dlf.v0= n++; dlf.v1=label; dlf.v2=sP; dlf.v3=dP;
+
+
+ cnt = extractFeat(f, cnt ,dir, featsP, featsSbl);
+
+ featsP =feats[prnt];
+ featsSbl =sblng!=-1?feats[sblng]:null;
+
+ dlf.v0= n++; dlf.v1=label; dlf.v2=pP; dlf.v3=sP;
+ if (featsP!=null && featsSbl!=null) {
+ for(short i1=0;i1<featsP.length;i1++) {
+ for(short i2=0;i2<featsSbl.length;i2++) {
+ dlf.v4=featsP[i1]; dlf.v5=featsSbl[i2];
+ dlf.cz6(); f[cnt++]=dlf.csa(s_dir,prnt<sblng?1:2);
+ }
+ }
+ } else if (featsP==null && featsSbl!=null) {
+
+ for(short i2=0;i2<featsSbl.length;i2++) {
+ dlf.v4=nofeat; dlf.v5=featsSbl[i2];
+ dlf.cz6(); f[cnt++]=dlf.csa(s_dir,dir);
+ }
+
+ } else if (featsP!=null && featsSbl==null) {
+
+ for(short i1=0;i1<featsP.length;i1++) {
+ dlf.v4=featsP[i1]; dlf.v5=nofeat;
+ dlf.cz6(); f[cnt++]=dlf.csa(s_dir,dir);
+ }
+ }
+
+ return;
+ }
+
+ private int extractFeat(long[] f, int cnt, int dir, short[] featsP, short[] featsD) {
+ if (featsP!=null && featsD!=null) {
+ for(short i1=0;i1<featsP.length;i1++) {
+ for(short i2=0;i2<featsD.length;i2++) {
+ dlf.v4=featsP[i1]; dlf.v5=featsD[i2];
+ dlf.cz6(); f[cnt++]=dlf.csa(s_dir,dir);
+ }
+ }
+ } else if (featsP==null && featsD!=null) {
+
+ for(short i2=0;i2<featsD.length;i2++) {
+ dlf.v4=nofeat; dlf.v5=featsD[i2];
+ dlf.cz6(); f[cnt++]=dlf.csa(s_dir,dir);
+
+ }
+ } else if (featsP!=null && featsD==null) {
+
+ for(short i1=0;i1<featsP.length;i1++) {
+ dlf.v4=featsP[i1]; dlf.v5=nofeat;
+ dlf.cz6(); f[cnt++]=dlf.csa(s_dir,dir);
+
+ }
+ }
+ return cnt;
+ }
+
+ public IFV encodeCat2(Instances is, int ic, short pposs[], int forms[], int[] lemmas, short[] heads, short[] types, short feats[][],
+ Cluster cluster, IFV f, Long2IntInterface li) {
+
+
+ long[] svs = new long[250];
+
+ for (int i = 1; i < heads.length; i++) {
+
+
+ int n =basic(pposs, forms, heads[i], i, cluster, f);
+ firstm(is, ic, heads[i], i, types[i], cluster,svs);
+ for(int k=0;k<svs.length;k++) f.add(li.l2i(svs[k]));
+
+ int ch,cmi,cmo;
+ if (heads[i] < i) {
+ ch = rightmostRight(heads, heads[i], i);
+ cmi = leftmostLeft(heads, i, heads[i]);
+ cmo = rightmostRight(heads, i, heads.length);
+
+ } else {
+ ch = leftmostLeft(heads, heads[i], i);
+ cmi = rightmostRight(heads, i, heads[i]);
+ cmo = leftmostLeft(heads, i, 0);
+ }
+
+ siblingm(is,ic,pposs, forms,lemmas, feats, heads[i], i, ch,types[i], cluster, svs,n);
+ for(int k=0;k<svs.length;k++) f.add(li.l2i(svs[k]));
+
+
+ gcm(is, ic,heads[i],i,cmi, types[i], cluster, svs);
+ for(int k=0;k<svs.length;k++) f.add(li.l2i(svs[k]));
+
+ gcm(is, ic, heads[i],i,cmo, types[i], cluster, svs);
+ for(int k=0;k<svs.length;k++)f.add(li.l2i(svs[k]));
+ }
+
+ return f;
+ }
+
+ public FV encodeCat(Instances is, int ic, short pposs[], int forms[], int[] lemmas, short[] heads, short[] types, short feats[][], Cluster cluster, FV f) {
+
+
+ long[] svs = new long[250];
+
+ for (int i = 1; i < heads.length; i++) {
+
+
+ int n =basic(pposs, forms, heads[i], i, cluster, f);
+ firstm(is, ic, heads[i], i, types[i], cluster,svs);
+ for(int k=0;k<svs.length;k++) dl1.map(f,svs[k]);
+
+ int ch,cmi,cmo;
+ if (heads[i] < i) {
+ ch = rightmostRight(heads, heads[i], i);
+ cmi = leftmostLeft(heads, i, heads[i]);
+ cmo = rightmostRight(heads, i, heads.length);
+
+ } else {
+ ch = leftmostLeft(heads, heads[i], i);
+ cmi = rightmostRight(heads, i, heads[i]);
+ cmo = leftmostLeft(heads, i, 0);
+ }
+
+ siblingm(is,ic,pposs, forms,lemmas, feats, heads[i], i, ch,types[i], cluster, svs,n);
+ for(int k=0;k<svs.length;k++) dl1.map(f,svs[k]);
+
+
+ gcm(is, ic,heads[i],i,cmi, types[i], cluster, svs);
+ for(int k=0;k<svs.length;k++) dl1.map(f,svs[k]);
+
+ gcm(is, ic, heads[i],i,cmo, types[i], cluster, svs);
+ for(int k=0;k<svs.length;k++) dl1.map(f,svs[k]);
+ }
+
+ return f;
+ }
+
+
+ public float encode3(short[] pos, short heads[] , short[] types, DataF d2) {
+
+ double v = 0;
+ for (int i = 1; i < heads.length; i++) {
+
+ int dir= (heads[i] < i)? 0:1;
+
+ v += d2.pl[heads[i]][i];
+ v += d2.lab[heads[i]][i][types[i]][dir];
+
+ boolean left = i<heads[i];
+ short[] labels = Edges.get(pos[heads[i]], pos[i], left);
+ int lid=-1;
+ for(int k=0;k<labels.length;k++) if (types[i]== labels[k]) {lid= k;break;}
+
+ int ch,cmi,cmo;
+ if (heads[i] < i) {
+ ch = rightmostRight(heads, heads[i], i);
+ cmi = leftmostLeft(heads, i, heads[i]);
+ cmo = rightmostRight(heads, i, heads.length);
+
+ if (ch==-1) ch=heads[i];
+ if (cmi==-1) cmi=heads[i];
+ if (cmo==-1) cmo=heads[i];
+
+ } else {
+ ch = leftmostLeft(heads, heads[i], i);
+ cmi = rightmostRight(heads, i, heads[i]);
+ cmo = leftmostLeft(heads, i, 0);
+
+ if (ch==-1) ch=i;
+ if (cmi==-1) cmi=i;
+ if (cmo==-1) cmo=i;
+ }
+ v += d2.sib[heads[i]][i][ch][dir][lid];
+ v += d2.gra[heads[i]][i][cmi][dir][lid];
+ v += d2.gra[heads[i]][i][cmo][dir][lid];
+ }
+ return (float)v;
+ }
+
+ /**
+ * Provide the scores of the edges
+ * @param pos
+ * @param heads
+ * @param types
+ * @param edgesScores
+ * @param d2
+ * @return
+ */
+ public static float encode3(short[] pos, short heads[] , short[] types, float[] edgesScores, DataF d2) {
+
+ double v = 0;
+ for (int i = 1; i < heads.length; i++) {
+
+ int dir= (heads[i] < i)? 0:1;
+
+ edgesScores[i] = d2.pl[heads[i]][i];
+ edgesScores[i] += d2.lab[heads[i]][i][types[i]][dir];
+
+ boolean left = i<heads[i];
+ short[] labels = Edges.get(pos[heads[i]], pos[i], left);
+ int lid=-1;
+ for(int k=0;k<labels.length;k++) if (types[i]== labels[k]) {lid= k;break;}
+
+ int ch,cmi,cmo;
+ if (heads[i] < i) {
+ ch = rightmostRight(heads, heads[i], i);
+ cmi = leftmostLeft(heads, i, heads[i]);
+ cmo = rightmostRight(heads, i, heads.length);
+
+ if (ch==-1) ch=heads[i];
+ if (cmi==-1) cmi=heads[i];
+ if (cmo==-1) cmo=heads[i];
+
+ } else {
+ ch = leftmostLeft(heads, heads[i], i);
+ cmi = rightmostRight(heads, i, heads[i]);
+ cmo = leftmostLeft(heads, i, 0);
+
+ if (ch==-1) ch=i;
+ if (cmi==-1) cmi=i;
+ if (cmo==-1) cmo=i;
+ }
+ edgesScores[i] += d2.sib[heads[i]][i][ch][dir][lid];
+ edgesScores[i] += d2.gra[heads[i]][i][cmi][dir][lid];
+ edgesScores[i] += d2.gra[heads[i]][i][cmo][dir][lid];
+ v+=edgesScores[i];
+ }
+ return (float)v;
+ }
+
+
+ private static int rightmostRight(short[] heads, int head, int max) {
+ int rightmost = -1;
+ for (int i = head + 1; i < max; i++) if (heads[i] == head) rightmost = i;
+
+ return rightmost;
+ }
+
+ private static int leftmostLeft(short[] heads, int head, int min) {
+ int leftmost = -1;
+ for (int i = head - 1; i > min; i--) if (heads[i] == head) leftmost = i;
+ return leftmost;
+ }
+
+ public static final String REL = "REL",END = "END",STR = "STR",LA = "LA",RA = "RA";
+
+ private static int ra,la;
+ private static int s_str;
+ private static int s_end, _cend,_cstr, s_stwrd,s_relend;
+
+ protected static final String TYPE = "TYPE",DIR = "D";
+ public static final String POS = "POS";
+ protected static final String DIST = "DIST",MID = "MID", FEAT="F";
+
+ private static final String _0 = "0",_4 = "4",_3 = "3", _2 = "2",_1 = "1",_5 = "5",_10 = "10";
+
+ private static int di0, d4,d3,d2,d1,d5,d10;
+
+
+ private static final String WORD = "WORD",STWRD = "STWRD", STPOS = "STPOS";
+
+
+
+ private static int nofeat;
+
+
+ public static int maxForm;
+
+
+ /**
+ * Initialize the features.
+ * @param maxFeatures
+ */
+ static public void initFeatures() {
+
+
+ MFB mf = new MFB();
+ mf.register(POS, MID);
+ s_str = mf.register(POS, STR);
+ s_end = mf.register(POS, END);
+
+ s_relend = mf.register(REL, END);
+
+ _cstr= mf.register(Cluster.SPATH,STR);
+ _cend=mf.register(Cluster.SPATH,END);
+
+
+ mf.register(TYPE, POS);
+
+ s_stwrd=mf.register(WORD,STWRD);
+ mf.register(POS,STPOS);
+
+ la = mf.register(DIR, LA);
+ ra = mf.register(DIR, RA);
+
+ // mf.register(TYPE, CHAR);
+
+ mf.register(TYPE, FEAT);
+ nofeat=mf.register(FEAT, "NOFEAT");
+
+ for(int k=0;k<215;k++) mf.register(TYPE, "F"+k);
+
+
+ di0=mf.register(DIST, _0);
+ d1=mf.register(DIST, _1);
+ d2=mf.register(DIST, _2);
+ d3=mf.register(DIST, _3);
+ d4=mf.register(DIST, _4);
+ d5=mf.register(DIST, _5);
+ // d5l=mf.register(DIST, _5l);
+ d10=mf.register(DIST, _10);
+
+
+ }
+
+ /* (non-Javadoc)
+ * @see extractors.Extractor#getType()
+ */
+ @Override
+ public int getType() {
+
+ return s_type;
+ }
+
+ /* (non-Javadoc)
+ * @see extractors.Extractor#setMaxForm(int)
+ */
+ @Override
+ public void setMaxForm(int max) {
+ maxForm = max;
+ }
+
+ /* (non-Javadoc)
+ * @see extractors.Extractor#getMaxForm()
+ */
+ @Override
+ public int getMaxForm() {
+ return maxForm;
+ }
+
+
+
+}
diff --git a/dependencyParser/mate-tools/src/extractors/ExtractorClusterStackedR2.java b/dependencyParser/mate-tools/src/extractors/ExtractorClusterStackedR2.java
new file mode 100644
index 0000000..de82f42
--- /dev/null
+++ b/dependencyParser/mate-tools/src/extractors/ExtractorClusterStackedR2.java
@@ -0,0 +1,937 @@
+package extractors;
+
+
+import java.util.Arrays;
+
+import is2.data.Cluster;
+import is2.data.D4;
+import is2.data.DataF;
+import is2.data.Edges;
+import is2.data.FV;
+import is2.data.IFV;
+import is2.data.Instances;
+import is2.data.Long2IntInterface;
+import is2.data.MFB;
+import is2.util.DB;
+
+
+
+final public class ExtractorClusterStackedR2 implements Extractor {
+
+ public static int s_rel,s_word,s_type,s_dir,s_dist,s_feat,s_child,s_spath,s_lpath,s_pos;
+
+ MFB mf;
+
+ final D4 d0 ,dl1,dl2, dwr,dr,dwwp,dw,dwp,dlf,d3lp, d2lp,d2pw,d2pp ;
+
+ public final Long2IntInterface li;
+
+ public ExtractorClusterStackedR2(Long2IntInterface li) {
+
+ initFeatures();
+ this.li=li;
+ d0 = new D4(li);dl1 = new D4(li);dl2 = new D4(li);
+ dwr = new D4(li);
+ dr = new D4(li);
+ dwwp = new D4(li);
+
+ dw = new D4(li);
+ dwp = new D4(li);
+
+ dlf = new D4(li);
+ d3lp = new D4(li); d2lp = new D4(li); d2pw = new D4(li); d2pp = new D4(li);
+
+ }
+
+ public void initStat() {
+
+
+ mf = new MFB();
+ s_rel = mf.getFeatureCounter().get(REL).intValue();
+ s_pos = mf.getFeatureCounter().get(POS).intValue();
+ s_word = mf.getFeatureCounter().get(WORD).intValue();
+ s_type = mf.getFeatureCounter().get(TYPE).intValue();//mf.getFeatureBits();
+ s_dir = mf.getFeatureCounter().get(DIR);
+ la = mf.getValue(DIR, LA);
+ ra = mf.getValue(DIR, RA);
+ s_dist = mf.getFeatureCounter().get(DIST);//mf.getFeatureBits(DIST);
+ s_feat = mf.getFeatureCounter().get(FEAT);//mf.getFeatureBits(Pipe.FEAT);
+ s_spath = mf.getFeatureCounter().get(Cluster.SPATH)==null?0:mf.getFeatureCounter().get(Cluster.SPATH);//mf.getFeatureBits(Cluster.SPATH);
+ s_lpath = mf.getFeatureCounter().get(Cluster.LPATH)==null?0:mf.getFeatureCounter().get(Cluster.LPATH);//mf.getFeatureBits(Cluster.LPATH);
+ }
+
+ public void init(){
+ // DB.println("init");
+ d0.a0 = s_type;d0.a1 = s_pos;d0.a2 = s_pos;d0.a3 = s_pos;d0.a4 = s_pos;d0.a5 = s_pos;d0.a6 = s_pos;d0.a7 = s_pos;
+ dl1.a0 = s_type;dl1.a1 = s_rel; dl1.a2 = s_pos;dl1.a3 = s_pos; dl1.a4 = s_pos; dl1.a5 = s_pos; dl1.a6 = s_pos; dl1.a7 = s_pos;
+ dl2.a0 = s_type;dl2.a1 = s_rel;dl2.a2 = s_word;dl2.a3 = s_pos;dl2.a4 = s_pos;dl2.a5 = s_pos;dl2.a6 = s_pos;dl2.a7 = s_pos;
+ dwp.a0 = s_type; dwp.a1 = s_rel; dwp.a2 = s_word; dwp.a3 = s_pos; dwp.a4 = s_pos; dwp.a5 = s_word;
+ dwwp.a0 = s_type; dwwp.a1 = s_rel; dwwp.a2 = s_word; dwwp.a3 = s_word; dwwp.a4 = s_pos; dwwp.a5 = s_word;
+ dlf.a0 = s_type;dlf.a1 = s_rel; dlf.a2 = s_pos;dlf.a3 = s_pos; dlf.a4 = s_feat; dlf.a5 = s_feat; dlf.a6 = s_pos; dlf.a7 = s_pos;
+ d3lp.a0 = s_type; d3lp.a1 = s_rel; d3lp.a2 = s_lpath; d3lp.a3 = s_lpath; d3lp.a4 = s_lpath; d3lp.a5 = s_word; d3lp.a6 = s_spath; d3lp.a7 = s_spath;
+ d2lp.a0 = s_type; d2lp.a1 = s_rel; d2lp.a2 = s_lpath; d2lp.a3 = s_lpath; d2lp.a4 = s_word; d2lp.a5 = s_word; //d3lp.a6 = s_spath; d3lp.a7 = s_spath;
+ d2pw.a0 = s_type; d2pw.a1 = s_rel; d2pw.a2 = s_lpath; d2pw.a3 = s_lpath; d2pw.a4 = s_word; d2pw.a5 = s_word; //d3lp.a6 = s_spath; d3lp.a7 = s_spath;
+ d2pp.a0 = s_type; d2pp.a1 = s_rel; d2pp.a2 = s_lpath; d2pp.a3 = s_lpath; d2pp.a4 = s_pos; d2pp.a5 = s_pos; //d3lp.a6 = s_spath; d3lp.a7 = s_spath;
+ }
+
+
+ public int basic(short[] pposs, int[] form, int p, int d, Cluster cluster, IFV f)
+ {
+
+ d0.clean(); dl1.clean(); dl2.clean(); dwp.clean(); dwwp.clean(); dlf.clean(); d3lp.clean();
+
+ d3lp.clean(); d2lp.clean();d2pw.clean(); d2pp.clean();
+
+ int n=1;
+ int dir= (p < d)? ra:la;
+ // d0.v0= n; d0.v1=pposs[p]; d0.v2=pposs[d]; //d0.stop=4;
+ int end= (p >= d ? p : d);
+ int start = (p >= d ? d : p) + 1;
+
+ StringBuilder s = new StringBuilder(end-start);
+ int[] x = new int[end-start];
+ int c=0;
+ for(int i = start ; i <end ; i++) {
+ //d0.v3=pposs[i];
+ //d0.cz4();
+ //d0.csa(s_dir,dir,f);
+// s.append((char)pposs[i]);
+ x[c++] =pposs[i];
+ }
+
+ Arrays.sort(x);
+ for(int i = 0;i<x.length ; i++) {
+ if (i==0 || x[i]!=x[i-1] ) s.append(x[i]);
+ }
+ int v = mf.register("px", s.toString());
+
+ dwp.v0 = n++; dwp.v1 = 1;dwp.v2 = v; dwp.v3 = pposs[p]; dwp.v4 = pposs[d]; dwp.cz5(); dwp.csa(s_dir,dir,f);
+
+ return n;
+ }
+
+
+ public void firstm(Instances is, int i,
+ int prnt, int dpnt, int label, Cluster cluster, long[] f)
+ {
+
+
+ //short[] pposs, int[] form, int[] lemmas, short[][] feats
+ for(int k=0;k<f.length;k++) f[k]=0;
+
+ short[] pposs = is.pposs[i];
+ int[] form =is.forms[i];
+ short[][] feats = is.feats[i];
+
+
+ int pF = form[prnt],dF = form[dpnt];
+ int pL = is.plemmas[i][prnt],dL = is.plemmas[i][dpnt];
+ int pP = pposs[prnt],dP = pposs[dpnt];
+
+ int prntLS = pF==-1?-1:cluster.getLP(pF), chldLS = dF==-1?-1:cluster.getLP(dF);
+
+ final int dir= (prnt < dpnt)? ra:la;
+
+ if (pF>maxForm) pF=-1;
+ if (pL>maxForm) pL=-1;
+
+ if (dF>maxForm) dF=-1;
+ if (dL>maxForm) dL=-1;
+
+
+ int n=3,c=0;
+
+ dl2.v1=label;
+ dl2.v0= n++; dl2.v2=pF; dl2.v3=dP; dl2.cz4(); f[c++]=dl2.csa(s_dir,dir);
+ dl2.v0= n++; dl2.cz3(); f[c++]=dl2.csa(s_dir,dir);
+ dl2.v0= n++; dl2.v2=dF; dl2.v3=pP; dl2.cz4(); f[c++]=dl2.csa(s_dir,dir);
+ dl2.v0= n++; dl2.cz3(); f[c++]=dl2.csa(s_dir,dir);
+
+
+ dwwp.v1=label;
+ dwwp.v0= n++; dwwp.v2=pF; dwwp.v3=dF; dwwp.cz4(); f[c++]=dwwp.csa(s_dir,dir);
+
+ dl1.v1=label;
+ dl1.v0= n++; dl1.v2=dP; dl1.cz3(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v2=pP; dl1.cz3(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v3=dP; dl1.cz4(); f[c++]=dl1.csa(s_dir,dir);
+
+ int pPm1 = prnt > 0 ? pposs[prnt - 1] : s_str, dPm1 = dpnt > 0 ? pposs[dpnt - 1] : s_str;
+ int pPp1 = prnt < pposs.length - 1 ? pposs[prnt + 1]:s_end, dPp1 = dpnt < pposs.length - 1 ? pposs[dpnt + 1]:s_end;
+
+ int pPm2 = prnt > 1 ? pposs[prnt - 2] : s_str, dPm2 = dpnt > 1 ? pposs[dpnt - 2] : s_str;
+ int pPp2 = prnt < pposs.length - 2 ? pposs[prnt + 2]:s_end, dPp2 = dpnt < pposs.length - 2 ? pposs[dpnt + 2]:s_end;
+
+ int pFm1 = prnt > 0 ? form[prnt - 1] : s_stwrd, dFm1 = dpnt > 0 ? form[dpnt - 1] : s_stwrd;
+ int pFp1 = prnt < form.length - 1 ? form[prnt + 1]:s_stwrd, dFp1 = dpnt < form.length - 1 ? form[dpnt + 1]:s_stwrd;
+
+
+
+ dl1.v0= n++;dl1.v2=pP; dl1.v3=pPp1; dl1.v4=dP;dl1.v5=dPp1; dl1.cz6(); f[n++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v5=dPm1; dl1.cz6(); f[n++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v3=pPm1; dl1.cz6(); f[n++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v5=dPp1; dl1.cz6(); f[n++]=dl1.csa(s_dir,dir);
+
+
+ dl1.v0= n++; dl1.v3=pPm1; dl1.cz5(); f[n++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v3=dPm1; dl1.cz5(); f[n++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v3=dPp1; dl1.cz5(); f[n++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v3=pPp1; dl1.cz5(); f[n++]=dl1.csa(s_dir,dir);
+
+ dl1.v0= n++;dl1.v2=pP; dl1.v3=pPp2; dl1.v4=dP;dl1.v5=dPp2; dl1.cz6(); f[n++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v5=dPm2; dl1.cz6(); f[n++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v3=pPm2; dl1.cz6(); f[n++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v5=dPp2; dl1.cz6(); f[n++]=dl1.csa(s_dir,dir);
+
+ dl1.v0= n++; dl1.v3=pPm2; dl1.cz5(); f[n++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v3=dPm2; dl1.cz5(); f[n++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v3=dPp2; dl1.cz5(); f[n++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v3=pPp2; dl1.cz5(); f[n++]=dl1.csa(s_dir,dir);
+
+
+
+ dl2.v0= n++; dl2.v3=dFm1; dl2.v3=pPp1;dl2.v4=pP; dl2.cz5(); f[n++]=dl2.getVal();
+ dl2.v0= n++; dl2.v3=dFp1; dl2.v3=pPm1; dl2.cz5(); f[n++]=dl2.getVal();
+ dl2.v0= n++; dl2.v3=pFm1; dl2.v3=dPp1;dl2.v4=dP; dl2.cz5(); f[n++]=dl2.getVal();
+ dl2.v0= n++; dl2.v3=pFp1; dl2.v3=dPm1; dl2.cz5(); f[n++]=dl2.getVal();
+
+
+ dl2.v0= n++; dl2.v3=dFm1; dl2.v3=dPm2;dl2.v4=pP; dl2.cz5(); f[n++]=dl2.getVal();
+ dl2.v0= n++; dl2.v3=dFp1; dl2.v3=dPp2; dl2.cz5(); f[n++]=dl2.getVal();
+ dl2.v0= n++; dl2.v3=pFm1; dl2.v3=pPm2;dl2.v4=dP; dl2.cz5(); f[n++]=dl2.getVal();
+ dl2.v0= n++; dl2.v3=pFp1; dl2.v3=pPp2; dl2.cz5(); f[n++]=dl2.getVal();
+
+
+ dwwp.v0= n++; dwwp.v2=pF; dwwp.v3=dF; dwwp.v4=dP; dwwp.cz5(); f[n++]=dwwp.csa(s_dir,dir);
+ dwwp.v0= n++; dwwp.v2=pF; dwwp.v3=dF; dwwp.v4=pP; dwwp.cz5(); f[n++]=dwwp.csa(s_dir,dir);
+ dwwp.v0= n++; dwwp.v2=dF; dwwp.v3=pF; dwwp.v4=pP; dwwp.v4=dP; dwwp.cz6(); f[n++]=dwwp.csa(s_dir,dir);
+
+
+
+ // lemmas
+
+ dl2.v1=label;
+ dl2.v0= n++; dl2.v2=pL; dl2.v3=dP; dl2.cz4(); f[c++]=dl2.csa(s_dir,dir);
+ dl2.v0= n++; dl2.cz3(); f[c++]=dl2.csa(s_dir,dir);
+ dl2.v0= n++; dl2.v2=dL; dl2.v3=pP; dl2.cz4(); f[c++]=dl2.csa(s_dir,dir);
+ dl2.v0= n++; dl2.cz3(); f[c++]=dl2.csa(s_dir,dir);
+
+
+ dwwp.v1=label;
+ dwwp.v0= n++; dwwp.v2=pL; dwwp.v3=dL; dwwp.cz4(); f[c++]=dwwp.csa(s_dir,dir);
+
+ dwp.v1= label;
+ dwp.v0=n++;dwp.v2=dL; dwp.v3=pP;dwp.v4=dP;dwp.v5=pL; dwp.cz6(); f[c++]=dwp.csa(s_dir,dir);
+ dwp.v0=n++;dwp.cz5(); f[c++]=dwp.csa(s_dir,dir);
+
+ dwp.v0=n++;dwp.v2=pL; dwp.cz5(); f[c++]=dwp.csa(s_dir,dir);
+ dwwp.v0= n++; dwwp.v2=pL; dwwp.v3=dL; dwwp.v4=dP; dwwp.cz5(); f[c++]=dwwp.csa(s_dir,dir);
+ dwwp.v0= n++; dwwp.v4=pP; dwwp.cz5(); f[c++]=dwwp.csa(s_dir,dir);
+
+
+ // cluster
+
+ d2pw.v1=label;
+ d2pw.v0=n++; d2pw.v2=prntLS; d2pw.v3=chldLS; d2pw.cz4(); f[c++]=d2pw.csa(s_dir,dir);
+ d2pw.v0=n++; d2pw.v4=pF; d2pw.cz5(); f[c++]=d2pw.csa(s_dir,dir);
+ d2pw.v0=n++; d2pw.v4=dF; d2pw.cz5(); f[c++]=d2pw.csa(s_dir,dir);
+ d2pw.v0=n++; d2pw.v5=pF; d2pw.cz6(); f[c++]=d2pw.csa(s_dir,dir);
+
+
+ d2pp.v1=label;
+ d2pp.v0=n++; d2pp.v2=prntLS; d2pp.v3=chldLS; d2pp.cz4(); f[c++]=d2pp.csa(s_dir,dir);
+ d2pp.v0=n++; d2pp.v4=pP; d2pp.cz5(); f[c++]=d2pp.csa(s_dir,dir);
+ d2pp.v0=n++; d2pp.v4=dP; d2pp.cz5(); f[c++]=d2pp.csa(s_dir,dir);
+ d2pp.v0=n++; d2pp.v5=pP; d2pp.cz6(); f[c++]=d2pp.csa(s_dir,dir);
+
+
+ short[] prel = is.plabels[i];
+ short[] phead = is.pheads[i];
+
+
+ //take those in for stacking
+ // dl2.v1=label;
+ // dl2.v0= n++;dl2.v2=prel[dpnt];dl2.v3=pP;dl2.v4=dP; dl2.v5=prnt==phead[dpnt]?1:2; dl2.cz6(); f[c++]=dl2.csa(s_dir,dir);
+ // dl2.v0= n++;dl2.v2=pP;dl2.v3=dP; dl2.v4=prnt==phead[dpnt]?1:2; dl2.cz5(); f[c++]=dl2.csa(s_dir,dir);
+
+
+
+ if (feats==null) return;
+
+ short[] featsP =feats[prnt], featsD =feats[dpnt];
+ dlf.v0= n++; dlf.v1=label; dlf.v2=pP; dlf.v3=dP;
+ extractFeat(f, c, dir, featsP, featsD);
+
+ return;
+ }
+
+
+
+ public void gcm(Instances is , int i, int p, int d, int gc, int label,Cluster cluster, long[] f) {
+
+ for(int k=0;k<f.length;k++) f[k]=0;
+
+ short[] pos= is.pposs[i];
+ int[] forms=is.forms[i];
+ int[] lemmas=is.plemmas[i];
+ short[][] feats=is.feats[i];
+
+ int pP = pos[p], dP = pos[d];
+ int prntF = forms[p], chldF = forms[d];
+ int prntL = lemmas[p], chldL = lemmas[d];
+ int prntLS = prntF==-1?-1:cluster.getLP(prntF), chldLS = chldF==-1?-1:cluster.getLP(chldF);
+
+ int gP = gc != -1 ? pos[gc] : s_str;
+ int gcF = gc != -1 ? forms[gc] : s_stwrd;
+ int gcL = gc != -1 ? lemmas[gc] : s_stwrd;
+ int gcLS = (gc != -1) && (gcF!=-1) ? cluster.getLP(gcF) : s_stwrd;
+
+ if (prntF>maxForm) prntF=-1;
+ if (prntL>maxForm) prntL=-1;
+
+ if (chldF>maxForm) chldF=-1;
+ if (chldL>maxForm) chldL=-1;
+
+ if (gcF>maxForm) gcF=-1;
+ if (gcL>maxForm) gcL=-1;
+
+
+ int dir= (p < d)? ra:la, dir_gra =(d < gc)? ra:la;
+
+ int n=84,c=0;
+
+ //dl1.v023();
+ dl1.v1=label;
+ dl1.v0= n++; dl1.v2=pP; dl1.v3=dP;dl1.v4=gP; dl1.cz5(); dl1.cs(s_dir,dir);f[c++]=dl1.csa(s_dir,dir_gra);
+ dl1.v0= n++; dl1.v2=pP; dl1.v3=gP; dl1.cz4();dl1.cs(s_dir,dir);f[c++]=dl1.csa(s_dir,dir_gra);
+ dl1.v0= n++; dl1.v2=dP; dl1.cz4(); dl1.cs(s_dir,dir);f[c++]=dl1.csa(s_dir,dir_gra);
+
+ dwwp.v1=label;
+ dwwp.v0= n++; dwwp.v2=prntF; dwwp.v3=gcF;
+ dwwp.cz4(); dwwp.cs(s_dir,dir);f[c++]=dwwp.csa(s_dir,dir_gra);
+
+ dwwp.v0= n++; dwwp.v2=chldF; dwwp.v3=gcF;
+ dwwp.cz4(); dwwp.cs(s_dir,dir);f[c++]=dwwp.csa(s_dir,dir_gra);
+
+ dwp.v1=label;
+ dwp.v0= n++; dwp.v2=gcF; dwp.v3=pP;
+ dwp.cz4(); dwp.cs(s_dir,dir);f[c++]=dwp.csa(s_dir,dir_gra);
+
+ dwp.v0= n++; dwp.v2=gcF; dwp.v3=dP;
+ dwp.cz4(); dwp.cs(s_dir,dir);f[c++]=dwp.csa(s_dir,dir_gra);
+
+ dwp.v0= n++; dwp.v2=prntF; dwp.v3=gP;
+ dwp.cz4(); dwp.cs(s_dir,dir);f[c++]=dwp.csa(s_dir,dir_gra);
+
+ dwp.v0= n++; dwp.v2=chldF; dwp.v3=gP;
+ dwp.cz4(); dwp.cs(s_dir,dir); f[c++]=dwp.csa(s_dir,dir_gra);
+
+
+ // lemma
+
+ dwwp.v0= n++; dwwp.v2=prntL; dwwp.v3=gcL;
+ dwwp.cz4();dwwp.cs(s_dir,dir);f[c++]=dwwp.csa(s_dir,dir_gra);
+
+ dwwp.v0= n++; dwwp.v2=chldL; dwwp.v3=gcL;
+ dwwp.cz4(); dwwp.cs(s_dir,dir);f[c++]=dwwp.csa(s_dir,dir_gra);
+
+ dwp.v0= n++; dwp.v2=gcL; dwp.v3=pP;
+ dwp.cz4(); dwp.cs(s_dir,dir);f[c++]=dwp.csa(s_dir,dir_gra);
+
+ dwp.v0= n++; dwp.v2=gcL; dwp.v3=dP;
+ dwp.cz4(); dwp.cs(s_dir,dir);f[c++]=dwp.csa(s_dir,dir_gra);
+
+ dwp.v0= n++; dwp.v2=prntL; dwp.v3=gP;
+ dwp.cz4(); dwp.cs(s_dir,dir);f[c++]=dwp.csa(s_dir,dir_gra);
+
+ dwp.v0= n++; dwp.v2=chldL; dwp.v3=gP;
+ dwp.cz4(); dwp.cs(s_dir,dir); f[c++]=dwp.csa(s_dir,dir_gra);
+
+
+ // clusters
+
+ d2lp.v1= label;
+ d2lp.v0= n++; d2lp.v2=prntLS; d2lp.v3=gcLS; d2lp.cz4(); d2lp.cs(s_dir,dir);f[c++]=d2lp.csa(s_dir,dir_gra);// f.add(li.l2i(l));
+ d2lp.v0= n++; d2lp.v2=chldLS; d2lp.v3=gcLS; d2lp.cz4(); d2lp.cs(s_dir,dir);f[c++]=d2lp.csa(s_dir,dir_gra);
+ d3lp.v0= n++; d3lp.v1= label; d3lp.v2=prntLS; d3lp.v3=chldLS; d3lp.v4=gcLS; d3lp.cz5(); d3lp.cs(s_dir,dir);f[c++]=d3lp.csa(s_dir,dir_gra);
+
+ //_f83;
+ d2lp.v0= n++; d2lp.v2=prntLS; d2lp.v3=chldLS; d2lp.v4=gcF; d2lp.cz5(); f[c++]=d2lp.csa(s_dir,dir);
+ d2lp.v0= n++; d2lp.v2=prntLS; d2lp.v3=gcLS; d2lp.v4=chldF; d2lp.cz5(); f[c++]=d2lp.csa(s_dir,dir);
+ d2lp.v0= n++; d2lp.v2=chldLS; d2lp.v3=gcLS; d2lp.v4=prntF; d2lp.cz5(); f[c++]=d2lp.csa(s_dir,dir);
+
+ d2pp.v1= label;
+ d2pp.v0= n++; d2pp.v2=prntLS; d2pp.v3=chldLS; d2pp.v4=gP; d2pp.cz5(); f[c++]=d2pp.csa(s_dir,dir);
+ d2pp.v0= n++; d2pp.v2=prntLS; d2pp.v3=gcLS; d2pp.v4=dP; d2pp.cz5(); f[c++]=d2pp.csa(s_dir,dir);
+ d2pp.v0= n++; d2pp.v2=chldLS; d2pp.v3=gcLS; d2pp.v4=pP; d2pp.cz5(); f[c++]=d2pp.csa(s_dir,dir);
+
+
+
+ // linear features
+
+ int prntPm1 = p != 0 ? pos[p - 1] : s_str; // parent-pos-minus1
+ int chldPm1 = d - 1 >=0 ? pos[d - 1] : s_str; // child-pos-minus1
+ int prntPp1 = p != pos.length - 1 ? pos[p + 1] : s_end;
+ int chldPp1 = d != pos.length - 1 ? pos[d + 1] : s_end;
+
+ int gcPm1 = gc > 0 ? pos[gc - 1] : s_str;
+ int gcPp1 = gc < pos.length - 1 ? pos[gc + 1] : s_end;
+
+ dl1.v0= n++; dl1.v2=gP; dl1.v3=gcPp1;dl1.v4=dP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v2=gP; dl1.v3=gcPm1;dl1.v4=dP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v2=gP; dl1.v3=dP;dl1.v4=chldPp1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v2=gP; dl1.v3=dP;dl1.v4=chldPm1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v2=gP; dl1.v3=gcPp1;dl1.v4=chldPm1;dl1.v5=dP;dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v2=gcPm1; dl1.v3=gP;dl1.v4=chldPm1;dl1.v5=dP; dl1.cz6();f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v2=gP; dl1.v3=gcPp1;dl1.v4=dP;dl1.v5=chldPp1; dl1.cz6();f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v2=gcPm1; dl1.v3=gP;dl1.v4=dP;dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v2=gP; dl1.v3=gcPp1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v2=gP; dl1.v3=gcPm1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v2=gP; dl1.v3=pP;dl1.v4=prntPp1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v2=gP; dl1.v3=pP;dl1.v4=prntPm1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v2=gP; dl1.v3=gcPp1;dl1.v4=prntPm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v2=gcPm1; dl1.v3=gP;dl1.v4=prntPm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v2=gP; dl1.v3=gcPp1;dl1.v4=pP;dl1.v5=prntPp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v2=gcPm1; dl1.v3=gP; dl1.v4=pP; dl1.v5=prntPp1;dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
+
+
+ int pLSp1 = p != pos.length - 1 ? forms[p + 1]==-1?-1:cluster.getLP(forms[p + 1]): _cend;
+ int cLSp1 = d != pos.length - 1 ? forms[d + 1] ==-1?-1:cluster.getLP(forms[d + 1]):_cend;
+ int gcLSp1 = gc < pos.length -1 ? forms[gc + 1] ==-1?-1:cluster.getLP(forms[gc + 1]) : s_end;
+
+ int pLSm1 = p != 0 ? lemmas[p - 1]==-1?-1:cluster.getLP(lemmas[p - 1]): _cstr;
+ int cLSm1 = d - 1 >=0 ? lemmas[d - 1] ==-1?-1:cluster.getLP(lemmas[d - 1]):_cstr;
+ int gcLSm1 = gc > 0 ? lemmas[gc - 1] ==-1?-1:cluster.getLP(lemmas[gc - 1]) : _cstr;
+
+
+ dl1.v0= n++; dl1.v2=gP; dl1.v3=gcLSp1;dl1.v4=dP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v2=gP; dl1.v3=gcLSm1;dl1.v4=dP; dl1.cz5();f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v2=gP; dl1.v3=dP;dl1.v4=cLSp1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v2=gP; dl1.v3=dP;dl1.v4=cLSm1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v2=gP; dl1.v3=gcLSp1;dl1.v4=cLSm1;dl1.v5=dP;dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v2=gcLSm1; dl1.v3=gP;dl1.v4=cLSm1;dl1.v5=dP; dl1.cz6();f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v2=gP; dl1.v3=gcLSp1;dl1.v4=dP;dl1.v5=cLSp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v2=cLSm1; dl1.v3=gP;dl1.v4=dP;dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v2=gP; dl1.v3=gcLSp1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v2=gP; dl1.v3=gcLSm1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v2=gP; dl1.v3=pP;dl1.v4=pLSp1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v2=gP; dl1.v3=pP;dl1.v4=pLSm1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v2=gP; dl1.v3=gcLSp1;dl1.v4=pLSm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v2=gcLSm1; dl1.v3=gP;dl1.v4=pLSm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v2=gP; dl1.v3=gcLSp1;dl1.v4=pP;dl1.v5=pLSp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v2=gcLSm1; dl1.v3=gP; dl1.v4=pP; dl1.v5=pLSp1;dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
+
+
+
+ short[] prel = is.plabels[i],phead=is.pheads[i];
+
+ int g = p==phead[d]?1:2 ;
+ if (gc>=0) g += d==phead[gc]?4:8;
+
+ int gr = gc==-1?s_relend:prel[gc];
+
+ // take those in for stacking
+ /*
+ dl2.v1=label;
+ dl2.v0= n++;dl2.v2=prel[d];dl2.v3=g;dl2.v4=gP;dl2.v5=dP;dl2.cz6();f[c++]=dl2.csa(s_dir,dir);
+ dl2.v0= n++;dl2.v2=prel[d];dl2.v3=g;dl2.v4=gP;dl2.v5=pP;dl2.cz6();f[c++]=dl2.csa(s_dir,dir);
+ dl2.v0= n++;dl2.v2=prel[d];dl2.v3=g;dl2.v4=gP;dl2.v5=pP;dl2.v6=dP;dl2.cz7();f[c++]=dl2.csa(s_dir,dir);
+
+ dl2.v0= n++;dl2.v2=gr;dl2.v3=g;dl2.v4=gP;dl2.v5=dP;dl2.cz6();f[c++]=dl2.csa(s_dir,dir);
+ dl2.v0= n++;dl2.v2=gr;dl2.v3=g;dl2.v4=gP;dl2.v5=pP;dl2.cz6();f[c++]=dl2.csa(s_dir,dir);
+ dl2.v0= n++;dl2.v2=gr;dl2.v3=g;dl2.v4=gP;dl2.v5=pP;dl2.v6=dP;dl2.cz7();f[c++]=dl2.csa(s_dir,dir);
+
+*/
+ if (feats==null) return;
+
+ short[] featsP =feats[d];
+ short[] featsD =gc!=-1?feats[gc]:null;
+
+ dlf.v0= n++; dlf.v1=label; dlf.v2=gP; dlf.v3=dP;
+ extractFeat(f, c, dir, featsP, featsD);
+ return;
+ }
+
+
+ public void siblingm(Instances is , int i,short pos[], int forms[], int[] lemmas, short[][] feats, int prnt, int d, int sblng, int label, Cluster cluster, long[] f, int v)
+ {
+
+ for(int k=0;k<f.length;k++) f[k]=0;
+
+ int pP = pos[prnt], dP = pos[d];
+ int prntF = forms[prnt],chldF = forms[d];
+ int prntL = lemmas[prnt], chldL = lemmas[d];
+ int prntLS = prntF==-1?-1:cluster.getLP(prntF), chldLS = chldF==-1?-1:cluster.getLP(chldF);
+
+ int sP = sblng!=-1 ? pos[sblng] : s_str, sblF = sblng!=-1 ? forms[sblng] : s_stwrd, sblL = sblng!=-1 ? lemmas[sblng] : s_stwrd;
+
+ int sblLS = (sblng != -1)&&(sblF!=-1) ? cluster.getLP(sblF) : s_stwrd;
+
+
+ int dir= (prnt < d)? ra:la;
+
+ int abs = Math.abs(prnt-d);
+
+ final int dist;
+ if (abs > 10)dist=d10;else if (abs>5) dist=d5;else if( abs==5)dist=d4;else if (abs==4)dist=d3;else if (abs==3)dist=d2;
+ else if (abs==2)dist=d1; else dist=di0;
+
+ int n=147;
+
+ if (prntF>maxForm) prntF=-1;
+ if (prntL>maxForm) prntL=-1;
+
+ if (chldF>maxForm) chldF=-1;
+ if (chldL>maxForm) chldL=-1;
+
+ if (sblF>maxForm) sblF=-1;
+ if (sblL>maxForm) sblL=-1;
+
+
+ dl1.v0= n++; dl1.v1=label;dl1.v2=pP; dl1.v3=dP;dl1.v4=sP; dl1.cz5(); f[0]=dl1.csa(s_dir,dir);f[1]=dl1.csa(s_dist,dist);
+ dl1.v0= n++; dl1.v3=sP; dl1.cz4(); f[2]=dl1.csa(s_dir,dir); f[3]=dl1.csa(s_dist,dist);
+ dl1.v0= n++; dl1.v2=dP;dl1.cz4(); f[4]=dl1.csa(s_dir,dir); f[5]=dl1.csa(s_dist,dist);
+
+ // sibling only could be tried
+ dwwp.v1=label;
+ dwwp.v0= n++; dwwp.v2=prntF; dwwp.v3=sblF; dwwp.cz4(); f[6]=dwwp.csa(s_dir,dir); f[7]=dwwp.csa(s_dist,dist);
+ dwwp.v0= n++; dwwp.v2=chldF; dwwp.cz4(); f[8]=dwwp.csa(s_dir,dir); f[9]=dwwp.csa(s_dist,dist);
+ dwp.v0= n++; dwp.v1=label; dwp.v2=sblF; dwp.v3=pP; dwp.cz4(); f[10]=dwp.csa(s_dir,dir); f[11]=dwp.csa(s_dist,dist);
+ dwp.v0= n++; /*dwp.v1=label; */dwp.v3=dP; dwp.cz4(); f[12]=dwp.csa(s_dir,dir); f[13]=dwp.csa(s_dist,dist);
+ dwp.v0= n++; /*dwp.v1=label;*/ dwp.v2=prntF; dwp.v3=sP; dwp.cz4(); f[14]=dwp.csa(s_dir,dir); f[15]=dwp.csa(s_dist,dist);
+ dwp.v0= n++; /*dwp.v1=label;*/ dwp.v2=chldF; dwp.cz4(); f[16]=dwp.csa(s_dir,dir); f[17]=dwp.csa(s_dist,dist);
+
+ //lemmas
+ dwwp.v0= n++; dwwp.v2=prntL; dwwp.v3=sblL; dwwp.cz4(); f[18]=dwwp.csa(s_dir,dir);
+ dwwp.v0= n++; dwwp.v2=chldL; dwwp.cz4(); f[19]=dwwp.csa(s_dir,dir); f[20]=dwwp.csa(s_dist,dist);
+ dwp.v0= n++; /*dwp.v1=label;*/ dwp.v2=sblL; dwp.v3=pP; dwp.cz4(); f[21]=dwp.csa(s_dir,dir); f[22]=dwp.csa(s_dist,dist);
+ dwp.v0= n++; /*dwp.v1=label; */ dwp.v3=dP; dwp.cz4(); f[23]=dwp.csa(s_dir,dir);f[24]=dwp.csa(s_dist,dist);
+ dwp.v0= n++; /*dwp.v1=label;*/ dwp.v2=prntL; dwp.v3=sP; dwp.cz4(); f[25]=dwp.csa(s_dir,dir); f[26]=dwp.csa(s_dist,dist);
+ dwp.v0= n++; /*dwp.v1=label;*/ dwp.v2=chldL; dwp.cz4(); f[27]=dwp.csa(s_dir,dir);f[28]=dwp.csa(s_dist,dist);
+
+
+ // clusters
+
+ d2lp.v1=label;
+ d2lp.v0= n++; d2lp.v2=prntLS; d2lp.v3=sblLS; d2lp.cz4(); f[29]=d2lp.csa(s_dir,dir);
+ d2lp.v0= n++; d2lp.v2=chldLS; d2lp.v3=sblLS; d2lp.cz4(); f[30]=d2lp.csa(s_dir,dir); f[31]=d2lp.csa(s_dist,dist);
+
+ d3lp.v1= label;
+ d3lp.v0= n++; d3lp.v2=prntLS; d3lp.v3=chldLS; d3lp.v4=sblLS;d3lp.cz5(); f[32]=d3lp.csa(s_dir,dir);
+
+ d2lp.v0= n++; d2lp.v2=prntLS; d2lp.v3=chldLS; d2lp.v4=sblF; d2lp.cz5(); f[33]=d2lp.csa(s_dir,dir); f[34]=d2lp.csa(s_dist,dist);
+ d2lp.v0= n++; d2lp.v2=prntLS; d2lp.v3=sblLS; d2lp.v4=chldF; d2lp.cz5(); f[35]=d2lp.csa(s_dir,dir); f[36]=d2lp.csa(s_dist,dist);
+ d2lp.v0= n++; d2lp.v2=chldLS; d2lp.v3=sblLS; d2lp.v4=prntF; d2lp.cz5(); f[37]=d2lp.csa(s_dir,dir); f[38]=d2lp.csa(s_dist,dist);
+
+ d2pp.v1=label;
+ d2pp.v0= n++; d2pp.v2=prntLS; d2pp.v3=chldLS; d2pp.v4=sP; d2pp.cz5(); f[39]=d2pp.csa(s_dir,dir); f[40]=d2pp.csa(s_dist,dist);
+ d2pp.v0= n++; d2pp.v2=prntLS; d2pp.v3=sblLS; d2pp.v4=dP; d2pp.cz5(); f[41]=d2pp.csa(s_dir,dir); f[42]=d2pp.csa(s_dist,dist);
+ d2pp.v0= n++; d2pp.v2=chldLS; d2pp.v3=sblLS; d2pp.v4=pP; d2pp.cz5(); f[43]=d2pp.csa(s_dir,dir); f[44]=d2pp.csa(s_dist,dist);
+
+
+ int prntPm1 = prnt!=0 ? pos[prnt-1] : s_str;
+ int chldPm1 = d-1>=0 ? pos[d-1] : s_str;
+ int prntPp1 = prnt!=pos.length-1 ? pos[prnt+1] : s_end;
+ int chldPp1 = d!=pos.length-1 ? pos[d+1] : s_end;
+
+ // sibling part of speech minus and plus 1
+ int sblPm1 = sblng>0 ? pos[sblng-1]:s_str;
+ int sblPp1 = sblng<pos.length-1 ? pos[sblng + 1]:s_end;
+
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=sblPp1;dl1.v4=pP; dl1.cz5(); f[45]=dl1.csa(s_dir,dir);
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=sblPm1;dl1.v4=pP; dl1.cz5(); f[46]=dl1.csa(s_dir,dir);// f.add(li.l2i(l));
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=pP;dl1.v4=prntPp1;dl1.cz5(); f[47]=dl1.csa(s_dir,dir);// f.add(li.l2i(l));
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=pP;dl1.v4=prntPm1; dl1.cz5(); f[48]=dl1.csa(s_dir,dir);// f.add(li.l2i(l));
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=sblPp1;dl1.v4=prntPm1;dl1.v5=pP; dl1.cz6(); f[49]=dl1.csa(s_dir,dir);
+ dl1.v0=n++; dl1.v2=sblPm1; dl1.v3=sP;dl1.v4=prntPm1;dl1.v5=pP;dl1.cz6(); f[50]=dl1.csa(s_dir,dir);// f.add(li.l2i(l));
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=sblPp1;dl1.v4=pP;dl1.v5=prntPp1; dl1.cz6(); f[51]=dl1.csa(s_dir,dir);// f.add(li.l2i(l));
+ dl1.v0=n++; dl1.v2=sblPm1; dl1.v3=sP; dl1.v4=pP;dl1.v5=prntPp1; dl1.cz6(); f[52]=dl1.csa(s_dir,dir);// f.add(li.l2i(l));
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=sblPp1;dl1.v4=dP; dl1.cz5(); f[53]=dl1.csa(s_dir,dir);
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=sblPm1;dl1.v4=dP; dl1.cz5(); f[54]=dl1.csa(s_dir,dir);
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=dP;dl1.v4=chldPp1;dl1.cz5(); f[55]=dl1.csa(s_dir,dir);
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=dP;dl1.v4=chldPm1; dl1.cz5(); f[56]=dl1.csa(s_dir,dir);
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=sblPp1;dl1.v4=chldPm1;dl1.v5=dP; dl1.cz6(); f[57]=dl1.csa(s_dir,dir);// f.add(li.l2i(l));
+ dl1.v0=n++; dl1.v2=sblPm1; dl1.v3=sP;dl1.v4=chldPm1;dl1.v5=dP;dl1.cz6(); f[58]=dl1.csa(s_dir,dir);
+ dl1.v0= n++;dl1.v2=sP; dl1.v3=sblPp1;dl1.v4=dP;dl1.v5=chldPp1;dl1.cz6();f[59]=dl1.csa(s_dir,dir);// f.add(li.l2i(l));
+ dl1.v0= n++; dl1.v2=sblPm1; dl1.v3=sP;dl1.v4=dP;dl1.v5=chldPp1;dl1.cz6(); f[60]=dl1.csa(s_dir,dir);
+
+ int c=61;
+
+ int pLSp1 = prnt != pos.length - 1 ? forms[prnt + 1]==-1?-1:cluster.getLP(forms[prnt + 1]): _cend;
+ int cLSp1 = d != pos.length - 1 ? forms[d + 1] ==-1?-1:cluster.getLP(forms[d + 1]):_cend;
+ int sLSp1 = sblng < pos.length -1 ? forms[sblng + 1] ==-1?-1:cluster.getLP(forms[sblng + 1]) : _cend;
+
+ int pLSm1 = prnt!=0 ? forms[prnt - 1]==-1?-1:cluster.getLP(forms[prnt - 1]): _cstr;
+ int cLSm1 = d-1>=0 ? forms[d - 1] ==-1?-1:cluster.getLP(forms[d - 1]):_cstr;
+ int sLSm1 = sblng>0 ? forms[sblng - 1] ==-1?-1:cluster.getLP(forms[sblng - 1]):_cstr;
+
+ //int c=61;
+
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSm1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=pP;dl1.v4=pLSp1;dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=pP;dl1.v4=pLSm1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=pLSm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP;dl1.v4=pLSm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=pP;dl1.v5=pLSp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP; dl1.v4=pP;dl1.v5=pLSp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=dP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSm1;dl1.v4=dP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=dP;dl1.v4=cLSp1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0=n++;dl1.v2=sP; dl1.v3=dP;dl1.v4=cLSm1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSm1;dl1.v4=cLSm1;dl1.v5=dP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP;dl1.v4=cLSm1;dl1.v5=dP;dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0=n++;dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=dP;dl1.v5=cLSp1;dl1.cz6();f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP;dl1.v4=dP;dl1.v5=cLSp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
+
+
+
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSm1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=pP;dl1.v4=pLSp1;dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=pP;dl1.v4=pLSm1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=pLSm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP;dl1.v4=pLSm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=pP;dl1.v5=pLSp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP; dl1.v4=pP;dl1.v5=pLSp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=dP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSm1;dl1.v4=dP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=dP;dl1.v4=cLSp1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0=n++;dl1.v2=sP; dl1.v3=dP;dl1.v4=cLSm1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSm1;dl1.v4=cLSm1;dl1.v5=dP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP;dl1.v4=cLSm1;dl1.v5=dP;dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++;dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=dP;dl1.v5=cLSp1;dl1.cz6();f[c++]=dl1.csa(s_dir,dir);
+ dl1.v0= n++; dl1.v2=sLSm1; dl1.v3=sP;dl1.v4=dP;dl1.v5=cLSp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir);
+
+ // take those in for stacking
+
+ /*
+ short[] prel = is.plabels[i],phead=is.pheads[i];
+
+ int g = prnt==phead[d]?1:2 ;
+ if (sblng>=0) g += prnt==phead[sblng]?4:8;
+
+ int gr = sblng==-1?s_relend:prel[sblng];
+
+
+ dl2.v0= n++;dl2.v2=prel[d];dl2.v3=g;dl2.v4=sP;dl2.v5=dP;dl2.cz6();f[c++]=dl2.csa(s_dir,dir);
+ dl2.v0= n++;dl2.v2=prel[d];dl2.v3=g;dl2.v4=sP;dl2.v5=pP;dl2.cz6();f[c++]=dl2.csa(s_dir,dir);
+ dl2.v0= n++;dl2.v2=prel[d];dl2.v3=g;dl2.v4=sP;dl2.v5=pP;dl2.v6=dP;dl2.cz7();f[c++]=dl2.csa(s_dir,dir);
+
+ dl2.v0= n++;dl2.v2=gr;dl2.v3=g;dl2.v4=sP;dl2.v5=dP;dl2.cz6();f[c++]=dl2.csa(s_dir,dir);
+ dl2.v0= n++;dl2.v2=gr;dl2.v3=g;dl2.v4=sP;dl2.v5=pP;dl2.cz6();f[c++]=dl2.csa(s_dir,dir);
+ dl2.v0= n++;dl2.v2=gr;dl2.v3=g;dl2.v4=sP;dl2.v5=pP;dl2.v6=dP;dl2.cz7();f[c++]=dl2.csa(s_dir,dir);
+*/
+
+ if (feats==null) return;
+
+ int cnt=c;
+
+ short[] featsP =feats[d];
+ short[] featsSbl =sblng!=-1?feats[sblng]:null;
+
+ dlf.v0= n++; dlf.v1=label; dlf.v2=sP; dlf.v3=dP;
+
+
+ cnt = extractFeat(f, cnt ,dir, featsP, featsSbl);
+
+ featsP =feats[prnt];
+ featsSbl =sblng!=-1?feats[sblng]:null;
+
+ dlf.v0= n++; dlf.v1=label; dlf.v2=pP; dlf.v3=sP;
+ if (featsP!=null && featsSbl!=null) {
+ for(short i1=0;i1<featsP.length;i1++) {
+ for(short i2=0;i2<featsSbl.length;i2++) {
+ dlf.v4=featsP[i1]; dlf.v5=featsSbl[i2];
+ dlf.cz6(); f[cnt++]=dlf.csa(s_dir,prnt<sblng?1:2);
+ }
+ }
+ } else if (featsP==null && featsSbl!=null) {
+
+ for(short i2=0;i2<featsSbl.length;i2++) {
+ dlf.v4=nofeat; dlf.v5=featsSbl[i2];
+ dlf.cz6(); f[cnt++]=dlf.csa(s_dir,dir);
+ }
+
+ } else if (featsP!=null && featsSbl==null) {
+
+ for(short i1=0;i1<featsP.length;i1++) {
+ dlf.v4=featsP[i1]; dlf.v5=nofeat;
+ dlf.cz6(); f[cnt++]=dlf.csa(s_dir,dir);
+ }
+ }
+
+ return;
+ }
+
+ private int extractFeat(long[] f, int cnt, int dir, short[] featsP, short[] featsD) {
+ if (featsP!=null && featsD!=null) {
+ for(short i1=0;i1<featsP.length;i1++) {
+ for(short i2=0;i2<featsD.length;i2++) {
+ dlf.v4=featsP[i1]; dlf.v5=featsD[i2];
+ dlf.cz6(); f[cnt++]=dlf.csa(s_dir,dir);
+ }
+ }
+ } else if (featsP==null && featsD!=null) {
+
+ for(short i2=0;i2<featsD.length;i2++) {
+ dlf.v4=nofeat; dlf.v5=featsD[i2];
+ dlf.cz6(); f[cnt++]=dlf.csa(s_dir,dir);
+
+ }
+ } else if (featsP!=null && featsD==null) {
+
+ for(short i1=0;i1<featsP.length;i1++) {
+ dlf.v4=featsP[i1]; dlf.v5=nofeat;
+ dlf.cz6(); f[cnt++]=dlf.csa(s_dir,dir);
+
+ }
+ }
+ return cnt;
+ }
+
+
+ public FV encodeCat(Instances is, int ic, short pposs[], int forms[], int[] lemmas, short[] heads, short[] types, short feats[][], Cluster cluster, FV f) {
+
+
+ long[] svs = new long[250];
+
+ for (int i = 1; i < heads.length; i++) {
+
+
+ int n =basic(pposs, forms, heads[i], i, cluster, f);
+
+ firstm(is, ic, heads[i], i, types[i], cluster,svs);
+ for(int k=0;k<svs.length;k++) dl1.map(f,svs[k]);
+
+ int ch,cmi,cmo;
+ if (heads[i] < i) {
+ ch = rightmostRight(heads, heads[i], i);
+ cmi = leftmostLeft(heads, i, heads[i]);
+ cmo = rightmostRight(heads, i, heads.length);
+
+ } else {
+ ch = leftmostLeft(heads, heads[i], i);
+ cmi = rightmostRight(heads, i, heads[i]);
+ cmo = leftmostLeft(heads, i, 0);
+ }
+
+ siblingm(is,ic,pposs, forms,lemmas, feats, heads[i], i, ch,types[i], cluster, svs,n);
+ for(int k=0;k<svs.length;k++) dl1.map(f,svs[k]);
+
+
+ gcm(is, ic,heads[i],i,cmi, types[i], cluster, svs);
+ for(int k=0;k<svs.length;k++) dl1.map(f,svs[k]);
+
+ gcm(is, ic, heads[i],i,cmo, types[i], cluster, svs);
+ for(int k=0;k<svs.length;k++) dl1.map(f,svs[k]);
+ }
+
+ return f;
+ }
+
+
+ public float encode3(short[] pos, short heads[] , short[] types, DataF d2) {
+
+ double v = 0;
+ for (int i = 1; i < heads.length; i++) {
+
+ int dir= (heads[i] < i)? 0:1;
+
+ v += d2.pl[heads[i]][i];
+ v += d2.lab[heads[i]][i][types[i]][dir];
+
+ boolean left = i<heads[i];
+ short[] labels = Edges.get(pos[heads[i]], pos[i], left);
+ int lid=-1;
+ for(int k=0;k<labels.length;k++) if (types[i]== labels[k]) {lid= k;break;}
+
+ int ch,cmi,cmo;
+ if (heads[i] < i) {
+ ch = rightmostRight(heads, heads[i], i);
+ cmi = leftmostLeft(heads, i, heads[i]);
+ cmo = rightmostRight(heads, i, heads.length);
+
+ if (ch==-1) ch=heads[i];
+ if (cmi==-1) cmi=heads[i];
+ if (cmo==-1) cmo=heads[i];
+
+ } else {
+ ch = leftmostLeft(heads, heads[i], i);
+ cmi = rightmostRight(heads, i, heads[i]);
+ cmo = leftmostLeft(heads, i, 0);
+
+ if (ch==-1) ch=i;
+ if (cmi==-1) cmi=i;
+ if (cmo==-1) cmo=i;
+ }
+ v += d2.sib[heads[i]][i][ch][dir][lid];
+ v += d2.gra[heads[i]][i][cmi][dir][lid];
+ v += d2.gra[heads[i]][i][cmo][dir][lid];
+ }
+ return (float)v;
+ }
+
+ /**
+ * Provide the scores of the edges
+ * @param pos
+ * @param heads
+ * @param types
+ * @param edgesScores
+ * @param d2
+ * @return
+ */
+ public static float encode3(short[] pos, short heads[] , short[] types, float[] edgesScores, DataF d2) {
+
+ double v = 0;
+ for (int i = 1; i < heads.length; i++) {
+
+ int dir= (heads[i] < i)? 0:1;
+
+ edgesScores[i] = d2.pl[heads[i]][i];
+ edgesScores[i] += d2.lab[heads[i]][i][types[i]][dir];
+
+ boolean left = i<heads[i];
+ short[] labels = Edges.get(pos[heads[i]], pos[i], left);
+ int lid=-1;
+ for(int k=0;k<labels.length;k++) if (types[i]== labels[k]) {lid= k;break;}
+
+ int ch,cmi,cmo;
+ if (heads[i] < i) {
+ ch = rightmostRight(heads, heads[i], i);
+ cmi = leftmostLeft(heads, i, heads[i]);
+ cmo = rightmostRight(heads, i, heads.length);
+
+ if (ch==-1) ch=heads[i];
+ if (cmi==-1) cmi=heads[i];
+ if (cmo==-1) cmo=heads[i];
+
+ } else {
+ ch = leftmostLeft(heads, heads[i], i);
+ cmi = rightmostRight(heads, i, heads[i]);
+ cmo = leftmostLeft(heads, i, 0);
+
+ if (ch==-1) ch=i;
+ if (cmi==-1) cmi=i;
+ if (cmo==-1) cmo=i;
+ }
+ edgesScores[i] += d2.sib[heads[i]][i][ch][dir][lid];
+ edgesScores[i] += d2.gra[heads[i]][i][cmi][dir][lid];
+ edgesScores[i] += d2.gra[heads[i]][i][cmo][dir][lid];
+ v+=edgesScores[i];
+ }
+ return (float)v;
+ }
+
+
+ private static int rightmostRight(short[] heads, int head, int max) {
+ int rightmost = -1;
+ for (int i = head + 1; i < max; i++) if (heads[i] == head) rightmost = i;
+
+ return rightmost;
+ }
+
+ private static int leftmostLeft(short[] heads, int head, int min) {
+ int leftmost = -1;
+ for (int i = head - 1; i > min; i--) if (heads[i] == head) leftmost = i;
+ return leftmost;
+ }
+
+ public static final String REL = "REL",END = "END",STR = "STR",LA = "LA",RA = "RA";
+
+ private static int ra,la;
+ private static int s_str;
+ private static int s_end, _cend,_cstr, s_stwrd,s_relend;
+
+ protected static final String TYPE = "TYPE",DIR = "D", FEAT="F";
+ public static final String POS = "POS";
+ protected static final String DIST = "DIST",MID = "MID";
+
+ private static final String _0 = "0",_4 = "4",_3 = "3", _2 = "2",_1 = "1",_5 = "5",_10 = "10";
+
+ private static int di0, d4,d3,d2,d1,d5,d10;
+
+
+ private static final String WORD = "WORD",STWRD = "STWRD", STPOS = "STPOS";
+
+
+
+ private static int nofeat;
+
+
+ private static int maxForm;
+
+
+ /**
+ * Initialize the features.
+ * @param maxFeatures
+ */
+ static public void initFeatures() {
+
+
+ MFB mf = new MFB();
+ mf.register(POS, MID);
+ s_str = mf.register(POS, STR);
+ s_end = mf.register(POS, END);
+
+ s_relend = mf.register(REL, END);
+
+ _cstr= mf.register(Cluster.SPATH,STR);
+ _cend=mf.register(Cluster.SPATH,END);
+
+
+ mf.register(TYPE, POS);
+
+ s_stwrd=mf.register(WORD,STWRD);
+ mf.register(POS,STPOS);
+
+ la = mf.register(DIR, LA);
+ ra = mf.register(DIR, RA);
+
+ // mf.register(TYPE, CHAR);
+
+ mf.register(TYPE, FEAT);
+ nofeat=mf.register(FEAT, "NOFEAT");
+
+ for(int k=0;k<215;k++) mf.register(TYPE, "F"+k);
+
+
+ di0=mf.register(DIST, _0);
+ d1=mf.register(DIST, _1);
+ d2=mf.register(DIST, _2);
+ d3=mf.register(DIST, _3);
+ d4=mf.register(DIST, _4);
+ d5=mf.register(DIST, _5);
+ // d5l=mf.register(DIST, _5l);
+ d10=mf.register(DIST, _10);
+
+
+ }
+
+ /* (non-Javadoc)
+ * @see extractors.Extractor#getType()
+ */
+ @Override
+ public int getType() {
+ return s_type;
+ }
+
+ /* (non-Javadoc)
+ * @see extractors.Extractor#setMaxForm(java.lang.Integer)
+ */
+ @Override
+ public void setMaxForm(int max) {
+ maxForm = max;
+ }
+
+ /* (non-Javadoc)
+ * @see extractors.Extractor#getMaxForm()
+ */
+ @Override
+ public int getMaxForm() {
+ return maxForm;
+ }
+
+
+
+}
diff --git a/dependencyParser/mate-tools/src/extractors/ExtractorFactory.java b/dependencyParser/mate-tools/src/extractors/ExtractorFactory.java
new file mode 100644
index 0000000..9543111
--- /dev/null
+++ b/dependencyParser/mate-tools/src/extractors/ExtractorFactory.java
@@ -0,0 +1,44 @@
+/**
+ *
+ */
+package extractors;
+
+import is2.data.Long2IntInterface;
+
+/**
+ * @author Dr. Bernd Bohnet, 29.04.2011
+ *
+ *
+ */
+public class ExtractorFactory {
+
+ public static final int StackedClustered = 4;
+ public static final int StackedClusteredR2 = 5;
+
+
+ private int type=-1;
+
+ /**
+ * @param stackedClusteredR22
+ */
+ public ExtractorFactory(int t) {
+ type=t;
+ }
+
+ /**
+ * @param stackedClusteredR22
+ * @param l2i
+ * @return
+ */
+ public Extractor getExtractor(Long2IntInterface l2i) {
+ switch(type)
+ {
+ case StackedClustered:
+ return new ExtractorClusterStacked(l2i);
+ case StackedClusteredR2:
+ return new ExtractorClusterStackedR2(l2i);
+ }
+ return null;
+ }
+
+}
diff --git a/dependencyParser/mate-tools/src/extractors/ExtractorReranker.java b/dependencyParser/mate-tools/src/extractors/ExtractorReranker.java
new file mode 100644
index 0000000..2761f26
--- /dev/null
+++ b/dependencyParser/mate-tools/src/extractors/ExtractorReranker.java
@@ -0,0 +1,621 @@
+package extractors;
+
+
+import is2.data.Cluster;
+import is2.data.D4;
+import is2.data.Instances;
+import is2.data.Long2IntInterface;
+import is2.data.MFB;
+import is2.data.ParseNBest;
+import is2.util.DB;
+
+import java.util.Arrays;
+
+
+
+final public class ExtractorReranker {
+
+ public static int s_rel,s_word,s_type,s_dir,s_dist,s_feat,s_child,s_spath,s_lpath,s_pos;
+ public static int d0,d1,d2,d3,d4,d5,d10;
+
+ MFB mf;
+
+ final D4 dl1,dl2, dwr,dr,dwwp,dw,dwp,dlf,d3lp, d2lp,d2pw,d2pp ;
+
+ public final Long2IntInterface li;
+
+ public ExtractorReranker(Long2IntInterface li) {
+ this.li=li;
+ dl1 = new D4(li);dl2 = new D4(li);
+ dwr = new D4(li);
+ dr = new D4(li);
+ dwwp = new D4(li);
+
+ dw = new D4(li);
+ dwp = new D4(li);
+
+ dlf = new D4(li);
+ d3lp = new D4(li); d2lp = new D4(li); d2pw = new D4(li); d2pp = new D4(li);
+
+ }
+
+ public static void initStat() {
+ DB.println("init called ");
+ MFB mf = new MFB();
+ s_rel = mf.getFeatureCounter().get(REL).intValue();;
+ s_pos = mf.getFeatureCounter().get(POS).intValue();
+ s_word = mf.getFeatureCounter().get(WORD).intValue();
+ s_type = mf.getFeatureCounter().get(TYPE).intValue();//mf.getFeatureBits();
+ s_dir = mf.getFeatureCounter().get(DIR);
+ la = mf.getValue(DIR, LA);
+ ra = mf.getValue(DIR, RA);
+ s_dist = mf.getFeatureCounter().get(DIST);//mf.getFeatureBits(DIST);
+ s_feat = mf.getFeatureCounter().get(FEAT);//mf.getFeatureBits(Pipe.FEAT);
+ s_spath = mf.getFeatureCounter().get(Cluster.SPATH)==null?0:mf.getFeatureCounter().get(Cluster.SPATH);//mf.getFeatureBits(Cluster.SPATH);
+ s_lpath = mf.getFeatureCounter().get(Cluster.LPATH)==null?0:mf.getFeatureCounter().get(Cluster.LPATH);//mf.getFeatureBits(Cluster.LPATH);
+ }
+
+ public void init(){
+ mf = new MFB();
+
+ dl1.a0 = s_type;dl1.a1 = 3; dl1.a2 = s_pos;dl1.a3 = s_pos; dl1.a4 = s_pos; dl1.a5 = s_pos; dl1.a6 = s_pos; dl1.a7 = s_pos;
+ dl2.a0 = s_type;dl2.a1 = 3;dl2.a2 = s_rel;dl2.a3 = s_rel;dl2.a4 = s_rel;dl2.a5 = s_rel;dl2.a6 = s_rel;dl2.a7 = s_rel;dl2.a8 = s_rel; dl2.a9 = s_rel;
+ dwp.a0 = s_type; dwp.a1 = 3; dwp.a2 = s_word; dwp.a3 = s_rel; dwp.a4 = s_rel; dwp.a5 = s_rel;dwp.a6 = s_rel;dwp.a7 = s_rel;
+ dwwp.a0 = s_type; dwwp.a1 = 3; dwwp.a2 = s_word; dwwp.a3 = s_word; dwwp.a4 = s_pos; dwwp.a5 = s_word;dwwp.a6 = s_pos;dwwp.a7 = s_pos;
+ }
+
+
+
+
+
+
+ public static final String REL = "REL",END = "END",STR = "STR",LA = "LA",RA = "RA", FEAT="F";
+
+ private static int ra,la;
+ private static int s_str;
+ private static int s_end, _cend,_cstr, s_stwrd,s_relend;
+
+ protected static final String TYPE = "TYPE",DIR = "D";
+ public static final String POS = "POS";
+ protected static final String DIST = "DIST",MID = "MID";
+
+ private static final String _0 = "0",_4 = "4",_3 = "3", _2 = "2",_1 = "1",_5 = "5",_10 = "10";
+
+
+
+ private static final String WORD = "WORD",STWRD = "STWRD", STPOS = "STPOS";
+
+
+
+ private static int nofeat;
+
+
+ public static int maxForm;
+
+
+ final public static int _FC =60;
+
+
+ /**
+ * Initialize the features.
+ * @param maxFeatures
+ */
+ static public void initFeatures() {
+
+
+ MFB mf = new MFB();
+ mf.register(POS, MID);
+ s_str = mf.register(POS, STR);
+ s_end = mf.register(POS, END);
+
+ s_relend = mf.register(REL, END);
+
+ _cstr= mf.register(Cluster.SPATH,STR);
+ _cend=mf.register(Cluster.SPATH,END);
+
+
+ mf.register(TYPE, POS);
+
+ s_stwrd=mf.register(WORD,STWRD);
+ mf.register(POS,STPOS);
+
+ la = mf.register(DIR, LA);
+ ra = mf.register(DIR, RA);
+
+ // mf.register(TYPE, CHAR);
+
+ mf.register(TYPE, FEAT);
+ nofeat=mf.register(FEAT, "NOFEAT");
+
+ for(int k=0;k<60;k++) mf.register(TYPE, "F"+k);
+
+
+ d0 =mf.register(DIST, _0);
+ d1= mf.register(DIST, _1);
+ d2 =mf.register(DIST, _2);
+ d3= mf.register(DIST, _3);
+ d4= mf.register(DIST, _4);
+ d5= mf.register(DIST, _5);
+ // d5l=mf.register(DIST, _5l);
+ d10= mf.register(DIST, _10);
+
+
+ }
+
+ /**
+ * @param is
+ * @param n
+ * @param parseNBest
+ * @param vs
+ */
+ public void extractFeatures3(Instances is, int i, ParseNBest parse, int rank, long[] v) {
+
+ int f=1,n=0;
+
+ for(short k= 0; k<is.length(i)-1;k++) {
+
+ short[] chld = children(parse.heads,k);
+
+ f=2;
+
+ int fm = is.forms[i][k];
+ int hh = k!=0? is.pposs[i][parse.heads[k]]:s_end;
+ int h = is.pposs[i][k];
+ int hrel = parse.labels[k];
+ int hhrel = k!=0? parse.labels[parse.heads[k]]:s_relend;
+ int hhf = k!=0? is.forms[i][parse.heads[k]]:s_stwrd;
+
+
+
+ int rlast = chld.length>0?parse.labels[chld[chld.length-1]]:s_relend;
+
+ int [] rels = new int[chld.length];
+ int [] pss = new int[chld.length];
+ for(int j=0;j<chld.length;j++) {
+ rels[j] = parse.labels[chld[j]];
+ pss[j] = is.pposs[i][chld[j]];
+ }
+
+ StringBuilder rl = new StringBuilder(chld.length);
+ StringBuilder psl = new StringBuilder(chld.length);
+ for(int j=0;j<chld.length;j++) {
+ rl.append((char)rels[j]);
+ psl.append((char)pss[j]);
+ }
+
+ int rli = mf.register("rli", rl.toString());
+ int pli = mf.register("pli", psl.toString());
+
+ dwwp.v0=f++; dwwp.v2=rli; dwwp.cz3(); v[n++]=dwwp.getVal();
+ dwwp.v0=f++; dwwp.v2=pli; dwwp.cz3(); v[n++]=dwwp.getVal();
+
+ dwwp.v0=f++; dwwp.v2=rli; dwwp.v3=h;dwwp.cz4(); v[n++]=dwwp.getVal();
+ dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=h;dwwp.cz4(); v[n++]=dwwp.getVal();
+
+ dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=hh; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal();
+ dwwp.v0=f++; dwwp.v2=rli; dwwp.v3=hh; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal();
+
+ dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=rli; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal();
+ dwwp.v0=f++; dwwp.v2=fm; dwwp.v3=rli; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal();
+ dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=fm; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal();
+
+
+
+ dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=rli; dwwp.v4=hh; dwwp.cz5(); v[n++]=dwwp.getVal();
+ dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=rli; dwwp.v4=hh; dwwp.v5=h; dwwp.cz6(); v[n++]=dwwp.getVal();
+
+ dwp.v0= f++; dwp.v2=rli; dwp.v3=hrel;dwp.v4=hh; dwp.v5=h; dwp.cz6(); v[n++]=dwp.getVal();
+
+ Arrays.sort(rels);
+ Arrays.sort(pss);
+
+ rl = new StringBuilder(chld.length);
+ psl = new StringBuilder(chld.length);
+ for(int j=0;j<chld.length;j++) {
+ rl.append((char)rels[j]);
+ psl.append((char)pss[j]);
+ }
+ rli = mf.register("rli", rl.toString());
+ pli = mf.register("pli", psl.toString());
+
+
+ dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=rli; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal();
+ dwwp.v0=f++; dwwp.v2=fm; dwwp.v3=rli; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal();
+ dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=fm; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal();
+
+ dwwp.v0=f++; dwwp.v2=rli; dwwp.v3=h;dwwp.cz4(); v[n++]=dwwp.getVal();
+
+ dl1.v0= f++; dl1.v2=h; dl1.v3=hrel; dl1.v4=hhrel;dl1.v5=hh; dl1.v6=rlast; dl1.cz6(); v[n++]=dl1.getVal();
+ dwp.v0= f++; dwp.v2=fm; dwp.v3=hrel; dwp.v4=hh; dwp.cz5(); v[n++]=dwp.getVal();
+ dwp.v0= f++; dwp.v2=hhf; dwp.v3=hrel; dwp.v4=hh; dwp.v5=h; dwp.cz6(); v[n++]=dwp.getVal();
+
+ dwwp.v0=f++; dwwp.v2=fm; dwwp.v3=hhf; dwwp.v4=hrel; dwwp.v5=hhrel; dwwp.cz6(); v[n++]=dwwp.getVal();
+ dwwp.v0=f++; dwwp.v2=h; dwwp.v3=hhf; dwwp.v4=hrel; dwwp.v5=hhrel; dwwp.cz6(); v[n++]=dwwp.getVal();
+ dwwp.v0=f++; dwwp.v2=fm; dwwp.v3=hh; dwwp.v4=hrel; dwwp.v5=hhrel; dwwp.cz6(); v[n++]=dwwp.getVal();
+
+ dwwp.v0=f++; dwwp.v2=fm; dwwp.v3=hhf; dwwp.v4=h; dwwp.v5=hh; dwwp.cz6(); v[n++]=dwwp.getVal();
+ dwwp.v0=f++; dwwp.v2=h; dwwp.v3=hhf; dwwp.v4=hrel; dwwp.v5=hh; dwwp.cz6(); v[n++]=dwwp.getVal();
+ dwwp.v0=f++; dwwp.v2=fm; dwwp.v3=hh; dwwp.v4=h; dwwp.v5=hrel; dwwp.cz6(); v[n++]=dwwp.getVal();
+
+
+ // dl1.v0= f++; dl1.v2=h;dl1.v3=hrel; dl1.v4=hhrel;dl1.v5=hh; dl1.v6=hhhrel;dl1.v7=hhh; dl1.v8=rlast; dl1.cz9(); v[n++]=dl1.getVal();
+// dl1.v0= f++; dl1.v2=h;dl1.v3=hrel; dl1.v4=hhrel;dl1.v5=hh; dl1.v6=hhhrel;dl1.v7=hhh; dl1.v8=rlast; dl1.cz9(); v[n++]=dl1.getVal();
+ // dl1.v0= f++; dl1.v2=h;dl1.v3=hrel; dl1.v4=dir;dl1.v5=hh; dl1.v6=hhh;dl1.v7=rlast; dl1.v8=r1; dl1.cz9(); v[n++]=dl1.getVal();
+ // dl1.v0= f++; dl1.v2=h;dl1.v3=hh; dl1.v4=hhh;dl1.v5=hrel; dl1.cz6(); v[n++]=dl1.getVal();
+
+
+ short hp = parse.heads[k];
+ short[] hchld = hp==-1?new short[0]:children(parse.heads,hp);
+
+ int [] hrels = new int[hchld.length];
+ int [] hpss = new int[hchld.length];
+ for(int j=0;j<hchld.length;j++) {
+ hrels[j] = parse.labels[hchld[j]];
+ hpss[j] = is.pposs[i][hchld[j]];
+ }
+
+
+ StringBuilder hrl = new StringBuilder(hchld.length);
+ StringBuilder hpsl = new StringBuilder(hchld.length);
+ for(int j=0;j<hchld.length;j++) {
+ hrl.append((char)hrels[j]);
+ hpsl.append((char)hpss[j]);
+ }
+ int hrli = mf.register("rli", hrl.toString());
+ int hpli = mf.register("pli", hpsl.toString());
+
+ dwwp.v0=f++; dwwp.v2=hpli; dwwp.v3=hrli; dwwp.cz4(); v[n++]=dwwp.getVal();
+ dwwp.v0=f++; dwwp.v2=fm; dwwp.v3=hrli; dwwp.cz4(); v[n++]=dwwp.getVal();
+ dwwp.v0=f++; dwwp.v2=hpli; dwwp.v3=fm; dwwp.cz4(); v[n++]=dwwp.getVal();
+
+ dwwp.v0=f++; dwwp.v2=hpli; dwwp.v3=rli; dwwp.v4=hrel;dwwp.v5=h; dwwp.cz5(); v[n++]=dwwp.getVal();
+ dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=hrli;dwwp.v4=hrel;dwwp.v5=h; dwwp.cz5(); v[n++]=dwwp.getVal();
+ dwwp.v0=f++; dwwp.v2=hpli; dwwp.v3=hpli;dwwp.v4=hrel;dwwp.v5=h; dwwp.cz5(); v[n++]=dwwp.getVal();
+
+
+
+ }
+
+ v[n]=Integer.MIN_VALUE;
+ }
+
+ /**
+ * This works seem works well with n-best n=8 (88.858074) , n=10 (88.836884), n=12 (88.858)
+ * n=14 (88.913417) n=16 (88.79546) n=20 (88.80621) n 50 (88.729364)
+ * 1-best: 88.749605
+ *
+ * @param is
+ * @param i
+ * @param parse
+ * @param rank
+ * @param v
+ * @param cluster
+ */
+ public void extractFeatures(Instances is, int i, ParseNBest parse, int rank, long[] v, Cluster cluster) {
+
+ // mf.getValue(REL, "SB");
+
+ int f=1,n=0;
+
+ for(short k= 0; k<is.length(i)-1;k++) {
+
+ short[] chld = children(parse.heads,k);
+
+ int abs = Math.abs(parse.heads[k]-k);
+ final int dist;
+ if (abs > 10)dist=d10;else if (abs>5) dist=d5;else if( abs==5)dist=d4;else if (abs==4)dist=d3;else if (abs==3)dist=d2;
+ else if (abs==2)dist=d1; else dist=d0;
+
+
+ f=2;
+
+ int fm = is.forms[i][k];
+ int hh = k!=0? is.pposs[i][parse.heads[k]]:s_end;
+ int h = is.pposs[i][k];
+ int hrel = parse.labels[k];//is.labels[i][k];
+ int hhrel = k!=0? parse.labels[parse.heads[k]]:s_relend;
+ int hhf = k!=0? is.forms[i][parse.heads[k]]:s_stwrd;
+
+ int r1 = chld.length>0?parse.labels[chld[0]]:s_relend;
+ int rlast = chld.length>0?parse.labels[chld[chld.length-1]]:s_relend;
+
+ int [] rels = new int[chld.length];
+ int [] pss = new int[chld.length];
+ int [] cls = new int[chld.length];
+
+ int[] rc = new int[30]; // 20 was a good length
+
+ for(int j=0;j<chld.length;j++) {
+ rels[j] = parse.labels[chld[j]];
+ if (rels[j]<rc.length) rc[rels[j]]++;
+ pss[j] = is.pposs[i][chld[j]];
+// cls[j] = is.forms[i][chld[j]]==-1?0:cluster.getLP(is.forms[i][chld[j]]);
+// cls[j] = cls[j]==-1?0:cls[j];
+ }
+
+ StringBuilder rl = new StringBuilder(chld.length);
+ StringBuilder psl = new StringBuilder(chld.length);
+ StringBuilder csl = new StringBuilder(chld.length);
+ for(int j=0;j<chld.length;j++) {
+ rl.append((char)rels[j]);
+ psl.append((char)pss[j]);
+// csl.append((char)cls[j]);
+ }
+
+ int rli = mf.register("rli", rl.toString());
+ int pli = mf.register("pli", psl.toString());
+// int cli = mf.register("cli", csl.toString());
+
+
+ dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=rli; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal();
+ dwwp.v0=f++; dwwp.v2=fm; dwwp.v3=rli; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal();
+ dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=fm; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal();
+ // dwwp.v0=f++; dwwp.v2=cli; dwwp.v3=fm; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal();
+
+ dwwp.v0=f++; dwwp.v2=rli; dwwp.cz3(); v[n++]=dwwp.getVal();
+ dwwp.v0=f++; dwwp.v2=pli; dwwp.cz3(); v[n++]=dwwp.getVal();
+ //dwwp.v0=f++; dwwp.v2=cli; dwwp.cz3(); v[n++]=dwwp.getVal();
+
+ // dwwp.v0=f++; dwwp.v2=cli;dwwp.v3=h; dwwp.cz4(); v[n++]=dwwp.getVal();
+
+ for(int j=1;j<rc.length;j++) {
+ dwwp.v0=f++; dwwp.v2=rc[j]==0?1:rc[j]==1?2:3; dwwp.v3=j; dwwp.cz4(); v[n++]=dwwp.getVal();//
+ }
+
+ dwwp.v0=f++; dwwp.v2=rli; dwwp.v3=h;dwwp.cz4(); v[n++]=dwwp.getVal();
+ dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=h;dwwp.cz4(); v[n++]=dwwp.getVal();
+
+ dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=hh; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal();
+ dwwp.v0=f++; dwwp.v2=rli; dwwp.v3=hh; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal();
+
+ dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=rli; dwwp.v4=hh; dwwp.cz5(); v[n++]=dwwp.getVal();
+ dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=rli; dwwp.v4=hh; dwwp.v5=h; dwwp.cz6(); v[n++]=dwwp.getVal();
+
+ dwp.v0= f++; dwp.v2=rli; dwp.v3=hrel;dwp.v4=hh; dwp.v5=h; dwp.cz6(); v[n++]=dwp.getVal();
+
+ //dwwp.v0=f++; dwwp.v2=h; dwwp.v3=hh; dwwp.v4=dist; dwwp.cz5(); v[n++]=dwwp.getVal();
+
+ Arrays.sort(rels);
+ Arrays.sort(pss);
+
+ rl = new StringBuilder(chld.length);
+ psl = new StringBuilder(chld.length);
+ for(int j=0;j<chld.length;j++) {
+ rl.append((char)rels[j]);
+ psl.append((char)pss[j]);
+ }
+ rli = mf.register("rli", rl.toString());
+ pli = mf.register("pli", psl.toString());
+
+ dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=rli; dwwp.v4=1; dwwp.v5=h; dwwp.cz6(); v[n++]=dwwp.getVal();
+ dwwp.v0=f++; dwwp.v2=fm; dwwp.v3=rli; dwwp.v4=1; dwwp.v5=h; dwwp.cz6(); v[n++]=dwwp.getVal();
+ dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=fm; dwwp.v4=1; dwwp.v5=h; dwwp.cz6(); v[n++]=dwwp.getVal();
+
+ dwwp.v0=f++; dwwp.v2=rli; dwwp.v3=h;dwwp.cz4(); v[n++]=dwwp.getVal();
+
+ dl1.v0= f++; dl1.v2=h; dl1.v3=hrel; dl1.v4=hhrel;dl1.v5=hh; dl1.v6=rlast; dl1.cz6(); v[n++]=dl1.getVal();
+ dwp.v0= f++; dwp.v2=fm; dwp.v3=hrel; dwp.v4=hh; dwp.cz5(); v[n++]=dwp.getVal();
+ dwp.v0= f++; dwp.v2=hhf; dwp.v3=hrel;dwp.v4=hh; dwp.v5=h; dwp.cz6(); v[n++]=dwp.getVal();
+ }
+
+ v[n]=Integer.MIN_VALUE;
+ }
+
+ /**
+
+ * Works well!
+ * @param is
+ * @param i
+ * @param parse
+ * @param rank
+ * @param v
+ */
+ public void extractFeatures6(Instances is, int i, ParseNBest parse, int rank, long[] v) {
+
+ // mf.getValue(REL, "SB");
+
+ int f=1,n=0;
+
+ for(short k= 0; k<is.length(i)-1;k++) {
+
+ short[] chld = children(parse.heads,k);
+
+ f=2;
+
+ int fm = is.forms[i][k];
+ int hh = k!=0? is.pposs[i][parse.heads[k]]:s_end;
+ int h = is.pposs[i][k];
+ int hrel = parse.labels[k];//is.labels[i][k];
+ int hhrel = k!=0? parse.labels[parse.heads[k]]:s_relend;
+ int hhf = k!=0? is.forms[i][parse.heads[k]]:s_stwrd;
+
+ int r1 = chld.length>0?parse.labels[chld[0]]:s_relend;
+ int rlast = chld.length>0?parse.labels[chld[chld.length-1]]:s_relend;
+
+ int [] rels = new int[chld.length];
+ int [] pss = new int[chld.length];
+
+ int[] rc = new int[30]; // 20 was a good length
+
+ for(int j=0;j<chld.length;j++) {
+ rels[j] = parse.labels[chld[j]];
+ if (rels[j]<rc.length) rc[rels[j]]++;
+ // if (rels[j]==sb) numSB++;
+ pss[j] = is.pposs[i][chld[j]];
+ }
+
+ StringBuilder rl = new StringBuilder(chld.length);
+ StringBuilder psl = new StringBuilder(chld.length);
+ for(int j=0;j<chld.length;j++) {
+ rl.append((char)rels[j]);
+ psl.append((char)pss[j]);
+ }
+
+ int rli = mf.register("rli", rl.toString());
+ int pli = mf.register("pli", psl.toString());
+
+
+ dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=rli; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal();
+ dwwp.v0=f++; dwwp.v2=fm; dwwp.v3=rli; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal();
+ dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=fm; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal();
+
+ dwwp.v0=f++; dwwp.v2=rli; dwwp.cz3(); v[n++]=dwwp.getVal();
+ dwwp.v0=f++; dwwp.v2=pli; dwwp.cz3(); v[n++]=dwwp.getVal();
+
+ for(int j=1;j<rc.length;j++) {
+ dwwp.v0=f++; dwwp.v2=rc[j]==0?1:rc[j]==1?2:3; dwwp.v3=j; dwwp.cz4(); v[n++]=dwwp.getVal();//
+ }
+
+ dwwp.v0=f++; dwwp.v2=rli; dwwp.v3=h;dwwp.cz4(); v[n++]=dwwp.getVal();
+ dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=h;dwwp.cz4(); v[n++]=dwwp.getVal();
+
+ dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=hh; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal();
+ dwwp.v0=f++; dwwp.v2=rli; dwwp.v3=hh; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal();
+
+ dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=rli; dwwp.v4=hh; dwwp.cz5(); v[n++]=dwwp.getVal();
+ dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=rli; dwwp.v4=hh; dwwp.v5=h; dwwp.cz6(); v[n++]=dwwp.getVal();
+
+ dwp.v0= f++; dwp.v2=rli; dwp.v3=hrel;dwp.v4=hh; dwp.v5=h; dwp.cz6(); v[n++]=dwp.getVal();
+
+
+ Arrays.sort(rels);
+ Arrays.sort(pss);
+
+ rl = new StringBuilder(chld.length);
+ psl = new StringBuilder(chld.length);
+ for(int j=0;j<chld.length;j++) {
+ rl.append((char)rels[j]);
+ psl.append((char)pss[j]);
+ }
+ rli = mf.register("rli", rl.toString());
+ pli = mf.register("pli", psl.toString());
+
+
+ dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=rli; dwwp.v4=1; dwwp.v5=h; dwwp.cz6(); v[n++]=dwwp.getVal();
+ dwwp.v0=f++; dwwp.v2=fm; dwwp.v3=rli; dwwp.v4=1; dwwp.v5=h; dwwp.cz6(); v[n++]=dwwp.getVal();
+ dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=fm; dwwp.v4=1; dwwp.v5=h; dwwp.cz6(); v[n++]=dwwp.getVal();
+
+ dwwp.v0=f++; dwwp.v2=rli; dwwp.v3=h;dwwp.cz4(); v[n++]=dwwp.getVal();
+
+ dl1.v0= f++; dl1.v2=h; dl1.v3=hrel; dl1.v4=hhrel;dl1.v5=hh; dl1.v6=rlast; dl1.cz6(); v[n++]=dl1.getVal();
+ dwp.v0= f++; dwp.v2=fm; dwp.v3=hrel; dwp.v4=hh; dwp.cz5(); v[n++]=dwp.getVal();
+ dwp.v0= f++; dwp.v2=hhf; dwp.v3=hrel;dwp.v4=hh; dwp.v5=h; dwp.cz6(); v[n++]=dwp.getVal();
+
+ }
+
+ v[n]=Integer.MIN_VALUE;
+ }
+
+
+
+ public void extractFeatures2(Instances is, int i, ParseNBest parse, int rank, long[] v) {
+
+
+
+ int f=1,n=0;
+
+ for(short k= 0; k<is.length(i)-1;k++) {
+
+ short[] chld = children(parse.heads,k);
+
+ f=2;
+
+ int fm = is.forms[i][k];
+ int hh = k!=0? is.pposs[i][parse.heads[k]]:s_end;
+ int h = is.pposs[i][k];
+ int hrel = parse.labels[k];//is.labels[i][k];
+ int hhrel = k!=0? parse.labels[parse.heads[k]]:s_relend;
+ int hhf = k!=0? is.forms[i][parse.heads[k]]:s_stwrd;
+
+ int r1 = chld.length>0?parse.labels[chld[0]]:s_relend;
+ int rlast = chld.length>0?parse.labels[chld[chld.length-1]]:s_relend;
+
+ int [] rels = new int[chld.length];
+ int [] pss = new int[chld.length];
+
+
+
+ for(int j=0;j<chld.length;j++) {
+ rels[j] = parse.labels[chld[j]];
+ pss[j] = is.pposs[i][chld[j]];
+ }
+
+ StringBuilder rl = new StringBuilder(chld.length);
+ StringBuilder psl = new StringBuilder(chld.length);
+ for(int j=0;j<chld.length;j++) {
+ rl.append((char)rels[j]);
+ psl.append((char)pss[j]);
+ }
+
+ int rli = mf.register("rli", rl.toString());
+ int pli = mf.register("pli", psl.toString());
+
+
+ dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=rli; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal();
+ dwwp.v0=f++; dwwp.v2=fm; dwwp.v3=rli; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal();
+ dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=fm; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal();
+
+ dwwp.v0=f++; dwwp.v2=rli; dwwp.cz3(); v[n++]=dwwp.getVal();
+ dwwp.v0=f++; dwwp.v2=pli; dwwp.cz3(); v[n++]=dwwp.getVal();
+
+ dwwp.v0=f++; dwwp.v2=rli; dwwp.v3=h;dwwp.cz4(); v[n++]=dwwp.getVal();
+ dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=h;dwwp.cz4(); v[n++]=dwwp.getVal();
+
+ dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=hh; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal();
+ dwwp.v0=f++; dwwp.v2=rli; dwwp.v3=hh; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal();
+
+ dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=rli; dwwp.v4=hh; dwwp.cz5(); v[n++]=dwwp.getVal();
+ dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=rli; dwwp.v4=hh; dwwp.v5=h; dwwp.cz6(); v[n++]=dwwp.getVal();
+
+ dwp.v0= f++; dwp.v2=rli; dwp.v3=hrel;dwp.v4=hh; dwp.v5=h; dwp.cz6(); v[n++]=dwp.getVal();
+
+
+ Arrays.sort(rels);
+ Arrays.sort(pss);
+
+ rl = new StringBuilder(chld.length);
+ psl = new StringBuilder(chld.length);
+ for(int j=0;j<chld.length;j++) {
+ rl.append((char)rels[j]);
+ psl.append((char)pss[j]);
+ }
+ rli = mf.register("rli", rl.toString());
+ pli = mf.register("pli", psl.toString());
+
+
+ dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=rli; dwwp.v4=1; dwwp.v5=h; dwwp.cz6(); v[n++]=dwwp.getVal();
+ dwwp.v0=f++; dwwp.v2=fm; dwwp.v3=rli; dwwp.v4=1; dwwp.v5=h; dwwp.cz6(); v[n++]=dwwp.getVal();
+ dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=fm; dwwp.v4=1; dwwp.v5=h; dwwp.cz6(); v[n++]=dwwp.getVal();
+
+ dwwp.v0=f++; dwwp.v2=rli; dwwp.v3=h;dwwp.cz4(); v[n++]=dwwp.getVal();
+
+ dl1.v0= f++; dl1.v2=h; dl1.v3=hrel; dl1.v4=hhrel;dl1.v5=hh; dl1.v6=rlast; dl1.cz6(); v[n++]=dl1.getVal();
+ dwp.v0= f++; dwp.v2=fm; dwp.v3=hrel; dwp.v4=hh; dwp.cz5(); v[n++]=dwp.getVal();
+ dwp.v0= f++; dwp.v2=hhf; dwp.v3=hrel;dwp.v4=hh; dwp.v5=h; dwp.cz6(); v[n++]=dwp.getVal();
+
+ }
+
+ v[n]=Integer.MIN_VALUE;
+ }
+
+
+
+ /**
+ * @param parse
+ * @param k
+ * @return
+ */
+ private short[] children(short[] heads, short h) {
+
+ int c=0;
+ for(int k=0;k<heads.length;k++) if (heads[k] ==h ) c++;
+
+ short[] clds = new short[c];
+ c=0;
+ for(int k=0;k<heads.length;k++) if (heads[k] ==h ) clds[c++]=(short)k;
+ return clds;
+ }
+
+
+
+}
diff --git a/dependencyParser/mate-tools/src/extractors/ParallelExtract.java b/dependencyParser/mate-tools/src/extractors/ParallelExtract.java
new file mode 100755
index 0000000..a2ef72c
--- /dev/null
+++ b/dependencyParser/mate-tools/src/extractors/ParallelExtract.java
@@ -0,0 +1,194 @@
+package extractors;
+
+import is2.data.Cluster;
+import is2.data.DataF;
+import is2.data.Edges;
+import is2.data.F2SF;
+import is2.data.FV;
+import is2.data.Instances;
+import is2.data.Long2IntInterface;
+
+import java.util.ArrayList;
+import java.util.concurrent.Callable;
+
+
+/**
+ * @author Bernd Bohnet, 30.08.2009
+ *
+ * This class implements a parallel feature extractor.
+ */
+final public class ParallelExtract implements Callable<Object>
+{
+ // the data space of the weights for a dependency tree
+ final DataF d;
+
+ // the data extractor does the actual work
+ final Extractor extractor;
+
+ private Instances is;
+ private int i;
+
+ private F2SF para;
+
+ private Cluster cluster;
+
+ private Long2IntInterface li;
+
+ public ParallelExtract(Extractor e, Instances is, int i, DataF d, F2SF para,Cluster cluster, Long2IntInterface li) {
+
+ this.is =is;
+ extractor=e;
+ this.d =d;
+ this.i=i;
+ this.para=para;
+ this.cluster = cluster;
+ this.li=li;
+ }
+
+
+ public static class DSet {
+ int w1,w2;
+ }
+
+ public Object call() {
+
+ try {
+
+ F2SF f= para;
+
+
+ short[] pos=is.pposs[i];
+ int[] forms=is.forms[i];
+ int[] lemmas=is.plemmas[i];
+ short[][] feats=is.feats[i];
+ int length = pos.length;
+
+ long[] svs = new long[250];
+
+ int type=extractor.getType();
+
+ while (true) {
+
+ DSet set = get();
+ if (set ==null) break;
+
+ int w1=set.w1;
+ int w2=set.w2;
+
+ f.clear();
+ int n =extractor.basic(pos, forms, w1, w2,cluster, f);
+ d.pl[w1][w2]=f.getScoreF();
+
+ short[] labels = Edges.get(pos[w1], pos[w2],false);
+ float[][] lab = d.lab[w1][w2];
+
+ extractor.firstm(is, i, w1, w2, 0, cluster, svs);
+
+ if (labels!=null) {
+
+
+ for (int l = labels.length - 1; l >= 0; l--) {
+
+ short label = labels[l];
+
+ f.clear();
+ for(int k=svs.length-1;k>=0;k--) if (svs[k]>0) f.add(li.l2i(svs[k]+label*type));
+ lab[label][0]=f.getScoreF();
+ }
+ }
+
+ labels = Edges.get(pos[w1], pos[w2],true);
+
+ if (labels!=null) {
+
+ for (int l = labels.length - 1; l >= 0; l--) {
+
+ int label = labels[l];
+ f.clear();
+ for(int k=svs.length-1;k>=0;k--) if (svs[k]>0) f.add(li.l2i(svs[k]+label*type));
+ lab[label][1]=f.getScoreF();
+ }
+ }
+
+ int s = w1<w2 ? w1 : w2;
+ int e = w1<w2 ? w2 : w1;
+
+ int sg = w1<w2 ? w1 : 0;
+ int eg = w1<w2 ? length : w1+1;
+
+
+ for(int m=s;m<e;m++) {
+ for(int dir=0;dir<2;dir++) {
+ labels = Edges.get(pos[w1], pos[w2],dir==1);
+ float lab2[]= new float[labels.length];
+
+ int g = (m==s||e==m) ? -1 : m;
+
+
+ extractor.siblingm(is,i,pos,forms,lemmas,feats, w1, w2, g, 0, cluster, svs,n);
+
+ for (int l = labels.length - 1; l >= 0; l--) {
+
+ int label = labels[l];
+ f.clear();
+
+ for(int k=svs.length-1;k>=0;k--) {
+ if (svs[k]>0) f.add(li.l2i(svs[k]+label*type));
+ }
+ lab2[l] = (float)f.score;//f.getScoreF();
+ }
+ d.sib[w1][w2][m][dir]=lab2;
+ }
+ }
+
+ for(int m=sg;m<eg;m++) {
+ for(int dir=0;dir<2;dir++) {
+ labels = Edges.get(pos[w1], pos[w2],dir==1);
+ float[] lab2 = new float[labels.length];
+
+ int g = (m==s||e==m) ? -1 : m;
+
+ extractor.gcm(is, i, w1,w2,g, 0, cluster, svs);
+
+ for (int l = labels.length - 1; l >= 0; l--) {
+
+ int label = labels[l];
+
+ f.clear();
+ for(int k=svs.length-1;k>=0;k--) {
+ if (svs[k]>0) f.add(li.l2i(svs[k]+label*type));
+ }
+ lab2[l] = f.getScoreF();
+ }
+ d.gra[w1][w2][m][dir] =lab2;
+ }
+ }
+
+ }
+ } catch(Exception e ) {
+ e.printStackTrace();
+ }
+ return null;
+ }
+
+
+ static ArrayList<DSet> sets = new ArrayList<DSet>();
+
+ private DSet get() {
+
+ synchronized (sets) {
+ if (sets.size()==0) return null;
+ return sets.remove(sets.size()-1);
+ }
+ }
+ static public void add(int w1, int w2){
+ DSet ds =new DSet();
+ ds.w1=w1;
+ ds.w2=w2;
+ sets.add(ds);
+ }
+
+
+
+
+}
diff --git a/dependencyParser/mate-tools/src/is2/data/Closed.java b/dependencyParser/mate-tools/src/is2/data/Closed.java
new file mode 100755
index 0000000..378d0c6
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/data/Closed.java
@@ -0,0 +1,31 @@
+package is2.data;
+
+
+
+final public class Closed {
+
+ public double p;
+ short b,e,m;
+ byte dir;
+
+ Closed d;
+ Open u;
+
+ public Closed(short s, short t, int m, int dir,Open u, Closed d, float score) {
+ this.b = s;
+ this.e = t;
+ this.m = (short)m;
+ this.dir = (byte)dir;
+ this.u=u;
+ this.d =d;
+ p=score;
+ }
+
+
+ public void create(Parse parse) {
+ if (u != null) u.create(parse);
+ if (d != null) d.create(parse);
+ }
+}
+
+
diff --git a/dependencyParser/mate-tools/src/is2/data/Cluster.java b/dependencyParser/mate-tools/src/is2/data/Cluster.java
new file mode 100644
index 0000000..485713d
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/data/Cluster.java
@@ -0,0 +1,158 @@
+/**
+ *
+ */
+package is2.data;
+
+
+
+import is2.util.DB;
+
+import java.io.BufferedReader;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+
+/**
+ * @author Dr. Bernd Bohnet, 28.10.2010
+ *
+ *
+ */
+final public class Cluster {
+
+ public static final String LPATH = "LP";
+ public static final String SPATH = "SP";
+
+ // [word][p] p = [0:long-path | 1:short-path]
+ final private short[][] word2path;
+
+ public Cluster() {
+ word2path =new short[0][0];
+ }
+
+ /**
+ * @param clusterFile
+ * @param mf
+ *
+ */
+ public Cluster(String clusterFile, IEncoderPlus mf, int ls) {
+
+ final String REGEX = "\t";
+
+ // register words
+ try {
+ BufferedReader inputReader = new BufferedReader(new InputStreamReader(new FileInputStream(clusterFile),"UTF-8"),32768);
+
+ int cnt=0;
+ String line;
+ while ((line =inputReader.readLine())!=null) {
+
+ cnt++;
+ try {
+ String[] split = line.split(REGEX);
+ mf.register(SPATH, split[0].length()<ls?split[0]:split[0].substring(0,ls));
+ mf.register(LPATH, split[0]);
+ mf.register(PipeGen.WORD, split[1]);
+ } catch(Exception e) {
+ System.out.println("Error in cluster line "+cnt+" error: "+e.getMessage());
+ }
+ }
+ System.out.println("read number of clusters "+cnt);
+ inputReader.close();
+
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+
+ word2path = new short[mf.getFeatureCounter().get(PipeGen.WORD)][2];
+
+
+ // insert words
+ try {
+ String line;
+ BufferedReader inputReader = new BufferedReader(new InputStreamReader(new FileInputStream(clusterFile),"UTF-8"),32768);
+
+ while ((line =inputReader.readLine())!=null) {
+
+ String[] split = line.split(REGEX);
+ int wd = mf.getValue(PipeGen.WORD, split[1]);
+ word2path[wd][0] = (short)mf.getValue(SPATH, split[0].length()<ls?split[0]:split[0].substring(0,ls));
+ word2path[wd][1] = (short)mf.getValue(LPATH, split[0]);
+ }
+ inputReader.close();
+ int fill=0;
+ for(int l = 0; l<word2path.length; l++ ){
+ if (word2path[l][0]!=0) fill++;
+ }
+ /*
+ for(int l = 0; l<word2path.length; l++ ){
+ if (word2path[l][1]!=0) fillL++;
+ if (word2path[l][1]<-1) System.out.println("lower "+word2path[l][1]);
+ }
+ */
+ System.out.println("filled "+fill+" of "+word2path.length);
+
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ }
+
+ /**
+ * Read the cluster
+ * @param dos
+ * @throws IOException
+ */
+ public Cluster(DataInputStream dis) throws IOException {
+
+ word2path = new short[dis.readInt()][2];
+ for(int i =0;i<word2path.length;i++) {
+ word2path[i][0]=dis.readShort();
+ word2path[i][1]=dis.readShort();
+ }
+ DB.println("Read cluster with "+word2path.length+" words ");
+ }
+
+ /**
+ * Write the cluster
+ * @param dos
+ * @throws IOException
+ */
+ public void write(DataOutputStream dos) throws IOException {
+
+ dos.writeInt(word2path.length);
+ for(short[] i : word2path) {
+ dos.writeShort(i[0]);
+ dos.writeShort(i[1]);
+ }
+
+ }
+
+ /**
+ * @param form the id of a word form
+ * @return the short path to the word form in the cluster
+
+ final public int getSP(int form) {
+ if (word2path.length<form) return -1;
+ return word2path[form][0];
+ }
+ */
+ /**
+ * get the long path to a word form in the cluster
+ * @param form the id of a word form
+ * @return the long path to the word
+ */
+ final public int getLP(int form) {
+ if (word2path.length<=form || word2path[form].length<=0) return -1;
+ return word2path[form][0]==0?-1:word2path[form][0];
+ }
+
+ final public int getLP(int form, int l) {
+ if (word2path.length<form) return -1;
+ return word2path[form][l]==0?-1:word2path[form][l];
+ }
+
+ final public int size() {
+ return word2path.length;
+ }
+}
diff --git a/dependencyParser/mate-tools/src/is2/data/D4.java b/dependencyParser/mate-tools/src/is2/data/D4.java
new file mode 100644
index 0000000..8be3df2
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/data/D4.java
@@ -0,0 +1,191 @@
+/**
+ *
+ */
+package is2.data;
+
+import is2.util.DB;
+
+/**
+ * @author Dr. Bernd Bohnet, 30.10.2010
+ *
+ * This class computes the mapping of features to the weight vector.
+ */
+final public class D4 extends DX {
+ private long shift;
+ private long h;
+
+
+ private final Long2IntInterface _li;
+ public D4(Long2IntInterface li) {
+ _li=li;
+ }
+
+
+ final public void clean() {
+ v0=0;v1=0;v2=0;v3=0;v4=0;v5=0;v6=0;v7=0;v8=0;
+ shift=0;h=0;
+ }
+
+ final public void cz3(){
+ if (v0<0||v1<0||v2<0) { h=-1;return;}
+
+ h= v0+v1*(shift =a0)+(long)v2*(shift *=a1);
+ shift *=a2;
+ }
+
+ final public long c3(){
+ if (v0<0||v1<0||v2<0) { h=-1;return h;}
+
+ h= v0+v1*(shift =a0)+(long)v2*(shift *=a1);
+ shift *=a2;
+ return h;
+ }
+
+ final public void cz4(){
+ if (v0<0||v1<0||v2<0||v3<0) {h=-1;return;}
+
+ h =v0+v1*(shift =a0)+v2*(shift *=a1)+v3*(shift *=a2);
+ shift *=a3;
+ }
+
+ final public long c4(){
+ if (v0<0||v1<0||v2<0||v3<0) {h=-1;return h;}
+
+ h =v0+v1*(shift =a0)+v2*(shift *=a1)+v3*(shift *=a2);
+ shift *=a3;
+ return h;
+ }
+
+
+ final public void cz5(){
+
+ if (v0<0||v1<0||v2<0||v3<0||v4<0) {h=-1;return;}
+
+ h =v0+v1*(shift =a0)+v2*(shift *=a1)+v3*(shift*=a2)+v4*(shift*=a3);
+ shift*=a4;
+
+ }
+
+ final public long c5(){
+
+ if (v0<0||v1<0||v2<0||v3<0||v4<0) {h=-1;return h;}
+
+ h =v0+v1*(shift =a0)+v2*(shift *=a1)+v3*(shift *=a2)+v4*(shift*=a3);
+ shift*=a4;
+ return h;
+ }
+
+
+ final public void cz6(){
+
+ if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0) {h=-1; return;}
+
+ h =v0+v1*(shift =a0)+v2*(shift *=a1)+v3*(shift *=a2);
+ h +=v4*(shift*=a3)+v5*(shift*=a4);
+ shift*=a5;
+ }
+
+ final public long c6(){
+
+ if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0) {h=-1; return h;}
+
+ h =v0+v1*(shift =a0)+v2*(shift *=a1)+v3*(shift *=a2);
+ h +=v4*(shift*=a3)+v5*(shift*=a4);
+ shift*=a5;
+ return h;
+ }
+
+
+ final public long cs(int b, int v) {
+ if (h<0) {h=-1; return h;}
+
+ h += v*shift;
+ shift *=b;
+ return h;
+
+ }
+
+ final public void csa(int b, int v, IFV f) {
+ if (h<0) {h=-1; return;}
+
+ h += v*shift;
+ shift *=b;
+ f.add(_li.l2i(h));
+ }
+
+ final public long csa(int b, int v) {
+ if (h<0) {h=-1; return-1; }
+
+ h += v*shift;
+ shift *=b;
+ return h;
+ }
+
+ public final long getVal(){
+ return h;
+ }
+
+ public final void map(IFV f, long l){
+ if (l>0) f.add(this._li.l2i(l));
+ }
+
+ /**
+ * @param f
+ */
+ final public void add(IFV f) {
+ f.add(_li.l2i(h));
+ }
+
+ final public void cz7() {
+ if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0||v6<0) {h=-1; return;}
+
+ h =v0+v1*(shift =a0)+v2*(shift *=a1)+v3*(shift *=a2);
+ h +=v4*(shift*=a3)+v5*(shift*=a4)+v6*(shift*=a5);
+ shift*=a6;
+
+ }
+
+ final public long c7() {
+ if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0||v6<0) {h=-1; return h;}
+
+ h =v0+v1*(shift =a0)+v2*(shift *=a1)+v3*(shift *=a2);
+ h +=v4*(shift*=a3)+v5*(shift*=a4)+v6*(shift*=a5);
+ shift*=a6;
+ return h;
+ }
+
+ /**
+ *
+ */
+ final public void cz8() {
+ if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0||v6<0||v7<0) {h=-1; return;}
+
+ h =v0+v1*(shift =a0)+v2*(shift *=a1)+v3*(shift *=a2);
+ h +=v4*(shift*=a3)+v5*(shift*=a4)+v6*(shift*=a5)+v7*(shift*=a6);
+ shift*=a7;
+ }
+
+ final public void cz9() {
+ if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0||v6<0||v7<0||v8<0) {h=-1; return;}
+
+ h =v0+v1*(shift =a0)+v2*(shift *=a1)+v3*(shift *=a2);
+ h +=v4*(shift*=a3)+v5*(shift*=a4)+v6*(shift*=a5)+v7*(shift*=a6)+v8*(shift*=a7);
+ shift*=a8;
+ }
+
+
+ /* (non-Javadoc)
+ * @see is2.data.DX#computeLabeValue(short, short)
+ */
+ @Override
+ public int computeLabeValue(int label, int shift) {
+ return label*shift;
+ }
+
+
+ public void fix() {
+
+ }
+
+
+}
\ No newline at end of file
diff --git a/dependencyParser/mate-tools/src/is2/data/D6.java b/dependencyParser/mate-tools/src/is2/data/D6.java
new file mode 100644
index 0000000..3694249
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/data/D6.java
@@ -0,0 +1,197 @@
+/**
+ *
+ */
+package is2.data;
+
+import is2.util.DB;
+
+/**
+ * @author Dr. Bernd Bohnet, 30.10.2010
+ *
+ * This class computes the mapping of features to the weight vector.
+ */
+final public class D6 extends DX {
+ private long shift;
+ private long h;
+
+
+ private final Long2IntInterface _li;
+ public D6(Long2IntInterface li) {
+ _li=li;
+ }
+
+ boolean fixed =false;
+
+ public void fix() {
+
+ if (fixed) {
+ DB.println("warning: already fixed");
+ // return;
+ }
+
+ long t0= 1, t1=a0, t2=t1*a1, t3=t2*a2,t4=t3*a3, t5=t4*a4,t6=t5*a5, t7=t6*a6, t8=t7*a7, t9=t8*a8;
+
+
+
+
+ a0=t0;a1=t1;a2=t2;a3=t3;a4=t4;a5=t5;a6=t6;a7=t7;a8=t8; a9=t9;
+
+ fixed=true;
+ }
+
+
+
+ final public void clean() {
+ v0=0;v1=0;v2=0;v3=0;v4=0;v5=0;v6=0;v7=0;v8=0;
+ shift=0;h=0;
+ }
+
+ final public void cz3(){
+ if (v0<0||v1<0||v2<0) { h=-1;return;}
+
+ h= v0+v1*a1+v2*a2;
+ shift =a3;
+ }
+
+ final public long c3(){
+ if (v0<0||v1<0||v2<0) { h=-1;return h;}
+
+ h= v0+v1*a1+v2*a2;
+ shift =a3;
+ return h;
+ }
+
+ final public void cz4(){
+ if (v0<0||v1<0||v2<0||v3<0) {h=-1;return;}
+
+ h =v0+v1*a1+v2*a2+v3*a3;
+ shift =a4;
+ }
+
+ final public long c4(){
+ if (v0<0||v1<0||v2<0||v3<0) {h=-1;return h;}
+
+ h =v0+v1*a1+v2*a2+v3*a3;
+ shift =a4;
+ return h;
+ }
+
+
+ final public void cz5(){
+
+ if (v0<0||v1<0||v2<0||v3<0||v4<0) {h=-1;return;}
+
+ h =v0+v1*a1+v2*a2+v3*a3+v4*a4;
+ shift=a5;
+
+ }
+
+ final public long c5(){
+
+ if (v0<0||v1<0||v2<0||v3<0||v4<0) {h=-1;return h;}
+
+ h =v0+v1*a1+v2*a2+v3*a3+v4*a4;
+ shift=a5;
+ return h;
+ }
+
+
+ final public void cz6(){
+
+ if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0) {h=-1; return;}
+
+ h =v0+v1*a1+v2*a2+v3*a3 +v4*a4+v5*a5;
+ shift=a6;
+ }
+
+ final public long c6(){
+
+ if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0) {h=-1; return h;}
+
+ h =v0+v1*a1+v2*a2+v3*a3 +v4*a4+v5*a5;
+ shift=a6;
+ return h;
+ }
+
+
+ final public long cs(int b, int v) {
+ if (h<0) {h=-1; return h;}
+
+ h += v*shift;
+ shift *=b;
+ return h;
+
+ }
+
+ final public void csa(int b, int v, IFV f) {
+ if (h<0) {h=-1; return;}
+
+ h += v*shift;
+ shift *=b;
+ f.add(_li.l2i(h));
+ }
+
+ final public long csa(int b, int v) {
+ if (h<0) {h=-1; return-1; }
+
+ h += v*shift;
+ shift *=b;
+ return h;
+ }
+
+ public final long getVal(){
+ return h;
+ }
+
+ public final void map(IFV f, long l){
+ if (l>0) f.add(this._li.l2i(l));
+ }
+
+ /**
+ * @param f
+ */
+ final public void add(IFV f) {
+ f.add(_li.l2i(h));
+ }
+
+ final public void cz7() {
+ if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0||v6<0) {h=-1; return;}
+
+ h =v0+v1*a1+v2*a2+v3*a3 +v4*a4+v5*a5+v6*a6;
+ shift=a7;
+
+ }
+
+ final public long c7() {
+ if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0||v6<0) {h=-1; return h;}
+
+ h =v0+v1*a1+v2*a2+v3*a3 +v4*a4+v5*a5+v6*a6;
+ shift=a7;
+ return h;
+ }
+
+ /**
+ *
+ */
+ final public void cz8() {
+ if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0||v6<0||v7<0) {h=-1; return;}
+
+ h =v0+v1*a1+v2*a2+v3*a3 +v4*a4+v5*a5+v6*a6+v7*a7;
+ shift=a8;
+ }
+
+
+
+ /* (non-Javadoc)
+ * @see is2.data.DX#computeLabeValue(short, short)
+ */
+ @Override
+ public int computeLabeValue(int label, int shift) {
+ return label*shift;
+ }
+
+
+
+
+
+}
\ No newline at end of file
diff --git a/dependencyParser/mate-tools/src/is2/data/D7.java b/dependencyParser/mate-tools/src/is2/data/D7.java
new file mode 100644
index 0000000..f4675d8
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/data/D7.java
@@ -0,0 +1,220 @@
+/**
+ *
+ */
+package is2.data;
+
+
+/**
+ * @author Dr. Bernd Bohnet, 30.10.2010
+ *
+ * This class computes the mapping of features to the weight vector.
+ */
+final public class D7 extends DX {
+
+ private long shift;
+ private long h;
+ private final Long2IntInterface _li;
+
+ public D7(Long2IntInterface li) {
+ _li=li;
+ }
+
+ boolean fixed =false;
+
+ public void fix() {
+
+ long t0= 1, t1=a0, t2=t1*a1, t3=t2*a2,t4=t3*a3, t5=t4*a4,t6=t5*a5, t7=t6*a6, t8=t7*a7, t9=t8*a8;
+
+ a0=t0;a1=t1;a2=t2;a3=t3;a4=t4;a5=t5;a6=t6;a7=t7;a8=t8; a9=t9;
+
+ }
+
+
+
+ final public void clean() {
+ v0=0;v1=0;v2=0;v3=0;v4=0;v5=0;v6=0;v7=0;v8=0;
+ shift=0;h=0;
+ }
+
+ final public void cz3(){
+ if (v2<0) { h=-1;return;}
+
+ h= v0+v1*a1+v2*a2;
+ shift =a3;
+ }
+
+ final public long c3(){
+ if (v2<0) { h=-1;return h;}
+
+ h= v0+v1*a1+v2*a2;
+ shift =a3;
+ return h;
+ }
+
+ final public long d3(){
+ if (v2<0)return -1;
+ return v0+v2*a2;
+ }
+
+ final public void cz4(){
+ // if (v0<0||v1<0||v2<0||v3<0) {h=-1;return;}
+ if (v2<0||v3<0) {h=-1;return;}
+
+ h =v0+v1*a1+v2*a2+v3*a3;
+ shift =a4;
+ }
+
+ final public long c4(){
+ if (v2<0||v3<0) {h=-1;return h;}
+
+ h =v0+v1*a1+v2*a2+v3*a3;
+ shift =a4;
+ return h;
+ }
+
+
+ final public long d4(){
+ if (v2<0||v3<0) return -1;
+ return v0+v2*a2+v3*a3;
+ }
+
+
+ final public void cz5(){
+
+ if (v2<0||v3<0||v4<0) {h=-1;return;}
+
+ h =v0+v1*a1+v2*a2+v3*a3+v4*a4;
+ shift=a5;
+
+ }
+
+ final public long c5(){
+
+ if (v2<0||v3<0||v4<0) {h=-1;return h;}
+
+ h =v0+v1*a1+v2*a2+v3*a3+v4*a4;
+ shift=a5;
+ return h;
+ }
+
+ final public long d5(){
+ if (v2<0||v3<0||v4<0) return -1;
+ return v0+v2*a2+v3*a3+v4*a4;
+ }
+
+
+ final public void cz6(){
+
+ if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0) {h=-1; return;}
+
+ h =v0+v1*a1+v2*a2+v3*a3 +v4*a4+v5*a5;
+ shift=a6;
+ }
+
+ final public long c6(){
+
+ if (v2<0||v3<0||v4<0||v5<0) {h=-1; return h;}
+
+ h =v0+v1*a1+v2*a2+v3*a3 +v4*a4+v5*a5;
+ shift=a6;
+ return h;
+ }
+
+ final public long d6(){
+ if (v2<0||v3<0||v4<0||v5<0) return -1;
+ return v0+v2*a2+v3*a3 +v4*a4+v5*a5;
+ }
+
+
+ final public long cs(int b, int v) {
+ if (h<0) {h=-1; return h;}
+
+ h += v*shift;
+ shift *=b;
+ return h;
+
+ }
+
+ final public void csa(int b, int v, IFV f) {
+ if (h<0) {h=-1; return;}
+
+ h += v*shift;
+ shift *=b;
+ f.add(_li.l2i(h));
+ }
+
+ final public long csa(int b, int v) {
+ if (h<0) {h=-1; return-1; }
+
+ h += v*shift;
+ shift *=b;
+ return h;
+ }
+
+ public final long getVal(){
+ return h;
+ }
+
+ public final void map(IFV f, long l){
+ if (l>0) f.add(this._li.l2i(l));
+ }
+
+ /**
+ * @param f
+ */
+ final public void add(IFV f) {
+ f.add(_li.l2i(h));
+ }
+
+ final public void cz7() {
+ if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0||v6<0) {h=-1; return;}
+
+ h =v0+v1*a1+v2*a2+v3*a3 +v4*a4+v5*a5+v6*a6;
+ shift=a7;
+
+ }
+
+
+ final public long c7() {
+ if (v2<0||v3<0||v4<0||v5<0||v6<0) {h=-1; return h;}
+
+ h =v0+v1*a1+v2*a2+v3*a3 +v4*a4+v5*a5+v6*a6;
+ shift=a7;
+ return h;
+ }
+
+ final public long d7() {
+ if (v2<0||v3<0||v4<0||v5<0||v6<0) return -1;
+ return v0+v2*a2+v3*a3 +v4*a4+v5*a5+v6*a6;
+ }
+
+ /**
+ *
+ */
+ final public void cz8() {
+ if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0||v6<0||v7<0) {h=-1; return;}
+
+ h =v0+v1*a1+v2*a2+v3*a3 +v4*a4+v5*a5+v6*a6+v7*a7;
+ shift=a8;
+ }
+
+ final public long d8() {
+ if (v2<0||v3<0||v4<0||v5<0||v6<0||v7<0) {return-1;}
+ return v0+v2*a2+v3*a3 +v4*a4+v5*a5+v6*a6+v7*a7;
+ }
+
+
+
+ /* (non-Javadoc)
+ * @see is2.data.DX#computeLabeValue(short, short)
+ */
+ @Override
+ public int computeLabeValue(int label, int shift) {
+ return label*shift;
+ }
+
+
+
+
+
+}
\ No newline at end of file
diff --git a/dependencyParser/mate-tools/src/is2/data/DPSTree.java b/dependencyParser/mate-tools/src/is2/data/DPSTree.java
new file mode 100644
index 0000000..554f756
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/data/DPSTree.java
@@ -0,0 +1,115 @@
+/**
+ *
+ */
+package is2.data;
+
+import is2.util.DB;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Stack;
+
+/**
+ * @author Dr. Bernd Bohnet, 17.01.2011
+ *
+ * Dynamic phrase structure tree.
+ */
+public class DPSTree {
+
+
+ private int size=0;
+
+ public int[] heads;
+ public int[] labels;
+
+ public DPSTree() {
+ this(30);
+ }
+
+ public DPSTree(int initialCapacity) {
+ heads = new int[initialCapacity];
+ labels = new int[initialCapacity];
+ }
+
+
+ /**
+ * Increases the capacity of this <tt>Graph</tt> instance, if
+ * necessary, to ensure that it can hold at least the number of nodes
+ * specified by the minimum capacity argument.
+ *
+ * @param minCapacity the desired minimum capacity.
+ */
+ private void ensureCapacity(int minCapacity) {
+
+
+ if (minCapacity > heads.length) {
+
+ int newCapacity =minCapacity + 1;
+
+ if (newCapacity < minCapacity) newCapacity = minCapacity;
+ int oldIndex[] = heads;
+ heads = new int[newCapacity];
+ System.arraycopy(oldIndex, 0, heads, 0, oldIndex.length);
+
+ oldIndex = labels;
+ labels = new int[newCapacity];
+ System.arraycopy(oldIndex, 0, labels, 0, oldIndex.length);
+
+ }
+ }
+
+
+ final public int size() {
+ return size;
+ }
+
+
+ final public boolean isEmpty() {
+ return size == 0;
+ }
+
+ final public void clear() {
+ size = 0;
+ }
+
+ final public void createTerminals(int terminals) {
+ ensureCapacity(terminals+1);
+ size= terminals+1;
+ }
+
+ final public int create(int phrase) {
+
+ ensureCapacity(size+1);
+ labels[size] =phrase;
+ size++;
+ return size-1;
+ }
+
+ public int create(int phrase, int nodeId) {
+
+ if (nodeId<0) return this.create(phrase);
+// DB.println("create phrase "+nodeId+" label "+phrase);
+ ensureCapacity(nodeId+1);
+ labels[nodeId] =phrase;
+ if (size<nodeId) size=nodeId+1;
+ return nodeId;
+ }
+
+ public void createEdge(int i, int j) {
+ heads[i] =j;
+// DB.println("create edge "+i+"\t "+j);
+ }
+
+ public DPSTree clone() {
+ DPSTree ps = new DPSTree(this.size+1);
+
+ for(int k=0;k<size;k++) {
+ ps.heads[k] = heads[k];
+ ps.labels[k] = labels[k];
+ }
+ ps.size=size;
+ return ps;
+
+ }
+
+}
\ No newline at end of file
diff --git a/dependencyParser/mate-tools/src/is2/data/DX.java b/dependencyParser/mate-tools/src/is2/data/DX.java
new file mode 100644
index 0000000..c357b58
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/data/DX.java
@@ -0,0 +1,58 @@
+/**
+ *
+ */
+package is2.data;
+
+import is2.data.IFV;
+
+/**
+ * @author Dr. Bernd Bohnet, 30.08.2011
+ *
+ *
+ */
+public abstract class DX {
+
+ public long a0,a1,a2,a3,a4,a5,a6,a7,a8,a9;
+ public long v0,v1,v2,v3,v4,v5,v6,v7,v8,v9;
+
+ public abstract void cz3();
+
+ public abstract void cz4();
+
+ public abstract void cz5();
+
+ public abstract void cz6();
+
+ public abstract void cz7();
+
+ public abstract void cz8();
+
+ public abstract void clean();
+
+ public abstract long cs(int b, int v);
+
+ public abstract long csa(int b, int v);
+
+ public abstract void csa(int b, int v, IFV f);
+
+ /**
+ * @return
+ */
+ public abstract long getVal();
+
+ /**
+ * @param f
+ * @param l
+ */
+ public abstract void map(IFV f, long l);
+
+ /**
+ * @param label
+ * @param s_type
+ * @return
+ */
+ public abstract int computeLabeValue(int label,int s_type) ;
+
+ public abstract void fix();
+
+}
\ No newline at end of file
diff --git a/dependencyParser/mate-tools/src/is2/data/DataF.java b/dependencyParser/mate-tools/src/is2/data/DataF.java
new file mode 100755
index 0000000..f127fbd
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/data/DataF.java
@@ -0,0 +1,39 @@
+package is2.data;
+
+
+
+final public class DataF {
+
+ final public short typesLen;
+ final public int len;
+
+ // first order features
+ final public float[][] pl;
+
+ // remove !!!!
+// final public float[][] highestLab;
+
+ //final public FV[][][] label;
+ final public float[][][][] lab;
+
+
+ public FV fv;
+
+ final public float[][][][][] sib;
+
+ final public float[][][][][] gra;
+
+
+ public DataF(int length, short types) {
+ typesLen=types;
+ len =length;
+
+ pl = new float[length][length];
+ lab = new float[length][length][types][2];
+ // highestLab = new float[length][length];
+
+ sib = new float[length][length][length][2][];
+ gra = new float[length][length][length][2][];
+
+ }
+}
diff --git a/dependencyParser/mate-tools/src/is2/data/DataFES.java b/dependencyParser/mate-tools/src/is2/data/DataFES.java
new file mode 100644
index 0000000..9772858
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/data/DataFES.java
@@ -0,0 +1,38 @@
+package is2.data;
+
+
+
+final public class DataFES {
+
+ final public short typesLen;
+ final public int len;
+
+ // first order features
+ final public float[][] pl;
+
+ // remove !!!!
+// final public float[][] highestLab;
+
+ //final public FV[][][] label;
+ final public float[][][] lab;
+
+
+ public FV fv;
+
+ final public float[][][][] sib;
+
+ final public float[][][][] gra;
+
+
+ public DataFES(int length, short types) {
+ typesLen=types;
+ len =length;
+
+ pl = new float[length][length];
+ lab = new float[length][length][types];
+
+ sib = new float[length][length][length][];
+ gra = new float[length][length][length][];
+
+ }
+}
diff --git a/dependencyParser/mate-tools/src/is2/data/DataT.java b/dependencyParser/mate-tools/src/is2/data/DataT.java
new file mode 100644
index 0000000..47691f8
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/data/DataT.java
@@ -0,0 +1,25 @@
+package is2.data;
+
+
+
+final public class DataT {
+
+ final public short typesLen;
+ final public int len;
+
+
+ //final public FV[][][] label;
+ // a b lab op
+ final public float[][][][] lab;
+
+
+
+ public DataT(int length, short types) {
+ typesLen=types;
+ len =length;
+
+ lab = new float[length][length][types][4];
+
+
+ }
+}
diff --git a/dependencyParser/mate-tools/src/is2/data/Edges.java b/dependencyParser/mate-tools/src/is2/data/Edges.java
new file mode 100644
index 0000000..f8b2ef9
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/data/Edges.java
@@ -0,0 +1,224 @@
+/**
+ *
+ */
+package is2.data;
+
+
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.Map.Entry;
+
+/**
+ * @author Dr. Bernd Bohnet, 13.05.2009;
+ *
+ *
+ */
+public final class Edges {
+
+
+ private static short[][][][] edges;
+ private static HashMap<Short,Integer> labelCount = new HashMap<Short,Integer>();
+
+ private static HashMap<String,Integer> slabelCount = new HashMap<String,Integer>();
+
+
+ static short[] def = new short[1];
+
+ private Edges () {}
+
+ /**
+ * @param length
+ */
+ public static void init(int length) {
+ edges = new short[length][length][2][];
+ }
+
+
+ public static void findDefault(){
+
+ int best =0;
+
+
+
+ for(Entry<Short,Integer> e : labelCount.entrySet()) {
+
+
+ if (best<e.getValue()) {
+ best = e.getValue();
+ def[0]=e.getKey();
+ }
+ }
+
+
+ // labelCount=null;
+ // String[] types = new String[mf.getFeatureCounter().get(PipeGen.REL)];
+ // for (Entry<String, Integer> e : MFO.getFeatureSet().get(PipeGen.REL).entrySet()) types[e.getValue()] = e.getKey();
+
+ is2.util.DB.println("set default label to "+def[0]+" " );
+
+ // System.out.println("found default "+def[0]);
+
+ }
+
+
+ final static public void put(int pos1, int pos2, boolean dir, short label) {
+ putD(pos1, pos2,dir, label);
+ // putD(pos2, pos1,!dir, label);
+ }
+
+
+ final static public void putD(int pos1, int pos2, boolean dir, short label) {
+
+ Integer lc = labelCount.get(label);
+ if (lc==null) labelCount.put(label, 1);
+ else labelCount.put(label, lc+1);
+
+ String key = pos1+"-"+pos2+dir+label;
+ Integer lcs = slabelCount.get(key);
+ if (lcs==null) slabelCount.put(key, 1);
+ else slabelCount.put(key, lcs+1);
+
+
+ if (edges[pos1][pos2][dir?0:1]==null) {
+ edges[pos1][pos2][dir?0:1]=new short[1];
+ edges[pos1][pos2][dir?0:1][0]=label;
+
+// edgesh[pos1][pos2][dir?0:1] = new TIntHashSet(2);
+// edgesh[pos1][pos2][dir?0:1].add(label);
+ } else {
+ short labels[] = edges[pos1][pos2][dir?0:1];
+ for(short l : labels) {
+ //contains label already?
+ if(l==label) return;
+ }
+
+ short[] nlabels = new short[labels.length+1];
+ System.arraycopy(labels, 0, nlabels, 0, labels.length);
+ nlabels[labels.length]=label;
+ edges[pos1][pos2][dir?0:1]=nlabels;
+
+ // edgesh[pos1][pos2][dir?0:1].add(label);
+ }
+ }
+
+ final static public short[] get(int pos1, int pos2, boolean dir) {
+
+ if (pos1<0 || pos2<0 || edges[pos1][pos2][dir?0:1]==null) return def;
+ return edges[pos1][pos2][dir?0:1];
+ }
+
+
+ /**
+ * @param dis
+ */
+ static public void write(DataOutputStream d) throws IOException {
+
+ int len = edges.length;
+ d.writeShort(len);
+
+ for(int p1 =0;p1<len;p1++) {
+ for(int p2 =0;p2<len;p2++) {
+ if (edges[p1][p2][0]==null) d.writeShort(0);
+ else {
+ d.writeShort(edges[p1][p2][0].length);
+ for(int l =0;l<edges[p1][p2][0].length;l++) {
+ d.writeShort(edges[p1][p2][0][l]);
+ }
+
+ }
+
+ if (edges[p1][p2][1]==null) d.writeShort(0);
+ else {
+ d.writeShort(edges[p1][p2][1].length);
+ for(int l =0;l<edges[p1][p2][1].length;l++) {
+ d.writeShort(edges[p1][p2][1][l]);
+ }
+ }
+ }
+ }
+
+ d.writeShort(def[0]);
+
+ }
+
+
+ /**
+ * @param dis
+ */
+ public static void read(DataInputStream d) throws IOException {
+ int len = d.readShort();
+
+ edges = new short[len][len][2][];
+ for(int p1 =0;p1<len;p1++) {
+ for(int p2 =0;p2<len;p2++) {
+ int ll = d.readShort();
+ if (ll==0) {
+ edges[p1][p2][0]=null;
+ } else {
+ edges[p1][p2][0] = new short[ll];
+ for(int l =0;l<ll;l++) {
+ edges[p1][p2][0][l]=d.readShort();
+ }
+ }
+
+ ll = d.readShort();
+ if (ll==0) {
+ edges[p1][p2][1]=null;
+ } else {
+ edges[p1][p2][1] = new short[ll];
+ for(int l =0;l<ll;l++) {
+ edges[p1][p2][1][l]=d.readShort();
+ }
+ }
+ }
+ }
+
+ def[0]= d.readShort();
+
+ }
+
+ public static class C implements Comparator<Short> {
+
+ public C() {
+ super();
+ }
+
+ String _key;
+
+ public C(String key) {
+ super();
+ _key=key;
+ }
+
+ /* (non-Javadoc)
+ * @see java.util.Comparator#compare(java.lang.Object, java.lang.Object)
+ */
+ @Override
+ public int compare(Short l1, Short l2) {
+
+ // int c1 = labelCount.get(l1);
+ // int c2 = labelCount.get(l2);
+ // if (true) return c1==c2?0:c1>c2?-1:1;
+
+ int x1 = slabelCount.get(_key+l1.shortValue());
+ int x2 = slabelCount.get(_key+l2.shortValue());
+ // System.out.println(x1+" "+x2);
+
+
+ return x1==x2?0:x1>x2?-1:1;
+
+
+
+ }
+
+
+
+
+
+ }
+
+
+}
diff --git a/dependencyParser/mate-tools/src/is2/data/F2S.java b/dependencyParser/mate-tools/src/is2/data/F2S.java
new file mode 100755
index 0000000..1f1f668
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/data/F2S.java
@@ -0,0 +1,48 @@
+package is2.data;
+
+
+final public class F2S extends IFV {
+
+ private double[] parameters;
+ public F2S() {}
+
+ public double score;
+
+ /**
+ * @param parameters2
+ */
+ public F2S(double[] parameters2) {
+ parameters=parameters2;
+ }
+
+ @Override
+ public void add(int i) {
+ if (i>0)score += parameters[i];
+ }
+
+ public void setParameters(double[] p) {
+ parameters =p;
+ }
+
+ @Override
+ public void clear() {
+ score =0;
+ }
+
+ /* (non-Javadoc)
+ * @see is2.IFV#getScore()
+ */
+ @Override
+ public double getScore() {
+ return score;
+ }
+
+ /* (non-Javadoc)
+ * @see is2.IFV#clone()
+ */
+ @Override
+ public IFV clone() {
+ return new F2S(parameters);
+ }
+
+}
diff --git a/dependencyParser/mate-tools/src/is2/data/F2SD.java b/dependencyParser/mate-tools/src/is2/data/F2SD.java
new file mode 100755
index 0000000..45c554f
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/data/F2SD.java
@@ -0,0 +1,44 @@
+package is2.data;
+
+
+final public class F2SD extends IFV {
+
+ final private double[] parameters;
+
+ public double score =0;
+
+ public F2SD(double[] p) {
+ parameters =p;
+ }
+
+
+ @Override
+ public void add(int i) {
+ if (i>0)score += parameters[i];
+ }
+
+ @Override
+ public void clear() {
+ score =0;
+ }
+
+
+ /* (non-Javadoc)
+ * @see is2.IFV#getScore()
+ */
+ @Override
+ public double getScore() {
+ return score;
+ }
+
+
+ /* (non-Javadoc)
+ * @see is2.IFV#clone()
+ */
+ @Override
+ public IFV clone() {
+ return new F2SD(parameters);
+ }
+
+
+}
diff --git a/dependencyParser/mate-tools/src/is2/data/F2SF.java b/dependencyParser/mate-tools/src/is2/data/F2SF.java
new file mode 100755
index 0000000..127d775
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/data/F2SF.java
@@ -0,0 +1,78 @@
+package is2.data;
+
+
+final public class F2SF extends IFV {
+
+ final private float[] parameters;
+
+
+
+ public float score =0;
+
+ public F2SF(float[] p) {
+ parameters =p;
+ }
+
+ @Override
+ final public void add(int i) {
+ if (i>0) score += parameters[i];
+ }
+
+
+ final public void add(int[] i) {
+ for(int k=0;k<i.length;k++) {
+ if (i[k]>0) score += parameters[i[k]];
+ }
+ }
+
+
+ final public void sub(float[] px,int i, Long2IntInterface li) {
+
+ if (i>0) {
+ score -= px[li.l2i(i)];
+// score -= px[i];
+ //else score -=px[];
+ }
+ }
+
+
+ @Override
+ public void clear() {
+ score =0;
+ }
+
+
+ /* (non-Javadoc)
+ * @see is2.IFV#getScore()
+ */
+ @Override
+ public double getScore() {
+ return score;
+ }
+
+ public float getScoreF() {
+ return score;
+ }
+
+ /* (non-Javadoc)
+ * @see is2.IFV#clone()
+ */
+ @Override
+ public IFV clone() {
+ return new F2SF(this.parameters);
+ }
+
+ /**
+ * @param l2i
+ */
+ public void addRel(int i, float f) {
+ if (i>0) score += parameters[i]*f;
+
+ }
+
+ public int length() {
+ return this.parameters.length;
+ }
+
+
+}
diff --git a/dependencyParser/mate-tools/src/is2/data/F2SP.java b/dependencyParser/mate-tools/src/is2/data/F2SP.java
new file mode 100644
index 0000000..515a788
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/data/F2SP.java
@@ -0,0 +1,76 @@
+package is2.data;
+
+
+final public class F2SP extends IFV {
+
+ final private float[] parameters;
+
+ public double score =0;
+
+ public F2SP(float[] p) {
+ parameters =p;
+ }
+
+ @Override
+ final public void add(int i) {
+ if (i>0) score += parameters[i];
+ }
+
+
+ final public void add(int[] i) {
+ for(int k=0;k<i.length;k++) {
+ if (i[k]>0) score += parameters[i[k]];
+ }
+ }
+
+
+ final public void sub(float[] px,int i, Long2IntInterface li) {
+
+ if (i>0) {
+ score -= px[li.l2i(i)];
+// score -= px[i];
+ //else score -=px[];
+ }
+ }
+
+
+ @Override
+ public void clear() {
+ score =0;
+ }
+
+
+ /* (non-Javadoc)
+ * @see is2.IFV#getScore()
+ */
+ @Override
+ public double getScore() {
+ return score;
+ }
+
+ public double getScoreF() {
+ return score;
+ }
+
+ /* (non-Javadoc)
+ * @see is2.IFV#clone()
+ */
+ @Override
+ public IFV clone() {
+ return new F2SP(this.parameters);
+ }
+
+ /**
+ * @param l2i
+ */
+ public void addRel(int i, float f) {
+ if (i>0) score += parameters[i]*f;
+
+ }
+
+ public int length() {
+ return this.parameters.length;
+ }
+
+
+}
diff --git a/dependencyParser/mate-tools/src/is2/data/F2ST.java b/dependencyParser/mate-tools/src/is2/data/F2ST.java
new file mode 100644
index 0000000..2ef062b
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/data/F2ST.java
@@ -0,0 +1,47 @@
+package is2.data;
+
+
+final public class F2ST extends IFV {
+
+ final private short[] parameters;
+
+
+
+ public int score =0;
+
+ public F2ST(short[] p) {
+ parameters =p;
+ }
+
+ @Override
+ final public void add(int i) {
+ if (i>0) score += parameters[i];
+ }
+
+ @Override
+ public void clear() {
+ score =0;
+ }
+
+
+ /* (non-Javadoc)
+ * @see is2.IFV#getScore()
+ */
+ @Override
+ public double getScore() {
+ return score;
+ }
+
+ public float getScoreF() {
+ return score;
+ }
+
+ /* (non-Javadoc)
+ * @see is2.IFV#clone()
+ */
+ @Override
+ public IFV clone() {
+ return new F2ST(this.parameters);
+ }
+
+}
diff --git a/dependencyParser/mate-tools/src/is2/data/FV.java b/dependencyParser/mate-tools/src/is2/data/FV.java
new file mode 100755
index 0000000..1cfbeba
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/data/FV.java
@@ -0,0 +1,551 @@
+package is2.data;
+
+
+
+
+
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+
+public final class FV extends IFV {
+
+ private FV subfv1;
+ private FV subfv2;
+ private boolean negateSecondSubFV = false;
+
+ private int size;
+
+ // content of the nodes NxC
+ private int m_index[];
+
+ // type of the nodes NxT
+
+ public FV() {
+ this(10);
+ }
+
+ public FV(int initialCapacity) {
+ m_index = new int[initialCapacity];
+ }
+
+
+ public FV (FV fv1, FV fv2) {
+ subfv1 = fv1;
+ subfv2 = fv2;
+ }
+
+ public FV (FV fv1, FV fv2, boolean negSecond) {
+ this(0);
+ subfv1 = fv1;
+ subfv2 = fv2;
+ negateSecondSubFV = negSecond;
+ }
+
+ /**
+ * Read a feature vector
+ * @param index
+ * @param value
+ */
+ public FV(DataInputStream dos, int capacity) throws IOException {
+ this(capacity);
+ size= m_index.length;
+
+ for (int i=0; i<size; i++) m_index[i] = dos.readInt();
+ }
+
+
+ /**
+ * Read a feature vector
+ * @param index
+ * @param value
+ */
+ public FV(DataInputStream dos) throws IOException {
+ this(dos.readInt());
+ size= m_index.length;
+
+ for (int i=0; i<size; i++) m_index[i] = dos.readInt();
+
+
+ }
+
+ /**
+ * Increases the capacity of this <tt>Graph</tt> instance, if
+ * necessary, to ensure that it can hold at least the number of nodes
+ * specified by the minimum capacity argument.
+ *
+ * @param minCapacity the desired minimum capacity.
+ */
+ private void ensureCapacity(int minCapacity) {
+
+
+ if (minCapacity > m_index.length) {
+
+ int oldIndex[] = m_index;
+
+ int newCapacity = ( m_index.length * 3)/2 + 1;
+
+
+ if (newCapacity < minCapacity) newCapacity = minCapacity;
+
+ m_index = new int[newCapacity];
+ System.arraycopy(oldIndex, 0, m_index, 0, oldIndex.length);
+
+ }
+ }
+
+
+ final public int size() {
+ return size;
+ }
+
+ final public boolean isEmpty() {
+ return size == 0;
+ }
+
+ @Override
+ final public void clear() {
+ size = 0;
+ }
+
+
+ final public int createFeature(int i, double v) {
+
+ ensureCapacity(size+1);
+ m_index[size] =i;
+ size++;
+ return size-1;
+ }
+
+ final public int createFeature(int i) {
+
+ ensureCapacity(size+1);
+ m_index[size] =i;
+ size++;
+ return size-1;
+ }
+
+
+ final public int getIndex(int i) {
+ return m_index[i];
+ }
+
+ public void setIndex(int p, int i) {
+ m_index[p] = i;
+ }
+
+
+ /**
+ * Trims the capacity of this <tt>Graph</tt> instance to true size.
+ * An application can use this operation to minimize
+ * the storage of an <tt>Graph</tt> instance.
+ */
+ public void trimToSize() {
+
+ if (size < m_index.length) {
+
+
+ int oldIndex[] = m_index;
+
+ m_index = new int[size];
+ System.arraycopy(oldIndex, 0, m_index, 0, size);
+
+ }
+
+ }
+
+
+
+
+
+ @Override
+ final public void add(int i) {
+ if (i>=0) {
+ ensureCapacity(size+1);
+ m_index[size] =i;
+ size++;
+ }
+ }
+
+ final public void add(int[] i) {
+
+ for(int k =0;k<i.length;k++) add(i[k]);
+
+ }
+
+ final public void put(int i, double f) {
+ if (i>=0) createFeature(i,f);
+ }
+
+
+ // fv1 - fv2
+ public FV getDistVector(FV fl2) {
+ return new FV(this, fl2, true);
+ }
+
+
+ public double getScore(double[] parameters, boolean negate) {
+ double score = 0.0;
+
+ if (null != subfv1) {
+ score += subfv1.getScore(parameters, negate);
+
+ if (null != subfv2) {
+ if (negate) score += subfv2.getScore(parameters, !negateSecondSubFV);
+ else score += subfv2.getScore(parameters, negateSecondSubFV);
+
+ }
+ }
+
+ if (negate) for(int i=0;i<size;i++) score -= parameters[m_index[i]];
+ else for(int i=0;i<size;i++) score += parameters[m_index[i]];
+
+
+ return score;
+ }
+
+
+ final public float getScore(float[] parameters, boolean negate) {
+ float score = 0.0F;
+
+ if (null != subfv1) {
+ score += subfv1.getScore(parameters, negate);
+
+ if (null != subfv2) {
+ if (negate) score += subfv2.getScore(parameters, !negateSecondSubFV);
+ else score += subfv2.getScore(parameters, negateSecondSubFV);
+
+ }
+ }
+
+ // warning changed the the value
+
+ if (negate) for(int i=0;i<size;i++) score -= parameters[m_index[i]];//*m_value[i];
+ else for(int i=0;i<size;i++) score += parameters[m_index[i]];//*m_value[i];
+
+ return score;
+ }
+
+ final public int getScore(short[] parameters, boolean negate) {
+ int score = 0;
+
+ if (null != subfv1) {
+ score += subfv1.getScore(parameters, negate);
+
+ if (null != subfv2) {
+ if (negate) score += subfv2.getScore(parameters, !negateSecondSubFV);
+ else score += subfv2.getScore(parameters, negateSecondSubFV);
+
+ }
+ }
+
+ // warning changed the value
+
+ if (negate) for(int i=0;i<size;i++) score -= parameters[m_index[i]];//*m_value[i];
+ else for(int i=0;i<size;i++) score += parameters[m_index[i]];//*m_value[i];
+
+ return score;
+ }
+
+
+
+ public void update(double[] parameters, double[] total, double alpha_k, double upd) {
+ update(parameters, total, alpha_k, upd, false);
+ }
+
+ public final void update(double[] parameters, double[] total, double alpha_k, double upd, boolean negate) {
+
+ if (null != subfv1) {
+ subfv1.update(parameters, total, alpha_k, upd, negate);
+
+ if (null != subfv2) {
+ if (negate) subfv2.update(parameters, total, alpha_k, upd, !negateSecondSubFV);
+ else subfv2.update(parameters, total, alpha_k, upd, negateSecondSubFV);
+ }
+ }
+
+ if (negate) {
+ for(int i=0;i<size;i++) {
+ parameters[m_index[i]] -= alpha_k;//*getValue(i);
+ total[m_index[i]] -= upd*alpha_k;//*getValue(i);
+ }
+ } else {
+ for(int i=0;i<size;i++){
+ parameters[m_index[i]] += alpha_k;//*getValue(i);
+ total[m_index[i]] += upd*alpha_k;//*getValue(i);
+ }
+ }
+
+
+ }
+
+ public final void update(short[] parameters, short[] total, double alpha_k, double upd, boolean negate) {
+
+ if (null != subfv1) {
+ subfv1.update(parameters, total, alpha_k, upd, negate);
+
+ if (null != subfv2) {
+ if (negate) subfv2.update(parameters, total, alpha_k, upd, !negateSecondSubFV);
+ else subfv2.update(parameters, total, alpha_k, upd, negateSecondSubFV);
+ }
+ }
+
+ if (negate) {
+ for(int i=0;i<size;i++) {
+ parameters[m_index[i]] -= alpha_k;//*getValue(i);
+ total[m_index[i]] -= upd*alpha_k;//*getValue(i);
+ }
+ } else {
+ for(int i=0;i<size;i++){
+ parameters[m_index[i]] += alpha_k;//*getValue(i);
+ total[m_index[i]] += upd*alpha_k;//*getValue(i);
+ }
+ }
+
+
+ }
+
+
+ public final void update(float[] parameters, float[] total, double alpha_k, double upd, boolean negate) {
+
+ if (null != subfv1) {
+ subfv1.update(parameters, total, alpha_k, upd, negate);
+
+ if (null != subfv2 && negate) {
+ subfv2.update(parameters, total, alpha_k, upd, !negateSecondSubFV);
+ } else {
+ subfv2.update(parameters, total, alpha_k, upd, negateSecondSubFV);
+ }
+
+ }
+
+ if (negate) {
+ for(int i=0;i<size;i++){
+ parameters[getIndex(i)] -= alpha_k;
+ total[getIndex(i)] -= upd*alpha_k;
+ }
+ } else {
+ for(int i=0;i<size;i++){
+ parameters[getIndex(i)] += alpha_k;
+ total[getIndex(i)] += upd*alpha_k; //
+ }
+ }
+
+
+ }
+
+
+ public final void update(float[] parameters, float[] total, double alpha_k,
+ double upd, boolean negate, float[] totalp, Long2IntInterface li) {
+
+ if (null != subfv1) {
+ subfv1.update(parameters, total, alpha_k, upd, negate,totalp,li);
+
+ if (null != subfv2 && negate) {
+ subfv2.update(parameters, total, alpha_k, upd, !negateSecondSubFV,totalp,li);
+ } else {
+ subfv2.update(parameters, total, alpha_k, upd, negateSecondSubFV,totalp,li);
+ }
+ }
+
+ if (negate) {
+ for(int i=0;i<size;i++){
+ parameters[getIndex(i)] -= alpha_k;
+ total[getIndex(i)] -= upd*alpha_k;
+
+ totalp[li.l2i(getIndex(i))] -=upd*alpha_k;
+ // totalp[getIndex(i)] -=upd*alpha_k;
+ }
+ } else {
+ for(int i=0;i<size;i++){
+ parameters[getIndex(i)] += alpha_k;
+ total[getIndex(i)] += upd*alpha_k; //
+
+ totalp[li.l2i(getIndex(i))] +=upd*alpha_k;
+ // totalp[getIndex(i)] +=upd*alpha_k;
+ }
+ }
+ }
+
+
+
+
+ private static IntIntHash hm1;
+ private static IntIntHash hm2;
+
+ public int dotProduct(FV fl2) {
+
+ if (hm1==null) hm1 = new IntIntHash(size(),0.4F);
+ else hm1.clear();
+
+ addFeaturesToMap(hm1);
+
+ if (hm2==null)hm2 = new IntIntHash(fl2.size,0.4F);
+ else hm2.clear();
+
+ fl2.addFeaturesToMap(hm2);
+
+ int[] keys = hm1.keys();
+
+ int result = 0;
+ for(int i = 0; i < keys.length; i++) result += hm1.get(keys[i])*hm2.get(keys[i]);
+
+ return result;
+
+ }
+
+ public double twoNorm(FV fl2) {
+
+ if (hm1==null) hm1 = new IntIntHash(size(),0.4F);
+ else hm1.clear();
+
+ addFeaturesToMap(hm1);
+
+ if (hm2==null)hm2 = new IntIntHash(fl2.size,0.4F);
+ else hm2.clear();
+
+ fl2.addFeaturesToMap(hm2);
+
+ int[] keys = hm1.keys();
+
+ int result = 0;
+ for(int i = 0; i < keys.length; i++) result += hm1.get(keys[i])*hm2.get(keys[i]);
+
+
+ return Math.sqrt((double)result);
+
+
+ }
+
+ public void addFeaturesToMap(IntIntHash map) {
+
+ if (null != subfv1) {
+ subfv1.addFeaturesToMap(map);
+
+ if (null != subfv2) {
+ subfv2.addFeaturesToMap(map, negateSecondSubFV);
+
+ }
+ }
+
+
+ for(int i=0;i<size;i++) if (!map.adjustValue(getIndex(i), 1)) map.put(getIndex(i), 1);
+
+
+
+ }
+
+
+
+ private void addFeaturesToMap(IntIntHash map, boolean negate) {
+
+ if (null != subfv1) {
+ subfv1.addFeaturesToMap(map, negate);
+
+ if (null != subfv2) {
+ if (negate) subfv2.addFeaturesToMap(map, !negateSecondSubFV);
+ else subfv2.addFeaturesToMap(map, negateSecondSubFV);
+
+ }
+ }
+
+ if (negate) {
+ for(int i=0;i<size;i++) if (!map . adjustValue(getIndex(i), -1)) map.put(getIndex(i), -1);
+ } else {
+ for(int i=0;i<size;i++) if (!map.adjustValue(getIndex(i), 1)) map.put(getIndex(i), 1);
+ }
+
+
+ }
+
+
+ @Override
+ public final String toString() {
+ StringBuilder sb = new StringBuilder();
+ toString(sb);
+ return sb.toString();
+ }
+
+ private final void toString(StringBuilder sb) {
+ if (null != subfv1) {
+ subfv1.toString(sb);
+
+ if (null != subfv2)
+ subfv2.toString(sb);
+ }
+ for(int i=0;i<size;i++)
+ sb.append(getIndex(i)).append(' ');
+ }
+
+ public void writeKeys(DataOutputStream dos) throws IOException {
+
+ // int keys[] = keys();
+ // dos.writeInt(keys.length);
+ // for(int i=0;i<keys.length;i++) {
+ // dos.writeInt(keys[i]);
+ // }
+
+
+ //int keys[] = keys();
+ dos.writeInt(size);
+ for(int i=0;i<size;i++) {
+ dos.writeInt(m_index[i]);
+ }
+
+ }
+
+ public void readKeys(DataInputStream dos) throws IOException {
+
+ int keys = dos.readInt();
+ for (int i=0; i<keys; i++) createFeature(dos.readInt(), 1.0);
+
+
+ }
+
+ final public static FV cat(FV f1,FV f2) {
+ if (f1==null) return f2;
+ if (f2==null) return f1;
+ return new FV(f1, f2);
+ }
+
+ final public static FV cat(FV f1,FV f2, FV f3) {
+ return FV.cat(f1, FV.cat(f2, f3));
+ }
+ final public static FV cat(FV f1,FV f2, FV f3, FV f4) {
+ return FV.cat(f1, FV.cat(f2, FV.cat(f3, f4)));
+ }
+
+
+ final public static FV read(DataInputStream dis) throws IOException {
+ int cap = dis.readInt();
+ if (cap == 0) return null;
+ return new FV(dis,cap);
+
+ }
+
+ /* (non-Javadoc)
+ * @see is2.IFV#getScore()
+ */
+ @Override
+ public double getScore() {
+ //System.out.println("not implemented");
+ // TODO Auto-generated method stub
+ return 0;
+ }
+
+ /* (non-Javadoc)
+ * @see is2.IFV#clone()
+ */
+ @Override
+ public IFV clone() {
+ FV f= new FV(this.size);
+ for(int i=0;i<this.size;i++) {
+ f.m_index[i]=m_index[i];
+ }
+ f.size=this.size;
+ return f;
+ }
+
+
+
+}
diff --git a/dependencyParser/mate-tools/src/is2/data/FVR.java b/dependencyParser/mate-tools/src/is2/data/FVR.java
new file mode 100644
index 0000000..f0b6784
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/data/FVR.java
@@ -0,0 +1,468 @@
+package is2.data;
+
+
+
+import gnu.trove.TIntDoubleHashMap;
+
+
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+
+public final class FVR extends IFV {
+
+ private FVR subfv1;
+ private FVR subfv2;
+ private boolean negateSecondSubFV = false;
+
+ private int size;
+
+
+
+ // content of the nodes NxC
+ private int m_index[];
+ private float m_value[];
+
+ // type of the nodes NxT
+
+ public FVR() {
+ this(10);
+ }
+
+ public FVR(int initialCapacity) {
+ m_index = new int[initialCapacity];
+ m_value = new float[initialCapacity];
+ }
+
+/*
+ public FVR (FVR fv1, FVR fv2) {
+ subfv1 = fv1;
+ subfv2 = fv2;
+ }
+*/
+ public FVR (FVR fv1, FVR fv2, boolean negSecond) {
+ this(0);
+ subfv1 = fv1;
+ subfv2 = fv2;
+ negateSecondSubFV = negSecond;
+ }
+
+ /**
+ * Read a feature vector
+ * @param index
+ * @param value
+ */
+ public FVR(DataInputStream dos, int capacity) throws IOException {
+ this(capacity);
+ size= m_index.length;
+
+ for (int i=0; i<size; i++) m_index[i] = dos.readInt();
+ }
+
+
+ /**
+ * Read a feature vector
+ * @param index
+ * @param value
+ */
+ public FVR(DataInputStream dos) throws IOException {
+ this(dos.readInt());
+ size= m_index.length;
+
+ for (int i=0; i<size; i++) m_index[i] = dos.readInt();
+
+
+ }
+
+ /**
+ * Increases the capacity of this <tt>Graph</tt> instance, if
+ * necessary, to ensure that it can hold at least the number of nodes
+ * specified by the minimum capacity argument.
+ *
+ * @param minCapacity the desired minimum capacity.
+ */
+ private void ensureCapacity(int minCapacity) {
+
+
+ if (minCapacity > m_index.length) {
+
+ int oldIndex[] = m_index;
+ float oldValue[] = m_value;
+
+ int newCapacity = ( m_index.length * 3)/2 + 1;
+
+
+ if (newCapacity < minCapacity) newCapacity = minCapacity;
+
+ m_index = new int[newCapacity];
+ m_value = new float[newCapacity];
+
+ System.arraycopy(oldIndex, 0, m_index, 0, oldIndex.length);
+ System.arraycopy(oldValue, 0, m_value, 0, oldValue.length);
+
+ }
+ }
+
+
+ final public int size() {
+ return size;
+ }
+
+ final public boolean isEmpty() {
+ return size == 0;
+ }
+
+ @Override
+ final public void clear() {
+ size = 0;
+ }
+
+
+ final public int createFeature(int i, float v) {
+
+ ensureCapacity(size+1);
+ m_index[size] =i;
+ m_value[size] =v;
+ size++;
+ return size-1;
+ }
+ /*
+ final public int createFeature(int i) {
+
+ ensureCapacity(size+1);
+ m_index[size] =i;
+ size++;
+ return size-1;
+ }
+ */
+
+ final public int getIndex(int i) {
+ return m_index[i];
+ }
+
+ public void setIndex(int p, int i) {
+ m_index[p] = i;
+ }
+
+
+ /**
+ * Trims the capacity of this <tt>Graph</tt> instance to true size.
+ * An application can use this operation to minimize
+ * the storage of an <tt>Graph</tt> instance.
+ */
+ public void trimToSize() {
+
+ if (size < m_index.length) {
+
+
+ int oldIndex[] = m_index;
+
+ m_index = new int[size];
+ System.arraycopy(oldIndex, 0, m_index, 0, size);
+
+ }
+
+ }
+
+
+
+
+
+ final public void add(int i) {
+ if (i>=0) {
+ ensureCapacity(size+1);
+ m_index[size] =i;
+ m_value[size] =1.0f;
+ size++;
+ }
+ }
+
+ final public void add(int i, float f) {
+ if (i>=0) createFeature(i,f);
+ }
+
+
+ // fv1 - fv2
+ public FVR getDistVector(FVR fl2) {
+ return new FVR(this, fl2, true);
+ }
+
+
+ public double getScore(double[] parameters, boolean negate) {
+ double score = 0.0;
+
+ if (null != subfv1) {
+ score += subfv1.getScore(parameters, negate);
+
+ if (null != subfv2) {
+ if (negate) score += subfv2.getScore(parameters, !negateSecondSubFV);
+ else score += subfv2.getScore(parameters, negateSecondSubFV);
+
+ }
+ }
+
+ if (negate) for(int i=0;i<size;i++) score -= parameters[m_index[i]];
+ else for(int i=0;i<size;i++) score += parameters[m_index[i]];
+
+
+ return score;
+ }
+
+
+ final public float getScore(float[] parameters, boolean negate) {
+ float score = 0.0F;
+
+ if (null != subfv1) {
+ score += subfv1.getScore(parameters, negate);
+
+ if (null != subfv2) {
+ if (negate) score += subfv2.getScore(parameters, !negateSecondSubFV);
+ else score += subfv2.getScore(parameters, negateSecondSubFV);
+
+ }
+ }
+
+ // warning changed the value
+
+ if (negate) for(int i=0;i<size;i++) score -= parameters[m_index[i]]*m_value[i];
+ else for(int i=0;i<size;i++) score += parameters[m_index[i]]*m_value[i];
+
+ return score;
+ }
+
+ final public int getScore(short[] parameters, boolean negate) {
+ int score = 0;
+
+ if (null != subfv1) {
+ score += subfv1.getScore(parameters, negate);
+
+ if (null != subfv2) {
+ if (negate) score += subfv2.getScore(parameters, !negateSecondSubFV);
+ else score += subfv2.getScore(parameters, negateSecondSubFV);
+
+ }
+ }
+
+ // warning changed the value
+
+ if (negate) for(int i=0;i<size;i++) score -= parameters[m_index[i]]*m_value[i];
+ else for(int i=0;i<size;i++) score += parameters[m_index[i]]*m_value[i];
+
+ return score;
+ }
+
+
+
+
+
+
+ public final void update(float[] parameters, float[] total, double alpha_k, double upd, boolean negate) {
+
+ if (null != subfv1) {
+ subfv1.update(parameters, total, alpha_k, upd, negate);
+
+ if (null != subfv2 && negate) {
+ subfv2.update(parameters, total, alpha_k, upd, !negateSecondSubFV);
+ } else {
+ subfv2.update(parameters, total, alpha_k, upd, negateSecondSubFV);
+ }
+
+ }
+
+ if (negate) {
+ for(int i=0;i<size;i++){
+ parameters[getIndex(i)] -= alpha_k*m_value[i];
+ total[getIndex(i)] -= upd*alpha_k*m_value[i];
+ }
+ } else {
+ for(int i=0;i<size;i++){
+ parameters[getIndex(i)] += alpha_k*m_value[i];
+ total[getIndex(i)] += upd*alpha_k*m_value[i]; //
+ }
+ }
+
+
+ }
+
+
+
+// private static IntIntHash hm1;
+// private static IntIntHash hm2;
+
+ private static TIntDoubleHashMap hd1;
+ private static TIntDoubleHashMap hd2;
+
+
+ public int dotProduct(FVR fl2) {
+
+ if (hd1==null) hd1 = new TIntDoubleHashMap(size(),0.4F);
+ else hd1.clear();
+
+ addFeaturesToMap(hd1);
+
+ if (hd2==null)hd2 = new TIntDoubleHashMap(fl2.size,0.4F);
+ else hd2.clear();
+
+ fl2.addFeaturesToMap(hd2);
+
+ int[] keys = hd1.keys();
+
+ int result = 0;
+ for(int i = 0; i < keys.length; i++) result += hd1.get(keys[i])*hd2.get(keys[i]);
+
+ return result;
+
+ }
+
+
+ private void addFeaturesToMap(TIntDoubleHashMap map) {
+
+ if (null != subfv1) {
+ subfv1.addFeaturesToMap(map);
+
+ if (null != subfv2) {
+ subfv2.addFeaturesToMap(map, negateSecondSubFV);
+
+ }
+ }
+
+
+ for(int i=0;i<size;i++) if (!map.adjustValue(getIndex(i), m_value[i])) map.put(getIndex(i), m_value[i]);
+
+
+
+ }
+
+
+
+ private void addFeaturesToMap(IntIntHash map, boolean negate) {
+
+ if (null != subfv1) {
+ subfv1.addFeaturesToMap(map, negate);
+
+ if (null != subfv2) {
+ if (negate) subfv2.addFeaturesToMap(map, !negateSecondSubFV);
+ else subfv2.addFeaturesToMap(map, negateSecondSubFV);
+
+ }
+ }
+
+ if (negate) {
+ for(int i=0;i<size;i++) if (!map . adjustValue(getIndex(i), -1)) map.put(getIndex(i), -1);
+ } else {
+ for(int i=0;i<size;i++) if (!map.adjustValue(getIndex(i), 1)) map.put(getIndex(i), 1);
+ }
+
+
+ }
+
+ private void addFeaturesToMap(TIntDoubleHashMap map, boolean negate) {
+
+ if (null != subfv1) {
+ subfv1.addFeaturesToMap(map, negate);
+
+ if (null != subfv2) {
+ if (negate) subfv2.addFeaturesToMap(map, !negateSecondSubFV);
+ else subfv2.addFeaturesToMap(map, negateSecondSubFV);
+
+ }
+ }
+
+ if (negate) {
+ for(int i=0;i<size;i++) if (!map . adjustValue(getIndex(i), -m_value[i])) map.put(getIndex(i), -m_value[i]);
+ } else {
+ for(int i=0;i<size;i++) if (!map.adjustValue(getIndex(i), m_value[i])) map.put(getIndex(i), m_value[i]);
+ }
+
+
+ }
+
+
+
+ @Override
+ public final String toString() {
+ StringBuilder sb = new StringBuilder();
+ toString(sb);
+ return sb.toString();
+ }
+
+ private final void toString(StringBuilder sb) {
+ if (null != subfv1) {
+ subfv1.toString(sb);
+
+ if (null != subfv2)
+ subfv2.toString(sb);
+ }
+ for(int i=0;i<size;i++)
+ sb.append(getIndex(i)).append('=').append(m_value[i]).append(' ');
+ }
+
+ public void writeKeys(DataOutputStream dos) throws IOException {
+
+ // int keys[] = keys();
+ // dos.writeInt(keys.length);
+ // for(int i=0;i<keys.length;i++) {
+ // dos.writeInt(keys[i]);
+ // }
+
+
+ //int keys[] = keys();
+ dos.writeInt(size);
+ for(int i=0;i<size;i++) {
+ dos.writeInt(m_index[i]);
+ }
+
+ }
+
+ /*
+
+ final public static FVR cat(FVR f1,FVR f2) {
+ if (f1==null) return f2;
+ if (f2==null) return f1;
+ return new FVR(f1, f2);
+ }
+
+ final public static FVR cat(FVR f1,FVR f2, FVR f3) {
+ return FVR.cat(f1, FVR.cat(f2, f3));
+ }
+ final public static FVR cat(FVR f1,FVR f2, FVR f3, FVR f4) {
+ return FVR.cat(f1, FVR.cat(f2, FVR.cat(f3, f4)));
+ }
+ */
+
+
+ final public static FVR read(DataInputStream dis) throws IOException {
+ int cap = dis.readInt();
+ if (cap == 0) return null;
+ return new FVR(dis,cap);
+
+ }
+
+ /* (non-Javadoc)
+ * @see is2.IFV#getScore()
+ */
+ @Override
+ public double getScore() {
+ System.out.println("not implemented");
+ new Exception().printStackTrace();
+ // TODO Auto-generated method stub
+ return 0;
+ }
+
+ /* (non-Javadoc)
+ * @see is2.IFV#clone()
+ */
+ @Override
+ public IFV clone() {
+ FVR f= new FVR(this.size);
+ for(int i=0;i<this.size;i++) {
+ f.m_index[i]=m_index[i];
+ f.m_value[i]=m_value[i];
+ }
+ f.size=this.size;
+ return f;
+ }
+
+
+
+}
diff --git a/dependencyParser/mate-tools/src/is2/data/IEncoder.java b/dependencyParser/mate-tools/src/is2/data/IEncoder.java
new file mode 100755
index 0000000..03c4a45
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/data/IEncoder.java
@@ -0,0 +1,26 @@
+/**
+ *
+ */
+package is2.data;
+
+import java.util.HashMap;
+
+/**
+ * @author Bernd Bohnet, 20.09.2009
+ *
+ *
+ */
+public interface IEncoder {
+ public int getValue(String a, String v);
+
+ /**
+ * @param spath
+ * @param substring
+ */
+// public int register(String spath, String substring);
+
+ /**
+ * @return
+ */
+// public HashMap<String,Integer> getFeatureCounter();
+}
diff --git a/dependencyParser/mate-tools/src/is2/data/IEncoderPlus.java b/dependencyParser/mate-tools/src/is2/data/IEncoderPlus.java
new file mode 100644
index 0000000..2558d6e
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/data/IEncoderPlus.java
@@ -0,0 +1,28 @@
+/**
+ *
+ */
+package is2.data;
+
+import java.util.HashMap;
+
+/**
+ * @author Bernd Bohnet, 20.09.2009
+ *
+ *
+ */
+public interface IEncoderPlus extends IEncoder {
+
+ final public static String NONE="<None>";
+
+
+ /**
+ * @param spath
+ * @param substring
+ */
+ public int register(String spath, String substring);
+
+ /**
+ * @return
+ */
+ public HashMap<String,Integer> getFeatureCounter();
+}
diff --git a/dependencyParser/mate-tools/src/is2/data/IFV.java b/dependencyParser/mate-tools/src/is2/data/IFV.java
new file mode 100755
index 0000000..28fbcfe
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/data/IFV.java
@@ -0,0 +1,28 @@
+package is2.data;
+
+public abstract class IFV {
+
+ // public double score=0;
+
+ public abstract void add(int i);
+
+ public abstract double getScore();
+
+ public abstract void clear();
+
+ @Override
+ public abstract IFV clone();
+
+ /**
+ * @param gvs
+ * @param li
+ */
+ public void add(long[] gvs, Long2IntInterface li, int l) {
+ for(int k=0;k<gvs.length;k++) {
+ if (gvs[k]==Integer.MIN_VALUE) break;
+ if (gvs[k]>0) add(li.l2i(gvs[k]+l));
+ }
+ }
+
+
+}
diff --git a/dependencyParser/mate-tools/src/is2/data/Instances.java b/dependencyParser/mate-tools/src/is2/data/Instances.java
new file mode 100755
index 0000000..ccb26f9
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/data/Instances.java
@@ -0,0 +1,392 @@
+package is2.data;
+
+import java.util.BitSet;
+
+import is2.io.CONLLReader09;
+import is2.util.DB;
+
+
+
+public class Instances {
+
+
+ public IEncoder m_encoder;
+
+
+ protected int size=0;
+
+ protected int capacity;
+
+ public int[][] forms;
+
+ public int[][] plemmas;
+ public int[][] glemmas;
+
+
+ public short[][] heads;
+ public short[][] pheads;
+
+ public short[][] labels;
+ public short[][] plabels;
+
+ public short[][] gpos;
+ public short[][] pposs;
+
+
+ public short[][][] feats;
+
+
+ public int[][] predicat;
+
+
+ public short[][] predicateId;
+
+
+ public short[][] semposition;
+
+
+ public short[][][] arg;
+
+
+ public short[][][] argposition;
+
+
+ public BitSet[] pfill;
+
+
+ public short[][] gfeats;
+ public short[][] pfeats;
+
+
+ public Instances() {}
+
+
+
+
+ public static int m_unkown = 0;
+ public static int m_count = 0;
+
+
+ public static boolean m_report;
+ public static boolean m_found =false;
+
+
+ final public void setForm(int i, int p, String x) {
+
+
+ forms[i][p] = m_encoder.getValue(PipeGen.WORD,x);
+ if (forms[i][p]==-1) {
+ if (m_report) System.out.println("unkwrd "+x);
+ m_unkown++;
+ m_found=true;
+ }
+ m_count++;
+ }
+
+
+
+ final public void setRel(int i, int p, String x) {
+ labels[i][p] = (short)m_encoder.getValue(PipeGen.REL,x);
+
+ }
+
+
+ final public void setHead(int i, int c, int p) {
+ heads[i][c] =(short)p;
+ }
+
+ final public int size() {
+ return size;
+ }
+ public void setSize(int n) {
+ size=n;
+ }
+
+
+
+
+ public void init(int ic, IEncoder mf) {
+ init(ic, mf, -1);
+ }
+
+
+ public void init(int ic, IEncoder mf, int version) {
+ capacity =ic;
+ m_encoder = mf;
+
+ forms = new int[capacity][];
+ plemmas = new int[capacity][];
+ glemmas = new int[capacity][];
+ pposs= new short[capacity][];
+
+ gpos= new short[capacity][];
+ labels= new short[capacity][];
+ heads= new short[capacity][];
+ plabels= new short[capacity][];
+ pheads= new short[capacity][];
+ feats = new short[capacity][][];
+ gfeats = new short[capacity][];
+ pfeats = new short[capacity][];
+
+ predicat =new int[ic][];
+ predicateId = new short[ic][];
+ semposition = new short[ic][];
+ arg= new short[ic][][];
+ argposition= new short[ic][][];
+
+ pfill = new BitSet[ic];
+ }
+
+
+ public int length(int i) {
+ return forms[i].length;
+ }
+
+
+ public int createInstance09(int length) {
+
+ forms[size] = new int[length];
+ plemmas[size] = new int[length];
+ glemmas[size] = new int[length];
+
+ pposs[size] = new short[length];
+
+ gpos[size] = new short[length];
+
+ labels[size] = new short[length];
+ heads[size] = new short[length];
+
+ this.pfill[size] = new BitSet(length);
+
+ feats[size] = new short[length][];
+ gfeats[size] = new short[length];
+ pfeats[size] = new short[length];
+ plabels[size] = new short[length];
+ pheads[size] = new short[length];
+
+ size++;
+
+ return size-1;
+
+ }
+
+/*
+ public final void setPPos(int i, int p, String x) {
+ ppos[i][p] = (short)m_encoder.getValue(PipeGen.POS,x);
+
+ }
+*/
+
+ public final void setPPoss(int i, int p, String x) {
+ pposs[i][p] = (short)m_encoder.getValue(PipeGen.POS,x);
+
+ }
+
+
+ public final void setGPos(int i, int p, String x) {
+ gpos[i][p] = (short)m_encoder.getValue(PipeGen.POS,x);
+ }
+
+
+ public void setLemma(int i, int p, String x) {
+ plemmas[i][p] = m_encoder.getValue(PipeGen.WORD,x);
+ }
+
+
+ public void setGLemma(int i, int p, String x) {
+ glemmas[i][p] = m_encoder.getValue(PipeGen.WORD,x);
+ }
+
+
+ public void setFeats(int i, int p, String[] fts) {
+ if (fts==null) {
+ feats[i][p] =null;
+ return ;
+ }
+ feats[i][p] = new short[fts.length];
+
+ for(int k=0;k<fts.length;k++) {
+ feats[i][p][k] = (short)m_encoder.getValue(PipeGen.FEAT,fts[k]);
+ }
+
+ }
+
+
+ public void setFeature(int i, int p, String feature) {
+ if (feature==null) return;
+ this.gfeats[i][p]= (short) m_encoder.getValue(PipeGen.FFEATS,feature);
+/* if (gfeats[i][p]==-1) {
+ System.out.println("+"+feature);
+ new Exception().printStackTrace();
+ System.exit(0);
+ }
+ */
+ }
+ public void setPFeature(int i, int p, String feature) {
+ if (feature==null) return;
+ this.pfeats[i][p]= (short) m_encoder.getValue(PipeGen.FFEATS,feature);
+ }
+
+
+ public int getWValue(String v) {
+ return m_encoder.getValue(PipeGen.WORD, v);
+ }
+
+
+ public final void setPRel(int i, int p, String x) {
+ plabels[i][p] = (short)m_encoder.getValue(PipeGen.REL,x);
+ }
+
+
+ public final void setPHead(int i, int c, int p) {
+ pheads[i][c] =(short)p;
+ }
+
+/*
+ public String toString(int c) {
+ StringBuffer s = new StringBuffer();
+ for(int i=0;i<length(c);i++) {
+ s.append(i).append('\t').append(forms[c][i]).append("\t_\t").append(ppos[c][i]).append('\t').
+ append('\t').append(heads[c][i]).append('\n');
+ }
+
+ return s.toString();
+ }
+*/
+
+ /*
+ public void setPos(int i, int p, String x) {
+ ppos[i][p] = (short)m_encoder.getValue(PipeGen.POS,x);
+
+ }
+*/
+
+ /**
+ * Create the semantic representation
+ * @param inst
+ * @param it
+ * @return
+ */
+ public boolean createSem(int inst, SentenceData09 it) {
+
+ boolean error = false;
+
+ if (it.sem==null) return error;
+
+ predicat[inst] = new int[it.sem.length];
+ semposition[inst] = new short[it.sem.length];
+ predicateId[inst] = new short[it.sem.length];
+
+ if (it.sem!=null) {
+ arg[inst] = new short[it.sem.length][];
+ argposition[inst] =new short[it.sem.length][];
+ }
+ if (it.sem==null) return error;
+
+ // init sems
+
+
+
+
+ for(int i=0;i<it.sem.length;i++) {
+
+ String pred;
+ short predSense =0;
+ if (it.sem[i].indexOf('.')>0) {
+ pred = it.sem[i].substring(0, it.sem[i].indexOf('.'));
+ predSense = (short)m_encoder.getValue(PipeGen.SENSE, it.sem[i].substring(it.sem[i].indexOf('.')+1, it.sem[i].length()));
+ //Short.parseShort(it.sem[i].substring(it.sem[i].indexOf('.')+1, it.sem[i].length()));
+ } else {
+ pred = it.sem[i];
+ predSense=(short)m_encoder.getValue(PipeGen.SENSE, "");
+ }
+
+ predicat[inst][i] = m_encoder.getValue(PipeGen.PRED, pred);
+ predicateId[inst][i] = predSense;
+
+ semposition[inst][i]=(short)it.semposition[i];
+
+ // this can happen too when no arguments have values
+ if (it.arg==null) {
+ // DB.println("error arg == null "+i+" sem"+it.sem[i]+" inst number "+inst);
+ // error =true;
+ continue;
+ }
+
+
+ // last pred(s) might have no argument
+ if (it.arg.length<=i) {
+ // DB.println("error in instance "+inst+" argument list and number of predicates different arg lists: "+it.arg.length+" preds "+sem.length);
+ // error =true;
+ continue;
+ }
+
+
+ // this happens from time to time, if the predicate has no arguments
+ if (it.arg[i]==null) {
+ // DB.println("error no args for pred "+i+" "+it.sem[i]+" length "+it.ppos.length);
+ // error =true;
+ continue;
+ }
+
+ int argCount=it.arg[i].length;
+ arg[inst][i] = new short[it.arg[i].length];
+ argposition[inst][i] = new short[it.arg[i].length];
+
+ // add the content of the argument
+ for(int a=0;a<argCount;a++) {
+ arg[inst][i][a]=(short)m_encoder.getValue(PipeGen.ARG, it.arg[i][a]);
+ argposition[inst][i][a]=(short)it.argposition[i][a];
+
+ //System.out.print(" #"+a+" pos: "+argposition[inst][i][a]+" "+it.arg[i][a]+" ");
+ }
+ //System.out.println("");
+
+ }
+
+ return error;
+
+ }
+
+
+ public int predCount(int n) {
+ return pfill[n].cardinality();
+ }
+
+
+ /**
+ * @param pscnt
+ * @return
+ */
+ public String print(int pscnt) {
+ StringBuilder s = new StringBuilder();
+
+ for(int i=0;i<this.length(pscnt);i++) {
+ s.append(i+"\t"+forms[pscnt][i]+"\t"+this.glemmas[pscnt][i]+"\t"+this.plemmas[pscnt][i]+"\t"+this.gpos[pscnt][i]+"\t"
+ +this.pposs[pscnt][i]+"\t"+this.gfeats[pscnt][i]+"\t"+(this.feats[pscnt][i]!=null&&this.feats[pscnt][i].length>0?this.feats[pscnt][i][0]:null)+
+ "\t l "+(labels[pscnt]!=null&&labels[pscnt].length>i?labels[pscnt][i]:null)+"\t"+
+ "\t"+heads[pscnt][i]+"\t"+
+ (plabels[pscnt]!=null&&plabels[pscnt].length>i?plabels[pscnt][i]:null)+
+ "\t"+this.predicat[pscnt][i]+"\n");
+ }
+ return s.toString();
+ }
+
+ public String print1(int pscnt) {
+ StringBuilder s = new StringBuilder();
+
+ for(int i=0;i<this.length(pscnt);i++) {
+ s.append(i+"\t"+forms[pscnt][i]+"\t"+"\t"+this.plemmas[pscnt][i]+"\t"+
+ +this.pposs[pscnt][i]+
+ "\t l "+(labels[pscnt]!=null&&labels[pscnt].length>i?labels[pscnt][i]:null)+"\t"+
+ "\t"+heads[pscnt][i]+"\t"+
+ (plabels[pscnt]!=null&&plabels[pscnt].length>i?plabels[pscnt][i]:null)+
+ "\n");
+ }
+ return s.toString();
+ }
+
+
+
+
+}
diff --git a/dependencyParser/mate-tools/src/is2/data/InstancesTagger.java b/dependencyParser/mate-tools/src/is2/data/InstancesTagger.java
new file mode 100644
index 0000000..8079222
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/data/InstancesTagger.java
@@ -0,0 +1,77 @@
+/**
+ *
+ */
+package is2.data;
+
+import is2.data.IEncoder;
+import is2.data.Instances;
+import is2.data.SentenceData09;
+
+
+/**
+ * @author Dr. Bernd Bohnet, 06.11.2010
+ *
+ *
+ */
+public class InstancesTagger extends Instances {
+
+ public short[][][] chars;
+ public int[][] formlc;
+
+ public void init(int ic, IEncoder mf) {
+ super.init(ic, mf,9);
+ chars = new short[capacity][][];
+ formlc = new int[capacity][];
+ // System.out.println("create chars "+capacity );
+ }
+
+ public void fillChars(SentenceData09 instance, int i, int cend) {
+ chars[i] = new short[instance.length()][13];
+ formlc[i] = new int[instance.length()];
+
+
+ for(int k=0;k<instance.length();k++) {
+ chars[i][k][0]= (short) ( instance.forms[k].length()>0?m_encoder.getValue(PipeGen.CHAR, String.valueOf(instance.forms[k].charAt(0))):cend);
+ chars[i][k][1]= (short) ( instance.forms[k].length()>1?m_encoder.getValue(PipeGen.CHAR, String.valueOf(instance.forms[k].charAt(1))):cend);//m_encoder.getValue(PipeGen.CHAR, END);
+ chars[i][k][2]= (short) ( instance.forms[k].length()>2?m_encoder.getValue(PipeGen.CHAR, String.valueOf(instance.forms[k].charAt(2))):cend);
+ chars[i][k][3]= (short) ( instance.forms[k].length()>3?m_encoder.getValue(PipeGen.CHAR, String.valueOf(instance.forms[k].charAt(3))):cend);
+ chars[i][k][4]= (short) ( instance.forms[k].length()>4?m_encoder.getValue(PipeGen.CHAR, String.valueOf(instance.forms[k].charAt(4))):cend);
+ chars[i][k][5]= (short) ( instance.forms[k].length()>5?m_encoder.getValue(PipeGen.CHAR, String.valueOf(instance.forms[k].charAt(5))):cend);
+
+ chars[i][k][6]= (short) ( instance.forms[k].length()>0?m_encoder.getValue(PipeGen.CHAR,String.valueOf(instance.forms[k].charAt(instance.forms[k].length()-1))):cend);
+ chars[i][k][7]= (short) ( instance.forms[k].length()>1?m_encoder.getValue(PipeGen.CHAR,String.valueOf(instance.forms[k].charAt(instance.forms[k].length()-2))):cend);//m_encoder.getValue(PipeGen.CHAR, END);
+ chars[i][k][8]= (short) ( instance.forms[k].length()>2?m_encoder.getValue(PipeGen.CHAR,String.valueOf(instance.forms[k].charAt(instance.forms[k].length()-3))):cend);
+ chars[i][k][9]= (short) ( instance.forms[k].length()>3?m_encoder.getValue(PipeGen.CHAR,String.valueOf(instance.forms[k].charAt(instance.forms[k].length()-4))):cend);
+ chars[i][k][10]= (short) ( instance.forms[k].length()>4?m_encoder.getValue(PipeGen.CHAR,String.valueOf(instance.forms[k].charAt(instance.forms[k].length()-5))):cend);
+ chars[i][k][11] = (short)instance.forms[k].length();
+ chars[i][k][12] = (short) ( instance.forms[k].length()>0?m_encoder.getValue(PipeGen.CHAR, String.valueOf(instance.forms[k].charAt(0))):cend);
+ formlc[i][k] =m_encoder.getValue(PipeGen.WORD, instance.forms[k].toLowerCase());
+ }
+ }
+
+ public void fillChars(SentenceData09 instance, int i, String[] what,int cend) {
+ chars[i] = new short[instance.length()][13];
+ formlc[i] = new int[instance.length()];
+
+
+ for(int k=0;k<instance.length();k++) {
+ chars[i][k][0]= (short) m_encoder.getValue(PipeGen.CHAR, String.valueOf(what[k].charAt(0)));
+ chars[i][k][1]= (short) ( what[k].length()>1?m_encoder.getValue(PipeGen.CHAR, String.valueOf(what[k].charAt(1))):cend);//m_encoder.getValue(PipeGen.CHAR, END);
+ chars[i][k][2]= (short) ( what[k].length()>2?m_encoder.getValue(PipeGen.CHAR, String.valueOf(what[k].charAt(2))):cend);
+ chars[i][k][3]= (short) ( what[k].length()>3?m_encoder.getValue(PipeGen.CHAR, String.valueOf(what[k].charAt(3))):cend);
+ chars[i][k][4]= (short) ( what[k].length()>4?m_encoder.getValue(PipeGen.CHAR, String.valueOf(what[k].charAt(4))):cend);
+ chars[i][k][5]= (short) ( what[k].length()>5?m_encoder.getValue(PipeGen.CHAR, String.valueOf(what[k].charAt(5))):cend);
+
+ chars[i][k][6]= (short) ( m_encoder.getValue(PipeGen.CHAR,String.valueOf(what[k].charAt(what[k].length()-1))));
+ chars[i][k][7]= (short) ( what[k].length()>1?m_encoder.getValue(PipeGen.CHAR,String.valueOf(what[k].charAt(what[k].length()-2))):cend);//m_encoder.getValue(PipeGen.CHAR, END);
+ chars[i][k][8]= (short) ( what[k].length()>2?m_encoder.getValue(PipeGen.CHAR,String.valueOf(what[k].charAt(what[k].length()-3))):cend);
+ chars[i][k][9]= (short) ( what[k].length()>3?m_encoder.getValue(PipeGen.CHAR,String.valueOf(what[k].charAt(what[k].length()-4))):cend);
+ chars[i][k][10]= (short) ( what[k].length()>4?m_encoder.getValue(PipeGen.CHAR,String.valueOf(what[k].charAt(what[k].length()-5))):cend);
+ chars[i][k][11] = (short)what[k].length();
+ formlc[i][k] =m_encoder.getValue(PipeGen.WORD, what[k].toLowerCase());
+ }
+ }
+
+
+
+}
diff --git a/dependencyParser/mate-tools/src/is2/data/IntIntHash.java b/dependencyParser/mate-tools/src/is2/data/IntIntHash.java
new file mode 100644
index 0000000..4aec043
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/data/IntIntHash.java
@@ -0,0 +1,270 @@
+package is2.data;
+
+
+
+import java.util.Arrays;
+
+final public class IntIntHash {
+
+
+ protected int _size;
+ protected int _free;
+ protected float _loadFactor;
+ public int _maxSize;
+ protected int _autoCompactRemovesRemaining;
+ protected float _autoCompactionFactor;
+ public int _set[];
+ private int _values[];
+
+
+ public IntIntHash() {
+ this(102877, 0.5F);
+ }
+
+
+ public IntIntHash(int initialCapacity, float loadFactor) {
+ _loadFactor = loadFactor;
+ _autoCompactionFactor = loadFactor;
+ setUp((int)Math.ceil(initialCapacity / loadFactor));
+ }
+
+
+ public int size() { return _size;}
+
+ public void ensureCapacity(int desiredCapacity) {
+
+ if(desiredCapacity > _maxSize - size()) {
+ rehash(PrimeFinder.nextPrime((int)Math.ceil((desiredCapacity + size()) / _loadFactor) + 1));
+ computeMaxSize(capacity());
+ }
+ }
+
+ public void compact() {
+ rehash(PrimeFinder.nextPrime((int)Math.ceil(size() / _loadFactor) + 1));
+ computeMaxSize(capacity());
+ if(_autoCompactionFactor != 0.0F) computeNextAutoCompactionAmount(size());
+ }
+
+ public void setAutoCompactionFactor(float factor) {
+ if(factor < 0.0F) {
+ throw new IllegalArgumentException((new StringBuilder()).append("Factor must be >= 0: ").append(factor).toString());
+ } else
+ {
+ _autoCompactionFactor = factor;
+ return;
+ }
+ }
+
+ public float getAutoCompactionFactor() { return _autoCompactionFactor; }
+
+
+ private void computeMaxSize(int capacity)
+ {
+ _maxSize = Math.min(capacity - 1, (int)Math.floor(capacity * _loadFactor));
+ _free = capacity - _size;
+ }
+
+ private void computeNextAutoCompactionAmount(int size)
+ {
+ if(_autoCompactionFactor != 0.0F)
+ _autoCompactRemovesRemaining = Math.round(size * _autoCompactionFactor);
+ }
+
+ protected final void postInsertHook(boolean usedFreeSlot)
+ {
+ if(usedFreeSlot) _free--;
+ if(++_size > _maxSize || _free == 0) {
+ int newCapacity = _size <= _maxSize ? capacity() : PrimeFinder.nextPrime(capacity() << 1);
+ rehash(newCapacity);
+ computeMaxSize(capacity());
+ }
+ }
+
+ protected int calculateGrownCapacity() { return capacity() << 1; }
+
+ protected int capacity() { return _values.length; }
+
+ public boolean contains(int val) { return index(val) >= 0;}
+
+ private int index(int v) {
+
+ int length = _set.length;
+ int index = Math.abs((computeHashCode(v) /*& 2147483647*/ ) % length);
+
+ while(true) {
+ // first
+ long l =_set[index];
+ if (l == 0) {
+ // good++;
+ return -1;
+ }
+ // second
+ if (l == v) {
+ return index;
+ }
+ if(--index < 0) index += length;
+ }
+ //return -1;
+ }
+
+ protected int insertionIndex(long val)
+ {
+ int length = _set.length;
+ int index = Math.abs((computeHashCode(val) /*& 2147483647*/ ) % length);
+ while(true) {
+ if(_set[index] == 0) return index;
+ if(_set[index] == val) return -index - 1;
+ if(--index < 0) index += length;
+
+ }
+ }
+
+ public int computeHashCode(long value)
+ {
+ return (int)(( value ^ (value&0xffffffff00000000L) >>> 32 ) *31);//0x811c9dc5 ^ // 29
+ }
+
+
+
+
+
+
+ protected int setUp(int initialCapacity)
+ {
+ int capacity = PrimeFinder.nextPrime(initialCapacity);
+ computeMaxSize(capacity);
+ computeNextAutoCompactionAmount(initialCapacity);
+ _set = new int[capacity];
+ _values = new int[capacity];
+ return capacity;
+ }
+
+ public void put(int key, int value)
+ {
+ int index = insertionIndex(key);
+ doPut(key, value, index);
+ }
+ private void doPut(int key, int value, int index)
+ {
+ boolean isNewMapping = true;
+ if(index < 0)
+ {
+ index = -index - 1;
+ isNewMapping = false;
+ }
+ _set[index] = key;
+ _values[index] = value;
+ if(isNewMapping) postInsertHook(true);
+
+ }
+
+ protected void rehash(int newCapacity)
+ {
+ int oldCapacity = _set.length;
+ int oldKeys[] = _set;
+ int oldVals[] = _values;
+ _set = new int[newCapacity];
+ _values = new int[newCapacity];
+ int i = oldCapacity;
+
+ while(true){
+ if(i-- <= 0) break;
+ if(oldVals[i] != 0) {
+ int o = oldKeys[i];
+ int index = insertionIndex(o);
+ _set[index] = o;
+ _values[index] = oldVals[i];
+ }
+ }
+ }
+
+ int index =0;
+
+
+ public int get(int key)
+ {
+ int index = index(key);
+ return index >= 0 ? _values[index] : 0;
+ }
+
+
+ public void clear()
+ {
+ _size = 0;
+ _free = capacity();
+ Arrays.fill(_set, 0, _set.length, 0);
+ // Arrays.fill(_values, 0, _values.length, 0);
+ }
+
+ public int remove(int key)
+ {
+ int prev = 0;
+ int index = index(key);
+ if(index >= 0)
+ {
+ prev = _values[index];
+ _values[index] = 0;
+ _set[index] = 0;
+ _size--;
+ if(_autoCompactionFactor != 0.0F) {
+ _autoCompactRemovesRemaining--;
+ if( _autoCompactRemovesRemaining <= 0) compact();
+ }
+ }
+ return prev;
+ }
+
+
+ public int[] getValues()
+ {
+ int vals[] = new int[size()];
+ int v[] = _values;
+ int i = v.length;
+ int j = 0;
+ do
+ {
+ if(i-- <= 0) break;
+ if(v[i] != 0) vals[j++] = v[i];
+ } while(true);
+ return vals;
+ }
+
+ public int[] keys()
+ {
+ int keys[] = new int[size()];
+ int k[] = _set;
+ // byte states[] = _states;
+ int i = k.length;
+ int j = 0;
+ do
+ {
+ if(i-- <= 0)
+ break;
+ if(k[i] != 0)
+ keys[j++] = k[i];
+ } while(true);
+ return keys;
+ }
+
+
+ /**
+ * @param index2
+ * @param i
+ * @return
+ */
+ public boolean adjustValue(int key, int i) {
+ int index = index(key);
+ if (index >= 0){
+ _values[index] +=i;
+ return true;
+ }
+ return false;
+ }
+
+
+
+
+
+
+
+}
diff --git a/dependencyParser/mate-tools/src/is2/data/Long2Int.java b/dependencyParser/mate-tools/src/is2/data/Long2Int.java
new file mode 100755
index 0000000..e505cb0
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/data/Long2Int.java
@@ -0,0 +1,113 @@
+package is2.data;
+
+
+
+/**
+ * @author Bernd Bohnet, 01.09.2009
+ *
+ * Maps for the Hash Kernel the long values to the int values.
+ */
+final public class Long2Int implements Long2IntInterface {
+
+
+ public Long2Int() {
+ size=115911564;
+ }
+
+
+ public Long2Int(int s) {
+ size=s;
+ }
+
+ public static void main(String args[]) {
+
+ long l =123456;
+ long l2 =1010119;
+ System.out.println("l \t"+l+"\t"+printBits(l));
+
+ long x =100000000;
+ System.out.println("1m\t"+l2+"\t"+printBits(x)+"\t"+x);
+
+ System.out.println("l2\t"+l2+"\t"+printBits(l));
+
+ System.out.println("l2*l\t"+l2+"\t"+printBits(l*l2)+" \t "+l*l2);
+
+ System.out.println("l2*l*l2\t"+l2+"\t"+printBits(l*l2*l2)+" \t "+l*l2*l2);
+
+ System.out.println("l2*l*l2\t"+l2+"\t"+printBits(l*l2*l2*l2)+" \t "+l*l2*l2*l2);
+
+
+ System.out.println("l2*l*l2\t"+l2+"\t"+printBits((l*l2)%0xfffff)+" \t "+l*l2*l2*l2+"\t "+0xfffff);
+ System.out.println("l2*l*l2\t"+l2+"\t"+printBits((l*l2)&0xfffffff)+" \t "+l*l2*l2*l2);
+ }
+
+
+ /** Integer counter for long2int */
+ final private int size; //0x03ffffff //0x07ffffff
+
+
+ /* (non-Javadoc)
+ * @see is2.sp09k9992.Long2IntIterface#size()
+ */
+ public int size() {return size;}
+
+ /* (non-Javadoc)
+ * @see is2.sp09k9992.Long2IntIterface#start()
+ * has no meaning for this implementation
+ */
+ final public void start() {}
+
+
+ /* (non-Javadoc)
+ * @see is2.sp09k9992.Long2IntIterface#l2i(long)
+ */
+ final public int l2i(long l) {
+ if (l<0) return -1;
+
+ // this works well LAS 88.138
+ // int r= (int)(( l ^ (l&0xffffffff00000000L) >>> 29 ));//0x811c9dc5 ^ // 29
+ // return Math.abs(r % size);
+ // this works a bit better and good with 0x03ffffff
+ //
+ /*
+ long r= l;//26
+ l = (l>>12)&0xfffffffffffff000L;
+ r ^= l;//38
+ l = (l>>11)&0xffffffffffffc000L;
+ r ^= l;//49
+ l = (l>>9)& 0xffffffffffff0000L; //53
+ r ^= l;//58
+ l = (l>>7)&0xfffffffffffc0000L; //62
+ r ^=l;//65
+ int x = (int)r;
+ x = x % size;
+ // return x >= 0 ? x : -x ;// Math.abs(r % size);
+
+ */
+ // 26 0x03ffffff
+ // together with 0x07ffffff 27 88.372
+ long r= l;// 27
+ l = (l>>13)&0xffffffffffffe000L;
+ r ^= l; // 40
+ l = (l>>11)&0xffffffffffff0000L;
+ r ^= l; // 51
+ l = (l>>9)& 0xfffffffffffc0000L; //53
+ r ^= l; // 60
+ l = (l>>7)& 0xfffffffffff00000L; //62
+ r ^=l; //67
+ int x = ((int)r) % size;
+
+ return x >= 0 ? x : -x ;
+ }
+
+ static public StringBuffer printBits(long out) {
+ StringBuffer s = new StringBuffer();
+
+ for(int k=0;k<65;k++) {
+ s.append((out & 1)==1?"1":"0");
+ out >>=1;
+ }
+ s.reverse();
+ return s;
+ }
+}
diff --git a/dependencyParser/mate-tools/src/is2/data/Long2IntExact.java b/dependencyParser/mate-tools/src/is2/data/Long2IntExact.java
new file mode 100644
index 0000000..62f6375
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/data/Long2IntExact.java
@@ -0,0 +1,60 @@
+package is2.data;
+
+
+
+/**
+ * @author Bernd Bohnet, 01.09.2009
+ *
+ * Maps for the Hash Kernel the long values to the int values.
+ */
+final public class Long2IntExact implements Long2IntInterface {
+
+ static gnu.trove.TLongIntHashMap mapt = new gnu.trove.TLongIntHashMap();
+
+ static int cnt=0;
+
+
+
+ public Long2IntExact() {
+ size=115911564;
+ }
+
+
+ public Long2IntExact(int s) {
+ size=s;
+ }
+
+
+ /** Integer counter for long2int */
+ final private int size; //0x03ffffff //0x07ffffff
+
+
+ /* (non-Javadoc)
+ * @see is2.sp09k9992.Long2IntIterface#size()
+ */
+ public int size() {return size;}
+
+ /* (non-Javadoc)
+ * @see is2.sp09k9992.Long2IntIterface#start()
+ * has no meaning for this implementation
+ */
+ final public void start() {}
+
+
+ /* (non-Javadoc)
+ * @see is2.sp09k9992.Long2IntIterface#l2i(long)
+ */
+ final public int l2i(long l) {
+ if (l<0) return -1;
+
+ int i = mapt.get(l);
+ if (i!=0) return i;
+
+ if (i==0 && cnt<size-1) {
+ cnt++;
+ mapt.put(l, cnt);
+ return cnt;
+ }
+ return -1;
+ }
+}
diff --git a/dependencyParser/mate-tools/src/is2/data/Long2IntInterface.java b/dependencyParser/mate-tools/src/is2/data/Long2IntInterface.java
new file mode 100755
index 0000000..a6cba63
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/data/Long2IntInterface.java
@@ -0,0 +1,15 @@
+package is2.data;
+
+public interface Long2IntInterface {
+
+ public abstract int size();
+
+
+ /**
+ * Maps a long to a integer value. This is very useful to save memory for sparse data long values
+ * @param l
+ * @return the integer
+ */
+ public abstract int l2i(long l);
+
+}
\ No newline at end of file
diff --git a/dependencyParser/mate-tools/src/is2/data/Long2IntQuick.java b/dependencyParser/mate-tools/src/is2/data/Long2IntQuick.java
new file mode 100644
index 0000000..adbe57d
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/data/Long2IntQuick.java
@@ -0,0 +1,47 @@
+package is2.data;
+
+
+
+/**
+ * @author Bernd Bohnet, 01.09.2009
+ *
+ * Maps for the Hash Kernel the long values to the int values.
+ */
+final public class Long2IntQuick implements Long2IntInterface {
+
+
+ /** Integer counter for long2int */
+ final private int size;
+
+ public Long2IntQuick() {
+ size=0x07ffffff;
+ }
+
+
+ public Long2IntQuick(int s) {
+ size=s;
+ }
+
+
+ /* (non-Javadoc)
+ * @see is2.sp09k9992.Long2IntIterface#size()
+ */
+ public int size() {return size;}
+
+ /* (non-Javadoc)
+ * @see is2.sp09k9992.Long2IntIterface#start()
+ * has no meaning for this implementation
+ */
+
+
+ /* (non-Javadoc)
+ * @see is2.sp09k9992.Long2IntIterface#l2i(long)
+ */
+ final public int l2i(long r) {
+ long l = (r>>16)&0xfffffffffffff000L;
+ r ^= l;
+ r ^= l = (l>>12)&0xffffffffffff0000L;
+ r ^= l = (l>>8)& 0xfffffffffffc0000L;
+ return (int)(r % size);
+ }
+}
diff --git a/dependencyParser/mate-tools/src/is2/data/MFB.java b/dependencyParser/mate-tools/src/is2/data/MFB.java
new file mode 100755
index 0000000..04c36ae
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/data/MFB.java
@@ -0,0 +1,256 @@
+package is2.data;
+
+
+import is2.util.DB;
+
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map.Entry;
+
+/**
+ * Map Features, do not map long to integer
+ *
+ * @author Bernd Bohnet, 20.09.2009
+ */
+
+final public class MFB implements IEncoderPlus {
+
+ /** The features and its values */
+ static private final HashMap<String,HashMap<String,Integer>> m_featureSets = new HashMap<String,HashMap<String,Integer>>();
+
+ /** The feature class and the number of values */
+ static private final HashMap<String,Integer> m_featureCounters = new HashMap<String,Integer>();
+
+ /** The number of bits needed to encode a feature */
+ static final HashMap<String,Integer> m_featureBits = new HashMap<String,Integer>();
+
+ /** Integer counter for long2int */
+ static private int count=0;
+
+ /** Stop growing */
+ public boolean stop=false;
+
+ final public static String NONE="<None>";
+
+
+
+
+
+
+
+ public MFB () {}
+
+
+ public int size() {return count;}
+
+
+
+ /**
+ * Register an attribute class, if it not exists and add a possible value
+ * @param type
+ * @param type2
+ */
+ final public int register(String a, String v) {
+
+ synchronized(m_featureCounters) {
+
+ HashMap<String,Integer> fs = getFeatureSet().get(a);
+ if (fs==null) {
+ fs = new HashMap<String,Integer>();
+ getFeatureSet().put(a, fs);
+ fs.put(NONE, 0);
+ getFeatureCounter().put(a, 1);
+ }
+
+ Integer i = fs.get(v);
+ if (i==null) {
+ Integer c = getFeatureCounter().get(a);
+ fs.put(v, c);
+ c++;
+ getFeatureCounter().put(a,c);
+ return c-1;
+ } else return i;
+ }
+ }
+
+ /**
+ * Calculates the number of bits needed to encode a feature
+ */
+ public void calculateBits() {
+
+ int total=0;
+ for(Entry<String,Integer> e : getFeatureCounter().entrySet() ){
+ int bits =(int)Math.ceil((Math.log(e.getValue()+1)/Math.log(2)));
+ m_featureBits.put(e.getKey(), bits);
+ total+=bits;
+ // System.out.println(" "+e.getKey()+" bits "+bits+" number "+(e.getValue()+1));
+ }
+
+// System.out.println("total number of needed bits "+total);
+ }
+
+
+
+ public String toString() {
+
+ StringBuffer content = new StringBuffer();
+ for(Entry<String,Integer> e : getFeatureCounter().entrySet() ){
+ content.append(e.getKey()+" "+e.getValue());
+ content.append(':');
+ // HashMap<String,Integer> vs = getFeatureSet().get(e.getKey());
+ content.append(getFeatureBits(e.getKey()));
+
+ /*if (vs.size()<120)
+ for(Entry<String,Integer> e2 : vs.entrySet()) {
+ content.append(e2.getKey()+" ("+e2.getValue()+") ");
+ }*/
+ content.append('\n');
+
+ }
+ return content.toString();
+ }
+
+
+
+ static final public short getFeatureBits(String a) {
+ if(m_featureBits.get(a)==null) return 0;
+ return (short)m_featureBits.get(a).intValue();
+ }
+
+
+
+ /**
+ * Get the integer place holder of the string value v of the type a
+ *
+ * @param t the type
+ * @param v the value
+ * @return the integer place holder of v
+ */
+ final public int getValue(String t, String v) {
+
+ if (m_featureSets.get(t)==null) return -1;
+ Integer vi = m_featureSets.get(t).get(v);
+ if (vi==null) return -1; //stop &&
+ return vi.intValue();
+ }
+
+ /**
+ * Static version of getValue
+ * @see getValue
+ */
+ static final public int getValueS(String a, String v) {
+
+ if (m_featureSets.get(a)==null) return -1;
+ Integer vi = m_featureSets.get(a).get(v);
+ if (vi==null) return -1; //stop &&
+ return vi.intValue();
+ }
+
+ public int hasValue(String a, String v) {
+
+ Integer vi = m_featureSets.get(a).get(v);
+ if (vi==null) return -1;
+ return vi.intValue();
+ }
+
+
+ public static String printBits(int k) {
+ StringBuffer s = new StringBuffer();
+ for(int i =0;i<31;i++) {
+ s.append((k&0x00000001)==1?'1':'0');
+ k=k>>1;
+
+ }
+ s.reverse();
+ return s.toString();
+ }
+
+
+
+
+
+
+
+ /**
+ * Maps a long to a integer value. This is very useful to save memory for sparse data long values
+ * @param l
+ * @return the integer
+ */
+ static public int misses = 0;
+ static public int good = 0;
+
+
+
+
+ /**
+ * Write the data
+ * @param dos
+ * @throws IOException
+ */
+ static public void writeData(DataOutputStream dos) throws IOException {
+ dos.writeInt(getFeatureSet().size());
+ // DB.println("write"+getFeatureSet().size());
+ for(Entry<String, HashMap<String,Integer>> e : getFeatureSet().entrySet()) {
+ dos.writeUTF(e.getKey());
+ dos.writeInt(e.getValue().size());
+
+ for(Entry<String,Integer> e2 : e.getValue().entrySet()) {
+
+ if(e2.getKey()==null) DB.println("key "+e2.getKey()+" value "+e2.getValue()+" e -key "+e.getKey());
+ dos.writeUTF(e2.getKey());
+ dos.writeInt(e2.getValue());
+
+ }
+
+ }
+ }
+ public void read(DataInputStream din) throws IOException {
+
+ int size = din.readInt();
+ for(int i=0; i<size;i++) {
+ String k = din.readUTF();
+ int size2 = din.readInt();
+
+ HashMap<String,Integer> h = new HashMap<String,Integer>();
+ getFeatureSet().put(k,h);
+ for(int j = 0;j<size2;j++) {
+ h.put(din.readUTF(), din.readInt());
+ }
+ getFeatureCounter().put(k, size2);
+ }
+
+ count =size;
+ // stop();
+ calculateBits();
+ }
+
+
+ /**
+ * Clear the data
+ */
+ static public void clearData() {
+ getFeatureSet().clear();
+ m_featureBits.clear();
+ getFeatureSet().clear();
+ }
+
+ public HashMap<String,Integer> getFeatureCounter() {
+ return m_featureCounters;
+ }
+
+ static public HashMap<String,HashMap<String,Integer>> getFeatureSet() {
+ return m_featureSets;
+ }
+
+ static public String[] reverse(HashMap<String,Integer> v){
+ String[] set = new String[v.size()];
+ for(Entry<String,Integer> e : v.entrySet()) {
+ set[e.getValue()]=e.getKey();
+ }
+ return set;
+ }
+
+
+}
diff --git a/dependencyParser/mate-tools/src/is2/data/MFC.java b/dependencyParser/mate-tools/src/is2/data/MFC.java
new file mode 100644
index 0000000..bb1f27a
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/data/MFC.java
@@ -0,0 +1,246 @@
+package is2.data;
+
+
+import is2.util.DB;
+
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map.Entry;
+
+/**
+ * Map Features, do not map long to integer
+ *
+ * @author Bernd Bohnet, 17.09.2011
+ */
+
+final public class MFC implements IEncoderPlus {
+
+ /** The features and its values */
+ private final HashMap<String,HashMap<String,Integer>> m_featureSets = new HashMap<String,HashMap<String,Integer>>();
+
+ /** The feature class and the number of values */
+ private final HashMap<String,Integer> m_featureCounters = new HashMap<String,Integer>();
+
+ /** The number of bits needed to encode a feature */
+ final HashMap<String,Integer> m_featureBits = new HashMap<String,Integer>();
+
+ /** Integer counter for long2int */
+ private int count=0;
+
+
+ public MFC () {}
+
+
+ public int size() {return count;}
+
+
+
+ /**
+ * Register an attribute class, if it not exists and add a possible value
+ * @param type
+ * @param type2
+ */
+ final public int register(String a, String v) {
+
+ synchronized(m_featureCounters) {
+
+ HashMap<String,Integer> fs = getFeatureSet().get(a);
+ if (fs==null) {
+ fs = new HashMap<String,Integer>();
+ getFeatureSet().put(a, fs);
+ fs.put(NONE, 0);
+ getFeatureCounter().put(a, 1);
+ }
+
+ Integer i = fs.get(v);
+ if (i==null) {
+ Integer c = getFeatureCounter().get(a);
+ fs.put(v, c);
+ c++;
+ getFeatureCounter().put(a,c);
+ return c-1;
+ } else return i;
+ }
+ }
+
+ /**
+ * Calculates the number of bits needed to encode a feature
+ */
+ public void calculateBits() {
+
+ int total=0;
+ for(Entry<String,Integer> e : getFeatureCounter().entrySet() ){
+ int bits =(int)Math.ceil((Math.log(e.getValue()+1)/Math.log(2)));
+ m_featureBits.put(e.getKey(), bits);
+ total+=bits;
+ // System.out.println(" "+e.getKey()+" bits "+bits+" number "+(e.getValue()+1));
+ }
+
+// System.out.println("total number of needed bits "+total);
+ }
+
+
+
+ public String toString() {
+
+ StringBuffer content = new StringBuffer();
+ for(Entry<String,Integer> e : getFeatureCounter().entrySet() ){
+ content.append(e.getKey()+" "+e.getValue());
+ content.append(':');
+ // HashMap<String,Integer> vs = getFeatureSet().get(e.getKey());
+ content.append(getFeatureBits(e.getKey()));
+
+ /*if (vs.size()<120)
+ for(Entry<String,Integer> e2 : vs.entrySet()) {
+ content.append(e2.getKey()+" ("+e2.getValue()+") ");
+ }*/
+ content.append('\n');
+
+ }
+ return content.toString();
+ }
+
+
+
+ final public short getFeatureBits(String a) {
+ if(m_featureBits.get(a)==null) return 0;
+ return (short)m_featureBits.get(a).intValue();
+ }
+
+
+
+ /**
+ * Get the integer place holder of the string value v of the type a
+ *
+ * @param t the type
+ * @param v the value
+ * @return the integer place holder of v
+ */
+ final public int getValue(String t, String v) {
+
+ if (m_featureSets.get(t)==null) return -1;
+ Integer vi = m_featureSets.get(t).get(v);
+ if (vi==null) return -1; //stop &&
+ return vi.intValue();
+ }
+
+ /**
+ * Static version of getValue
+ * @see getValue
+ */
+ final public int getValueS(String a, String v) {
+
+ if (m_featureSets.get(a)==null) return -1;
+ Integer vi = m_featureSets.get(a).get(v);
+ if (vi==null) return -1; //stop &&
+ return vi.intValue();
+ }
+
+ public int hasValue(String a, String v) {
+
+ Integer vi = m_featureSets.get(a).get(v);
+ if (vi==null) return -1;
+ return vi.intValue();
+ }
+
+
+ public static String printBits(int k) {
+ StringBuffer s = new StringBuffer();
+ for(int i =0;i<31;i++) {
+ s.append((k&0x00000001)==1?'1':'0');
+ k=k>>1;
+
+ }
+ s.reverse();
+ return s.toString();
+ }
+
+
+
+
+
+
+
+ /**
+ * Maps a long to a integer value. This is very useful to save memory for sparse data long values
+ * @param l
+ * @return the integer
+ */
+ static public int misses = 0;
+ static public int good = 0;
+
+
+
+
+ /**
+ * Write the data
+ * @param dos
+ * @throws IOException
+ */
+ public void writeData(DataOutputStream dos) throws IOException {
+ dos.writeInt(getFeatureSet().size());
+ // DB.println("write"+getFeatureSet().size());
+ for(Entry<String, HashMap<String,Integer>> e : getFeatureSet().entrySet()) {
+ dos.writeUTF(e.getKey());
+ dos.writeInt(e.getValue().size());
+
+ for(Entry<String,Integer> e2 : e.getValue().entrySet()) {
+
+ if(e2.getKey()==null) DB.println("key "+e2.getKey()+" value "+e2.getValue()+" e -key "+e.getKey());
+ dos.writeUTF(e2.getKey());
+ dos.writeInt(e2.getValue());
+
+ }
+
+ }
+ }
+ public void read(DataInputStream din) throws IOException {
+
+ int size = din.readInt();
+ for(int i=0; i<size;i++) {
+ String k = din.readUTF();
+ int size2 = din.readInt();
+
+ HashMap<String,Integer> h = new HashMap<String,Integer>();
+ getFeatureSet().put(k,h);
+ for(int j = 0;j<size2;j++) {
+ h.put(din.readUTF(), din.readInt());
+ }
+ getFeatureCounter().put(k, size2);
+ }
+
+ count =size;
+ // stop();
+ calculateBits();
+ }
+
+
+ /**
+ * Clear the data
+ */
+ public void clearData() {
+ getFeatureSet().clear();
+ m_featureBits.clear();
+ getFeatureSet().clear();
+ }
+
+ public HashMap<String,Integer> getFeatureCounter() {
+ return m_featureCounters;
+ }
+
+ public HashMap<String,HashMap<String,Integer>> getFeatureSet() {
+ return m_featureSets;
+ }
+
+ public String[] reverse(HashMap<String,Integer> v){
+ String[] set = new String[v.size()];
+ for(Entry<String,Integer> e : v.entrySet()) {
+ set[e.getValue()]=e.getKey();
+ }
+ return set;
+ }
+
+
+}
diff --git a/dependencyParser/mate-tools/src/is2/data/MFO.java b/dependencyParser/mate-tools/src/is2/data/MFO.java
new file mode 100755
index 0000000..ff4d43e
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/data/MFO.java
@@ -0,0 +1,386 @@
+package is2.data;
+
+
+import is2.util.DB;
+
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map.Entry;
+
+/**
+ * Map Features, do not map long to integer
+ *
+ * @author Bernd Bohnet, 20.09.2009
+ */
+
+final public class MFO implements IEncoderPlus {
+
+ /** The features and its values */
+ static private final HashMap<String,HashMap<String,Integer>> m_featureSets = new HashMap<String,HashMap<String,Integer>>();
+
+ /** The feature class and the number of values */
+ static private final HashMap<String,Integer> m_featureCounters = new HashMap<String,Integer>();
+
+ /** The number of bits needed to encode a feature */
+ static final HashMap<String,Integer> m_featureBits = new HashMap<String,Integer>();
+
+ final public static String NONE="<None>";
+
+ final public static class Data4 {
+ public int shift;
+ public short a0,a1,a2,a3,a4,a5,a6,a7,a8,a9;
+ public int v0,v1,v2,v3,v4,v5,v6,v7,v8,v9;
+
+ final public long calcs(int b, long v, long l) {
+ if (l<0) return l;
+ l |= v<<shift;
+ shift +=b;
+ return l;
+ }
+
+ }
+
+ public MFO () {}
+
+
+
+
+
+ /**
+ * Register an attribute class, if it not exists and add a possible value
+ * @param type
+ * @param type2
+ */
+ final public int register(String a, String v) {
+
+ HashMap<String,Integer> fs = getFeatureSet().get(a);
+ if (fs==null) {
+ fs = new HashMap<String,Integer>();
+ getFeatureSet().put(a, fs);
+ fs.put(NONE, 0);
+ getFeatureCounter().put(a, 1);
+ }
+ Integer c = getFeatureCounter().get(a);
+
+ Integer i = fs.get(v);
+ if (i==null) {
+ fs.put(v, c);
+ c++;
+ getFeatureCounter().put(a,c);
+ return c-1;
+ } else return i;
+ }
+
+ /**
+ * Calculates the number of bits needed to encode a feature
+ */
+ public void calculateBits() {
+
+ int total=0;
+ for(Entry<String,Integer> e : getFeatureCounter().entrySet() ){
+ int bits =(int)Math.ceil((Math.log(e.getValue()+1)/Math.log(2)));
+ m_featureBits.put(e.getKey(), bits);
+ total+=bits;
+ // System.out.println(" "+e.getKey()+" bits "+bits+" number "+(e.getValue()+1));
+ }
+
+ // System.out.println("total number of needed bits "+total);
+ }
+
+
+
+ @Override
+ public String toString() {
+
+ StringBuffer content = new StringBuffer();
+ for(Entry<String,Integer> e : getFeatureCounter().entrySet() ){
+ content.append(e.getKey()+" "+e.getValue());
+ content.append(':');
+ // HashMap<String,Integer> vs = getFeatureSet().get(e.getKey());
+ content.append(getFeatureBits(e.getKey()));
+
+ /*if (vs.size()<120)
+ for(Entry<String,Integer> e2 : vs.entrySet()) {
+ content.append(e2.getKey()+" ("+e2.getValue()+") ");
+ }*/
+ content.append('\n');
+
+ }
+ return content.toString();
+ }
+
+
+ static final public long calcs(Data4 d,int b, long v, long l) {
+ if (l<0) return l;
+ l |= v<<d.shift;
+ d.shift +=b;
+ return l;
+ }
+
+
+ static final public short getFeatureBits(String a) {
+ return (short)m_featureBits.get(a).intValue();
+ }
+
+
+
+ /**
+ * Get the integer place holder of the string value v of the type a
+ *
+ * @param t the type
+ * @param v the value
+ * @return the integer place holder of v
+ */
+ final public int getValue(String t, String v) {
+
+ if (m_featureSets.get(t)==null) return -1;
+ Integer vi = m_featureSets.get(t).get(v);
+ if (vi==null) return -1; //stop &&
+ return vi.intValue();
+ }
+
+ /**
+ * Static version of getValue
+ * @see getValue
+ */
+ static final public int getValueS(String a, String v) {
+
+ if (m_featureSets.get(a)==null) return -1;
+ Integer vi = m_featureSets.get(a).get(v);
+ if (vi==null) return -1; //stop &&
+ return vi.intValue();
+ }
+
+ public int hasValue(String a, String v) {
+
+ Integer vi = m_featureSets.get(a).get(v);
+ if (vi==null) return -1;
+ return vi.intValue();
+ }
+
+
+
+
+ final public long calc2(Data4 d) {
+
+ if (d.v0<0||d.v1<0) return -1;
+ // if (d.v1<0||d.v2<0) return -1;
+
+ long l = d.v0;
+ short shift =d.a0;
+ l |= (long)d.v1<<shift;
+ shift +=d.a1;
+ // l |= (long)d.v2<<shift;
+ d.shift=shift;
+
+ //d.shift=;
+ return l;
+ }
+
+
+
+ final public long calc3(Data4 d) {
+
+ if (d.v0<0||d.v1<0||d.v2<0) return -1;
+ // if (d.v1<0||d.v2<0) return -1;
+
+ long l = d.v0;
+ short shift =d.a0;
+ l |= (long)d.v1<<shift;
+ shift +=d.a1;
+ l |= (long)d.v2<<shift;
+ d.shift=shift + d.a2;
+
+ //d.shift=;
+ return l;
+ }
+
+
+ final public long calc4(Data4 d) {
+ if (d.v0<0||d.v1<0||d.v2<0||d.v3<0) return -1;
+
+ long l = d.v0;
+ int shift =d.a0;
+ l |= (long)d.v1<<shift;
+ shift +=d.a1;
+ l |= (long)d.v2<<shift;
+ shift +=d.a2;
+ l |= (long)d.v3<<shift;
+ d.shift= shift +d.a3;
+
+ return l;
+ }
+
+
+
+ final public long calc5(Data4 d) {
+
+ if (d.v0<0||d.v1<0||d.v2<0||d.v3<0||d.v4<0) return -1;
+
+ long l = d.v0;
+ int shift =d.a0;
+ l |= (long)d.v1<<shift;
+ shift +=d.a1;
+ l |= (long)d.v2<<shift;
+ shift +=d.a2;
+ l |= (long)d.v3<<shift;
+ shift +=d.a3;
+ l |= (long)d.v4<<shift;
+ d.shift =shift+d.a4;
+
+ return l;
+ }
+
+
+ static final public long calc6(Data4 d) {
+
+ if (d.v0<0||d.v1<0||d.v2<0||d.v3<0||d.v4<0||d.v5<0) return -1;
+
+ long l = d.v0;
+ int shift =d.a0;
+ l |= (long)d.v1<<shift;
+ shift +=d.a1;
+ l |= (long)d.v2<<shift;
+ shift +=d.a2;
+ l |= (long)d.v3<<shift;
+ shift +=d.a3;
+ l |= (long)d.v4<<shift;
+ shift +=d.a4;
+ l |= (long)d.v5<<shift;
+ d.shift =shift+d.a5;
+
+ return l;
+ }
+
+ final public long calc7(Data4 d) {
+
+ if (d.v0<0||d.v1<0||d.v2<0||d.v3<0||d.v4<0||d.v5<0||d.v6<0) return -1;
+
+ long l = d.v0;
+ int shift =d.a0;
+ l |= (long)d.v1<<shift;
+ shift +=d.a1;
+ l |= (long)d.v2<<shift;
+ shift +=d.a2;
+ l |= (long)d.v3<<shift;
+ shift +=d.a3;
+ l |= (long)d.v4<<shift;
+ shift +=d.a4;
+ l |= (long)d.v5<<shift;
+ shift +=d.a5;
+ l |= (long)d.v6<<shift;
+ d.shift =shift+d.a6;
+
+ return l;
+ }
+
+
+ final public long calc8(Data4 d) {
+
+ if (d.v0<0||d.v1<0||d.v2<0||d.v3<0||d.v4<0||d.v5<0||d.v6<0||d.v7<0) return -1;
+
+ long l = d.v0;
+ int shift =d.a0;
+ l |= (long)d.v1<<shift;
+ shift +=d.a1;
+ l |= (long)d.v2<<shift;
+ shift +=d.a2;
+ l |= (long)d.v3<<shift;
+ shift +=d.a3;
+ l |= (long)d.v4<<shift;
+ shift +=d.a4;
+ l |= (long)d.v5<<shift;
+ shift +=d.a5;
+ l |= (long)d.v6<<shift;
+ shift +=d.a6;
+ l |= (long)d.v7<<shift;
+ d.shift =shift+d.a7;
+
+ return l;
+ }
+
+
+
+
+
+
+
+ /**
+ * Maps a long to a integer value. This is very useful to save memory for sparse data long values
+ * @param node
+ * @return the integer
+ */
+ static public int misses = 0;
+ static public int good = 0;
+
+
+
+
+ /**
+ * Write the data
+ * @param dos
+ * @throws IOException
+ */
+ public void writeData(DataOutputStream dos) throws IOException {
+ dos.writeInt(getFeatureSet().size());
+ for(Entry<String, HashMap<String,Integer>> e : getFeatureSet().entrySet()) {
+ dos.writeUTF(e.getKey());
+ dos.writeInt(e.getValue().size());
+
+ for(Entry<String,Integer> e2 : e.getValue().entrySet()) {
+
+ if(e2.getKey()==null) DB.println("key "+e2.getKey()+" value "+e2.getValue()+" e -key "+e.getKey());
+ dos.writeUTF(e2.getKey());
+ dos.writeInt(e2.getValue());
+
+ }
+
+ }
+ }
+ public void read(DataInputStream din) throws IOException {
+
+ int size = din.readInt();
+ for(int i=0; i<size;i++) {
+ String k = din.readUTF();
+ int size2 = din.readInt();
+
+ HashMap<String,Integer> h = new HashMap<String,Integer>();
+ getFeatureSet().put(k,h);
+ for(int j = 0;j<size2;j++) {
+ h.put(din.readUTF(), din.readInt());
+ }
+ getFeatureCounter().put(k, size2);
+ }
+
+ calculateBits();
+ }
+
+
+ /**
+ * Clear the data
+ */
+ static public void clearData() {
+ getFeatureSet().clear();
+ m_featureBits.clear();
+ getFeatureSet().clear();
+ }
+
+ public HashMap<String,Integer> getFeatureCounter() {
+ return m_featureCounters;
+ }
+
+ static public HashMap<String,HashMap<String,Integer>> getFeatureSet() {
+ return m_featureSets;
+ }
+
+ static public String[] reverse(HashMap<String,Integer> v){
+ String[] set = new String[v.size()];
+ for(Entry<String,Integer> e : v.entrySet()) {
+ set[e.getValue()]=e.getKey();
+ }
+ return set;
+ }
+
+}
diff --git a/dependencyParser/mate-tools/src/is2/data/Open.java b/dependencyParser/mate-tools/src/is2/data/Open.java
new file mode 100755
index 0000000..ba75fe3
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/data/Open.java
@@ -0,0 +1,37 @@
+package is2.data;
+
+
+
+final public class Open {
+
+ public float p;
+ short s, e, label;
+ byte dir;
+
+ Closed left;
+ Closed right;
+
+ public Open(short s, short t, short dir, short label,Closed left, Closed right, float p) {
+ this.s = s;
+ this.e = t;
+ this.label = label;
+ this.dir = (byte)dir;
+ this.left =left;
+ this.right=right;
+ this.p=p;
+ }
+
+
+ void create(Parse parse) {
+ if (dir == 0) {
+ parse.heads[s] = e;
+ if (label != -1) parse.labels[s] = label;
+ } else {
+ parse.heads[e] = s;
+ if (label != -1) parse.labels[e] = label;
+ }
+ if (left != null) left.create(parse);
+ if (right != null) right.create(parse);
+ }
+
+}
diff --git a/dependencyParser/mate-tools/src/is2/data/PSTree.java b/dependencyParser/mate-tools/src/is2/data/PSTree.java
new file mode 100644
index 0000000..30c1364
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/data/PSTree.java
@@ -0,0 +1,711 @@
+/**
+ *
+ */
+package is2.data;
+
+import is2.util.DB;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Stack;
+
+/**
+ * @author Dr. Bernd Bohnet, 17.01.2011
+ *
+ *
+ */
+public class PSTree {
+
+ int wordCount =0;
+ public String entries[];
+ public String lemmas[];
+ public int head[];
+ public String pos[];
+ public int[] ok;
+ public int non;
+ public int terminalCount;
+ public String[] morph;
+
+ public int[] forms;
+ public int[] phrases;
+ public int[][] psfeats;
+ public int[] ppos;
+
+
+ /**
+ * @param d
+ */
+ public PSTree(SentenceData09 d) {
+ create(d.length()-1,d.length()*20);
+ for(int i=1;i<d.length();i++) {
+ entries[i-1]=d.forms[i];
+ pos[i-1]=d.ppos[i];
+ }
+ }
+
+
+ /**
+ * Create an undefined phrase tree
+ */
+ public PSTree() { }
+
+
+ /**
+ * @param terminals
+ * @param nonTerminals
+ */
+ public void create(int terminals, int nonTerminals) {
+ entries = new String[terminals+nonTerminals];
+ pos = new String[terminals+nonTerminals];
+ head = new int[terminals+nonTerminals];
+ lemmas = new String[terminals+nonTerminals];
+ morph = new String[terminals+nonTerminals];
+ non=terminals;
+ wordCount=terminals;
+
+ for(int i=terminals+1;i<head.length;i++) head[i]=-1;
+ }
+
+
+ public String toString() {
+
+ StringBuffer s = new StringBuffer();
+
+ for(int i=0;i<entries.length;i++) {
+ if (head[i]==-1&&entries[i]==null) break;
+
+ s.append(i+"\t"+pos[i]+"\t"+entries[i]+"\t"+head[i]+(ok==null?"":("\t"+(ok[i]==1)))+" \n");
+
+ }
+ // DB.println("entries "+entries.length);
+ return s.toString();
+ }
+
+
+ /**
+ * @return
+ */
+ public boolean containsNull() {
+ for(int k=0;k<wordCount-1;k++) {
+ if (entries[k]==null) return true;
+ }
+ return false;
+ }
+
+
+ public int equals(SentenceData09 s) {
+
+ int j=1; // starts with root
+ for(int i=0;i<terminalCount-1;i++){
+
+ // if (s.forms[j].equals("erschrekkend")) s.forms[j]="erschreckend";
+
+ if (s.forms.length<j) {
+ DB.println(""+s+" "+this.toString());
+ return i;
+
+ }
+
+ if(!entries[i].equals(s.forms[j])) {
+ // System.out.println("ps "+entries[i]+" != ds "+s.forms[j]);
+ // Rolls-Royce
+ if(entries[i].startsWith(s.forms[j]) && s.forms.length>i+2 && s.forms[j+1].equals("-")) {
+ j+=2;
+ if( entries[i].contains(s.forms[j-1]) && s.forms.length>i+3 && s.forms[j+1].equals("-")) {
+ j+=2; // &&
+ // System.out.println("s.forms[j] "+s.forms[j]+" s.forms[j-1] "+s.forms[j-1]+" "+entries[i]);
+ if( entries[i].contains(s.forms[j-1]) && s.forms.length>i+3 && s.forms[j+1].equals("-")) {
+ j+=2; // &&
+ // System.out.println("s.forms[j] "+s.forms[j]+" s.forms[j-1] "+s.forms[j-1]+" "+entries[i]);
+ }
+ }
+ //Interstate\/Johnson
+ } else if(entries[i].startsWith(s.forms[j]) && s.forms.length>i+2 && s.forms[j+1].equals("/")) {
+ j+=2;
+ if( entries[i].contains(s.forms[j-1]) && s.forms.length>i+3 && s.forms[j+1].equals("/")) {
+ j+=2; // &&
+ // System.out.println("s.forms[j] "+s.forms[j]+" s.forms[j-1] "+s.forms[j-1]+" "+entries[i]);
+ }
+
+ // U.S.-Japan -> U . S . - Japan
+ } else if(entries[i].startsWith(s.forms[j]) && s.forms.length>i+2 && s.forms[j+1].equals(".")) {
+ j+=2;
+ if( entries[i].contains(s.forms[j-1]) && s.forms.length>i+3 && s.forms[j+1].equals(".")) {
+ j+=2; // &&
+ // System.out.println("s.forms[j] "+s.forms[j]+" s.forms[j-1] "+s.forms[j-1]+" "+entries[i]);
+ }
+ } else if(entries[i].startsWith(s.forms[j]) && s.forms.length>i+1 && s.forms[j+1].equals("'S")) {
+ j+=1;
+
+ } else {
+
+ // chech those !!!
+ // System.out.print("entry "+entries[i]+" form "+s.forms[j]+" ");
+ return j;
+ }
+
+ }
+ j++;
+
+
+ }
+
+ // without root
+ return s.length();
+ //return j;
+
+ }
+
+
+ /**
+ * @param dn
+ * @return
+ */
+ public int getPS(int dn) {
+
+ return this.head[dn-1];
+ }
+
+
+ /**
+ * @param dn
+ * @param n
+ * @param commonHead the common head in the phrase structure
+ * @return
+ */
+ public String getChain(int dn, int n, int commonHead) {
+
+ int pdn =dn-1,pdh=n-1;
+ // int phraseHead =head[pdh];
+
+ // System.out.println("phrase head "+phraseHead+" common head "+commonHead);
+
+ int[] ch = new int[20];
+ int head =this.head[pdn];
+ int i=0;
+ ch[i++]=head;
+ while(commonHead!=head && head!=0) {
+
+ head = this.head[head];
+ ch[i++]=head;
+ }
+ StringBuffer chain= new StringBuffer();
+
+ for(int k=0;k<i;k++) {
+ chain.append(entries[ch[k]]).append(" ");
+ }
+ return chain.toString();
+ }
+
+
+ /**
+ * @param dn
+ * @param n
+ * @return
+ */
+ public int getCommonHead(int d, int dh) {
+ int pdh = this.getPS(dh), pd = this.getPS(d);
+
+
+ ArrayList<Integer> path2root = getPath2Root(pdh);
+
+ //System.out.println("path 2 root "+path2root+" pdh "+pdh);
+
+ for(int n : path2root) {
+ int candidateHead=pd;
+ while(candidateHead!=0&& candidateHead!=-1) {
+ if (n==candidateHead) return n;
+ candidateHead =this.head[candidateHead];
+ }
+ }
+ return -1;
+ }
+
+
+ /**
+ * @param pdh
+ */
+ private ArrayList<Integer> getPath2Root(int pdh) {
+ ArrayList<Integer> path = new ArrayList<Integer>();
+
+
+ // restrict the number in case its a cycle which should never be
+ for(int k=0;k<100;k++) {
+ if(pdh==-1) break;
+ path.add(pdh);
+ pdh = this.head[pdh];
+ if(pdh==0) break;
+ }
+ return path;
+ }
+
+
+ /**
+ * Get operations to create root
+ * see operation in method getOperation
+ * @param pr
+ */
+ public String getOperationRoot(int pr) {
+
+ StringBuffer o = new StringBuffer();
+ int h = pr;
+ int[] path = new int[10];
+ // System.out.println(" start node "+pr);
+ int k=0;
+ for(;k<10;k++) {
+ h = head[h];
+ if (h==-1){
+ break;
+ }
+ path[k]=h;
+ if (h==0){
+ break;
+ }
+
+ }
+ k-=2;
+
+ boolean first=true;
+ for(;k>=0;k--) {
+
+ // create phrase
+ if (first) {
+ o.append("c:").append(entries[path[k]]);
+ first =false;
+ }
+
+ // insert and create phrase
+ else {o.append(":ci:").append(entries[path[k]]);}
+ }
+
+
+ // insert dependent node
+ //if (o.length()>0)
+ o.append(":in:d");
+ //else o.append("in:d"); // insert root into nothing
+ return o.toString();
+ }
+
+
+ /**
+ * Create operation to include dependency edges in phrase structure
+ * Operations: c - create ; i - insert ; in - insert (dependent) node ; up:X go the (phrase) X up
+ * ci create and insert ...
+ *
+ * @param dn
+ * @param n
+ * @param commonHead
+ * @return
+ */
+ public String getOperation(int dn, int n, int commonHead) {
+
+ StringBuffer o= new StringBuffer();
+
+ // from n move up to common head, if needed
+ int ph =n-1, pd = dn-1;
+
+ int[] path = new int[20];
+ int i=0;
+
+ int h =ph;
+
+ boolean nth=false;
+ for(int k=0;k<10;k++) {
+ h = head[h];
+ path[k]=h;
+ if (nth) o.append(':');
+ o.append("up:"+entries[h]);
+ nth=true;
+ if (h==commonHead) break;
+ }
+
+ // from common head to the node
+ int k=0;
+ h=pd;
+ for(;k<10;k++) {
+ h = head[h];
+ path[k]=h;
+ if (h==commonHead){
+ break;
+ }
+
+ }
+ k-=1;
+
+ // boolean first=true;
+ for(;k>=0;k--) {
+
+ // create phrase
+ if (!nth) {
+ o.append("ci:").append(entries[path[k]]);
+ nth =true;
+ }
+
+ // insert and create phrase
+ else {o.append(":ci:").append(entries[path[k]]);}
+ }
+
+
+ // insert dependent node
+ o.append(":in:d");
+
+
+
+ return o.toString();
+ }
+
+
+ /**
+ * @param ph node in the phrase structure corresponding to the head in the dependency structure
+ * @param pt node in the prhase structure corresponding to the dependent in the ds.
+ * @param check
+ * @return rules was applicable
+ */
+ public boolean exec(String r, int ph, int pt, boolean check) {
+
+ String o[] = r.split(":");
+
+ int last =-1, headP = -1;
+
+ // create root node
+
+ // System.out.println("operation "+r+" "+ph+" "+pt);
+ boolean done =true;
+ for(int i=0;i<o.length;i++) {
+
+ if (o[i].equals("c")) {
+ if (check) return true;
+
+ if(ph<0) {
+ last=non++;
+ }
+
+ entries[non]=o[++i]; // create
+ head[pt]=non;
+ head[non]=last; // insert into root
+ last=non++;
+ } else if (o[i].equals("ci")) {
+ if (check) return true;
+ entries[non]= o[++i]; // create
+ head[non] = last; // insert
+ last =non;
+ non++;
+ } else if (o[i].equals("in")&&o[i+1].equals("d")) {
+ if (check) return true;
+ head[pt] = last; // insert
+ i++; // move forward because of 'd'
+ } else if (o[i].equals("up")) {
+
+ if (ph==-1) {
+ // System.out.println("ph is -1 please check this "+ph+" there is a bug ");
+ return false;
+ }
+
+ if (headP==-1) headP=head[ph];
+ else headP=head[headP];
+
+ try {
+ if (headP==-1 || entries[headP]==null ||!entries[headP].equals(o[i+1])) return false;
+
+ } catch(Exception e) {
+ e.printStackTrace();
+ System.out.println(""+entries[headP]+" o[i+1] "+o[i+1]+" "+headP+" "+this.terminalCount);
+ // System.out.println(""+ this.toString());
+ System.exit(0);
+ }
+
+ i++;
+ last =headP;
+ } else {
+ done = false;
+ }
+
+ }
+
+
+ return done;
+ }
+
+ /**
+ * More tolerant mapping
+ *
+ * @param ph node in the phrase structure corresponding to the head in the dependency structure
+ * @param pt node in the prhase structure corresponding to the dependent in the ds.
+ * @param check
+ * @return rules was applicable
+ */
+ public boolean execT(String r, int ph, int pt, boolean check) {
+
+ String o[] = r.split(":");
+
+ int last =-1, headP = -1;
+
+ int up=0;
+
+ boolean done =true;
+ for(int i=0;i<o.length;i++) {
+
+ if (o[i].equals("c")) {
+ if (check) return true;
+
+
+ // create root node
+ if(ph<0) {
+ last=non++;
+ }
+
+ entries[non]= o[++i]; // create
+ head[pt]=non;
+ head[non]=last; // insert into root
+ last=non++;
+ } else if (o[i].equals("ci")) {
+
+ if (check) return true;
+ entries[non]= o[++i]; // create
+ head[non] = last; // insert
+ last =non;
+ non++;
+ } else if (o[i].equals("in")&&o[i+1].equals("d")) {
+ if (check) return true;
+
+ // DB.println("hallo");
+
+ if (last !=-1)
+ head[pt] = last; // insert
+
+
+ // i am not sure if this does much good?
+
+ // if (last ==-1)
+
+ // done=true;
+
+
+
+ i++; // move forward because of 'd'
+
+ } else if (o[i].equals("up")) {
+ up++;
+ if (ph==-1) {
+ return false;
+ }
+
+ if (headP==-1) headP=head[ph];
+ else headP=head[headP];
+
+ try {
+
+ // tolerant mapping
+ if (headP==-1 || entries[headP]==null ||
+ ((!entries[headP].equals(o[i+1]) ) && up>1 )) return false; //>1
+// && entries[headP].charAt(0)!=o[i+1].charAt(0)
+ } catch(Exception e) {
+ e.printStackTrace();
+ System.out.println(""+entries[headP]+" o[i+1] "+o[i+1]+" "+headP+" "+this.terminalCount);
+ }
+
+ i++;
+ last =headP;
+ } else {
+ done = false;
+ }
+
+ }
+
+
+ return done;
+ }
+
+
+ public final static boolean INSERT_NEWLINE =true;
+
+ /**
+ * Convert to bracket format
+ * @param newLine
+ * @return
+ */
+ public String toPennBracket(boolean newLine) {
+
+
+ StringBuffer b = new StringBuffer();
+ ArrayList<Integer> current=null;// = new ArrayList<Integer>();
+ int open =0;
+ for(int i=0; i<terminalCount ;i++) {
+ ArrayList<Integer> path = getPathToRoot(i);
+
+ ArrayList<Integer> diff = getDiffPath(path, current);
+
+ boolean spaces=false;
+
+ ArrayList<Integer> common = this.getDiffCommon(path, current);
+
+ if(current!=null && (current.size()>common.size())) {
+
+ // close brackets
+ for(int bc =0;bc<current.size()-common.size();bc++) {
+ b.append(")");
+ open--;
+ }
+ if(diff.size()==0 && newLine) b.append("\n");
+ spaces=true;
+ }
+
+ if(i!=0 && diff.size()>0 && newLine) b.append("\n").append(createSpaces(open));
+
+ for(int k=diff.size()-1;k>=0;k--) {
+ open++;
+ b.append("("+(entries[path.get(k)]==null?" ":entries[path.get(k)]));
+ if (k!=0 &&path.size()-1!=k && newLine)
+ b.append("\n").append(createSpaces(open));
+ spaces=false;
+ }
+ if(spaces) b.append(createSpaces(open));
+ else b.append(" ");
+
+ String term=entries[i];
+ if(term.equals("(")) term="-LRB-";
+ if(term.equals(")")) term="-RRB-";
+ if(term.equals("{")) term="-LCB-";
+ if(term.equals("}")) term="-RCB-";
+
+ String ps=pos[i];
+ if(ps.equals("(")) ps="-LRB-";
+ if(ps.equals("$(")) ps="-LRB-";
+
+ if(ps.equals(")")) ps="-RRB-";
+ if(ps.equals("{")) ps="-LCB-";
+ if(ps.equals("}")) ps="-RCB-";
+
+
+ b.append("(").append(ps).append(" ").append(term).append(')');
+ current = path;
+ // break;
+ }
+ for(;open>0;open--) {
+ b.append(")");
+ }
+ // b.append("\n");
+
+ return b.toString();
+ }
+ static int cnt=0;
+
+ /**
+ * @param path
+ * @param current
+ * @return
+ */
+ private ArrayList<Integer> getDiffPath(ArrayList<Integer> path, ArrayList<Integer> current) {
+ if (current==null) return path;
+
+ ArrayList<Integer> common = new ArrayList<Integer>();
+
+ int pindex = path.size()-1;
+ int cindex = current.size()-1;
+
+ while(cindex>=0 && pindex>=0) {
+
+ if(path.get(pindex)==current.get(cindex)) {
+ cindex--;
+ pindex--;
+ } else break;
+ }
+
+ for(int k=0;k<=pindex;k++) {
+ common.add(path.get(k));
+ }
+
+ return common;
+ }
+
+ private ArrayList<Integer> getDiffCommon(ArrayList<Integer> path, ArrayList<Integer> current) {
+ if (current==null) return path;
+
+ ArrayList<Integer> common = new ArrayList<Integer>();
+
+ int pindex = path.size()-1;
+ int cindex = current.size()-1;
+
+ while(cindex>=0 && pindex>=0) {
+
+ if(path.get(pindex)==current.get(cindex)) {
+ common.add(path.get(pindex));
+ cindex--;
+ pindex--;
+ } else break;
+ }
+
+ Collections.reverse(common);
+ // System.out.println("common "+pindex+" "+common);
+
+ return common;
+ }
+ /**
+ * @param i
+ * @return
+ */
+ private StringBuffer createSpaces(int i) {
+ StringBuffer s = new StringBuffer();
+ for (int k=0;k<i;k++) s.append(" ");
+ return s;
+ }
+
+
+ /**
+ * @param i
+ * @return
+ */
+ private ArrayList<Integer> getPathToRoot(int i) {
+
+ ArrayList<Integer> path = new ArrayList<Integer> ();
+
+ int h=i;
+ while(true) {
+ h=this.head[h];
+ if (h<this.terminalCount || path.contains(h)) break;
+ path.add(h);
+ }
+
+ // Collections.reverse(list)
+
+
+ return path;
+ }
+
+
+ public String conll09() {
+
+ StringBuilder s = new StringBuilder();
+ for(int i=0;i<this.terminalCount;i++) {
+ if (head[i]==-1&&entries[i]==null) break;
+
+ s.append((i+1)).append('\t').append(entries[i]).append("\t_\t_\t").append(pos[i]).append("\t_\t_\t_\t_\t_\t_\t_\t_\n");
+
+
+ }
+
+
+ return s.toString();
+ }
+
+ /**
+ * @param phead
+ * @return
+ */
+ public int[] getChilds(int head) {
+
+ int count=0;
+ for(int i =0;i<this.entries.length;i++) {
+ if (this.head[i]==head) count++;
+ }
+
+ int[] clds = new int[count];
+ count=0;
+ for(int i =0;i<this.entries.length;i++) {
+ if (this.head[i]==head) clds[count++]=i;
+ }
+
+ return clds;
+ }
+
+}
diff --git a/dependencyParser/mate-tools/src/is2/data/Parameter.java b/dependencyParser/mate-tools/src/is2/data/Parameter.java
new file mode 100644
index 0000000..35a9911
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/data/Parameter.java
@@ -0,0 +1,13 @@
+/**
+ *
+ */
+package is2.data;
+
+/**
+ * @author Dr. Bernd Bohnet, 23.12.2010
+ *
+ *
+ */
+public class Parameter {
+
+}
diff --git a/dependencyParser/mate-tools/src/is2/data/ParametersFloat.java b/dependencyParser/mate-tools/src/is2/data/ParametersFloat.java
new file mode 100755
index 0000000..653487e
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/data/ParametersFloat.java
@@ -0,0 +1,183 @@
+package is2.data;
+
+import is2.util.DB;
+
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+
+
+
+final public class ParametersFloat {
+
+ public float[] parameters;
+ public float[] total;
+
+ public ParametersFloat(int size) {
+
+ parameters = new float[size];
+ total = new float[size];
+ for(int i = 0; i < parameters.length; i++) {
+ parameters[i] = 0F;
+ total[i] = 0F;
+ }
+ }
+
+
+ /**
+ * @param parameters2
+ */
+ public ParametersFloat(float[] p) {
+ parameters =p;
+ }
+
+ public void average(double avVal) {
+ for(int j = 0; j < total.length; j++) {
+ parameters[j] = total[j]/((float)avVal);
+ }
+ total =null;
+ }
+
+ public ParametersFloat average2(double avVal) {
+ float[] px = new float[this.parameters.length];
+ for(int j = 0; j < total.length; j++) {
+ px[j] = total[j]/((float)avVal);
+ }
+ ParametersFloat pf = new ParametersFloat(px);
+ return pf;
+ }
+
+ public void update(FV pred, FV act, float upd, float err) {
+
+
+ float lam_dist = act.getScore(parameters,false)- pred.getScore(parameters,false);
+ float loss =(float)err - lam_dist;
+
+ FV dist = act.getDistVector(pred);
+
+ float alpha;
+ float A = dist.dotProduct(dist);
+ if (A<=0.0000000000000001) alpha=0.0f;
+ else alpha= loss/A;
+
+ // alpha = Math.min(alpha, 0.00578125F);
+
+ dist.update(parameters, total, alpha, upd,false);
+
+ }
+
+ public void update(FV pred, FV act, float upd, float err, float C) {
+
+
+ float lam_dist = act.getScore(parameters,false)- pred.getScore(parameters,false);
+ float loss =(float)err - lam_dist;
+
+ FV dist = act.getDistVector(pred);
+
+ float alpha;
+ float A = dist.dotProduct(dist);
+ if (A<=0.0000000000000001) alpha=0.0f;
+ else alpha= loss/A;
+
+ alpha = Math.min(alpha, C);
+
+ dist.update(parameters, total, alpha, upd,false);
+
+ }
+
+
+
+ public double update(FV a, double b) {
+
+ double A = a.dotProduct(a);
+ if (A<=0.0000000000000000001) return 0.0;
+ return b/A;
+ }
+
+
+ public double getScore(FV fv) {
+ if (fv ==null) return 0.0F;
+ return fv.getScore(parameters,false);
+
+ }
+
+
+ final public void write(DataOutputStream dos) throws IOException{
+
+ dos.writeInt(parameters.length);
+ for(float d : parameters) dos.writeFloat(d);
+
+ }
+
+ public void read(DataInputStream dis) throws IOException{
+
+ parameters = new float[dis.readInt()];
+ int notZero=0;
+ for(int i=0;i<parameters.length;i++) {
+ parameters[i]=dis.readFloat();
+ if (parameters[i]!=0.0F) notZero++;
+ }
+
+ DB.println("read parameters "+parameters.length+" not zero "+notZero);
+
+ }
+
+ public int countNZ() {
+
+ int notZero=0;
+ for(int i=0;i<parameters.length;i++) {
+ if (parameters[i]!=0.0F) notZero++;
+ }
+ return notZero;
+
+ }
+
+ public F2SF getFV() {
+ return new F2SF(parameters);
+ }
+
+
+ public int size() {
+ return parameters.length;
+ }
+
+ public void update(FVR act, FVR pred, Instances isd, int instc, Parse dx, double upd, double e, float lam_dist) {
+
+ e++;
+
+
+ float b = (float)e-lam_dist;
+
+ FVR dist = act.getDistVector(pred);
+
+ dist.update(parameters, total, hildreth(dist,b), upd,false);
+ }
+
+
+ public void update(FVR pred, FVR act, float upd, float e) {
+
+ e++;
+ float lam_dist = act.getScore(parameters,false)- pred.getScore(parameters,false);
+
+ float b = (float)e-lam_dist;
+
+ FVR dist = act.getDistVector(pred);
+
+ dist.update(parameters, total, hildreth(dist,b), upd,false);
+ }
+
+ protected double hildreth(FVR a, double b) {
+
+ double A = a.dotProduct(a);
+ if (A<=0.0000000000000000001) return 0.0;
+ return b/A;
+ }
+
+ public float getScore(FVR fv) { //xx
+ if (fv ==null) return 0.0F;
+ return fv.getScore(parameters,false);
+
+ }
+
+
+}
diff --git a/dependencyParser/mate-tools/src/is2/data/Parse.java b/dependencyParser/mate-tools/src/is2/data/Parse.java
new file mode 100755
index 0000000..21a83d9
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/data/Parse.java
@@ -0,0 +1,157 @@
+package is2.data;
+
+
+import java.util.BitSet;
+
+
+public class Parse implements Comparable<Parse> {
+
+ public short[] heads;
+ public short[] labels;
+ public double f1;
+
+
+ public Parse() {}
+
+ public Parse(int i) {
+ heads = new short[i];
+ labels = new short[i];
+
+ }
+ /**
+ * @param heads2
+ * @param types2
+ * @param p_new
+ */
+ public Parse(short[] heads2, short[] types2, float p_new) {
+ this.heads = new short[heads2.length];
+ this.labels = new short[types2.length];
+ // this.heads=heads2;
+ // this.labels=types2;
+ System.arraycopy(heads2, 0, heads, 0, heads.length);
+ System.arraycopy(types2, 0, labels, 0, labels.length);
+ f1=p_new;
+
+ }
+
+ /**
+ * @param heads2
+ * @param types2
+ * @param p_new
+ */
+ public Parse(String parse, float p_new) {
+
+ // this(parse.length()/2);
+
+ signature2parse(parse);
+
+ f1=p_new;
+
+ }
+
+ public void signature2parse(String parse) {
+ int p=0;
+ heads = new short[parse.length()/2];
+ labels = new short[heads.length];
+ // DB.println("pl "+parse.length());
+ for(int k=0;k<heads.length;k++) {
+ heads[k]= (short)parse.charAt(p++);
+ labels[k] = (short)parse.charAt(p++);
+ }
+ }
+
+
+ @Override
+ public Parse clone() {
+ Parse p = new Parse();
+ p.heads = new short[heads.length];
+ p.labels = new short[labels.length];
+
+ System.arraycopy(heads, 0, p.heads, 0, heads.length);
+ System.arraycopy(labels, 0, p.labels, 0, labels.length);
+
+ p.f1=f1;
+
+ return p;
+ }
+
+ /**
+ * Check if it is a tree
+ * @return
+ */
+ public boolean checkTree() {
+
+ BitSet set = new BitSet(heads.length);
+ set.set(0);
+ return checkTree(set, 0);
+
+ }
+
+ /**
+ * @param set
+ * @return
+ */
+ private boolean checkTree(BitSet set, int h) {
+ //System.out.print(" h "+h);
+
+ for(int i=0;i<heads.length;i++) {
+ if (heads[i]==h) {
+ // System.out.print(" "+i);
+ if (!set.get(i)) checkTree(set, i);
+ set.set(i);
+
+ }
+ }
+
+ for(int i=0;i<heads.length;i++) {
+ if (!set.get(i)) return false;
+ }
+ return true;
+ }
+
+ public String toString() {
+ StringBuilder b = new StringBuilder();
+ for(int k=0;k<this.heads.length;k++) {
+ b.append(k).append(" ").append(heads[k]+" ").append(this.labels[k]).append("\n");
+ }
+ return b.toString();
+ }
+
+
+
+ /* (non-Javadoc)
+ * @see java.lang.Comparable#compareTo(java.lang.Object)
+ */
+ @Override
+ public int compareTo(Parse o) {
+
+ if (f1==o.f1) return this.signature().compareTo(o.signature());
+ return f1<o.f1?1:f1==o.f1?0:-1;
+ }
+
+ /**
+ * @return the signature of a parse
+ */
+ public String signature() {
+ StringBuilder b = new StringBuilder(heads.length*2);
+ for(int k=0;k<heads.length;k++) {
+ b.append((char)heads[k]).append((char)labels[k]);
+ }
+ return b.toString();
+ }
+
+
+ /**
+ * @return the signature of a parse
+ */
+ public StringBuilder signatureSB() {
+ StringBuilder b = new StringBuilder(heads.length*2);
+ for(int k=0;k<heads.length;k++) {
+ b.append((char)heads[k]).append((char)labels[k]);
+ }
+ return b;
+ }
+
+
+}
+
\ No newline at end of file
diff --git a/dependencyParser/mate-tools/src/is2/data/ParseNBest.java b/dependencyParser/mate-tools/src/is2/data/ParseNBest.java
new file mode 100644
index 0000000..b66306c
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/data/ParseNBest.java
@@ -0,0 +1,107 @@
+package is2.data;
+
+
+
+
+final public class ParseNBest extends Parse {
+
+
+ private String signature=null;
+
+ //public float[] scores;
+
+ public ParseNBest() {}
+
+ public ParseNBest(short[] heads2, short[] types2, float p_new) {
+ super(heads2, types2, p_new);
+ }
+
+ public ParseNBest(int i) {
+ super(i);
+ }
+
+ /**
+ * @param sig
+ * @param readFloat
+ */
+ public ParseNBest(String sig, float score) {
+ super(sig,score);
+ }
+
+ /**
+ * create a total order to provide replicable deterministic results
+ * @param o
+ * @return
+ */
+ public int compareTo(ParseNBest o) {
+ if (f1<o.f1) return 1;
+ if (f1==o.f1) {
+ if (signature==null) signature=signature();
+ if (o.signature==null) o.signature=o.signature();
+ return o.signature.compareTo(signature);
+
+ }
+ return -1;
+ }
+
+ /**
+ * @return the signature of a parse
+ */
+ public String signature() {
+ if (signature!=null) return signature;
+ signature= super.signature();
+ return signature;
+ }
+
+ /**
+ * @return the signature of a parse
+ */
+ public String signature(short[] heads, short[] labels) {
+ StringBuilder b = new StringBuilder(heads.length*2);
+ for(int k=0;k<heads.length;k++) {
+ b.append((char)heads[k]).
+ append((char)labels[k]);
+ }
+ signature = b.toString();
+ return signature;
+ }
+
+ /**
+ * @param heads
+ * @param types
+ * @param oldP
+ * @param ch
+ * @param s
+ */
+ public String signature(short[] heads, short[] types, short p, short ch,short l) {
+ StringBuilder b = new StringBuilder(heads.length*2);
+ for(int k=0;k<heads.length;k++) {
+
+
+ b.append(k==ch?(char)p:
+ (char)heads[k]).
+ append(k==ch?(char)l:(char)types[k]);
+ }
+ signature = b.toString();
+ return signature;
+
+ }
+
+ @Override
+ public Parse clone() {
+ ParseNBest p = new ParseNBest();
+ p.heads = new short[heads.length];
+ p.labels = new short[labels.length];
+
+ System.arraycopy(heads, 0, p.heads, 0, heads.length);
+ System.arraycopy(labels, 0, p.labels, 0, labels.length);
+
+ p.f1=f1;
+
+ return p;
+ }
+
+
+}
+
+
\ No newline at end of file
diff --git a/dependencyParser/mate-tools/src/is2/data/PipeGen.java b/dependencyParser/mate-tools/src/is2/data/PipeGen.java
new file mode 100755
index 0000000..b63fb90
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/data/PipeGen.java
@@ -0,0 +1,83 @@
+package is2.data;
+
+
+public class PipeGen {
+
+ public static final String SENSE = "SENSE",POS = "POS",DIST = "DIST",WORD = "WORD",PRED = "PRED",ARG = "ARG",
+ FEAT = "F", REL = "REL",TYPE = "TYPE" ,CHAR = "C",FFEATS="FF", DIR="DIR",LA = "LA",RA = "RA";
+
+ public static final String GPOS = "GPOS", MID = "MID",END = "END",STR = "STR",FM="FM", NOFEAT = "NOFEAT";
+
+ public static final String _0 = "0",_4 = "4", _3 = "3", _2 = "2",_1 = "1", _5 = "5",_10 = "10";
+
+ static public int outValue(int num1, int del) {
+ String out = ""+num1;
+ StringBuffer delS=new StringBuffer();
+ for(int k =0;k< del;k++) delS.append('\b');
+ del=out.length();
+ System.out.print(delS+out);
+ return del;
+ }
+
+ static public int outValue(int num1, int del, long last) {
+ String out = ""+num1+" ("+(System.currentTimeMillis()-last)/(num1+1)+" ms/instance)";
+ StringBuffer delS=new StringBuffer();
+ for(int k =0;k< del;k++) delS.append('\b');
+ del=out.length();
+ System.out.print(delS+out);
+ return del;
+ }
+
+ static public int outValueErr(int num1, float err, float f1, int del, long last) {
+
+ String out = ""+num1+" ("+(System.currentTimeMillis()-last)/(num1+1)+" ms/instance "+(err/num1)+" err/instance f1="+
+ f1 +") ";
+ StringBuffer delS=new StringBuffer();
+ for(int k =0;k< del;k++) delS.append('\b');
+ del=out.length();
+ System.out.print(delS+out);
+ return del;
+ }
+
+
+ static public int outValueErr(int num1, float err, float f1, int del, long last, double upd) {
+ String out = ""+num1+" ("+(System.currentTimeMillis()-last)/(num1+1)+" ms/instance "+(err/num1)+" err/instance f1="+
+ f1 +") upd "+upd;
+ StringBuffer delS=new StringBuffer();
+ for(int k =0;k< del;k++) delS.append('\b');
+ del=out.length();
+ System.out.print(delS+out);
+ return del;
+ }
+
+ static public int outValueErr(int num1, float err, float f1, int del, long last, double upd, String info) {
+ String out = ""+num1+" ("+(System.currentTimeMillis()-last)/(num1+1)+" ms/instance "+(err/(float)num1)+" err/instance f1="+
+ f1 +") upd "+upd+" "+info;
+ StringBuffer delS=new StringBuffer();
+ for(int k =0;k< del;k++) delS.append('\b');
+ del=out.length();
+ System.out.print(delS+out);
+ return del;
+ }
+
+
+ /**
+ * @param cnt
+ * @param l
+ * @return
+ */
+ public static String getSecondsPerInstnace(int cnt, long l) {
+ return " "+((float)l/(cnt*1000f))+" seconds/sentnece ";
+ }
+
+ /**
+ * @param l
+ * @return
+ */
+ public static String getUsedTime(long l) {
+ return "Used time " + (((float)l)/1000f)+" seconds ";
+ }
+
+
+
+}
diff --git a/dependencyParser/mate-tools/src/is2/data/PrimeFinder.java b/dependencyParser/mate-tools/src/is2/data/PrimeFinder.java
new file mode 100644
index 0000000..38c614b
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/data/PrimeFinder.java
@@ -0,0 +1,66 @@
+/**
+ *
+ */
+package is2.data;
+
+import java.util.Arrays;
+
+/**
+ * @author Dr. Bernd Bohnet, 13.05.2010
+ *
+ *
+ */
+public class PrimeFinder {
+
+
+
+ public PrimeFinder()
+ {
+ }
+
+ public static final int nextPrime(int desiredCapacity)
+ {
+ int i = Arrays.binarySearch(primeCapacities, desiredCapacity);
+ if(i < 0)
+ i = -i - 1;
+ return primeCapacities[i];
+ }
+
+ public static final int largestPrime = 2147483647;
+ private static final int primeCapacities[] = {
+ 2147483647, 5, 11, 23, 47, 97, 197, 397, 797, 1597,
+ 3203, 6421, 12853, 25717, 51437, 102877, 205759, 411527, 823117, 1646237,
+ 3292489, 6584983, 13169977, 26339969, 52679969, 105359939, 210719881, 421439783, 842879579, 1685759167,
+ 433, 877, 1759, 3527, 7057, 14143, 28289, 56591, 113189, 226379,
+ 452759, 905551, 1811107, 3622219, 7244441, 14488931, 28977863, 57955739, 115911563, 231823147,
+ 463646329, 927292699, 1854585413, 953, 1907, 3821, 7643, 15287, 30577, 61169,
+ 122347, 244703, 489407, 978821, 1957651, 3915341, 7830701, 15661423, 31322867, 62645741,
+ 125291483, 250582987, 501165979, 1002331963, 2004663929, 1039, 2081, 4177, 8363, 16729,
+ 33461, 66923, 133853, 267713, 535481, 1070981, 2141977, 4283963, 8567929, 17135863,
+ 34271747, 68543509, 137087021, 274174111, 548348231, 1096696463, 31, 67, 137, 277,
+ 557, 1117, 2237, 4481, 8963, 17929, 35863, 71741, 143483, 286973,
+ 573953, 1147921, 2295859, 4591721, 9183457, 18366923, 36733847, 73467739, 146935499, 293871013,
+ 587742049, 1175484103, 599, 1201, 2411, 4831, 9677, 19373, 38747, 77509,
+ 155027, 310081, 620171, 1240361, 2480729, 4961459, 9922933, 19845871, 39691759, 79383533,
+ 158767069, 317534141, 635068283, 1270136683, 311, 631, 1277, 2557, 5119, 10243,
+ 20507, 41017, 82037, 164089, 328213, 656429, 1312867, 2625761, 5251529, 10503061,
+ 21006137, 42012281, 84024581, 168049163, 336098327, 672196673, 1344393353, 3, 7, 17,
+ 37, 79, 163, 331, 673, 1361, 2729, 5471, 10949, 21911,
+ 43853, 87719, 175447, 350899, 701819, 1403641, 2807303, 5614657, 11229331, 22458671,
+ 44917381, 89834777, 179669557, 359339171, 718678369, 1437356741, 43, 89, 179, 359,
+ 719, 1439, 2879, 5779, 11579, 23159, 46327, 92657, 185323, 370661,
+ 741337, 1482707, 2965421, 5930887, 11861791, 23723597, 47447201, 94894427, 189788857, 379577741,
+ 759155483, 1518310967, 379, 761, 1523, 3049, 6101, 12203, 24407, 48817,
+ 97649, 195311, 390647, 781301, 1562611, 3125257, 6250537, 12501169, 25002389, 50004791,
+ 100009607, 200019221, 400038451, 800076929, 1600153859
+ };
+
+ static
+ {
+ Arrays.sort(primeCapacities);
+ }
+
+
+
+
+}
diff --git a/dependencyParser/mate-tools/src/is2/data/RandomIndex.java b/dependencyParser/mate-tools/src/is2/data/RandomIndex.java
new file mode 100644
index 0000000..7fc67b3
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/data/RandomIndex.java
@@ -0,0 +1,161 @@
+/**
+ *
+ */
+package is2.data;
+
+import java.util.BitSet;
+
+import is2.util.DB;
+
+
+/**
+ * @author Dr. Bernd Bohnet, 20.05.2011
+ *
+ *
+ */
+public class RandomIndex implements Long2IntInterface {
+
+
+ final int[] prims = {52349171,199951347,89990,5001,32891,17,19,23,29,31,37,47,53,59,61,67,71};
+// final int[] prims = {1,3,5,7,11,17,19,23,29,31,37,47,53,59,61,67,71};
+
+ final long hashFunctionModifiers[];
+
+ final int kbit,lbit;
+ final int hsize ; // maximal size of hash
+
+ final int bits; // available bits
+ final int moves; // needed moves to put a number into
+
+
+ /**
+ * Creates the random functions.
+ *
+ * @param kbit The bits to be mapped
+ * @param lbit The left shift of the bits
+ * @param hsize The size of the featurs space (not included in the original algorithm)
+ * @param numberFunctions The number of the hash functions
+ */
+ public RandomIndex(int kbit, int lbit, int hsize, int numberFunctions) {
+
+
+ this.kbit =kbit;
+ this.lbit =lbit;
+
+
+ if (hsize<=0) this.hsize = 67000001; // default value
+ else this.hsize = hsize;
+
+ bits = (int) Math.ceil(Math.log(this.hsize)/Math.log(2));
+
+ moves = (int) Math.ceil(64f/(float)bits);
+
+
+
+ DB.println("moves "+moves+" bits "+bits+" hsize "+hsize);
+
+ hashFunctionModifiers = new long[numberFunctions];
+
+ for (int f = 0;f<numberFunctions;f++) hashFunctionModifiers[f] = prims[f];
+ }
+
+
+
+ public int[] hash(long x)
+ {
+ int[] hvals = new int[hashFunctionModifiers.length];
+
+ for(int k=0;k<hashFunctionModifiers.length;k++) {
+
+ // the original function: value = ((x+1) * hashFunctionModifiers[k] & m ) >> n;
+
+ // the first part of the original function
+ long value = (x+1) * hashFunctionModifiers[k];
+
+ // do the above >> n with a maximal size of the available hash values
+ // Shift all bits until they have been each xor-ed (^) in the range of the hash
+ // in order the have all information potentially represented there.
+
+ for(int j=1;j<= moves;j++) value = value ^ (value >> (bits*j));
+
+ // Map the value to the range of the available space should be the same as (value & m) .
+ hvals[k] = Math.abs((int)value % hsize);
+ }
+ return hvals;
+ }
+
+ public int[] hashU(long x)
+ {
+ int[] hvals = new int[hashFunctionModifiers.length];
+
+ long y = Long.reverse(x);
+ for(int k=0;k<hashFunctionModifiers.length;k++) {
+
+ // the original function: value = ((x+1) * hashFunctionModifiers[k] & m ) >> n;
+
+ // the first part of the original function
+ long value1 = (((y+1) * hashFunctionModifiers[k]) /* % 2 pow 64 */ ) >> (kbit-lbit);
+
+ // I get probably only the first part lets get the second part too
+ // long value2 = (((y+1>>20) * hashFunctionModifiers[k]) /* % 2 pow 64 */ ) >> (kbit-lbit);
+
+
+ // the modulo (%) 2 pow 64 is done since the long number can not be larger than 2 pow 64.
+ // System.out.println("value "+value+" shift "+(lbit-kbit));
+ hvals[k] = Math.abs((int)value1);
+ }
+ return hvals;
+ }
+
+ /*
+ (defun generate-hash-fn (&key (k-bit 32)
+ (l-bit 8)
+ verbosep constants (count 4))
+
+ (labels ((random-constant ()
+ (let ((a (+ (random (- (expt 2 k-bit) 1)) 1)))
+ (logior a 1)))) ;; inclusive OR ensures odd number.
+ (let ((pdiff (- (- k-bit l-bit)));; neg. sign to do a rightshift, see ash()
+ (sub1 (- (expt 2 k-bit) 1))
+ (constants (copy-list constants)))
+ (unless constants
+ (loop ;; a = odd number a where 0 < a < u.
+ until (= count (length constants))
+ do (pushnew (random-constant) constants)))
+ (when verbosep
+ (format t "~&generate-hash-fn(): using random constants: ~a~%"
+ constants))
+ (values
+ #'(lambda (x)
+ (loop
+ for a in constants
+ ;;; always add 1 to x to avoid f(0)=0.
+ collect (ash (logand (* (+ 1 x) a) sub1) pdiff)))
+ constants))))
+
+ */
+
+
+
+
+
+ /* (non-Javadoc)
+ * @see is2.data.Long2IntInterface#l2i(long)
+ */
+ @Override
+ public int l2i(long l) {
+ // TODO Auto-generated method stub
+ return 0;
+ }
+
+ /* (non-Javadoc)
+ * @see is2.data.Long2IntInterface#size()
+ */
+ @Override
+ public int size() {
+ return hsize;
+ }
+
+}
+
+
diff --git a/dependencyParser/mate-tools/src/is2/data/SentenceData09.java b/dependencyParser/mate-tools/src/is2/data/SentenceData09.java
new file mode 100755
index 0000000..46cabc0
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/data/SentenceData09.java
@@ -0,0 +1,530 @@
+package is2.data;
+
+
+import is2.io.CONLLReader09;
+import is2.io.CONLLWriter09;
+
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.io.ObjectInputStream;
+import java.io.StringWriter;
+import java.util.ArrayList;
+
+public class SentenceData09 {
+
+
+ public String[] id;
+ public String[] forms;
+
+ public String[] lemmas;
+ public String[] plemmas;
+
+ public int[] heads;
+ public int[] pheads;
+
+ public String[] labels;
+ public String[] plabels;
+
+ public String[] gpos; // gold pos
+ public String[] ppos;
+
+ public String feats[][];
+// public String[] split_lemma;
+
+ public String[] sem;
+ public int[] semposition;
+
+ // predicate number, argument number -> argument string
+ public String[][] arg;
+ public int[][] argposition;
+
+ public String[] fillp;
+
+ public String[] ofeats;
+ public String[] pfeats;
+
+ public SentenceData09() {}
+
+ public SentenceData09(String[] forms, String[] postags, String[] labs, int[] heads) {
+ this.forms = forms;
+ gpos = postags;
+ labels = labs;
+ this.heads = heads;
+ }
+
+ public SentenceData09(String[] forms, String[] lemmas, String[] postags, String[] labs, int[] heads) {
+ this.forms = forms;
+ gpos = postags;
+ //ppos = postags;
+
+ labels = labs;
+ this.heads = heads;
+ this.plemmas = lemmas;
+ }
+ public SentenceData09(String[] forms, String[] lemmas, String[] gpos, String[] ppos, String[] labs, int[] heads) {
+ this.forms = forms;
+ this.gpos = gpos;
+ this.ppos = ppos;
+
+ labels = labs;
+ this.heads = heads;
+ this.plemmas = lemmas;
+
+
+ }
+ public SentenceData09(String[] forms, String[] lemmas, String[] gpos, String[] ppos, String[] labs, int[] heads, String[] fillpred) {
+ this.forms = forms;
+ this.gpos = gpos;
+ this.ppos = ppos;
+
+ labels = labs;
+ this.heads = heads;
+ this.plemmas = lemmas;
+
+ fillp =fillpred;
+ }
+
+ public SentenceData09(String[] forms, String[] lemmas, String[] olemmas,String[] gpos, String[] ppos, String[] labs, int[] heads, String[] fillpred) {
+ this.forms = forms;
+ this.gpos = gpos;
+ this.ppos = ppos;
+
+ labels = labs;
+ this.heads = heads;
+ this.plemmas = lemmas;
+ this.lemmas =olemmas;
+ fillp =fillpred;
+ }
+
+ public SentenceData09(String[] forms, String[] olemmas, String[] lemmas,String[] gpos,
+ String[] ppos, String[] labs, int[] heads, String[] fillpred, String[] of, String[] pf) {
+ this.forms = forms;
+ this.gpos = gpos;
+ this.ppos = ppos;
+
+ labels = labs;
+ this.heads = heads;
+ this.pheads =heads;
+ this.plabels=labs;
+ this.plemmas = lemmas;
+ this.lemmas =olemmas;
+
+ this.ofeats =of;
+ this.pfeats =pf;
+ fillp =fillpred;
+ }
+
+
+
+
+ /**
+ * Create an instance without root of the input instance
+ * @param instance
+ */
+ public SentenceData09(SentenceData09 i) {
+
+ int length = i.length()-1;
+
+ forms = new String[length];
+ gpos = new String[length];
+ ppos = new String[length];
+ plemmas = new String[length];
+ plabels = new String[length];
+ lemmas = new String[length];
+ heads = new int[length];
+ pheads = new int[length];
+ ofeats = new String[length];
+ pfeats = new String[length];
+ labels = new String[length];
+ fillp = new String[length];
+ id = new String[length];
+
+
+ for(int j = 0; j < length; j++) {
+ forms[j] = i.forms[j+1];
+ ppos[j] = i.ppos[j+1];
+ gpos[j] = i.gpos[j+1];
+
+ labels[j] = i.labels[j+1];
+ heads[j] = i.heads[j+1];
+
+
+
+ if (i.pheads!=null) pheads[j] = i.pheads[j+1];
+ if (i.plabels!=null) plabels[j] = i.plabels[j+1];
+
+
+ if (i.lemmas!=null) lemmas[j] = i.lemmas[j+1];
+
+ plemmas[j] = i.plemmas[j+1];
+
+
+ if (i.ofeats!=null) ofeats[j] = i.ofeats[j+1];
+ if (i.pfeats!=null) pfeats[j] = i.pfeats[j+1];
+
+ if (i.fillp!=null) fillp[j] = i.fillp[j+1];
+ if (i.id!=null) id[j] = i.id[j+1];
+ }
+
+
+ }
+ public void setPPos(String[] pos) {
+ ppos=pos;
+ }
+
+ public void setLemmas(String[] lemmas) {
+ this.plemmas=lemmas;
+ }
+
+ public void setFeats(String[] fts) {
+ feats = new String[fts.length][];
+ for(int i=0;i<fts.length;i++) {
+ feats[i] = fts[i].split("\\|");
+ }
+ pfeats =fts;
+ }
+
+ public int length () {
+ return forms.length;
+ }
+
+ @Override
+ public String toString () {
+ // prepare the output
+ StringWriter sw = new StringWriter();
+ CONLLWriter09 snt2str = new is2.io.CONLLWriter09(sw);
+ try{
+ snt2str.write(this, CONLLWriter09.NO_ROOT);
+ snt2str.finishWriting();
+ return sw.toString();
+ }catch(Exception e) {
+ e.printStackTrace();
+ }
+
+ // backup
+ StringBuffer sb = new StringBuffer();
+ for(int k=0;k<forms.length;k++) sb.append(k+1).append('\t').append(forms[k]).append('\t').append(heads[k]).append('\t').append(labels[k]).append('\n');
+ return sw.toString();
+ }
+
+
+ final public void write (DataOutputStream out) throws IOException {
+
+ out.writeInt(forms.length);
+ for(int k=0;k<forms.length;k++) {
+ out.writeUTF(forms[k]);
+ out.writeUTF(ppos[k]);
+ out.writeUTF(gpos[k]);
+ out.writeInt(heads[k]);
+ out.writeUTF(labels[k]);
+ out.writeUTF(lemmas[k]);
+ out.writeUTF(plemmas[k]);
+ out.writeUTF(ofeats[k]); // needed for mtag
+ out.writeUTF(fillp[k]);
+ }
+
+ // out.writeUTF(actParseTree);
+
+ }
+
+ final public void read (DataInputStream dis) throws IOException {
+
+ int l = dis.readInt();
+
+ forms = new String[l];
+ lemmas = new String[l];
+ plemmas = new String[l];
+ ppos = new String[l];
+ gpos = new String[l];
+ labels = new String[l];
+ heads = new int[l];
+ fillp = new String[l];
+ ofeats=new String[l];
+
+ for(int k=0;k<l;k++) {
+ forms[k] = dis.readUTF();
+ ppos[k]=dis.readUTF();
+ gpos[k]=dis.readUTF();
+ heads[k]=dis.readInt();
+ labels[k]=dis.readUTF();
+ lemmas[k]=dis.readUTF();
+ plemmas[k]=dis.readUTF();
+ ofeats[k]=dis.readUTF();
+ fillp[k]=dis.readUTF();
+
+ }
+ }
+
+
+ private void readObject (ObjectInputStream in) throws IOException, ClassNotFoundException {
+ forms = (String[])in.readObject();
+ plemmas = (String[])in.readObject();
+ ppos = (String[])in.readObject();
+ heads = (int[])in.readObject();
+ labels = (String[])in.readObject();
+ }
+
+ public void addPredicate(int i, String s) {
+
+ int predId;
+ if (sem == null) {
+ predId=0;
+ sem = new String[1];
+ semposition = new int[1];
+ }
+ else {
+ predId=sem.length;
+ String p[] = new String[sem.length+1];
+ System.arraycopy(sem, 0, p, 0, sem.length);
+ int id[] = new int[sem.length+1];
+ System.arraycopy(semposition, 0, id, 0, semposition.length);
+ sem =p;
+ semposition=id;
+ }
+ sem[predId]=s;
+ semposition[predId]=i;
+ }
+
+
+ /**
+ * Add an argument
+ * @param i the instance (the child)
+ * @param predId the id of the predicate (the head)
+ * @param a the label of the argument
+ */
+ public void addArgument(int i, int predId, String a) {
+
+ if (a ==null || a.equals("_")) return;
+
+ // ensure the space for the argument in the data structure
+ if (arg == null) {
+ arg = new String[predId+1][];
+ argposition = new int[predId+1][];
+ } else if (arg.length<=predId) {
+ String p[][] = new String[predId+1][];
+ System.arraycopy(arg, 0, p, 0, arg.length);
+ arg =p;
+
+ int id[][] = new int[predId+1][];
+ System.arraycopy(argposition, 0, id, 0, argposition.length);
+ argposition = id;
+ }
+
+
+
+ int aId;
+ if (arg[predId]==null) {
+ aId=0;
+ arg[predId] = new String[1];
+ argposition[predId] = new int[1];
+ } else {
+ aId =arg[predId].length;
+ String args[] = new String[arg[predId].length+1];
+ System.arraycopy(arg[predId], 0, args, 0, arg[predId].length);
+ arg[predId]=args;
+
+ int argsId[] = new int[argposition[predId].length+1];
+ System.arraycopy(argposition[predId], 0, argsId, 0, argposition[predId].length);
+ argposition[predId]=argsId;
+ }
+
+ arg[predId][aId]=a;
+ argposition[predId][aId]=i;
+
+ }
+
+ public int[] getParents() {
+ return heads;
+ }
+
+ public String[] getLabels() {
+ return labels;
+ }
+
+ public String printSem() {
+
+ if (sem==null) return "";
+ StringBuilder s = new StringBuilder();
+
+ for(int k=0;k<sem.length;k++) {
+ s.append(sem[k]).append("\n");
+
+ if (arg==null) {
+ s.append("arg == null");
+ }else
+ if (arg.length<=k) {
+ s.append("args.length <=k arg.length:"+arg.length+" k:"+k);
+ } else if (arg[k]!=null) {
+ for(int a=0;a< arg[k].length;a++) {
+ s.append(" ").append(arg[k][a]);
+ }
+ } else {
+ s.append("args == null ");
+ }
+ s.append('\n');
+ }
+ return s.toString();
+ }
+
+
+ /**
+ * Initialize a instance so that a tagger, parser, etc. could be applied
+ * @param forms
+ */
+ public void init(String[] forms) {
+ this.forms = forms;
+ heads = new int[forms.length];
+ gpos = new String[forms.length];
+ ppos = new String[forms.length];
+ plemmas = new String[forms.length];
+ feats = new String[forms.length][0];
+ labels = new String[forms.length];
+ }
+
+ /**
+ * @param instance
+ * @param fillp2
+ * @param i09
+ */
+ public void createSemantic(SentenceData09 instance) {
+
+ this.sem = instance.sem;
+ this.semposition = instance.semposition;
+
+ if (instance.semposition!=null)
+ for (int k= 0;k< instance.semposition.length;k++) {
+ this.semposition[k]=instance.semposition[k]-1;
+ }
+
+ this.arg = instance.arg;
+
+
+ this.argposition = instance.argposition;
+
+ if (this.argposition!=null)
+ for (int p= 0;p< instance.argposition.length;p++) {
+ if (this.argposition[p]!=null)
+ for(int a=0;a<instance.argposition[p].length;a++)
+ this.argposition[p][a]=instance.argposition[p][a]-1;
+ }
+
+
+ }
+
+ /**
+ *
+ */
+ public String oneLine() {
+
+
+ StringBuffer o = new StringBuffer();
+ for(int i=1;i<this.length();i++) {
+
+ if (i!=1)o.append(" ");
+ o.append(this.forms[i]);
+ }
+ return o.toString();
+ }
+
+ /**
+ * Get the children of this instance
+ * @param head
+ * @return children of the head
+ */
+ public ArrayList<Integer> getChildren(int head) {
+
+ ArrayList<Integer> children = new ArrayList<Integer>();
+ for(int i=0;i<length();i++) {
+ if (heads[i]==head) children.add(i);
+ }
+ return children;
+ }
+
+ public void createWithRoot(SentenceData09 i) {
+
+ int length = i.length();
+ int offset = 0;
+ if (! i.forms[0].equals(CONLLReader09.ROOT)) {
+ length++;
+ offset = -1;
+ }
+
+
+
+ forms = new String[length];
+ gpos = new String[length];
+ ppos = new String[length];
+ plemmas = new String[length];
+ plabels = new String[length];
+ lemmas = new String[length];
+ heads = new int[length];
+ pheads = new int[length];
+ ofeats = new String[length];
+ pfeats = new String[length];
+ labels = new String[length];
+ fillp = new String[length];
+ id = new String[length];
+ feats = new String[forms.length][];
+
+ for(int j = 1; j < length; j++) {
+ forms[j] = i.forms[j+offset];
+ ppos[j] = i.ppos[j+offset];
+ gpos[j] = i.gpos[j+offset];
+
+ labels[j] = i.labels[j+offset];
+ heads[j] = i.heads[j+offset];
+
+
+
+ if (i.pheads!=null) pheads[j] = i.pheads[j+offset];
+ if (i.plabels!=null) plabels[j] = i.plabels[j+offset];
+
+
+ if (i.lemmas!=null) lemmas[j] = i.lemmas[j+offset];
+
+ plemmas[j] = i.plemmas[j+offset];
+
+
+ // if (i.ofeats!=null) ofeats[j] = i.ofeats[j+offset];
+
+ ofeats[j]= i.ofeats[j+offset].equals(CONLLWriter09.DASH)? "_" : i.ofeats[j+offset];
+
+ // if (i.pfeats!=null) pfeats[j] = i.pfeats[j+offset];
+
+ if (i.pfeats!=null && i.pfeats[j+offset]!=null) {
+ if (i.pfeats[j+offset].equals(CONLLWriter09.DASH)) feats[j]=null;
+ else {
+ feats[j] =i.pfeats[j+offset].split(CONLLReader09.PIPE);
+
+ // if (info[7].equals(CONLLWriter09.DASH)) it.feats[i]=null;
+ // else {
+ // it.feats[i] =info[7].split(PIPE);
+ pfeats[j] = i.pfeats[j+offset];
+ // }
+ }
+ }
+
+ if (i.fillp!=null) fillp[j] = i.fillp[j+offset];
+ if (i.id!=null) id[j] = i.id[j+offset];
+ }
+
+
+
+ forms[0] = CONLLReader09.ROOT;
+ plemmas[0] = CONLLReader09.ROOT_LEMMA;
+ fillp[0] = "N";
+ lemmas[0] = CONLLReader09.ROOT_LEMMA;
+
+ gpos[0] = CONLLReader09.ROOT_POS;
+ ppos[0] = CONLLReader09.ROOT_POS;
+ labels[0] = CONLLReader09.NO_TYPE;
+ heads[0] = -1;
+ plabels[0] = CONLLReader09.NO_TYPE;
+ pheads[0] = -1;
+ ofeats[0] = CONLLReader09.NO_TYPE;
+ id[0] ="0";
+ }
+
+
+}
diff --git a/dependencyParser/mate-tools/src/is2/data/Thesaurus.java b/dependencyParser/mate-tools/src/is2/data/Thesaurus.java
new file mode 100644
index 0000000..2d3677a
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/data/Thesaurus.java
@@ -0,0 +1,194 @@
+/**
+ *
+ */
+package is2.data;
+
+import is2.util.DB;
+
+import java.io.BufferedReader;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.util.ArrayList;
+
+/**
+ * @author Dr. Bernd Bohnet, 28.10.2010
+ *
+ *
+ */
+final public class Thesaurus {
+
+ public static final String LPATH = "LP";
+ public static final String SPATH = "SP";
+
+ // [word][p] p = [0:long-path | 1:short-path]
+ final private int[][] word2path;
+
+ public Thesaurus() {
+ word2path =new int[0][];
+ }
+
+ /**
+ * @param clusterFile
+ * @param mf
+ *
+ */
+ public Thesaurus(String clusterFile, IEncoderPlus mf, int ls) {
+
+ final String REGEX = "\t";
+
+ // register words
+ try {
+ BufferedReader inputReader = new BufferedReader(new InputStreamReader(new FileInputStream(clusterFile),"UTF-8"),32768);
+
+ int cnt=0;
+ String line;
+ while ((line =inputReader.readLine())!=null) {
+
+ cnt++;
+ try {
+ String[] split = line.split(REGEX);
+ // mf.register(LPATH, split[0].length()<ls?split[0]:split[0].substring(0,ls));
+ mf.register(PipeGen.WORD, split[0]);
+ mf.register(PipeGen.WORD, split[1]);
+ } catch(Exception e) {
+ System.out.println("Error in cluster line "+cnt+" error: "+e.getMessage());
+ }
+ }
+ System.out.println("read number of thesaury entries "+cnt);
+ inputReader.close();
+
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+
+ word2path = new int[mf.getFeatureCounter().get(PipeGen.WORD)][];
+
+
+ // insert words
+ try {
+ String line;
+ BufferedReader inputReader = new BufferedReader(new InputStreamReader(new FileInputStream(clusterFile),"UTF-8"),32768);
+
+ int startWd =-1;
+ ArrayList<Integer> wrds = new ArrayList<Integer>();
+ while ((line =inputReader.readLine())!=null) {
+
+ String[] split = line.split(REGEX);
+ int wd = mf.getValue(PipeGen.WORD, split[0]);
+ // DB.println("wd "+wd+" "+startWd);
+ if (startWd == wd) {
+ int thesaurusWrd = mf.getValue(PipeGen.WORD, split[1]);
+ if (thesaurusWrd!=wd) wrds.add(thesaurusWrd);
+ } else if (startWd!=-1) {
+ int[] ths = new int[wrds.size()];
+ for(int k=0;k<ths.length;k++) ths[k]=wrds.get(k);
+ word2path[startWd] = ths;
+ // DB.println(""+wrds+" size "+ths.length);
+ wrds.clear();
+ int thesaurusWrd = mf.getValue(PipeGen.WORD, split[1]);
+ if (thesaurusWrd!=wd) wrds.add(thesaurusWrd);
+ }
+ startWd=wd;
+ }
+
+ if (wrds.size()!=0) {
+ // put rest of the words
+ int[] ths = new int[wrds.size()];
+ for(int k=0;k<ths.length;k++) ths[k]=wrds.get(k);
+ word2path[startWd] = ths;
+ // DB.println(""+wrds+" size "+ths.length);
+ wrds.clear();
+
+
+
+
+ }
+
+ inputReader.close();
+ int fill=0;
+ for(int l = 0; l<word2path.length; l++ ){
+ if (word2path[l]!=null) fill++;
+ }
+ /*
+ for(int l = 0; l<word2path.length; l++ ){
+ if (word2path[l][1]!=0) fillL++;
+ if (word2path[l][1]<-1) System.out.println("lower "+word2path[l][1]);
+ }
+ */
+ System.out.println("filled "+fill+" of "+word2path.length);
+
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ }
+
+ /**
+ * Read the cluster
+ * @param dos
+ * @throws IOException
+ */
+ public Thesaurus(DataInputStream dis) throws IOException {
+
+ word2path = new int[dis.readInt()][];
+ for(int i =0;i<word2path.length;i++) {
+ int len = dis.readInt();
+ if (len>0) {
+ word2path[i] = new int[len];
+ for(int j =0;j<len;j++) {
+ word2path[i][j] = dis.readInt();
+
+ }
+ }
+
+ word2path[i][0]=dis.readShort();
+ }
+ DB.println("Read cluster with "+word2path.length+" words ");
+ }
+
+ /**
+ * Write the cluster
+ * @param dos
+ * @throws IOException
+ */
+ public void write(DataOutputStream dos) throws IOException {
+
+ dos.writeInt(word2path.length);
+ for(int[] i : word2path) {
+ dos.writeInt(i==null?0:i.length);
+
+ if (i!=null) {
+ for(int j=0;j<i.length;j++) {
+
+ dos.writeInt(i[j]);
+
+ }
+
+ }
+ }
+
+ }
+
+ /**
+ * @param form the id of a word form
+ * @return the short path to the word form in the cluster
+
+ final public int getSP(int form) {
+ if (word2path.length<form) return -1;
+ return word2path[form][0];
+ }
+ */
+ /**
+ * get the long path to a word form in the cluster
+ * @param form the id of a word form
+ * @return the long path to the word
+ */
+ final public int get(int form, int k) {
+ if (word2path.length<form || word2path[form]==null) return -1;
+ return word2path[form][k];
+ }
+
+
+}
diff --git a/dependencyParser/mate-tools/src/is2/io/CONLLReader04.java b/dependencyParser/mate-tools/src/is2/io/CONLLReader04.java
new file mode 100644
index 0000000..4ca5254
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/io/CONLLReader04.java
@@ -0,0 +1,272 @@
+
+
+package is2.io;
+
+import is2.data.Instances;
+import is2.data.SentenceData09;
+import is2.util.DB;
+
+import java.io.BufferedReader;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.util.ArrayList;
+
+
+
+/**
+ * This class reads files in the CONLL-08 and CONLL-09 format.
+ *
+ * @author Bernd Bohnet
+ */
+public class CONLLReader04 {
+
+ private static final String US = "_";
+ private static final String REGEX = "\t";
+ public static final String STRING = "*";
+ public static final String PIPE = "\\|";
+ public static final String NO_TYPE = "<no-type>";
+ public static final String ROOT_POS = "<root-POS>";
+ public static final String ROOT_LEMMA = "<root-LEMMA>";
+ public static final String ROOT = "<root>";
+ public static final String EMPTY_FEAT = "<ef>";
+
+ private static final String NUMBER = "[0-9]+|[0-9]+\\.[0-9]+|[0-9]+[0-9,]+";
+ private static final String NUM = "<num>";
+
+ private BufferedReader inputReader;
+
+ public static final int TASK08=8;
+ public static final int TASK09=9;
+
+ public static boolean normalizeOn =true;
+
+
+ private int lineNumber = 0;
+
+ public CONLLReader04(){}
+
+ public CONLLReader04(String file){
+ lineNumber=0;
+ try {
+ inputReader = new BufferedReader(new InputStreamReader(new FileInputStream(file),"UTF-8"),32768); //,"UTF-8"
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ }
+
+ public CONLLReader04(String file, int task){
+ this(file);
+ }
+
+
+
+ public void startReading(String file ){
+ lineNumber=0;
+ try {
+ inputReader = new BufferedReader(new InputStreamReader(new FileInputStream(file),"UTF-8"),32768);
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ }
+
+ /**i.forms[heads[l]-1]+" "+rel+" "+
+ * Read a instance
+ * @return a instance
+ * @throws Exception
+ */
+ public SentenceData09 getNext() throws Exception {
+
+ try {
+
+ ArrayList<String[]> lineList = new ArrayList<String[]>();
+
+ String line = inputReader.readLine();
+
+ while(line !=null && line.length()<2) {
+ line = inputReader.readLine();
+ lineNumber++;
+ System.out.println("skip empty line at line "+lineNumber);
+ }
+
+ while (line != null && line.length()!=0 && !line.startsWith(STRING) &&!line.startsWith(REGEX)) {
+ lineList.add(line.split(REGEX));
+ line = inputReader.readLine();
+ lineNumber++;
+ }
+
+
+
+ int length = lineList.size();
+
+ if(length == 0) {
+ inputReader.close();
+ return null;
+ }
+
+ SentenceData09 it = new SentenceData09();
+
+ // column content
+ // 1 id
+ // 2 form
+ // 3 lemma
+ // 4 cpos-tag
+ // 5 pos-tog
+ // 6 feats
+ // 7 head
+ // 8 deprel
+
+
+ it.forms = new String[length+1];
+
+ it.plemmas = new String[length+1];
+ it.gpos = new String[length+1];
+ it.labels = new String[length+1];
+ it.heads = new int[length+1];
+ it.pheads = new int[length+1];
+ it.plabels = new String[length+1];
+
+ it.ppos = new String[length+1];
+ it.lemmas = new String[length+1];
+ it.fillp = new String[length+1];
+ it.feats = new String[length+1][];
+ it.ofeats = new String[length+1];
+ it.pfeats = new String[length+1];
+
+
+ it.forms[0] = ROOT;
+ it.plemmas[0] = ROOT_LEMMA;
+ it.fillp[0] = "N";
+ it.lemmas[0] = ROOT_LEMMA;
+
+ it.gpos[0] = ROOT_POS;
+ it.ppos[0] = ROOT_POS;
+ it.labels[0] = NO_TYPE;
+ it.heads[0] = -1;
+ it.plabels[0] = NO_TYPE;
+ it.pheads[0] = -1;
+ it.ofeats[0] = NO_TYPE;
+
+ // root is 0 therefore start with 1
+
+ for(int i = 1; i <= length; i++) {
+
+ String[] info = lineList.get(i-1);
+
+ it.forms[i] = info[0]; //normalize(
+
+ it.lemmas[i] = "_";
+ it.plemmas[i] ="_";
+
+ // 3 cpos
+
+ it.gpos[i] = info[1];
+ it.ppos[i] = info[1];
+
+ it.ofeats[i]="_";
+
+
+
+ it.feats[i]=null;
+ // it.feats[i] =info[5].split(PIPE);
+ it.pfeats[i] = "_";
+
+
+ if (info[2].equals(US)) it.heads[i]=-1;
+ else it.heads[i] = Integer.parseInt(info[2]);// head
+
+
+
+ it.labels[i] = info[3];
+
+
+ }
+ return it;
+
+ } catch(Exception e) {
+ System.out.println("\n!!! Error in input file at line : "+lineNumber+" "+e.toString());
+ e.printStackTrace();
+ throw new Exception();
+ // return null;
+ }
+
+ }
+
+ /**
+ * Read a instance an store it in a compressed format
+ * @param is
+ * @return
+ * @throws IOException
+ */
+ final public SentenceData09 getNext(Instances is) throws Exception {
+
+ SentenceData09 it = getNext();
+
+ if (is !=null) insert(is,it);
+
+ return it;
+
+ }
+
+
+
+
+ final public boolean insert(Instances is, SentenceData09 it) throws IOException {
+
+ try {
+
+ if(it == null) {
+ inputReader.close();
+ return false;
+ }
+
+ int i= is.createInstance09(it.length());
+
+ for(int p = 0; p < it.length(); p++) {
+
+ is.setForm(i, p, normalize(it.forms[p]));
+ is.setGPos(i, p, it.gpos[p]);
+
+ if (it.ppos[p]==null||it.ppos[p].equals(US)) {
+ is.setPPoss(i, p, it.gpos[p]);
+ } else is.setPPoss(i, p, it.ppos[p]);
+
+
+ if (it.plemmas[p]==null ||it.plemmas[p].equals(US)) {
+ is.setLemma(i, p, normalize(it.forms[p]));
+ } else is.setLemma(i, p, normalize(it.plemmas[p]));
+
+
+ is.setFeats(i,p,it.feats[p]);
+
+
+ is.setFeature(i,p,it.ofeats[p]);
+
+
+ is.setRel(i,p,it.labels[p]);
+ if (it.plabels!=null) is.setPRel(i,p,it.plabels[p]);
+ is.setHead(i,p,it.heads[p]);
+ if (it.pheads!=null) is.setPHead(i,p,it.pheads[p]);
+
+ if (it.fillp!=null && it.fillp[p]!=null && it.fillp[p].startsWith("Y")) is.pfill[i].set(p);
+ else is.pfill[i].clear(p);
+ }
+
+ if (is.createSem(i,it)) {
+ DB.println("count "+i+" len "+it.length());
+ DB.println(it.printSem());
+ }
+ } catch(Exception e ){
+ DB.println("head "+it);
+ e.printStackTrace();
+ }
+ return true;
+
+ }
+ public static String normalize (String s) {
+ if (!normalizeOn) return s;
+ if(s.matches(NUMBER)) return NUM;
+ return s;
+ }
+
+}
diff --git a/dependencyParser/mate-tools/src/is2/io/CONLLReader06.java b/dependencyParser/mate-tools/src/is2/io/CONLLReader06.java
new file mode 100755
index 0000000..351fa04
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/io/CONLLReader06.java
@@ -0,0 +1,275 @@
+
+
+package is2.io;
+
+import is2.data.Instances;
+import is2.data.SentenceData09;
+import is2.util.DB;
+
+import java.io.BufferedReader;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.util.ArrayList;
+
+
+
+/**
+ * This class reads files in the CONLL-08 and CONLL-09 format.
+ *
+ * @author Bernd Bohnet
+ */
+public class CONLLReader06 {
+
+ private static final String US = "_";
+ private static final String REGEX = "\t";
+ public static final String STRING = "*";
+ public static final String PIPE = "\\|";
+ public static final String NO_TYPE = "<no-type>";
+ public static final String ROOT_POS = "<root-POS>";
+ public static final String ROOT_LEMMA = "<root-LEMMA>";
+ public static final String ROOT = "<root>";
+ public static final String EMPTY_FEAT = "<ef>";
+
+ private static final String NUMBER = "[0-9]+|[0-9]+\\.[0-9]+|[0-9]+[0-9,]+";
+ private static final String NUM = "<num>";
+
+ private BufferedReader inputReader;
+
+ public static final int TASK08=8;
+ public static final int TASK09=9;
+
+ public static boolean normalizeOn =true;
+
+
+ private int lineNumber = 0;
+
+ public CONLLReader06(){}
+
+ public CONLLReader06(String file){
+ lineNumber=0;
+ try {
+ inputReader = new BufferedReader(new InputStreamReader(new FileInputStream(file),"UTF-8"),32768); //,"UTF-8"
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ }
+
+ public CONLLReader06(String file, int task){
+ this(file);
+ }
+
+
+
+ public void startReading(String file ){
+ lineNumber=0;
+ try {
+ inputReader = new BufferedReader(new InputStreamReader(new FileInputStream(file),"UTF-8"),32768);
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ }
+
+ /**i.forms[heads[l]-1]+" "+rel+" "+
+ * Read a instance
+ * @return a instance
+ * @throws Exception
+ */
+ public SentenceData09 getNext() throws Exception {
+
+ try {
+
+ ArrayList<String[]> lineList = new ArrayList<String[]>();
+
+ String line = inputReader.readLine();
+
+ while(line !=null && line.length()==0) {
+ line = inputReader.readLine();
+ lineNumber++;
+ System.out.println("skip empty line at line "+lineNumber);
+ }
+
+ while (line != null && line.length()!=0 && !line.startsWith(STRING) &&!line.startsWith(REGEX)) {
+ lineList.add(line.split(REGEX));
+ line = inputReader.readLine();
+ lineNumber++;
+ }
+
+
+
+ int length = lineList.size();
+
+ if(length == 0) {
+ inputReader.close();
+ return null;
+ }
+
+ SentenceData09 it = new SentenceData09();
+
+ // column content
+ // 1 id
+ // 2 form
+ // 3 lemma
+ // 4 cpos-tag
+ // 5 pos-tog
+ // 6 feats
+ // 7 head
+ // 8 deprel
+
+
+ it.forms = new String[length+1];
+
+ it.plemmas = new String[length+1];
+ it.gpos = new String[length+1];
+ it.labels = new String[length+1];
+ it.heads = new int[length+1];
+ it.pheads = new int[length+1];
+ it.plabels = new String[length+1];
+
+ it.ppos = new String[length+1];
+ it.lemmas = new String[length+1];
+ it.fillp = new String[length+1];
+ it.feats = new String[length+1][];
+ it.ofeats = new String[length+1];
+ it.pfeats = new String[length+1];
+
+
+ it.forms[0] = ROOT;
+ it.plemmas[0] = ROOT_LEMMA;
+ it.fillp[0] = "N";
+ it.lemmas[0] = ROOT_LEMMA;
+
+ it.gpos[0] = ROOT_POS;
+ it.ppos[0] = ROOT_POS;
+ it.labels[0] = NO_TYPE;
+ it.heads[0] = -1;
+ it.plabels[0] = NO_TYPE;
+ it.pheads[0] = -1;
+ it.ofeats[0] = NO_TYPE;
+
+ // root is 0 therefore start with 1
+
+ for(int i = 1; i <= length; i++) {
+
+ String[] info = lineList.get(i-1);
+
+ it.forms[i] = info[1]; //normalize(
+
+ it.lemmas[i] = info[2];
+ it.plemmas[i] =info[2];
+
+ // 3 cpos
+
+ it.gpos[i] = info[3];
+ it.ppos[i] = info[4];
+
+ it.ofeats[i]=info[5].equals(CONLLWriter09.DASH)? "": info[5];
+
+
+
+ if (info[5].equals(CONLLWriter09.DASH)) it.feats[i]=null;
+ else {
+ it.feats[i] =info[5].split(PIPE);
+ it.pfeats[i] = info[5];
+ }
+
+ if (info[6].equals(US)) it.heads[i]=-1;
+ else it.heads[i] = Integer.parseInt(info[6]);// head
+
+
+// it.phead[i]=info[9].equals(US) ? it.phead[i]=-1: Integer.parseInt(info[9]);// head
+
+ it.labels[i] = info[7];
+// it.pedge[i] = info[11];
+
+
+ }
+ return it;
+
+ } catch(Exception e) {
+ System.out.println("\n!!! Error in input file at line : "+lineNumber+" "+e.toString());
+ e.printStackTrace();
+ throw new Exception();
+ // return null;
+ }
+
+ }
+
+ /**
+ * Read a instance an store it in a compressed format
+ * @param is
+ * @return
+ * @throws IOException
+ */
+ final public SentenceData09 getNext(Instances is) throws Exception {
+
+ SentenceData09 it = getNext();
+
+ if (is !=null) insert(is,it);
+
+ return it;
+
+ }
+
+
+
+
+ final public boolean insert(Instances is, SentenceData09 it) throws IOException {
+
+ try {
+
+ if(it == null) {
+ inputReader.close();
+ return false;
+ }
+
+ int i= is.createInstance09(it.length());
+
+ for(int p = 0; p < it.length(); p++) {
+
+ is.setForm(i, p, normalize(it.forms[p]));
+ is.setGPos(i, p, it.gpos[p]);
+
+ if (it.ppos[p]==null||it.ppos[p].equals(US)) {
+ is.setPPoss(i, p, it.gpos[p]);
+ } else is.setPPoss(i, p, it.ppos[p]);
+
+
+ if (it.plemmas[p]==null ||it.plemmas[p].equals(US)) {
+ is.setLemma(i, p, normalize(it.forms[p]));
+ } else is.setLemma(i, p, normalize(it.plemmas[p]));
+
+
+ is.setFeats(i,p,it.feats[p]);
+
+
+ is.setFeature(i,p,it.ofeats[p]);
+
+
+ is.setRel(i,p,it.labels[p]);
+ if (it.plabels!=null) is.setPRel(i,p,it.plabels[p]);
+ is.setHead(i,p,it.heads[p]);
+ if (it.pheads!=null) is.setPHead(i,p,it.pheads[p]);
+
+ if (it.fillp!=null && it.fillp[p]!=null && it.fillp[p].startsWith("Y")) is.pfill[i].set(p);
+ else is.pfill[i].clear(p);
+ }
+
+ if (is.createSem(i,it)) {
+ DB.println("count "+i+" len "+it.length());
+ DB.println(it.printSem());
+ }
+ } catch(Exception e ){
+ DB.println("head "+it);
+ e.printStackTrace();
+ }
+ return true;
+
+ }
+ public static String normalize (String s) {
+ if (!normalizeOn) return s;
+ if(s.matches(NUMBER)) return NUM;
+ return s;
+ }
+
+}
diff --git a/dependencyParser/mate-tools/src/is2/io/CONLLReader08.java b/dependencyParser/mate-tools/src/is2/io/CONLLReader08.java
new file mode 100644
index 0000000..a6194a3
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/io/CONLLReader08.java
@@ -0,0 +1,413 @@
+
+
+package is2.io;
+
+import is2.data.Instances;
+import is2.data.SentenceData09;
+import is2.util.DB;
+
+import java.io.BufferedReader;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.util.ArrayList;
+
+
+
+/**
+ * This class reads files in the CONLL-09 format.
+ *
+ * @author Bernd Bohnet
+ */
+public class CONLLReader08 extends IOGenerals {
+
+
+ private BufferedReader inputReader;
+
+ public static final boolean NORMALIZE = true;
+
+ public static final boolean NO_NORMALIZE = false;
+
+ public boolean normalizeOn =true;
+
+
+
+ private int format = 0;
+
+ private int lineNumber = 0;
+
+
+ public CONLLReader08(boolean normalize){
+
+ normalizeOn=normalize;
+ }
+
+ public CONLLReader08(String file){
+ lineNumber=0;
+ try {
+ inputReader = new BufferedReader(new InputStreamReader(new FileInputStream(file),"UTF-8"),32768);
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ }
+
+ public CONLLReader08(String file, boolean normalize){
+ this(file);
+ normalizeOn=normalize;
+ }
+
+ /**
+ * Sets the input format:
+ *
+ * CONLL09 is standard,
+ * ONE_LINE
+ *
+ * @param format the fomrat (see the constants starting with F_).
+ */
+ public void setInputFormat(int format) {
+ this.format=format;
+ }
+
+
+
+ /**
+ *
+ */
+ public CONLLReader08() {}
+
+ /**
+ * @param testfile
+ * @param formatTask
+ */
+ public CONLLReader08(String testfile, int formatTask) {
+ this(testfile);
+ }
+
+ public void startReading(String file ){
+ lineNumber=0;
+ try {
+ inputReader = new BufferedReader(new InputStreamReader(new FileInputStream(file),"UTF-8"),32768);
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ }
+
+ public SentenceData09 getNext() {
+
+ if (F_ONE_LINE == format) return getNextOneLine();
+ else return getNextCoNLL09();
+ }
+
+ /**
+ * @return
+ */
+ private SentenceData09 getNextOneLine() {
+
+ String line=null;
+ int i=0;
+ try {
+
+
+ line = inputReader.readLine();
+ lineNumber++;
+
+ if (line==null ) {
+ inputReader.close();
+ return null;
+ }
+
+ String[] tokens = line.split(" ");
+ int length = tokens.length;
+ if (line.isEmpty()) length=0;
+
+ SentenceData09 it = new SentenceData09();
+
+ it.forms = new String[length+1];
+
+ it.plemmas = new String[length+1];
+ // it.ppos = new String[length+1];
+ it.gpos = new String[length+1];
+ it.labels = new String[length+1];
+ it.heads = new int[length+1];
+ it.pheads = new int[length+1];
+ it.plabels = new String[length+1];
+
+ it.ppos = new String[length+1];
+ it.lemmas = new String[length+1];
+ it.fillp = new String[length+1];
+ it.feats = new String[length+1][];
+ it.ofeats = new String[length+1];
+ it.pfeats = new String[length+1];
+ it.id = new String[length+1];
+
+ it.forms[0] = ROOT;
+ it.plemmas[0] = ROOT_LEMMA;
+ it.fillp[0] = "N";
+ it.lemmas[0] = ROOT_LEMMA;
+
+ it.gpos[0] = ROOT_POS;
+ it.ppos[0] = ROOT_POS;
+ it.labels[0] = NO_TYPE;
+ it.heads[0] = -1;
+ it.plabels[0] = NO_TYPE;
+ it.pheads[0] = -1;
+ it.ofeats[0] = NO_TYPE;
+ it.id[0] ="0";
+
+ // root is 0 therefore start with 1
+
+ for(i = 1; i <= length; i++) {
+
+ it.id[i] = ""+i;
+
+ it.forms[i] = this.normalizeOn?normalize(tokens[i-1]):tokens[i-1]; //normalize(
+
+
+ }
+
+ return it;
+
+ } catch(Exception e) {
+ System.out.println("\n!!! Error in input file sentence before line: "+lineNumber+" (in sentence line "+i+" ) "+e.toString());
+ e.printStackTrace();
+ System.exit(0);
+
+
+
+
+ //throw new Exception();
+ return null;
+ }
+
+
+
+ }
+
+ /**i.forms[heads[l]-1]+" "+rel+" "+
+ * Read a instance
+ * @return a instance
+ * @throws Exception
+ */
+
+ public SentenceData09 getNextCoNLL09() {
+
+ String line=null;
+ int i=0;
+ try {
+
+ ArrayList<String[]> lineList = new ArrayList<String[]>();
+
+ line = inputReader.readLine();
+ lineNumber++;
+
+ while(line !=null && line.length()==0) {
+ line = inputReader.readLine();
+ lineNumber++;
+ System.out.println("skip empty line at line "+lineNumber);
+ }
+
+ while (line != null && line.length()!=0 && !line.startsWith(STRING) &&!line.startsWith(REGEX)) {
+ lineList.add(line.split(REGEX));
+ line = inputReader.readLine();
+ lineNumber++;
+ }
+
+
+
+ int length = lineList.size();
+
+ if(length == 0) {
+ inputReader.close();
+ return null;
+ }
+
+ SentenceData09 it = new SentenceData09();
+
+ it.forms = new String[length+1];
+
+ it.plemmas = new String[length+1];
+ // it.ppos = new String[length+1];
+ it.gpos = new String[length+1];
+ it.labels = new String[length+1];
+ it.heads = new int[length+1];
+ it.pheads = new int[length+1];
+ it.plabels = new String[length+1];
+
+ it.ppos = new String[length+1];
+ it.lemmas = new String[length+1];
+ it.fillp = new String[length+1];
+ it.feats = new String[length+1][];
+ it.ofeats = new String[length+1];
+ it.pfeats = new String[length+1];
+ it.id = new String[length+1];
+
+ it.forms[0] = ROOT;
+ it.plemmas[0] = ROOT_LEMMA;
+ it.fillp[0] = "N";
+ it.lemmas[0] = ROOT_LEMMA;
+
+ it.gpos[0] = ROOT_POS;
+ it.ppos[0] = ROOT_POS;
+ it.labels[0] = NO_TYPE;
+ it.heads[0] = -1;
+ it.plabels[0] = NO_TYPE;
+ it.pheads[0] = -1;
+ it.ofeats[0] = NO_TYPE;
+ it.id[0] ="0";
+
+ // root is 0 therefore start with 1
+
+ for(i = 1; i <= length; i++) {
+
+
+
+ String[] info = lineList.get(i-1);
+
+ it.id[i] = info[0];
+ it.forms[i] = info[5]; //normalize(
+ if (info.length<3) continue;
+
+ //it.lemmas[i] = info[2];
+ it.plemmas[i] =info[6];
+ it.gpos[i] = info[3];
+
+ if (info.length<5) continue;
+ it.ppos[i] = info[7];//.split("\\|")[0];
+
+ // feat 6
+ // pfeat 7
+
+ // this causes trouble in the perl eval09 scirpt
+ //it.ofeats[i]=info[6].equals(CONLLWriter09.DASH)? "" : info[6];
+
+ // now we try underscore
+ it.ofeats[i]="_";
+
+
+ // it.feats[i] ="_";
+ it.pfeats[i] = "_";
+
+
+
+
+ if (info[8].equals(US)) it.heads[i]=-1;
+ else it.heads[i] = Integer.parseInt(info[8]);// head
+
+ it.pheads[i]=-1;// head
+
+ it.labels[i] = info[9];
+ it.plabels[i] = "_";
+
+ it.fillp[i]=info[10];
+
+ if (info.length>11) {
+ if (!info[10].equals(US)) it.addPredicate(i,info[10]);
+ for(int k=11;k<info.length;k++) it.addArgument(i,k-11,info[k]);
+ }
+
+
+
+
+ }
+ return it;
+
+ } catch(Exception e) {
+ System.out.println("\n!!! Error in input file sentence before line: "+lineNumber+" (in sentence line "+i+" ) "+e.toString());
+ e.printStackTrace();
+ System.exit(0);
+
+
+
+
+ //throw new Exception();
+ return null;
+ }
+
+ }
+
+ /**
+ * Read a instance an store it in a compressed format
+ * @param is
+ * @return
+ * @throws IOException
+ */
+ final public SentenceData09 getNext(Instances is) {
+
+ SentenceData09 it = getNext();
+
+ if (is !=null) insert(is,it);
+
+ return it;
+
+ }
+
+
+
+
+ final public boolean insert(Instances is, SentenceData09 it) {
+
+ try {
+
+ if(it == null) {
+ inputReader.close();
+ return false;
+ }
+
+ int i= is.createInstance09(it.length());
+
+ for(int p = 0; p < it.length(); p++) {
+
+ is.setForm(i, p, normalize(it.forms[p]));
+ is.setGPos(i, p, it.gpos[p]);
+
+ // System.out.println(""+is.gpos[i][p]);
+
+ if (it.ppos[p]==null||it.ppos[p].equals(US)) {
+
+ is.setPPoss(i, p, it.gpos[p]);
+ } else is.setPPoss(i, p, it.ppos[p]);
+
+
+ if (it.plemmas[p]==null ||it.plemmas[p].equals(US)) {
+ is.setLemma(i, p, normalize(it.forms[p]));
+ } else is.setLemma(i, p, normalize(it.plemmas[p]));
+
+ if (it.lemmas!=null)
+ if (it.lemmas[p]==null ) { // ||it.org_lemmas[p].equals(US) that harms a lot the lemmatizer
+ is.setGLemma(i, p, it.plemmas[p]);
+ } else is.setGLemma(i, p, it.lemmas[p]);
+
+
+ if (it.feats!=null && it.feats[p]!=null) is.setFeats(i,p,it.feats[p]);
+
+ if (it.ofeats!=null) is.setFeature(i,p,it.ofeats[p]);
+
+
+ is.setRel(i,p,it.labels[p]);
+ if (it.plabels!=null) is.setPRel(i,p,it.plabels[p]);
+
+ is.setHead(i,p,it.heads[p]);
+ if (it.pheads!=null) is.setPHead(i,p,it.pheads[p]);
+
+ if (it.fillp!=null && it.fillp[p]!=null && it.fillp[p].startsWith("Y")) is.pfill[i].set(p);
+ else is.pfill[i].clear(p);
+ }
+
+ if (is.createSem(i,it)) {
+ DB.println("count "+i+" len "+it.length());
+ DB.println(it.printSem());
+ }
+ } catch(Exception e ){
+ DB.println("head "+it);
+ e.printStackTrace();
+ }
+ return true;
+
+ }
+ public String normalize (String s) {
+ if (!normalizeOn) return s;
+ if(s.matches(NUMBER)) return NUM;
+ return s;
+ }
+
+}
diff --git a/dependencyParser/mate-tools/src/is2/io/CONLLReader09.java b/dependencyParser/mate-tools/src/is2/io/CONLLReader09.java
new file mode 100755
index 0000000..c020579
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/io/CONLLReader09.java
@@ -0,0 +1,411 @@
+
+
+package is2.io;
+
+import is2.data.Instances;
+import is2.data.SentenceData09;
+import is2.util.DB;
+
+import java.io.BufferedReader;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.util.ArrayList;
+
+
+
+/**
+ * This class reads files in the CONLL-09 format.
+ *
+ * @author Bernd Bohnet
+ */
+public class CONLLReader09 extends IOGenerals {
+
+
+ private BufferedReader inputReader;
+
+ public static final boolean NORMALIZE = true;
+
+ public static final boolean NO_NORMALIZE = false;
+
+ public boolean normalizeOn =true;
+
+ static public String joint ="";
+
+ private int format = 0;
+
+ private int lineNumber = 0;
+
+
+ public CONLLReader09(boolean normalize){
+
+ normalizeOn=normalize;
+ }
+
+ public CONLLReader09(String file){
+ lineNumber=0;
+ try {
+ inputReader = new BufferedReader(new InputStreamReader(new FileInputStream(file),"UTF-8"),32768);
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ }
+
+ public CONLLReader09(String file, boolean normalize){
+ this(file);
+ normalizeOn=normalize;
+ }
+
+ /**
+ * Sets the input format:
+ *
+ * CONLL09 is standard,
+ * ONE_LINE
+ *
+ * @param format the fomrat (see the constants starting with F_).
+ */
+ public void setInputFormat(int format) {
+ this.format=format;
+ }
+
+
+
+ /**
+ *
+ */
+ public CONLLReader09() {}
+
+ /**
+ * @param testfile
+ * @param formatTask
+ */
+ public CONLLReader09(String testfile, int formatTask) {
+ this(testfile);
+ }
+
+ public void startReading(String file ){
+ lineNumber=0;
+ try {
+ inputReader = new BufferedReader(new InputStreamReader(new FileInputStream(file),"UTF-8"),32768);
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ }
+
+ public SentenceData09 getNext() {
+
+ if (F_ONE_LINE == format) return getNextOneLine();
+ else return getNextCoNLL09();
+ }
+
+ /**
+ * @return
+ */
+ private SentenceData09 getNextOneLine() {
+
+ String line=null;
+ int i=0;
+ try {
+
+
+ line = inputReader.readLine();
+ lineNumber++;
+
+ if (line==null ) {
+ inputReader.close();
+ return null;
+ }
+
+ String[] tokens = line.split(" ");
+ int length = tokens.length;
+ if (line.isEmpty()) length=0;
+
+ SentenceData09 it = new SentenceData09();
+
+ it.forms = new String[length+1];
+
+ it.plemmas = new String[length+1];
+ // it.ppos = new String[length+1];
+ it.gpos = new String[length+1];
+ it.labels = new String[length+1];
+ it.heads = new int[length+1];
+ it.pheads = new int[length+1];
+ it.plabels = new String[length+1];
+
+ it.ppos = new String[length+1];
+ it.lemmas = new String[length+1];
+ it.fillp = new String[length+1];
+ it.feats = new String[length+1][];
+ it.ofeats = new String[length+1];
+ it.pfeats = new String[length+1];
+ it.id = new String[length+1];
+
+ it.forms[0] = ROOT;
+ it.plemmas[0] = ROOT_LEMMA;
+ it.fillp[0] = "N";
+ it.lemmas[0] = ROOT_LEMMA;
+
+ it.gpos[0] = ROOT_POS;
+ it.ppos[0] = ROOT_POS;
+ it.labels[0] = NO_TYPE;
+ it.heads[0] = -1;
+ it.plabels[0] = NO_TYPE;
+ it.pheads[0] = -1;
+ it.ofeats[0] = NO_TYPE;
+ it.id[0] ="0";
+
+ // root is 0 therefore start with 1
+
+ for(i = 1; i <= length; i++) {
+
+ it.id[i] = ""+i;
+
+ it.forms[i] = this.normalizeOn?normalize(tokens[i-1]):tokens[i-1]; //normalize(
+
+
+ }
+
+ return it;
+
+ } catch(Exception e) {
+ System.out.println("\n!!! Error in input file sentence before line: "+lineNumber+" (in sentence line "+i+" ) "+e.toString());
+ e.printStackTrace();
+
+
+
+
+
+ //throw new Exception();
+ return null;
+ }
+
+
+
+ }
+
+ /**i.forms[heads[l]-1]+" "+rel+" "+
+ * Read a instance
+ * @return a instance
+ * @throws Exception
+ */
+
+ public SentenceData09 getNextCoNLL09() {
+
+ String line=null;
+ int i=0;
+ try {
+
+ ArrayList<String[]> lineList = new ArrayList<String[]>();
+
+ line = inputReader.readLine();
+ lineNumber++;
+
+ while(line !=null && line.length()==0) {
+ line = inputReader.readLine();
+ lineNumber++;
+ System.out.println("skip empty line at line "+lineNumber);
+ }
+
+ while (line != null && line.length()!=0 && !line.startsWith(STRING) &&!line.startsWith(REGEX)) {
+ lineList.add(line.split(REGEX));
+ line = inputReader.readLine();
+ lineNumber++;
+ }
+
+
+
+ int length = lineList.size();
+
+ if(length == 0) {
+ inputReader.close();
+ return null;
+ }
+
+ SentenceData09 it = new SentenceData09();
+
+ it.forms = new String[length+1];
+
+ it.plemmas = new String[length+1];
+ // it.ppos = new String[length+1];
+ it.gpos = new String[length+1];
+ it.labels = new String[length+1];
+ it.heads = new int[length+1];
+ it.pheads = new int[length+1];
+ it.plabels = new String[length+1];
+
+ it.ppos = new String[length+1];
+ it.lemmas = new String[length+1];
+ it.fillp = new String[length+1];
+ it.feats = new String[length+1][];
+ it.ofeats = new String[length+1];
+ it.pfeats = new String[length+1];
+ it.id = new String[length+1];
+
+ it.forms[0] = ROOT;
+ it.plemmas[0] = ROOT_LEMMA;
+ it.fillp[0] = "N";
+ it.lemmas[0] = ROOT_LEMMA;
+
+ it.gpos[0] = ROOT_POS;
+ it.ppos[0] = ROOT_POS;
+ it.labels[0] = NO_TYPE;
+ it.heads[0] = -1;
+ it.plabels[0] = NO_TYPE;
+ it.pheads[0] = -1;
+ it.ofeats[0] = NO_TYPE;
+ it.id[0] ="0";
+
+ // root is 0 therefore start with 1
+
+ for(i = 1; i <= length; i++) {
+
+
+
+ String[] info = lineList.get(i-1);
+
+ it.id[i] = info[0];
+ it.forms[i] = info[1]; //normalize(
+ if (info.length<3) continue;
+
+ it.lemmas[i] = info[2];
+ it.plemmas[i] =info[3];
+ it.gpos[i] = info[4];
+
+ if (info.length<5) continue;
+ it.ppos[i] = info[5];//.split("\\|")[0];
+ // feat 6
+
+ // now we try underscore
+ it.ofeats[i]=info[6].equals(CONLLWriter09.DASH)? "_" : info[6];
+
+ if (info[7].equals(CONLLWriter09.DASH)) it.feats[i]=null;
+ else {
+ it.feats[i] =info[7].split(PIPE);
+ it.pfeats[i] = info[7];
+ }
+
+
+
+ if (info[8].equals(US))it.heads[i]=-1;
+ else it.heads[i] = Integer.parseInt(info[8]);// head
+
+ it.pheads[i]=info[9].equals(US) ? it.pheads[i]=-1: Integer.parseInt(info[9]);// head
+
+ it.labels[i] = info[10];
+ it.plabels[i] = info[11];
+ it.fillp[i]=info[12];
+
+ if (info.length>13) {
+ if (!info[13].equals(US)) it.addPredicate(i,info[13]);
+ for(int k=14;k<info.length;k++) it.addArgument(i,k-14,info[k]);
+
+ }
+
+
+
+
+ }
+ return it;
+
+ } catch(Exception e) {
+ System.out.println("\n!!! Error in input file sentence before line: "+lineNumber+" (in sentence line "+i+" ) "+e.toString());
+ e.printStackTrace();
+ System.exit(0);
+
+
+
+
+ //throw new Exception();
+ return null;
+ }
+
+ }
+
+ /**
+ * Read a instance an store it in a compressed format
+ * @param is
+ * @return
+ * @throws IOException
+ */
+ final public SentenceData09 getNext(Instances is) {
+
+ SentenceData09 it = getNext();
+
+ if (is !=null) insert(is,it);
+
+ return it;
+
+ }
+
+
+
+
+ final public boolean insert(Instances is, SentenceData09 it) {
+
+ try {
+
+ if(it == null) {
+ inputReader.close();
+ return false;
+ }
+
+ int i= is.createInstance09(it.length());
+
+ for(int p = 0; p < it.length(); p++) {
+
+ is.setForm(i, p, normalize(it.forms[p]));
+ // is.setFormOrg(i, p, it.forms[p]);
+ is.setGPos(i, p, it.gpos[p]);
+
+ // System.out.println(""+is.gpos[i][p]);
+
+ if (it.ppos[p]==null||it.ppos[p].equals(US)) {
+
+ is.setPPoss(i, p, it.gpos[p]);
+ } else is.setPPoss(i, p, it.ppos[p]);
+
+
+ if (it.plemmas[p]==null ||it.plemmas[p].equals(US)) {
+ is.setLemma(i, p, normalize(it.forms[p]));
+ } else is.setLemma(i, p, normalize(it.plemmas[p]));
+
+ if (it.lemmas!=null)
+ if (it.lemmas[p]==null ) { // ||it.org_lemmas[p].equals(US) that harms a lot the lemmatizer
+ is.setGLemma(i, p, it.plemmas[p]);
+ } else is.setGLemma(i, p, it.lemmas[p]);
+
+
+ if (it.feats!=null && it.feats[p]!=null) is.setFeats(i,p,it.feats[p]);
+
+ if (it.ofeats!=null) is.setFeature(i,p,it.ofeats[p]);
+ if (it.pfeats!=null) is.setPFeature(i,p,it.pfeats[p]);
+
+
+ is.setRel(i,p,it.labels[p]);
+ if (it.plabels!=null) is.setPRel(i,p,it.plabels[p]);
+
+ is.setHead(i,p,it.heads[p]);
+ if (it.pheads!=null) is.setPHead(i,p,it.pheads[p]);
+
+ if (it.fillp!=null && it.fillp[p]!=null && it.fillp[p].startsWith("Y")) is.pfill[i].set(p);
+ else is.pfill[i].clear(p);
+ }
+
+ if (is.createSem(i,it)) {
+ DB.println("count "+i+" len "+it.length());
+ DB.println(it.printSem());
+ }
+ } catch(Exception e ){
+ DB.println("head "+it);
+ e.printStackTrace();
+ }
+ return true;
+
+ }
+ public String normalize (String s) {
+ if (!normalizeOn) return s;
+ if(s.matches(NUMBER)) return NUM;
+ return s;
+ }
+
+}
diff --git a/dependencyParser/mate-tools/src/is2/io/CONLLWriter06.java b/dependencyParser/mate-tools/src/is2/io/CONLLWriter06.java
new file mode 100755
index 0000000..26762bc
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/io/CONLLWriter06.java
@@ -0,0 +1,193 @@
+package is2.io;
+
+import is2.data.SentenceData09;
+import is2.util.DB;
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.OutputStreamWriter;
+import java.util.StringTokenizer;
+
+
+public class CONLLWriter06 {
+
+ public static final String DASH = "_";
+
+ protected BufferedWriter writer;
+
+ public CONLLWriter06 () { }
+
+
+
+ public static void main(String args[]) throws IOException {
+
+
+ if (args.length==2) {
+ File f = new File(args[0]);
+ File f2 = new File(args[1]);
+ // BufferedReader bf = new BufferedReader(new FileInputStream(new File(args[0]),"UTF-8"),32768);
+ BufferedReader ir = new BufferedReader(new InputStreamReader(new FileInputStream(f),"ISO-8859"),32768);
+ BufferedWriter br = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(f2),"UTF-8"));;
+ boolean found =false;
+ boolean tab =false;
+ while(true) {
+ String l = ir.readLine();
+ if (l==null) break;
+ String x =l.trim();
+ if (x.endsWith("\t")) tab=true;
+ br.write(x);
+ br.newLine();
+ if (!l.equals(x)) found =true;
+
+ }
+ ir.close();
+ br.flush();
+ br.close();
+
+ if (found) DB.println("found diff. found tab? "+tab);
+ } else if (args.length==3) {
+ File f1 = new File(args[1]);
+ File f2 = new File(args[2]);
+
+ BufferedReader ir1 = new BufferedReader(new InputStreamReader(new FileInputStream(f1),"ISO-8859"),32768);
+ BufferedReader ir2 = new BufferedReader(new InputStreamReader(new FileInputStream(f2),"UTF-8"),32768);
+
+ int line =0, alltabs1=0,alltabs2=0;
+ while(true) {
+ String l1 = ir1.readLine();
+ String l2 = ir2.readLine();
+
+ if (l1==null && l2!=null) DB.println("files do not end at the same line ");
+ if (l1!=null && l2==null) DB.println("files do not end at the same line ");
+ if (l1==null ) break;
+ StringTokenizer t1 = new StringTokenizer(l1,"\t");
+ StringTokenizer t2 = new StringTokenizer(l2,"\t");
+ int tabs1=0;
+ while(t1.hasMoreTokens()) {
+
+ t1.nextElement();
+ tabs1++;
+ alltabs1++;
+ }
+
+ int tabs2=0;
+ while(t2.hasMoreTokens()) {
+
+ t2.nextElement();
+ tabs2++;
+ alltabs2++;
+ }
+ line ++;
+ if (tabs1!=tabs2) {
+ DB.println("number of tabs different in line "+line+" file1-tabs "+tabs1+" file2-tabs "+tabs2);
+ System.exit(0);
+ }
+
+
+ }
+ DB.println("checked lines "+line+" with tabs in file 1 "+alltabs1+" in file2 "+alltabs2);
+
+ } else {
+ File f = new File(args[0]);
+ String[] dir =f.list();
+ for(String fx :dir) {
+ BufferedReader ir = new BufferedReader(new InputStreamReader(new FileInputStream(args[0]+File.separatorChar+fx),"UTF-8"),32768);
+ System.out.println("check file "+fx);
+ while(true) {
+ String l = ir.readLine();
+ if (l==null) break;
+ if (l.endsWith("\t")) {
+ DB.println("found tab in file "+fx);
+ break;
+ }
+ }
+ ir.close();
+ }
+ }
+
+ }
+
+
+// public int version = CONLLReader09.TASK08;
+
+ public CONLLWriter06 (String file) {
+
+ try {
+ writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file),"UTF-8"));
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ }
+
+ public CONLLWriter06(String outfile, int formatTask) {
+ this(outfile);
+ // version = formatTask;
+ }
+
+ public void write(SentenceData09 inst) throws IOException {
+
+ for (int i=0; i<inst.length(); i++) {
+
+
+ writer.write(Integer.toString(i+1)); writer.write('\t'); // id
+ writer.write(inst.forms[i]); writer.write('\t'); // form
+
+ if (inst.lemmas!=null && inst.lemmas[i]!=null) {
+ writer.write(inst.lemmas[i]);
+ }
+ else writer.write(DASH); // lemma
+ writer.write('\t');
+
+// writer.write(DASH); // cpos
+// writer.write('\t');
+
+
+ writer.write(inst.gpos[i]); // cpos has to be included
+ writer.write('\t');
+
+ writer.write(inst.gpos[i]); // gpos
+ writer.write('\t');
+
+
+ if (inst.ofeats[i].isEmpty()||inst.ofeats[i].equals(" ")) writer.write(DASH);
+ else writer.write(inst.ofeats[i]);
+ writer.write('\t');
+
+
+ //writer.write(DASH); writer.write('\t'); // pfeat
+
+ writer.write(Integer.toString(inst.heads[i])); writer.write('\t'); // head
+
+ if (inst.labels[i]!=null) writer.write(inst.labels[i]); // rel
+ else writer.write(DASH);
+ writer.write('\t');
+
+ writer.write(DASH);
+ writer.write('\t');
+
+ writer.write(DASH);
+ writer.write('\t');
+
+
+ writer.newLine();
+ }
+ writer.newLine();
+
+ }
+
+
+
+ public void finishWriting () throws IOException {
+ writer.flush();
+ writer.close();
+ }
+
+
+
+
+}
diff --git a/dependencyParser/mate-tools/src/is2/io/CONLLWriter09.java b/dependencyParser/mate-tools/src/is2/io/CONLLWriter09.java
new file mode 100755
index 0000000..e7a92a5
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/io/CONLLWriter09.java
@@ -0,0 +1,307 @@
+package is2.io;
+
+import is2.data.SentenceData09;
+import is2.util.DB;
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.OutputStreamWriter;
+import java.io.Writer;
+import java.util.StringTokenizer;
+
+
+public class CONLLWriter09 extends IOGenerals {
+
+
+ int format =0;
+
+ public static final String DASH = "_";
+
+ public static final boolean NO_ROOT = true, ROOT = false;
+
+ protected BufferedWriter writer;
+
+ public CONLLWriter09 () { }
+
+ public static void main(String args[]) throws IOException {
+
+
+ if (args.length==2) {
+ File f = new File(args[0]);
+ File f2 = new File(args[1]);
+ BufferedReader ir = new BufferedReader(new InputStreamReader(new FileInputStream(f),"UTF-8"),32768);
+ BufferedWriter br = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(f2),"UTF8"));;
+ boolean found =false;
+ boolean tab =false;
+ while(true) {
+ String l = ir.readLine();
+ if (l==null) break;
+ String x =l.trim();
+ if (x.endsWith("\t")) tab=true;
+ br.write(x);
+ br.newLine();
+ if (!l.equals(x)) found =true;
+
+ }
+ ir.close();
+ br.flush();
+ br.close();
+
+ if (found) DB.println("found diff. found tab? "+tab);
+ } else if (args.length==3) {
+ File f1 = new File(args[1]);
+ File f2 = new File(args[2]);
+
+ BufferedReader ir1 = new BufferedReader(new InputStreamReader(new FileInputStream(f1),"UTF-8"),32768);
+ BufferedReader ir2 = new BufferedReader(new InputStreamReader(new FileInputStream(f2),"UTF-8"),32768);
+
+ int line =0, alltabs1=0,alltabs2=0;
+ while(true) {
+ String l1 = ir1.readLine();
+ String l2 = ir2.readLine();
+
+ if (l1==null && l2!=null) DB.println("files do not end at the same line ");
+ if (l1!=null && l2==null) DB.println("files do not end at the same line ");
+ if (l1==null ) break;
+ StringTokenizer t1 = new StringTokenizer(l1,"\t");
+ StringTokenizer t2 = new StringTokenizer(l2,"\t");
+ int tabs1=0;
+ while(t1.hasMoreTokens()) {
+
+ t1.nextElement();
+ tabs1++;
+ alltabs1++;
+ }
+
+ int tabs2=0;
+ while(t2.hasMoreTokens()) {
+
+ t2.nextElement();
+ tabs2++;
+ alltabs2++;
+ }
+ line ++;
+ if (tabs1!=tabs2) {
+ DB.println("number of tabs different in line "+line+" file1-tabs "+tabs1+" file2-tabs "+tabs2);
+ System.exit(0);
+ }
+
+
+ }
+ DB.println("checked lines "+line+" with tabs in file 1 "+alltabs1+" in file2 "+alltabs2);
+
+ } else {
+ File f = new File(args[0]);
+ String[] dir =f.list();
+ for(String fx :dir) {
+ BufferedReader ir = new BufferedReader(new InputStreamReader(new FileInputStream(args[0]+File.separatorChar+fx),"UTF-8"),32768);
+ System.out.println("check file "+fx);
+ while(true) {
+ String l = ir.readLine();
+ if (l==null) break;
+ if (l.endsWith("\t")) {
+ DB.println("found tab in file "+fx);
+ break;
+ }
+ }
+ ir.close();
+ }
+ }
+
+ }
+
+
+ public CONLLWriter09 (String file) {
+
+ try {
+ writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file),"UTF8"));
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ }
+
+ public CONLLWriter09 (Writer writer) {
+ this.writer = new BufferedWriter(writer);
+ }
+
+
+
+ public CONLLWriter09(String outfile, int formatTask) {
+ this(outfile);
+ }
+
+ public void write(SentenceData09 inst) throws IOException {
+ write(inst, NO_ROOT);
+ }
+
+ /**
+ *
+ * @param inst
+ * @param root true: remove root node
+ * @throws IOException
+ */
+ public void write(SentenceData09 inst, boolean root) throws IOException {
+
+ int i, mod;
+ if(root&&(inst.forms[0].startsWith("<root")||(inst.lemmas[0]!=null&&inst.lemmas[0].startsWith("<root")))){
+ i=1; mod=0;
+ } else {
+ i=0; mod=1;
+ }
+ //=()?1:0;
+
+ if (format == this.F_ONE_LINE) {
+ boolean first =true;
+ for (; i<inst.length(); i++) {
+ if (first ){
+ first=false;
+ } else writer.write(" ");
+ writer.write(inst.plemmas[i]);
+ }
+ writer.newLine();
+
+ return ;
+ }
+
+
+ for (; i<inst.length(); i++) {
+
+ if (inst.id==null|| inst.id[i]==null) {writer.write(Integer.toString(i+mod)); writer.write('\t');} // id
+ else { writer.write(inst.id[i]); writer.write('\t');}
+
+ writer.write(inst.forms[i]); writer.write('\t'); // form
+
+ if (inst.lemmas!=null && inst.lemmas[i]!=null) {
+ writer.write(inst.lemmas[i]);
+ }
+ else writer.write(DASH); // lemma
+ writer.write('\t');
+
+ if (inst.plemmas!=null && inst.plemmas[i]!=null) writer.write(inst.plemmas[i]);
+ else writer.write(DASH); // plemma
+ writer.write('\t');
+
+ if (inst.gpos[i]!=null) writer.write(inst.gpos[i]); // gpos
+ else writer.write(DASH);
+ writer.write('\t');
+
+ if (inst.ppos!=null && inst.ppos[i]!=null) writer.write(inst.ppos[i]);
+ else writer.write(DASH); // ppos
+ writer.write('\t');
+
+ if (inst.ofeats!=null&& inst.ofeats[i]!=null) writer.write(inst.ofeats[i]);
+ else writer.write(DASH);
+ writer.write('\t');
+
+ //writer.write(DASH); writer.write('\t'); // feat
+ if (inst.pfeats!=null&&inst.pfeats[i]!=null) {
+ //System.out.println(""+inst.pfeats[i]);
+ writer.write(inst.pfeats[i]);
+ }
+ else writer.write(DASH);
+ writer.write('\t');
+
+
+ writer.write(Integer.toString(inst.heads[i])); writer.write('\t'); // head
+
+ if (inst.pheads!=null ) writer.write(Integer.toString(inst.pheads[i]));
+ else writer.write(DASH);
+ writer.write('\t'); // phead
+
+ if (inst.labels[i]!=null) writer.write(inst.labels[i]); // rel
+ else writer.write(DASH);
+ writer.write('\t');
+
+ if (inst.plabels!=null &&inst.plabels[i]!=null) writer.write(inst.plabels[i]); // rel
+ else writer.write(DASH);
+ writer.write('\t');
+
+ if (inst.fillp!=null && inst.fillp[i]!=null) writer.write(inst.fillp[i]); // fill p
+ else {
+ writer.write(DASH);
+ }
+
+
+// writer.write('\t');
+
+
+ if (inst.sem==null) {
+ writer.write('\t');
+ writer.write(DASH);
+
+ } else {
+
+
+
+ boolean foundPred =false;
+ // print the predicate
+ for (int p =0;p< inst.sem.length;p++) {
+ if (inst.semposition[p]==i) {
+ foundPred=true;
+ // System.out.println("write pred "+inst.sem[p] );
+ writer.write('\t'); writer.write(inst.sem[p]);
+
+ // if (inst.sem[p].startsWith(".")) DB.println("error "+inst.sem[p]);
+ }
+ }
+
+ if (!foundPred ) {
+ writer.write('\t');
+ writer.write(DASH);
+// writer.write('\t');
+// writer.write(DASH);
+ }
+
+ // print the arguments
+ for (int p =0;p< inst.sem.length;p++) {
+
+ boolean found =false;
+ if (inst.arg!=null &&inst.arg.length>p&&inst.arg[p]!=null)
+ for(int a = 0; a<inst.arg[p].length;a++) {
+
+ if (i==inst.argposition[p][a]) {
+ writer.write('\t'); writer.write(inst.arg[p][a]);
+ found = true;
+ break;
+ }
+
+ }
+ if (!found) {
+ writer.write('\t');
+ writer.write(DASH);
+ }
+
+
+ }
+
+
+
+
+ }
+ writer.newLine();
+ }
+ writer.newLine();
+ }
+
+ public void finishWriting () throws IOException {
+ writer.flush();
+ writer.close();
+ }
+
+ /**
+ * Sets the output format such as CoNLL or one line for the lemmata of the sentence (see F_xxxx constants).
+ * @param formatTask
+ */
+ public void setOutputFormat(int formatTask) {
+ format =formatTask;
+ }
+
+
+
+
+}
diff --git a/dependencyParser/mate-tools/src/is2/io/IOGenerals.java b/dependencyParser/mate-tools/src/is2/io/IOGenerals.java
new file mode 100644
index 0000000..456a17f
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/io/IOGenerals.java
@@ -0,0 +1,33 @@
+/**
+ *
+ */
+package is2.io;
+
+/**
+ * @author Dr. Bernd Bohnet, 18.08.2011
+ *
+ *
+ */
+public class IOGenerals {
+
+ // some constants
+ public static final String US = "_";
+ public static final String REGEX = "\t";
+ public static final String STRING = "*";
+ public static final String PIPE = "\\|";
+ public static final String NO_TYPE = "<no-type>";
+ public static final String ROOT_POS = "<root-POS>";
+ public static final String ROOT_LEMMA = "<root-LEMMA>";
+ public static final String ROOT = "<root>";
+ public static final String EMPTY_FEAT = "<ef>";
+
+
+ // the different readers
+ public static final int F_CONLL09 = 0;
+ public static final int F_ONE_LINE = 1;
+
+ // normalization of the input
+ public static final String NUMBER = "[0-9]+|[0-9]+\\.[0-9]+|[0-9]+[0-9,]+";
+ public static final String NUM = "<num>";
+
+}
diff --git a/dependencyParser/mate-tools/src/is2/io/PSReader.java b/dependencyParser/mate-tools/src/is2/io/PSReader.java
new file mode 100644
index 0000000..3598b3d
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/io/PSReader.java
@@ -0,0 +1,23 @@
+/**
+ *
+ */
+package is2.io;
+
+import is2.data.PSTree;
+
+/**
+ * @author Dr. Bernd Bohnet, 07.02.2011
+ *
+ *
+ */
+public interface PSReader {
+
+ public PSTree getNext();
+
+ /**
+ * @param ps
+ * @param filter
+ */
+ public void startReading(String ps, String[] filter);
+
+}
diff --git a/dependencyParser/mate-tools/src/is2/io/TigerReader.java b/dependencyParser/mate-tools/src/is2/io/TigerReader.java
new file mode 100644
index 0000000..2a98b72
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/io/TigerReader.java
@@ -0,0 +1,403 @@
+/**
+ *
+ */
+package is2.io;
+
+import is2.data.PSTree;
+import is2.util.DB;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.InputStreamReader;
+import java.util.ArrayList;
+import java.util.Stack;
+import java.util.StringTokenizer;
+
+/**
+ * @author Dr. Bernd Bohnet, 17.01.2011
+ *
+ * Reads a sentences in Penn Tree Bank bracket style and return sentences.
+ */
+public class TigerReader implements PSReader {
+
+ BufferedReader inputReader;
+ ArrayList<File> psFiles = new ArrayList<File>();
+ ArrayList<PSTree> psCache = new ArrayList<PSTree>();
+
+ String filter[] = null;
+ int startFilter =-1;
+ int endFilter =-1;
+
+ public TigerReader() {}
+
+ public TigerReader(String file ) {
+
+ try {
+ inputReader = new BufferedReader(new InputStreamReader(new FileInputStream(file),"ISO-8859-1"),32768);
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ }
+
+ /**
+ * @param ps
+ */
+ @Override
+ public void startReading(String file, String[] filter) {
+
+
+ try {
+ this.filter =filter;
+ startFilter =filter==null?-1:1;
+ endFilter =filter==null?-1:1;
+
+ inputReader = new BufferedReader(new InputStreamReader(new FileInputStream(file),"ISO-8859-1"),32768);
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+
+ }
+
+ public static class Line {
+ String form;
+ String lemma;
+ String morph;
+ String pos;
+ int parent;
+ String edge;
+
+
+ }
+
+ static int stop=0;
+
+ /**
+ * @return
+ */
+ public PSTree getNext() {
+
+ PSTree ps = null;
+ String l =null;
+ ArrayList<Line> lines = new ArrayList<Line>();
+ try {
+ int state=1, terminals=0, nonterminals=0;
+ while((l = inputReader.readLine())!=null) {
+
+ if (startFilter==1 && l.startsWith("#BOS "+filter[0]) ) {
+ System.out.println("found start "+l);
+ startFilter=2;
+ }
+ if (endFilter==1 && l.startsWith("#EOS "+filter[1]) ){
+ System.out.println("found end "+l);
+
+ endFilter=2;
+ }
+
+
+ if (startFilter==1||endFilter==2) continue;
+
+ if (l.startsWith("#BOS")) {
+
+ state=2;
+ continue;
+ }
+ if (l.startsWith("#500")) state=3;
+ if (l.startsWith("#EOS")) state=4;
+ if (state<2) continue;
+
+ if ( state==4) {
+
+ ps = new PSTree();
+ ps.create(terminals, nonterminals);
+ // System.out.println("terminals "+terminals);
+ //build ps tree
+
+ int cnt=0;
+ // ps.entries[0] =CONLLReader09.ROOT;
+ // ps.head[0]=-1;
+ int root=-1;
+ for(Line line : lines) {
+
+ /* if (cnt==terminals) {
+ // insert root
+ root =cnt;
+ cnt++;
+ }
+ */
+ ps.entries[cnt] = line.form;
+ if (cnt<terminals) ps.pos[cnt] = line.pos;
+ else ps.entries[cnt] =line.pos;
+ ps.lemmas[cnt] = line.lemma;
+ ps.head[cnt] = line.parent==0?lines.size()-1:line.parent>=500?line.parent-500+terminals:line.parent;
+ // ps.head[cnt] = line.parent==0?lines.size()-1:line.parent>=500?line.parent-500+terminals:line.parent;
+ ps.morph[cnt]=line.morph;
+ cnt++;
+
+ }
+
+ if (root==-1) root= terminals;
+ ps.head[cnt-1]=0; // root
+ ps.terminalCount=terminals;
+ lines.clear();
+ state=1;
+
+ /*
+ for(int k=0;k<ps.head.length;k++) {
+ if (ps.head[k]<terminals && k!=root) {
+ ps.head[k]=root;
+ // DB.println("error "+k+" "+ps.head[k]);
+ }
+ }
+ */
+ // System.out.println(""+ps.toString());
+ // if (stop++ == 4)System.exit(0);
+ return ps;
+ }
+
+
+
+ StringTokenizer t = new StringTokenizer(l,"\t");
+ int tc=0;
+ Line line = new Line();
+ lines.add(line);
+ while(t.hasMoreTokens()) {
+ String token = t.nextToken();
+ if (token.equals("\t"))continue;
+ if (tc==0) {
+ if (token.startsWith("#5")||token.startsWith("#6") ) {
+ nonterminals++;
+
+ }
+ else {
+ terminals++;
+
+ //change it back to the wrong format since the conll stuff was derived from this.
+ // if (token.equals("durchblicken")) token="durchblikken";
+ line.form = token;
+ }
+
+ } else if (tc==1) {
+ line.lemma=token;
+ } else if (tc==2) {
+ line.pos=token;
+ } else if (tc==3) {
+ line.morph=token;
+ } else if (tc==4) {
+ line.edge=token;
+ } else if (tc==5) {
+ line.parent=Integer.parseInt(token);
+ }
+
+
+ if (token.length()>0)tc++;
+ }
+
+ // read till #EOS
+
+
+ }
+ } catch(Exception e) {
+ e.printStackTrace();
+ }
+ return ps;
+
+ }
+
+ /**
+ * @param tree
+ */
+ private void removeTraces(ArrayList<Object> tree) {
+
+ Stack<ArrayList<Object>> s = new Stack<ArrayList<Object>>();
+
+ s.push(tree);
+ ArrayList<Object> list =null;
+ while (!s.isEmpty()) {
+
+ ArrayList<Object> last =list;
+ list = s.pop();
+ for(int k=0;k<list.size();k++) {
+ Object o = list.get(k);
+ if(o instanceof String) {
+ String t = (String)o;
+ if ((t.endsWith("-1")||t.endsWith("-2")||t.endsWith("-3")||t.endsWith("-4")) && list.size()>(k+1)) {
+ t = t.substring(0, t.length()-2);
+ list.set(k, t);
+ }
+
+ if (t.startsWith("-NONE-")) {
+
+ // remove the bigger surrounding phrase, e.g. (NP (-NONE- *))
+ if (last.size()==2 && last.get(0) instanceof String && last.contains(list)) {
+ ArrayList<Object> rest = remove(tree, last);
+ if (rest!=null && rest.size()==1){
+ rest = remove(tree, rest);
+ }
+ }
+ // remove the phrase only, e.g. (NP (AP nice small) (-NONE- *))
+ else {
+ // there might a phrase with two empty elements (VP (-NONE- *) (-NONE- ...))
+// System.out.println("last "+last+" list "+list );
+ ArrayList<Object> rest = remove(tree, list);
+ removeTraces(rest);
+ if (rest.size()==1) {
+ rest = remove(tree, rest);
+ if (rest!=null && rest.size()==1){
+ System.out.println("rest "+rest);
+ System.exit(0);
+ }
+ }
+ }
+ continue;
+ }
+ }
+ if (o instanceof ArrayList) {
+ s.push((ArrayList<Object>)o);
+ }
+ }
+ }
+ }
+
+
+
+
+ /**
+ * Remove from tree p
+ * @param tree phrase structure tree
+ * @param p elment to remove
+ */
+ private ArrayList<Object> remove(ArrayList<Object> tree, Object p) {
+ Stack<ArrayList<Object>> s = new Stack<ArrayList<Object>>();
+
+ s.push(tree);
+
+ while (!s.isEmpty()) {
+
+ ArrayList<Object> list = s.pop();
+ for(int k=0;k<list.size();k++) {
+ Object o = list.get(k);
+ if (o == p) {
+ list.remove(p);
+ return list ;
+ }
+ if (o instanceof ArrayList) {
+ s.push((ArrayList<Object>)o);
+ }
+ }
+ }
+ return null;
+ }
+
+ /**
+ * Count the terminals
+ * @param current
+ * @return
+ */
+ private int countTerminals(ArrayList<Object> current) {
+
+ int count =0;
+ boolean found =false, all =true ;
+ for(Object o : current) {
+ if (o instanceof String) found =true;
+ else {
+ all =false;
+ if (o instanceof ArrayList) count +=countTerminals((ArrayList<Object>)o);
+ }
+ }
+
+ if (found && all) {
+ // System.out.println(""+current);
+ count++;
+ }
+
+ return count;
+ }
+
+ /**
+ * Count the terminals
+ * @param current
+ * @return
+ */
+ private int insert(PSTree ps, ArrayList<Object> current, Integer terminal, Integer xxx, int head) {
+
+ boolean found =false, all =true;
+ String term =null;
+ String pos =null;
+ for(Object o : current) {
+ if (o instanceof String) {
+ if (found) term =(String)o;
+ if (!found) pos =(String)o;
+ found =true;
+ } else {
+ all =false;
+ // if (o instanceof ArrayList) count +=countTerminals((ArrayList<Object>)o);
+ }
+ }
+
+ if (found && all) {
+
+ if(term.equals("-LRB-")) term="(";
+ if(term.equals("-RRB-")) term=")";
+ if(term.equals("-LCB-")) term="{";
+ if(term.equals("-RCB-")) term="}";
+ if(term.contains("1\\/2-year")) term=term.replace("\\/", "/");
+ if(term.contains("1\\/2-foot-tall")) term=term.replace("\\/", "/");
+
+
+ ps.entries[ps.terminalCount] =term;
+ ps.pos[ps.terminalCount]=pos;
+ ps.head[ps.terminalCount]=head;
+ // System.out.println("terminal "+term+" "+ps.terminal+" head "+head);
+ ps.terminalCount ++;
+ } else if (found && ! all) {
+ if(pos.startsWith("NP-SBJ")) pos="NP-SBJ";
+ if(pos.startsWith("WHNP")) pos="WHNP";
+
+ ps.entries[ps.non] =pos;
+ ps.head[ps.non]=head;
+ // System.out.println("non terminal "+pos+" "+ps.non+" head "+ head);
+ int non =ps.non ++;
+
+ for (Object o : current) {
+ if (o instanceof ArrayList) {
+ insert(ps,(ArrayList<Object>)o,terminal,ps.non, non);
+ }
+ }
+ }
+ if(!all && !found)for (Object o : current) {
+ if (o instanceof ArrayList) {
+ insert(ps,(ArrayList<Object>)o,terminal,0, ps.non-1);
+ }
+ }
+ return terminal;
+ }
+
+
+ /**
+ * Count the terminals
+ * @param current
+ * @return
+ */
+ private int countNonTerminals(ArrayList<Object> current) {
+
+ int count =0;
+ boolean found =false, all =true ;
+ for(Object o : current) {
+ if (o instanceof String) found =true;
+ else {
+ all =false;
+ if (o instanceof ArrayList) count +=countNonTerminals((ArrayList<Object>)o);
+ }
+ }
+
+ if (found && !all) count++;
+
+ return count;
+ }
+
+
+
+
+}
diff --git a/dependencyParser/mate-tools/src/is2/lemmatizer/Evaluator.java b/dependencyParser/mate-tools/src/is2/lemmatizer/Evaluator.java
new file mode 100755
index 0000000..b333c62
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/lemmatizer/Evaluator.java
@@ -0,0 +1,105 @@
+package is2.lemmatizer;
+
+import is2.data.SentenceData09;
+import is2.io.CONLLReader09;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.Hashtable;
+import java.util.Map.Entry;
+
+
+public class Evaluator {
+
+ public static void evaluate (String act_file, String pred_file, String format) throws Exception {
+
+ CONLLReader09 goldReader = new CONLLReader09(act_file, CONLLReader09.NO_NORMALIZE);
+ CONLLReader09 predictedReader = new CONLLReader09(pred_file,CONLLReader09.NO_NORMALIZE);
+ // predictedReader.startReading(pred_file);
+
+
+ Hashtable<String,Integer> errors = new Hashtable<String,Integer>();
+
+
+ int total = 0, corr = 0, corrL = 0, corrT=0;
+ int numsent = 0, corrsent = 0, corrsentL = 0;
+ SentenceData09 goldInstance = goldReader.getNext();
+ SentenceData09 predInstance = predictedReader.getNext();
+
+ while(goldInstance != null) {
+
+ int instanceLength = goldInstance.length();
+
+ if (instanceLength != predInstance.length())
+ System.out.println("Lengths do not match on sentence "+numsent);
+
+
+ String gold[] = goldInstance.lemmas;
+ String pred[] = predInstance.plemmas;
+
+
+ boolean whole = true;
+ boolean wholeL = true;
+
+ // NOTE: the first item is the root info added during nextInstance(), so we skip it.
+
+ for (int i = 1; i < instanceLength; i++) {
+ if (gold[i].toLowerCase().equals(pred[i].toLowerCase())) corrT++;
+
+ if (gold[i].equals(pred[i])) corrL++;
+ else {
+
+ // System.out.println("error gold:"+goldPos[i]+" pred:"+predPos[i]+" "+goldInstance.forms[i]+" snt "+numsent+" i:"+i);
+ String key = "gold: '"+gold[i]+"' pred: '"+pred[i]+"'";
+ Integer cnt = errors.get(key);
+ if (cnt==null) {
+ errors.put(key,1);
+ } else {
+ errors.put(key,cnt+1);
+ }
+ }
+
+ }
+ total += instanceLength - 1; // Subtract one to not score fake root token
+
+ if(whole) corrsent++;
+ if(wholeL) corrsentL++;
+ numsent++;
+
+ goldInstance = goldReader.getNext();
+ predInstance = predictedReader.getNext();
+ }
+ ArrayList<Entry<String, Integer>> opsl = new ArrayList<Entry<String, Integer>>();
+ for(Entry<String, Integer> e : errors.entrySet()) {
+ opsl.add(e);
+ }
+
+ Collections.sort(opsl, new Comparator<Entry<String, Integer>>(){
+
+ @Override
+ public int compare(Entry<String, Integer> o1,
+ Entry<String, Integer> o2) {
+
+ return o1.getValue()==o2.getValue()?0:o1.getValue()>o2.getValue()?1:-1;
+ }
+
+
+ });
+
+ for(Entry<String, Integer> e : opsl) {
+ // System.out.println(e.getKey()+" "+e.getValue());
+ }
+
+ System.out.println("Tokens: " + total+" Correct: " + corrT+" "+(float)corrT/total+" correct uppercase "+(float)corrL/total);
+ }
+
+ public static void main (String[] args) throws Exception {
+ String format = "CONLL";
+ if (args.length > 2)
+ format = args[2];
+
+ evaluate(args[0], args[1], format);
+ }
+
+}
diff --git a/dependencyParser/mate-tools/src/is2/lemmatizer/Lemmatizer.java b/dependencyParser/mate-tools/src/is2/lemmatizer/Lemmatizer.java
new file mode 100755
index 0000000..33756dd
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/lemmatizer/Lemmatizer.java
@@ -0,0 +1,535 @@
+package is2.lemmatizer;
+
+
+import is2.data.Cluster;
+import is2.data.F2SF;
+import is2.data.FV;
+import is2.data.Instances;
+import is2.data.InstancesTagger;
+import is2.data.Long2Int;
+import is2.data.ParametersFloat;
+import is2.data.PipeGen;
+import is2.data.SentenceData09;
+import is2.io.CONLLReader09;
+import is2.io.CONLLWriter09;
+import is2.tools.IPipe;
+import is2.tools.Tool;
+import is2.tools.Train;
+import is2.util.DB;
+import is2.util.OptionsSuper;
+
+import java.io.BufferedInputStream;
+import java.io.BufferedOutputStream;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.Map.Entry;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipInputStream;
+import java.util.zip.ZipOutputStream;
+
+
+
+public class Lemmatizer implements Tool, Train {
+
+ public Pipe pipe;
+ public ParametersFloat params;
+ private Long2Int li;
+
+ private boolean doUppercase=false;
+
+ private long[] vs= new long[40];
+
+
+
+ /**
+ * Creates a lemmatizer due to the model stored in modelFileName
+ * @param modelFileName the path and file name to a lemmatizer model
+ */
+ public Lemmatizer(String modelFileName) {
+
+ // tell the lemmatizer the location of the model
+ try {
+ Options m_options = new Options(new String[] {"-model", modelFileName});
+ li = new Long2Int(m_options.hsize);
+
+ // initialize the lemmatizer
+ readModel(m_options);
+
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ }
+
+
+
+
+
+ public Lemmatizer(boolean doUppercase) {this.doUppercase=doUppercase; }
+
+
+
+ public static void main (String[] args) throws FileNotFoundException, Exception
+ {
+
+ Options options = new Options(args);
+ Lemmatizer lemmatizer = new Lemmatizer(options.upper);
+
+ long start = System.currentTimeMillis();
+
+
+ if (options.train) {
+
+
+ lemmatizer.li = new Long2Int(options.hsize);
+ lemmatizer.pipe = new Pipe (options,lemmatizer.li);
+
+ InstancesTagger is = lemmatizer.pipe.createInstances(options.trainfile);
+
+ DB.println("Features: " + lemmatizer.pipe.mf.size()+" Operations "+lemmatizer.pipe.mf.getFeatureCounter().get(Pipe.OPERATION));
+
+ ParametersFloat params = new ParametersFloat(lemmatizer.li.size());
+
+ lemmatizer.train(options,lemmatizer.pipe,params,is);
+
+ lemmatizer.writeModel(options, lemmatizer.pipe, params);
+ }
+
+ if (options.test) {
+
+ lemmatizer.readModel(options);
+
+ lemmatizer.out(options,lemmatizer.pipe, lemmatizer.params);
+ }
+
+ System.out.println();
+
+ if (options.eval) {
+ System.out.println("\nEVALUATION PERFORMANCE:");
+ Evaluator.evaluate(options.goldfile, options.outfile,options.format);
+ }
+ long end = System.currentTimeMillis();
+ System.out.println("used time "+((float)((end-start)/100)/10));
+ }
+
+ /* (non-Javadoc)
+ * @see is2.tools.Train#writeModel(is2.util.OptionsSuper, is2.tools.IPipe, is2.data.ParametersFloat)
+ */
+ @Override
+ public void writeModel(OptionsSuper options, IPipe pipe,
+ ParametersFloat params) {
+ try {
+ // store the model
+ ZipOutputStream zos = new ZipOutputStream(new BufferedOutputStream(new FileOutputStream(options.modelName)));
+ zos.putNextEntry(new ZipEntry("data"));
+ DataOutputStream dos = new DataOutputStream(new BufferedOutputStream(zos));
+
+ this.pipe.mf.writeData(dos);
+
+ dos.flush();
+ params.write(dos);
+
+ pipe.write(dos);
+
+ dos.writeBoolean(this.doUppercase);
+
+ dos.flush();
+ dos.close();
+ } catch(Exception e) {
+ e.printStackTrace();
+ }
+ }
+
+
+ public void readModel(OptionsSuper options) {
+
+ try {
+
+ // load the model
+ ZipInputStream zis = new ZipInputStream(new BufferedInputStream(new FileInputStream(options.modelName)));
+ zis.getNextEntry();
+ DataInputStream dis = new DataInputStream(new BufferedInputStream(zis));
+
+ MFO mf = new MFO();
+ mf.read(dis);
+ params = new ParametersFloat(0);
+ params.read(dis);
+ li =new Long2Int(params.size());
+ pipe = new Pipe(options, li);
+ pipe.mf =mf;
+
+ pipe.initFeatures();
+ pipe.initValues();
+
+ pipe.readMap(dis);
+
+ for(Entry<String,Integer> e : mf.getFeatureSet().get(Pipe.OPERATION).entrySet()) {
+ this.pipe.types[e.getValue()] = e.getKey();
+ // System.out.println("set pos "+e.getKey());
+ }
+
+
+ pipe.cl = new Cluster(dis);
+
+ if (dis.available()>0) this.doUppercase = dis.readBoolean();
+
+
+ dis.close();
+ DB.println("Loading data finished. ");
+
+ DB.println("number of params "+params.parameters.length);
+ DB.println("number of classes "+pipe.types.length);
+
+ } catch (Exception e ) {
+ e.printStackTrace();
+ }
+
+ }
+
+
+
+ /**
+ * Do the training
+ * @param instanceLengths
+ * @param options
+ * @param pipe
+ * @param params
+ * @param li
+ * @throws IOException
+ * @throws InterruptedException
+ * @throws ClassNotFoundException
+ */
+ public void train(OptionsSuper options, IPipe p, ParametersFloat params, Instances ist) {
+
+ InstancesTagger is = (InstancesTagger)ist;
+
+ int i = 0,del=0;
+ FV g = new FV(), f = new FV();
+
+ int LC = this.pipe.types.length+1, UC = LC+1;
+
+ String wds[] = MFO.reverse(pipe.mf.getFeatureSet().get(Pipe.WORD));
+
+ F2SF fs = params.getFV();
+ double upd=0;
+
+ for(i = 0; i < options.numIters; i++) {
+
+ System.out.print("Iteration "+i+": ");
+
+ long start = System.currentTimeMillis();
+ int numInstances = is.size();
+ int correct =0,count=0;
+
+ long last= System.currentTimeMillis();
+ int wrongOp=0,correctOp=0, correctUC=0, wrongUC=0;
+
+ HashMap<String,Integer> map = new HashMap<String,Integer>();
+
+ for(int n = 0; n < numInstances; n++) {
+
+ if((n+1) % 500 == 0) del= Pipe.outValueErr(n+1, (float)(count-correct),(float)correct/(float)count,del,last,upd);
+
+ upd = (double)(options.numIters*numInstances - (numInstances*i+(n+1))+ 1);
+
+ for(int k = 0; k < is.length(n); k++) {
+
+ double best = -1000;
+ String bestOp="";
+
+
+
+ count++;
+ pipe.addCoreFeatures(is, n, k, 0,wds[is.forms[n][k]], vs);
+
+ String lemma = pipe.opse.get(wds[is.forms[n][k]].toLowerCase());
+
+
+ // predict
+ if (lemma==null)
+ for(int t = 0; t < pipe.types.length; t++) {
+
+ fs.clear();
+ for(int l=vs.length-1;l>=0;l--) if (vs[l]>0) fs.add(li.l2i(vs[l]+(t*Pipe.s_type)));
+
+ float score = (float) fs.getScore();
+ if (score >best) {
+ bestOp = pipe.types[t];
+ best =score;
+ }
+ }
+
+ if (doUppercase) {
+ fs.clear();
+ for(int l=vs.length-1;l>=0;l--) if (vs[l]>0) fs.add(li.l2i(vs[l]+(LC*Pipe.s_type)));
+
+ int correctOP =-1, selectedOP =-1;
+ if (wds[is.glemmas[n][k]].length()>0 &&
+ Character.isUpperCase(wds[is.glemmas[n][k]].charAt(0)) &&
+ fs.score > 0) {
+
+ correctOP = UC;
+ selectedOP =LC;
+ } else if (wds[is.glemmas[n][k]].length()>0
+ &&Character.isLowerCase(wds[is.glemmas[n][k]].charAt(0)) &&
+ fs.score <= 0) {
+
+
+ correctOP = LC;
+ selectedOP =UC;
+ }
+
+ if (correctOP!=-1 && wds[is.glemmas[n][k]].length()>0) {
+
+ wrongUC++;
+ f.clear();
+ for(int l=vs.length-1;l>=0;l--) if (vs[l]>0) f.add(li.l2i(vs[l]+(selectedOP*Pipe.s_type)));
+
+ g.clear();
+ for(int l=vs.length-1;l>=0;l--) if (vs[l]>0) g.add(li.l2i(vs[l]+(correctOP*Pipe.s_type)));
+
+ double lam_dist = params.getScore(g) - params.getScore(f);//f
+ double loss = 1 - lam_dist;
+
+ FV dist = g.getDistVector(f);
+ dist.update(params.parameters, params.total, params.update(dist,loss), upd,false);
+
+ } else {
+ correctUC++;
+ }
+ }
+ if (lemma!=null) {
+ correct++;
+ correctOp++;
+ continue;
+ }
+
+
+ String op = Pipe.getOperation(is,n, k,wds);
+ if (op.equals(bestOp) ) {
+ correct++;
+ correctOp++;
+ continue;
+ }
+ wrongOp++;
+
+ f.clear();
+ int bop =pipe.mf.getValue(Pipe.OPERATION, bestOp);
+ for(int r=vs.length-1;r>=0;r--) if (vs[r]>0)f.add(li.l2i(vs[r]+(bop*Pipe.s_type)));
+
+ g.clear();
+ int gop =pipe.mf.getValue(Pipe.OPERATION, op);
+ for(int r=vs.length-1;r>=0;r--) if (vs[r]>0)g.add(li.l2i(vs[r]+(gop*Pipe.s_type)));
+ double lam_dist = params.getScore(g) - params.getScore(f);//f
+
+ double loss = 1 - lam_dist;
+
+ FV dist = g.getDistVector(f);
+
+ dist.update(params.parameters, params.total, params.update(dist,loss), upd,false); //0.05
+
+ }
+
+ }
+ ArrayList<Entry<String, Integer>> opsl = new ArrayList<Entry<String, Integer>>();
+ for(Entry<String, Integer> e : map.entrySet()) {
+ if(e.getValue()>1) {
+ opsl.add(e);
+ }
+ }
+
+ Collections.sort(opsl, new Comparator<Entry<String, Integer>>(){
+ @Override
+ public int compare(Entry<String, Integer> o1,
+ Entry<String, Integer> o2) {
+
+ return o1.getValue()==o2.getValue()?0:o1.getValue()>o2.getValue()?1:-1;
+ }
+ });
+
+ if (opsl.size()>0) System.out.println();
+ for(Entry<String, Integer> e : opsl) {
+ System.out.println(e.getKey()+" "+e.getValue());
+ }
+ map.clear();
+
+ del= Pipe.outValueErr(numInstances, (float)(count-correct), (float)correct/(float)count,del,last,upd,
+ "time "+(System.currentTimeMillis()-start)+
+ " corr/wrong "+correctOp+" "+wrongOp+" uppercase corr/wrong "+correctUC+" "+wrongUC);
+ del=0;
+ System.out.println();
+ }
+
+ params.average(i*is.size());
+
+ }
+
+
+ /**
+ * Do the work
+ * @param options
+ * @param pipe
+ * @param params
+ * @throws IOException
+ */
+ public void out (OptionsSuper options, IPipe pipe, ParametersFloat params) {
+
+ long start = System.currentTimeMillis();
+
+ CONLLReader09 depReader = new CONLLReader09(options.testfile, CONLLReader09.NO_NORMALIZE);
+ depReader.setInputFormat(options.formatTask);
+ CONLLWriter09 depWriter = new CONLLWriter09(options.outfile);
+ depWriter.setOutputFormat(options.formatTask);
+
+ System.out.print("Processing Sentence: ");
+
+ int cnt = 0;
+ int del=0;
+
+ try {
+
+ while(true) {
+
+ InstancesTagger is = new InstancesTagger();
+
+ is.init(1, new MFO());
+ SentenceData09 instance = depReader.getNext(is);//pipe.nextInstance(null, depReader);
+
+ if (instance==null) break;
+ is.fillChars(instance, 0, Pipe._CEND);
+ cnt++;
+ SentenceData09 i09 =lemmatize(is, instance, this.li);
+
+ if(options.normalize) for(int k=0;k<i09.length();k++) {
+ boolean save = depReader.normalizeOn;
+ depReader.normalizeOn =true;
+ i09.plemmas[k] = depReader.normalize(i09.plemmas[k]);
+ depReader.normalizeOn = save;
+ }
+
+ if (options.overwritegold) i09.lemmas = i09.plemmas;
+
+
+
+ depWriter.write(i09);
+
+ if (cnt%100 ==0) del=Pipe.outValue(cnt, del);
+
+ }
+ depWriter.finishWriting();
+ del=Pipe.outValue(cnt, del);
+ long end = System.currentTimeMillis();
+
+ System.out.println(PipeGen.getSecondsPerInstnace(cnt,(end-start)));
+ System.out.println(PipeGen.getUsedTime(end-start));
+ } catch(Exception e) {
+ e.printStackTrace();
+ }
+ }
+
+
+ private SentenceData09 lemmatize(InstancesTagger is, SentenceData09 instance, Long2Int li) {
+
+ int LC = pipe.types.length+1;
+
+ is.feats[0] = new short[instance.length()][11];
+
+ is.fillChars(instance, 0, Pipe._CEND);
+
+ int length = instance.length();
+
+ F2SF fs = new F2SF(params.parameters);
+
+
+ for(int w1 = 0; w1 < length; w1++) {
+ instance.plemmas[w1]="_";
+ pipe.addCoreFeatures(is, 0, w1, 0,instance.forms[w1], vs);
+
+ String f =null;
+ if (is.forms[0][w1]!=-1) {
+ f = pipe.opse.get(instance.forms[w1].toLowerCase());
+ if (f!=null) {
+ instance.plemmas[w1]=f;
+ }
+ }
+ double best = -1000.0;
+ int bestOp=0;
+
+ for(int t = 0; t < pipe.types.length; t++) {
+
+ fs.clear();
+ for(int l=vs.length-1;l>=0;l--) if (vs[l]>0) fs.add(li.l2i(vs[l]+(t*Pipe.s_type)));
+
+ if (fs.score >=best) {
+ best =fs.score;
+ bestOp=t;
+ }
+ }
+ //instance.ppos[w1]=""+bestOp;
+ if (f==null) instance.plemmas[w1] = StringEdit.change((doUppercase?instance.forms[w1]:instance.forms[w1].toLowerCase()),pipe.types[bestOp]);
+
+ // check for empty string
+ if(instance.plemmas[w1].length()==0) instance.plemmas[w1] = "_";
+
+ if(doUppercase){
+ fs.clear();
+ for(int l=vs.length-1;l>=0;l--) if (vs[l]>0) fs.add(li.l2i(vs[l]+(LC*Pipe.s_type)));
+
+
+ try {
+
+ if (fs.score<=0 && instance.plemmas[w1].length()>1) {
+ instance.plemmas[w1] = Character.toUpperCase(instance.plemmas[w1].charAt(0))+instance.plemmas[w1].substring(1);
+ } else if (fs.score<=0 && instance.plemmas[w1].length()>0) {
+ instance.plemmas[w1] = String.valueOf(Character.toUpperCase(instance.plemmas[w1].charAt(0)));
+ } else if (fs.score>0) {
+ instance.plemmas[w1] = instance.plemmas[w1].toLowerCase();
+ }
+
+ } catch(Exception e){
+ e.printStackTrace();
+ // System.out.println("error "+pipe.types[bestOp]+" "+instance.forms[w1]);
+ }
+ }
+ }
+
+
+ SentenceData09 i09 = new SentenceData09(instance);
+ i09.createSemantic(instance);
+ return i09;
+ }
+
+
+ /* (non-Javadoc)
+ * @see is2.tools.Tool#apply(is2.data.SentenceData09)
+ */
+ @Override
+ public SentenceData09 apply(SentenceData09 snt) {
+ InstancesTagger is = new InstancesTagger();
+
+ // be robust
+ if (snt.length()== 0) return snt;
+
+ SentenceData09 it = new SentenceData09();
+ it.createWithRoot(snt);
+
+
+ is.init(1, new MFO());
+ is.createInstance09(it.length());
+ is.fillChars(it, 0, Pipe._CEND);
+
+ for(int j = 0; j < it.length(); j++) is.setForm(0, j, it.forms[j]);
+
+ return lemmatize(is, it,li);
+ }
+
+
+
+}
diff --git a/dependencyParser/mate-tools/src/is2/lemmatizer/MFO.java b/dependencyParser/mate-tools/src/is2/lemmatizer/MFO.java
new file mode 100755
index 0000000..249ca42
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/lemmatizer/MFO.java
@@ -0,0 +1,257 @@
+package is2.lemmatizer;
+
+
+import is2.data.IEncoder;
+import is2.data.IEncoderPlus;
+import is2.data.IFV;
+import is2.data.Long2IntInterface;
+import is2.util.DB;
+
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map.Entry;
+
+/**
+ * Map Features, do not map long to integer
+ *
+ * @author Bernd Bohnet, 20.09.2009
+ */
+
+final public class MFO implements IEncoderPlus {
+
+ /** The features and its values */
+ static private final HashMap<String,HashMap<String,Integer>> m_featureSets = new HashMap<String,HashMap<String,Integer>>();
+
+ /** The feature class and the number of values */
+ static private final HashMap<String,Integer> m_featureCounters = new HashMap<String,Integer>();
+
+ /** The number of bits needed to encode a feature */
+ static final HashMap<String,Integer> m_featureBits = new HashMap<String,Integer>();
+
+ /** Integer counter for long2int */
+ static private int count=0;
+
+ /** Stop growing */
+ public boolean stop=false;
+
+ final public static String NONE="<None>";
+
+
+
+
+
+
+
+ public MFO () {}
+
+
+ public int size() {return count;}
+
+
+
+ /**
+ * Register an attribute class, if it not exists and add a possible value
+ * @param type
+ * @param type2
+ */
+ final public int register(String a, String v) {
+
+ HashMap<String,Integer> fs = getFeatureSet().get(a);
+ if (fs==null) {
+ fs = new HashMap<String,Integer>();
+ getFeatureSet().put(a, fs);
+ fs.put(NONE, 0);
+ getFeatureCounter().put(a, 1);
+ }
+ Integer c = getFeatureCounter().get(a);
+
+ Integer i = fs.get(v);
+ if (i==null) {
+ fs.put(v, c);
+ c++;
+ getFeatureCounter().put(a,c);
+ return c-1;
+ } else return i;
+ }
+
+ /**
+ * Calculates the number of bits needed to encode a feature
+ */
+ public void calculateBits() {
+
+ int total=0;
+ for(Entry<String,Integer> e : getFeatureCounter().entrySet() ){
+ int bits =(int)Math.ceil((Math.log(e.getValue()+1)/Math.log(2)));
+ m_featureBits.put(e.getKey(), bits);
+ total+=bits;
+ // System.out.println(" "+e.getKey()+" bits "+bits+" number "+(e.getValue()+1));
+ }
+
+// System.out.println("total number of needed bits "+total);
+ }
+
+
+
+ public String toString() {
+
+ StringBuffer content = new StringBuffer();
+ for(Entry<String,Integer> e : getFeatureCounter().entrySet() ){
+ content.append(e.getKey()+" "+e.getValue());
+ content.append(':');
+ // HashMap<String,Integer> vs = getFeatureSet().get(e.getKey());
+ content.append(getFeatureBits(e.getKey()));
+
+ /*if (vs.size()<120)
+ for(Entry<String,Integer> e2 : vs.entrySet()) {
+ content.append(e2.getKey()+" ("+e2.getValue()+") ");
+ }*/
+ content.append('\n');
+
+ }
+ return content.toString();
+ }
+
+
+
+ static final public short getFeatureBits(String a) {
+ if(m_featureBits.get(a)==null) return 0;
+ return (short)m_featureBits.get(a).intValue();
+ }
+
+
+
+ /**
+ * Get the integer place holder of the string value v of the type a
+ *
+ * @param t the type
+ * @param v the value
+ * @return the integer place holder of v
+ */
+ final public int getValue(String t, String v) {
+
+ if (m_featureSets.get(t)==null) return -1;
+ Integer vi = m_featureSets.get(t).get(v);
+ if (vi==null) return -1; //stop &&
+ return vi.intValue();
+ }
+
+ /**
+ * Static version of getValue
+ * @see getValue
+ */
+ static final public int getValueS(String a, String v) {
+
+ if (m_featureSets.get(a)==null) return -1;
+ Integer vi = m_featureSets.get(a).get(v);
+ if (vi==null) return -1; //stop &&
+ return vi.intValue();
+ }
+
+ public int hasValue(String a, String v) {
+
+ Integer vi = m_featureSets.get(a).get(v);
+ if (vi==null) return -1;
+ return vi.intValue();
+ }
+
+
+ public static String printBits(int k) {
+ StringBuffer s = new StringBuffer();
+ for(int i =0;i<31;i++) {
+ s.append((k&0x00000001)==1?'1':'0');
+ k=k>>1;
+
+ }
+ s.reverse();
+ return s.toString();
+ }
+
+
+
+
+
+
+
+ /**
+ * Maps a long to a integer value. This is very useful to save memory for sparse data long values
+ * @param l
+ * @return the integer
+ */
+ static public int misses = 0;
+ static public int good = 0;
+
+
+
+
+ /**
+ * Write the data
+ * @param dos
+ * @throws IOException
+ */
+ static public void writeData(DataOutputStream dos) throws IOException {
+ dos.writeInt(getFeatureSet().size());
+ // DB.println("write"+getFeatureSet().size());
+ for(Entry<String, HashMap<String,Integer>> e : getFeatureSet().entrySet()) {
+ dos.writeUTF(e.getKey());
+ dos.writeInt(e.getValue().size());
+
+ for(Entry<String,Integer> e2 : e.getValue().entrySet()) {
+
+ if(e2.getKey()==null) DB.println("key "+e2.getKey()+" value "+e2.getValue()+" e -key "+e.getKey());
+ dos.writeUTF(e2.getKey());
+ dos.writeInt(e2.getValue());
+
+ }
+
+ }
+ }
+ public void read(DataInputStream din) throws IOException {
+
+ int size = din.readInt();
+ for(int i=0; i<size;i++) {
+ String k = din.readUTF();
+ int size2 = din.readInt();
+
+ HashMap<String,Integer> h = new HashMap<String,Integer>();
+ getFeatureSet().put(k,h);
+ for(int j = 0;j<size2;j++) {
+ h.put(din.readUTF(), din.readInt());
+ }
+ getFeatureCounter().put(k, size2);
+ }
+
+ count =size;
+ // stop();
+ calculateBits();
+ }
+
+
+ /**
+ * Clear the data
+ */
+ static public void clearData() {
+ getFeatureSet().clear();
+ m_featureBits.clear();
+ getFeatureSet().clear();
+ }
+
+ public HashMap<String,Integer> getFeatureCounter() {
+ return m_featureCounters;
+ }
+
+ static public HashMap<String,HashMap<String,Integer>> getFeatureSet() {
+ return m_featureSets;
+ }
+
+ static public String[] reverse(HashMap<String,Integer> v){
+ String[] set = new String[v.size()];
+ for(Entry<String,Integer> e : v.entrySet()) {
+ set[e.getValue()]=e.getKey();
+ }
+ return set;
+ }
+
+
+}
diff --git a/dependencyParser/mate-tools/src/is2/lemmatizer/Options.java b/dependencyParser/mate-tools/src/is2/lemmatizer/Options.java
new file mode 100755
index 0000000..a4b9e69
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/lemmatizer/Options.java
@@ -0,0 +1,67 @@
+package is2.lemmatizer;
+
+import java.io.File;
+import java.io.IOException;
+
+import is2.util.OptionsSuper;
+
+
+public final class Options extends OptionsSuper {
+
+
+ public Options (String[] args) throws IOException {
+
+
+
+ for(int i = 0; i < args.length; i++) {
+
+ if (args[i].equals("--help")) explain();
+
+ if (args[i].equals("-normalize")) {
+ normalize=Boolean.parseBoolean(args[++i]);
+ } else if (args[i].equals("-features")) {
+ features= args[i+1]; i++;
+ } else if (args[i].equals("-hsize")) {
+ hsize= Integer.parseInt(args[i+1]); i++;
+ } else if (args[i].equals("-len")) {
+ maxLen= Integer.parseInt(args[i+1]); i++;
+ } else if (args[i].equals("-tmp")) {
+ tmp = args[i+1]; i++;
+ } else if (args[i].equals("-uc")) {
+ upper=true;
+ System.out.println("set uppercase "+upper);
+
+ } else super.addOption(args, i);
+
+ }
+
+ if (trainfile!=null) {
+
+
+ if (tmp!=null) trainforest = File.createTempFile("train", ".tmp", new File(tmp));
+ else trainforest = File.createTempFile("train", ".tmp"); //,new File("F:\\")
+ trainforest.deleteOnExit();
+ }
+
+
+
+
+ }
+
+ private void explain() {
+ System.out.println("Usage: ");
+ System.out.println("java -class mate.jar is2.lemmatizer.Lemmatizer [Options]");
+ System.out.println();
+ System.out.println("Options:");
+ System.out.println("");
+ System.out.println(" -train <file> the corpus a model is trained on; default "+this.trainfile);
+ System.out.println(" -test <file> the input corpus for testing; default "+this.testfile);
+ System.out.println(" -out <file> the output corpus (result) of a test run; default "+this.outfile);
+ System.out.println(" -model <file> the parsing model for traing the model is stored in the files");
+ System.out.println(" and for parsing the model is load from this file; default "+this.modelName);
+ System.out.println(" -i <number> the number of training iterations; good numbers are 10 for smaller corpora and 6 for bigger; default "+this.numIters);
+ System.out.println(" -count <number> the n first sentences of the corpus are take for the training default "+this.count);
+
+ System.exit(0);
+ }
+}
diff --git a/dependencyParser/mate-tools/src/is2/lemmatizer/Pipe.java b/dependencyParser/mate-tools/src/is2/lemmatizer/Pipe.java
new file mode 100755
index 0000000..37647ee
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/lemmatizer/Pipe.java
@@ -0,0 +1,585 @@
+package is2.lemmatizer;
+
+import is2.data.Cluster;
+import is2.data.D4;
+import is2.data.Instances;
+import is2.data.InstancesTagger;
+import is2.data.PipeGen;
+import is2.data.SentenceData09;
+import is2.io.CONLLReader09;
+import is2.tools.IPipe;
+import is2.util.DB;
+import is2.util.OptionsSuper;
+import is2.data.Long2Int;
+
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map.Entry;
+
+
+
+
+final public class Pipe extends PipeGen implements IPipe {
+
+
+ private static final int _MIN_WORDS_MAPPED_BY_SCRIPT = 1;
+ private static final int _MIN_OCCURENT_FOR_SCRIPT_USE = 4;
+
+ private static final String _F0 = "F0";
+ private static final String _F1 = "F1",_F2 = "F2",_F3 = "F3",_F4 = "F4",_F5 = "F5",_F6= "F6",_F7= "F7",_F8= "F8",_F9="F9",_F10 = "F10";
+ private static final String _F11="F11",_F12="F12",_F13= "F13",_F14="F14",_F15="F15",_F16="F16",_F17="F17",_F18="F18",_F19="F19",_F20="F20";
+ private static final String _F21="F21",_F22="F22",_F23= "F23",_F24="F24",_F25="F25",_F26="F26",_F27="F27",_F28="F28",_F29="F29",_F30="F30";
+ private static final String _F31="F31",_F32="F32",_F33= "F33",_F34="F34",_F35="F35",_F36="F36",_F37="F37",_F38="F38",_F39="F39",_F40="F40";
+ private static final String _F41="F41";
+
+ private static int _f0,_f1,_f2,_f3,_f4,_f5,_f6,_f7,_f8,_f9,_f10,_f11,_f12,_f13,_f14,_f15,_f16,_f17,_f18,_f19,_f20;
+ private static int _f21,_f22,_f23,_f24,_f25,_f26,_f27,_f28,_f29,_f30,_f31,_f32,_f33,_f34,_f35,_f36,_f37,_f38,_f39,_f41;
+ public static int _CEND,_swrd,_ewrd;
+
+ public static final String MID = "MID", END = "END",STR = "STR",OPERATION = "OP";
+
+ private CONLLReader09 depReader;
+
+
+ public HashMap<String,String> opse = new HashMap<String, String> ();
+
+ public String[] types;
+
+
+ public MFO mf =new MFO();
+ private D4 z, x;
+
+
+ Cluster cl;
+ OptionsSuper options;
+ Long2Int li;
+
+ public Pipe (OptionsSuper options2, Long2Int l) {
+
+ options=options2;
+ li=l;
+ }
+
+
+ public InstancesTagger createInstances(String file) {
+
+ InstancesTagger is = new InstancesTagger();
+
+ depReader = new CONLLReader09(CONLLReader09.NO_NORMALIZE);
+
+ depReader.startReading(file);
+ mf.register(REL,"<root-type>");
+ mf.register(POS,"<root-POS>");
+
+
+ System.out.print("Registering feature parts ");
+ HashMap<String,Integer> ops = new HashMap<String, Integer> ();
+ HashMap<String,HashSet<String>> op2form = new HashMap<String, HashSet<String>> ();
+ int ic=0;
+ int del=0;
+ HashSet<String> rm = new HashSet<String> ();
+
+ while(true) {
+ SentenceData09 instance1 = depReader.getNext();
+ if (instance1== null) break;
+ ic++;
+ if (ic % 100 ==0) {del = outValue(ic, del);}
+
+
+ String[] labs1 = instance1.labels;
+ for(int i1 = 0; i1 < labs1.length; i1++) {
+ //typeAlphabet.lookupIndex(labs1[i1]);
+ mf.register(REL, labs1[i1]);
+ }
+
+ String[] w = instance1.forms;
+ for(int i1 = 0; i1 < w.length; i1++) {
+ // saw the first time?
+ if (mf.getValue(WORD, w[i1].toLowerCase())==-1)
+ opse.put(instance1.forms[i1].toLowerCase(), instance1.lemmas[i1]);
+
+ mf.register(WORD, w[i1].toLowerCase());
+ }
+ for(int i1 = 0; i1 < w.length; i1++) mf.register(WORD, w[i1]);
+
+ w = instance1.lemmas;
+ for(int i1 = 0; i1 < w.length; i1++) mf.register(WORD, w[i1]);
+ for(int i1 = 0; i1 < w.length; i1++) mf.register(WORD, w[i1].toLowerCase());
+
+ w = instance1.plemmas;
+ for(int i1 = 0; i1 < w.length; i1++) mf.register(WORD, w[i1]);
+ for(int i1 = 0; i1 < w.length; i1++) mf.register(WORD, w[i1].toLowerCase());
+
+
+ for(int i1 = 0; i1 < w.length; i1++) registerChars(CHAR, w[i1]);
+
+ w = instance1.ppos;
+ for(int i1 = 0; i1 < w.length; i1++) mf.register(POS, w[i1]);
+
+ w = instance1.gpos;
+ for(int i1 = 0; i1 < w.length; i1++) mf.register(POS, w[i1]);
+
+
+ for(int i1 = 1; i1 < w.length; i1++) {
+ String op = getOperation(instance1, i1);
+ if (ops.get(op)==null) ops.put(op, 1);
+ else {
+ ops.put(op, (ops.get(op)+1));
+ if (ops.get(op)>4) rm.add(instance1.forms[i1].toLowerCase());
+ }
+
+
+ HashSet<String> forms = op2form.get(op);
+ if (forms==null) {
+ forms = new HashSet<String>();
+ op2form.put(op, forms);
+ }
+ forms.add(instance1.forms[i1].toLowerCase());
+
+ }
+
+ }
+
+ int countFreqSingleMappings =0;
+
+ int sc=0;
+ ArrayList<Entry<String, Integer>> opsl = new ArrayList<Entry<String, Integer>>();
+ for(Entry<String, Integer> e : ops.entrySet()) {
+
+ // do not use scripts for infrequent cases or frequent single mappings (der -> die)
+ if(e.getValue()>_MIN_OCCURENT_FOR_SCRIPT_USE && op2form.get(e.getKey()).size()>_MIN_WORDS_MAPPED_BY_SCRIPT) {
+ mf.register(OPERATION, e.getKey());
+ sc++;
+ opsl.add(e);
+ } else {
+ // do not remove the infrequent cases
+ rm.removeAll(op2form.get(e.getKey()));
+
+ if (op2form.get(e.getKey()).size()<=1) countFreqSingleMappings+=op2form.get(e.getKey()).size();
+ }
+ }
+ for(String k : rm) {
+ opse.remove(k);
+ }
+
+ Collections.sort(opsl, new Comparator<Entry<String, Integer>>(){
+
+ @Override
+ public int compare(Entry<String, Integer> o1, Entry<String, Integer> o2) {
+
+ return o1.getValue()==o2.getValue()?0:o1.getValue()>o2.getValue()?1:-1;
+ }
+ });
+
+
+
+ for(Entry<String, Integer> e : opsl) {
+ // System.out.println(e.getKey()+" "+e.getValue());
+ }
+
+
+ if (options.clusterFile==null)cl = new Cluster();
+ else cl= new Cluster(options.clusterFile, mf,6);
+
+
+ System.out.println("\nfound scripts "+ops.size()+" used scripts "+sc);
+ System.out.println("found mappings of single words "+countFreqSingleMappings);
+ System.out.println("use word maps instead of scripts "+this.opse.size());
+ // System.out.println(" "+opse);
+ System.out.println(""+mf.toString());
+
+ initFeatures();
+
+ mf.calculateBits();
+ initValues();
+
+ depReader.startReading(options.trainfile);
+
+ int i = 0;
+ long start1 = System.currentTimeMillis();
+
+ System.out.print("Creating Features: ");
+ is.init(ic, mf) ;
+ del=0;
+ while(true) {
+ try {
+ if (i % 100 ==0) {del = outValue(i, del);}
+ SentenceData09 instance1 = depReader.getNext(is);
+ if (instance1== null) break;
+
+ is.fillChars(instance1, i, _CEND);
+
+ if (i>options.count) break;
+
+ i++;
+ } catch(Exception e) {
+ DB.println("error in sentnence "+i);
+ e.printStackTrace();
+ }
+ }
+ long end1 = System.currentTimeMillis();
+ System.gc();
+ long mem2 = Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory();
+ System.out.print(" time "+(end1-start1)+" mem "+(mem2/1024)+" kb");
+
+ types = new String[mf.getFeatureCounter().get(OPERATION)];
+
+ for(Entry<String,Integer> e : mf.getFeatureSet().get(OPERATION).entrySet()) {
+ types[e.getValue()] = e.getKey();
+ // System.out.println("set pos "+e.getKey());
+ }
+
+ System.out.println("Num Features: " + mf.size());
+
+
+
+ return is;
+
+ }
+
+
+ /**
+ * @param is
+ * @param n
+ * @param k
+ * @param wds
+ * @return
+ */
+ public static String getOperation(Instances is, int n, int k, String[] wds) {
+
+
+ String form = wds[is.forms[n][k]];
+ String olemma = wds[is.glemmas[n][k]];
+
+ String s = new StringBuffer(form.toLowerCase()).reverse().toString();
+ String t = new StringBuffer(olemma.toLowerCase()).reverse().toString();
+
+
+
+ return getOperation2(s, t);
+ }
+
+
+
+ public static String getOperation(SentenceData09 instance1, int i1) {
+ String s = new StringBuffer(instance1.forms[i1].toLowerCase()).reverse().toString();
+ String t = new StringBuffer(instance1.lemmas[i1].toLowerCase()).reverse().toString();
+
+
+
+ return getOperation2(s, t);
+ }
+
+ public static String getOperation(String si, String ti) {
+ String s = new StringBuffer(si.toLowerCase()).reverse().toString();
+ String t = new StringBuffer(ti.toLowerCase()).reverse().toString();
+
+
+
+ return getOperation2(s, t);
+ }
+
+
+ private static String getOperation2(String s, String t) {
+ StringBuffer po = new StringBuffer();
+ String op;
+ if (!s.equals(t)) {
+
+
+ int[][] d =StringEdit.LD(s, t);
+ StringEdit.searchPath(s,t,d, po, false);
+ op = po.toString();
+
+ } else op ="0"; // do nothing
+ return op;
+ }
+
+
+
+ private void registerChars(String type, String word) {
+ for(int i=0;i<word.length();i++) mf.register(type, Character.toString(word.charAt(i)));
+ }
+
+
+
+ public void initValues() {
+
+ z = new D4(li);
+
+ x = new D4(li);
+ x.a0=s_type;
+
+ s_pos = mf.getFeatureCounter().get(POS).intValue();//mf.getFeatureBits(POS);
+ s_word = mf.getFeatureCounter().get(WORD);
+ s_type = mf.getFeatureCounter().get(TYPE).intValue();//mf.getFeatureBits(TYPE);
+ s_char = mf.getFeatureCounter().get(CHAR).intValue();//mf.getFeatureBits(CHAR);
+ s_oper = mf.getFeatureCounter().get(OPERATION).intValue();//mf.getFeatureBits(OPERATION);
+
+ types = new String[mf.getFeatureCounter().get(Pipe.OPERATION)];
+ for(Entry<String,Integer> e : mf.getFeatureSet().get(Pipe.OPERATION).entrySet()) types[e.getValue()] = e.getKey();
+
+ //wds = new String[mf.getFeatureCounter().get(Pipe.WORD)];
+ //for(Entry<String,Integer> e : mf.getFeatureSet().get(Pipe.WORD).entrySet()) wds[e.getValue()] = e.getKey();
+
+
+ z.a0 = s_type;z.a1 = s_oper; z.a2 = s_char; z.a3 = s_char; z.a4 = s_char;z.a5 = s_char;z.a6 = s_char;z.a7 = s_char;
+ x.a0 = s_type; x.a1 = s_oper;x.a2 = s_word; x.a3 = s_word; x.a4 = s_word;x.a5 = s_char;x.a6 = s_char;x.a7 = s_char;
+
+ }
+
+ public static int s_pos,s_word,s_type,s_dir,s_dist, s_char, s_oper;
+
+
+
+ /**
+ * Initialize the features.
+ * @param maxFeatures
+ */
+ public void initFeatures() {
+
+
+
+ for(int k=0;k<50;k++) {
+ mf.register(TYPE, "F"+k);
+ }
+
+ _f0 = mf.register(TYPE, _F0);
+ _f1 = mf.register(TYPE, _F1);
+ _f2 = mf.register(TYPE, _F2);
+ _f3 = mf.register(TYPE, _F3);
+ _f4 = mf.register(TYPE, _F4);
+ _f5 = mf.register(TYPE, _F5);
+ _f6 = mf.register(TYPE, _F6);
+ _f7 = mf.register(TYPE, _F7);
+ _f8 = mf.register(TYPE, _F8);
+ _f9 = mf.register(TYPE, _F9);
+ _f10 = mf.register(TYPE, _F10);
+ _f11 = mf.register(TYPE, _F11);
+ _f12 = mf.register(TYPE, _F12);
+ _f13 = mf.register(TYPE, _F13);
+ _f14 = mf.register(TYPE, _F14);
+ _f15 = mf.register(TYPE, _F15);
+ _f16 = mf.register(TYPE, _F16);
+ _f17 = mf.register(TYPE, _F17);
+ _f18 = mf.register(TYPE, _F18);
+ _f19 = mf.register(TYPE, _F19);
+ _f20 = mf.register(TYPE, _F20);
+ _f21 = mf.register(TYPE, _F21);
+ _f22 = mf.register(TYPE, _F22);
+ _f23 = mf.register(TYPE, _F23);
+ _f24 = mf.register(TYPE, _F24);
+ _f25 = mf.register(TYPE, _F25);
+ _f26 = mf.register(TYPE, _F26);
+ _f27 = mf.register(TYPE, _F27);
+ _f28 = mf.register(TYPE, _F28);
+ _f29 = mf.register(TYPE, _F29);
+ _f30 = mf.register(TYPE, _F30);
+
+ _f31 = mf.register(TYPE, _F31);
+ _f32 = mf.register(TYPE, _F32);
+ _f33 = mf.register(TYPE, _F33);
+ _f34 = mf.register(TYPE, _F34);
+
+ _f35 = mf.register(TYPE, _F35);
+ _f36 = mf.register(TYPE, _F36);
+ _f37 = mf.register(TYPE, _F37);
+ _f38 = mf.register(TYPE, _F38);
+
+
+ mf.register(POS, MID);
+ mf.register(POS, STR);
+ mf.register(POS, END);
+ mf.register(TYPE, CHAR);
+
+ _swrd = mf.register(WORD, STR);
+ _ewrd = mf.register(WORD, END);
+
+
+ _CEND = mf.register(CHAR, END);
+
+
+ }
+
+
+
+ final public void addCoreFeatures(InstancesTagger is, int ic, int i, int oper, String form, long[] f) {
+
+ for(int l=f.length-1;l>=0;l--) f[l]=0;
+
+ int formi =is.forms[ic][i];
+ int wl =is.chars[ic][i][11];//.forms[i].length();
+
+ int position = 1+(i<3?i:3);
+
+ int c0= is.chars[ic][i][0], c1=is.chars[ic][i][1], c2=is.chars[ic][i][2], c3=is.chars[ic][i][3], c4=is.chars[ic][i][4],c5=is.chars[ic][i][5];
+ int e0 =is.chars[ic][i][6], e1 =is.chars[ic][i][7],e2 =is.chars[ic][i][8],e3 =is.chars[ic][i][9],e4 =is.chars[ic][i][10];
+
+ int len = is.length(ic);
+
+
+
+ x.v1=oper; x.v0 = _f0; x.v2 = formi; x.cz3(); f[0]=x.getVal(); f[1]=x.csa(3, position);
+ x.v0 = _f1; x.v2 = formi; x.v3 =i+1>=len?x.v3=_ewrd:is.forms[ic][i+1];x.cz4(); f[2]=x.getVal();
+
+ // contains upper case include again!!!
+
+ short upper =0;
+ short number = 1;
+ for(int k1=0;k1<wl;k1++){
+ char c =form.charAt(k1);
+ if (Character.isUpperCase(c)) {
+ if (k1==0) upper=1;
+ else {
+ // first char + another
+ if (upper==1)upper=3;
+ // another uppercase in the word
+ else if (upper==0) upper=2;
+ }
+ }
+
+ if (Character.isDigit(c) && k1==0) number =2 ;
+ else if (Character.isDigit(c) && number==1) number = 3 ;
+
+ }
+
+ // contains a number
+ z.v0= _f21; z.v2=number; z.cz3();f[3]=z.getVal();
+
+ z.v0 = _f4; z.v1 = oper; z.v2=c0; z.cz3();f[4]=z.getVal();
+ z.v0 = _f5; z.v2 = e0;z.cz3();f[5]=z.getVal();
+
+ z.v2=c0; z.v3=c1; z.v4=c2; z.v5=c3; z.v6=c4;
+ z.v0=_f6; z.cz4(); f[6]=z.getVal();
+ z.v0=_f7; z.cz5(); f[7]=z.getVal();
+ z.v0=_f8; z.cz6(); f[8]=z.getVal();
+ z.v0=_f9; z.cz7(); f[9]=z.getVal();
+
+ int c=10;
+ z.v2=e0; z.v3=e1; z.v4=e2; z.v5=e3; z.v6=e4;
+ z.v0 =_f10; z.cz4();f[c++]=z.getVal(); f[c++]= z.csa(3, upper);
+ z.v0 =_f11; z.cz5();f[c++]=z.getVal(); f[c++]= z.csa(3, upper);
+ z.v0 =_f12; z.cz6();f[c++]=z.getVal(); f[c++]= z.csa(3, upper);
+ z.v0 =_f13; z.cz7();f[c++]=z.getVal(); f[c++]= z.csa(3, upper);
+
+ if (len>i+1) {
+
+ z.v0 = _f14; z.v2 = is.chars[ic][i+1][0];
+ z.cz3();f[c++]=z.getVal();
+
+ z.v0 = _f15; z.v2 = is.chars[ic][i+1][5];z.cz3();f[c++]=z.getVal();
+
+ if (is.chars[ic][i+1][11]>1 ) {
+ z.v0 = _f16; z.v2 = is.chars[ic][i+1][0];
+ z.v3 = is.chars[ic][i+1][2];z.cz4();f[c++]=z.getVal();
+
+ z.v0 = _f17; z.v2 = is.chars[ic][i+1][1];
+ z.v3 = is.chars[ic][i+1][6];
+ z.cz4();f[c++]=z.getVal();//fv.add(li.l2i(mf.calc4(b)));
+ }
+
+
+ x.v0 = _f18;
+ x.v2 = is.forms[ic][i+1];
+ x.cz3();f[c++]=x.getVal();
+
+ if (len>i+2) {
+ x.v0 = _f32;
+ x.v2 = is.forms[ic][i+2]; x.v3 = is.forms[ic][i+1]; x.cz4();f[c++]=x.getVal();
+ x.cz3();f[c++]=x.getVal();//fv.add(li.l2i(mf.calc3(b)));
+
+ }
+
+ if (len>i+3) {
+ x.v0 = _f33; x.v2 = is.forms[ic][i+3]; x.v3 = is.forms[ic][i+2];x.cz4();f[c++]=x.getVal();//fv.add(li.l2i(mf.calc4(b)));
+ x.cz3();f[27]=x.getVal();//fv.add(li.l2i(mf.calc3(b)));
+ }
+ }
+
+ // length
+
+ z.v0= _f19; z.v1=oper; z.v2=wl;z.cz3();f[c++]=z.getVal();//fv.add(li.l2i(mf.calc3(dl1)));
+
+ if (i<1) return ;
+
+ x.v0 = _f27; x.v1=oper;
+ x.v2 = is.forms[ic][i-1];x.cz3();f[c++]=x.getVal();//fv.add(li.l2i(mf.calc3(b)));
+
+
+ if (i<2) return ;
+
+ //added this before it was 99.46
+ x.v0 = _f28; x.v2 = is.forms[ic][i-2];x.cz3();f[c++]=x.getVal();//fv.add(li.l2i(mf.calc3(b)));
+
+ // result 99.484
+ if (i<3) return ;
+
+ x.v0 = _f31; x.v1=oper; x.v2 = is.forms[ic][i-3]; x.v3 = is.forms[ic][i-2]; x.cz4();f[c++]=x.getVal();//fv.add(li.l2i(mf.calc4(b)));
+
+ }
+
+
+
+// public String[] wds;
+
+ /**
+ * Write the lemma that are not mapped by operations
+ * @param dos
+ */
+ private void writeMap(DataOutputStream dos) {
+
+ try {
+ dos.writeInt(opse.size());
+ for(Entry<String, String> e : opse.entrySet()) {
+ dos.writeUTF(e.getKey());
+ dos.writeUTF(e.getValue());
+ }
+ } catch (IOException e1) {
+ e1.printStackTrace();
+ }
+ }
+
+
+
+ /**
+ * Read the form-lemma mapping not read by operations
+ * @param dis
+ */
+ public void readMap(DataInputStream dis) {
+ try {
+ int size = dis.readInt();
+ for(int i =0; i<size;i++) {
+ opse.put(dis.readUTF(), dis.readUTF());
+ }
+ } catch (IOException e1) {
+ e1.printStackTrace();
+ }
+ }
+
+
+
+
+
+
+ /* (non-Javadoc)
+ * @see is2.tools.IPipe#write(java.io.DataOutputStream)
+ */
+ @Override
+ public void write(DataOutputStream dos) {
+ this.writeMap(dos);
+ try {
+ cl.write(dos);
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+
+ }
+
+
+
+}
diff --git a/dependencyParser/mate-tools/src/is2/lemmatizer/StringEdit.java b/dependencyParser/mate-tools/src/is2/lemmatizer/StringEdit.java
new file mode 100755
index 0000000..8a4080e
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/lemmatizer/StringEdit.java
@@ -0,0 +1,318 @@
+package is2.lemmatizer;
+
+import is2.util.DB;
+
+import java.util.ArrayList;
+
+public class StringEdit {
+
+
+ public static void main(String args[]) {
+
+
+
+ String s = new StringBuffer(args[0]).reverse().toString();
+ String t = new StringBuffer(args[1]).reverse().toString();
+
+ int d[][] = LD(s, t);
+
+
+
+ StringBuffer opersations = new StringBuffer();
+ searchPath(s,t,d, opersations, false);
+ System.out.println("resuylt "+" "+opersations);
+
+ }
+
+
+
+
+
+ //****************************
+ // Get minimum of three values
+ //****************************
+
+ static private int Minimum (int a, int b, int c) {
+ int mi;
+
+ mi = a;
+ if (b < mi) mi = b;
+ if (c < mi) mi = c;
+
+ return mi;
+
+ }
+
+ //*****************************
+ // Compute Levenshtein distance
+ //*****************************
+
+ static public int[][] LD (String s, String t) {
+
+ int n = s.length ();
+ int m = t.length ();; // length of t
+ // char s_i; // ith character of s
+ // char t_j; // jth character of t
+ int cost; // cost
+
+ // Step 1
+
+
+ int[][] d = new int[n+1][m+1];
+
+ if (n == 0) return d;
+ if (m == 0) return d;
+
+ // Step 2
+
+ for (int i = 0; i <= n; i++) d[i][0] = i;
+ for (int j = 0; j <= m; j++) d[0][j] = j;
+
+
+ // Step 3
+
+ for (int i = 1; i <= n; i++) {
+
+ int s_i = s.charAt (i - 1);
+
+ // Step 4
+
+ for (int j = 1; j <= m; j++) {
+
+// t_j = t.charAt (j - 1);
+
+ // Step 5
+
+ if (s_i == t.charAt (j - 1)) cost = 0;
+ else cost = 1;
+
+
+ // Step 6
+
+ d[i][j] = Minimum (d[i-1][j]+1, d[i][j-1]+1, d[i-1][j-1] + cost);
+
+ }
+
+ }
+
+ // Step 7
+
+
+
+ return d;
+
+ }
+
+
+
+
+
+ static String searchPath(String s, String t, int[][] d, StringBuffer operations, boolean debug) {
+
+ StringBuffer result = new StringBuffer(s);
+
+ int n = d.length;
+ int m = d[0].length;
+
+ int x=n-1;
+ int y=m-1;
+ boolean changed =false;
+ while(true) {
+ if (debug && changed )System.out.println("result "+new StringBuffer(result) .reverse());
+
+ if (d[x][y]==0)break;
+ if (y>0&&x>0&& d[x-1][y-1]<d[x][y]) {
+ if (debug) System.out.println("min d[x-1][y-1] "+d[x-1][y-1]+" d[x][y] "+d[x][y]+" rep "+s.charAt(x-1)+" with "+t.charAt(y-1)+" at "+(x-1));
+
+ operations.append('R').append(Character.toString((char)((int)x-1))).append(s.charAt(x-1)).append(t.charAt(y-1));
+ if (debug) result.setCharAt(x-1, t.charAt(y-1));
+ y--;
+ x--;
+ changed =true;
+ continue;
+ }
+ if (y>0&& d[x][y-1]<d[x][y]) {
+ if (debug) System.out.println("min d[x][y-1] "+d[x][y-1]+" d[x][y] "+d[x][y]+" ins "+t.charAt(y-1)+" at "+(x));
+ operations.append('I').append(Character.toString((char)((int)x))).append(t.charAt(y-1));
+ if (debug)result.insert(x, t.charAt(y-1));
+ y--;
+ changed =true;
+ continue;
+ }
+ if (x>0&& d[x-1][y]<d[x][y]) {
+ if (debug)System.out.println("min d[x-1][y] "+d[x-1][y]+" d[x][y] "+d[x][y]+" del "+s.charAt(x-1)+" at "+(x-1));
+ operations.append('D').append(Character.toString((char)((int)x-1))).append(s.charAt(x-1));
+ if (debug)result.deleteCharAt(x-1);
+ x--;
+ changed =true;
+ continue;
+ }
+ changed =false;
+ if (x>0&& y>0 && d[x-1][y-1]==d[x][y]) {
+ x--; y--;
+ continue ;
+ }
+ if (x>0&& d[x-1][y]==d[x][y]) {
+ x--;
+ continue;
+ }
+ if (y>0 && d[x][y-1]==d[x][y]) {
+ y--;
+ continue;
+ }
+
+ }
+ if (debug) return result.reverse().toString();
+ else return null;
+ }
+
+ public static String change(String s, String operations) {
+
+ StringBuffer result = new StringBuffer(s).reverse();
+
+ int pc =0;
+ while(true) {
+ if (operations.length()<=pc) break;
+ char nextOperation = operations.charAt(pc);
+ pc++;
+ if (nextOperation == 'R') {
+ //pc++;
+ int xm1 = (char)operations.charAt(pc);
+ pc++;
+ char replace = operations.charAt(pc);
+ pc++;
+ char with = operations.charAt(pc);
+ //operations.append('R').append((char)x-1).append(s.charAt(x-1)).append(t.charAt(y-1));
+ // System.out.println(""+result+" xm1 "+xm1+" op "+operations);
+
+
+ if (result.length()<=xm1) return s;
+
+ if (result.charAt(xm1)==replace) result.setCharAt(xm1, with);
+ //if (debug) result.setCharAt(x-1, t.charAt(y-1));
+ pc++;
+
+ }else if (nextOperation == 'I') {
+ // if (debug) System.out.println("min d[x][y-1] "+d[x][y-1]+" d[x][y] "+d[x][y]+" ins "+t.charAt(y-1)+" at "+(x));
+ //operations.append('I').append((char)x).append(t.charAt(y-1));
+
+ //if (debug)result.insert(x, t.charAt(y-1));
+ //y--;
+ //changed =true;
+ //pc++;
+ int x = operations.charAt(pc);
+ pc++;
+ char in = operations.charAt(pc);
+
+ if (result.length()<x) return s;
+
+ result.insert(x, in);
+ pc++;
+ } else if (nextOperation == 'D' ) {
+ //pc++;
+ int xm1 = operations.charAt(pc);
+
+
+ if (result.length()<=xm1) return s;
+
+ result.deleteCharAt(xm1);
+ pc++;
+ // delete with
+ pc++;
+ // operations.append('D').append((char)x-1).append(s.charAt(x-1));
+ // if (debug)result.deleteCharAt(x-1);
+ }
+
+ }
+ return result.reverse().toString();
+ //else return null;
+ }
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ /**
+ * @param opers
+ * @param postion
+ * @return
+ */
+ public static String get(ArrayList<String> opers, int position) {
+ for(String s : opers) {
+ int p = (int)s.charAt(1);
+ if (p==position) {
+ return s;
+ }
+ }
+ return "0";
+ }
+
+
+
+
+
+ /**
+ * @param form
+ * @param string
+ * @param c
+ * @return
+ */
+ public static String changeSimple(String form, String operation, int c) {
+
+ if (operation.equals("0")) return form;
+
+ if (operation.charAt(0)=='I') {
+ StringBuffer f = new StringBuffer(form);
+ if (f.length()<=c) {
+ // DB.println("fail insert ");
+ return form;
+ }
+ f.insert(c+1, operation.charAt(1));
+ return f.toString();
+ }
+ if (operation.charAt(0)=='R') {
+ StringBuffer f = new StringBuffer(form);
+ // if (f.length()<=c) f.append(' ');
+ if (f.length()<=c) {
+ // DB.println("fail replace ");
+ return form;
+ }
+ f.setCharAt(c, operation.charAt(2));
+ return f.toString();
+ }
+
+ if (operation.charAt(0)=='D') {
+ StringBuffer f = new StringBuffer(form);
+ f.delete(c, c+1);//.append(' ');
+ return f.toString();
+ }
+ return form;
+ }
+
+
+
+
+
+ /**
+ * @param string
+ * @return
+ */
+ public static String simple(String o) {
+ StringBuffer s = new StringBuffer(o);
+ s.delete(1, 2);
+ return s.toString();
+ }
+
+}
diff --git a/dependencyParser/mate-tools/src/is2/mtag/Convert.java b/dependencyParser/mate-tools/src/is2/mtag/Convert.java
new file mode 100755
index 0000000..e262269
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/mtag/Convert.java
@@ -0,0 +1,98 @@
+/**
+ *
+ */
+package is2.mtag;
+
+
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.OutputStreamWriter;
+import java.util.ArrayList;
+
+/**
+ * @author Dr. Bernd Bohnet, 20.01.2010
+ *
+ *
+ */
+public class Convert {
+
+ public static void main (String[] args) throws IOException {
+
+ Options options = new Options(args);
+
+ split(options.trainfile);
+
+ }
+
+ /**
+ * @param trainfile
+ * @throws IOException
+ */
+ private static void split(String trainfile) throws IOException {
+
+ String dir = "split";
+ boolean success = (new File("split")).mkdir();
+ if (success) System.out.println("Directory: " + dir + " created");
+
+
+ ArrayList<String> corpus = new ArrayList<String>();
+
+ BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(trainfile),"UTF-8"),32768);
+ String l =null;
+ int sentences = 0;
+ try {
+ while( (l = reader.readLine())!=null) {
+
+ corpus.add(l);
+ if (l.length()<8) sentences++;
+
+ }
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ System.out.println("Corpus has "+sentences+" sentences.");
+
+ int partSize = sentences / 20;
+ System.out.println("Prepare corpus for cross annotations with 20 parts with part size "+partSize+" number of lines "+corpus.size());
+
+
+
+ for(int k=0;k<20;k++) {
+ BufferedWriter br = new BufferedWriter(new OutputStreamWriter(new FileOutputStream("split/p-"+k),"UTF-8"));
+ BufferedWriter rest = new BufferedWriter(new OutputStreamWriter(new FileOutputStream("split/r-"+k),"UTF-8"));
+ int skip=k*partSize;
+
+ int countSentences=0;
+ int countSentencesWrote=0;
+ System.out.println("skip from "+skip+" to "+(skip+partSize-1));
+ for(String x : corpus) {
+ if (countSentences>=skip && (countSentences<(skip+partSize)||k==19)){
+ rest.write(x);
+ rest.newLine();
+ if (x.length()<8) countSentencesWrote++;
+ } else {
+ br.write(x);
+ br.newLine();
+ }
+
+ if (x.length()<8) countSentences++;
+ }
+ System.out.println("wrote for this part "+countSentencesWrote);
+ br.flush();
+ br.close();
+ rest.flush();
+ rest.close();
+
+ }
+
+
+ }
+
+
+}
diff --git a/dependencyParser/mate-tools/src/is2/mtag/Evaluator.java b/dependencyParser/mate-tools/src/is2/mtag/Evaluator.java
new file mode 100755
index 0000000..09d1455
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/mtag/Evaluator.java
@@ -0,0 +1,148 @@
+package is2.mtag;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.Hashtable;
+import java.util.Map.Entry;
+
+import is2.data.SentenceData09;
+import is2.io.CONLLReader09;
+
+
+public class Evaluator {
+
+ public static void evaluate (String act_file, String pred_file, String format) throws Exception {
+
+ CONLLReader09 goldReader = new CONLLReader09(act_file);//DependencyReader.createDependencyReader();
+ // boolean labeled = goldReader.startReading(act_file);
+
+ CONLLReader09 predictedReader = new CONLLReader09();
+ predictedReader.startReading(pred_file);
+
+// if (labeled != predLabeled)
+// System.out.println("Gold file and predicted file appear to differ on whether or not they are labeled. Expect problems!!!");
+
+
+ int total = 0, totalP=0,corr = 0, corrL = 0, corrT=0,totalX=0;
+ int totalD=0, corrD=0,err=0;
+ int numsent = 0, corrsent = 0, corrsentL = 0;
+ SentenceData09 goldInstance = goldReader.getNext();
+ SentenceData09 predInstance = predictedReader.getNext();
+
+ Hashtable<String,Integer> errors = new Hashtable<String,Integer>();
+ Hashtable<String,StringBuffer> words = new Hashtable<String,StringBuffer>();
+
+
+ while(goldInstance != null) {
+
+ int instanceLength = goldInstance.length();
+
+ if (instanceLength != predInstance.length())
+ System.out.println("Lengths do not match on sentence "+numsent);
+
+
+ String gold[] = goldInstance.ofeats;
+ String pred[] = predInstance.pfeats;
+
+ boolean whole = true;
+ boolean wholeL = true;
+
+ // NOTE: the first item is the root info added during nextInstance(), so we skip it.
+
+ for (int i = 1; i < instanceLength; i++) {
+ if (gold[i].equals(pred[i])||(gold[i].equals("_")&&pred[i]==null)) corrT++;
+ else {
+ // System.out.println("gold:"+goldFeats[i]+" pred:"+predFeats[i]+" "+goldInstance.forms[i]+" snt "+numsent+" i:"+i);
+ //for (int k = 1; k < instanceLength; k++) {
+
+ // System.out.print(goldInstance.forms[k]+":"+goldInstance.gpos[k]);
+ // if (k==i) System.out.print(":"+predInstance.gpos[k]);
+ // System.out.print(" ");
+
+ // }
+ //System.out.println();
+ String key = "gold: '"+gold[i]+"' pred: '"+pred[i]+"'";
+ Integer cnt = errors.get(key);
+ StringBuffer errWrd = words.get(key);
+ if (cnt==null) {
+ errors.put(key,1);
+ words.put(key, new StringBuffer().append(goldInstance.forms[i]));
+ }
+ else {
+ errors.put(key,cnt+1);
+ errWrd.append(" "+goldInstance.forms[i]);
+ }
+ err++;
+
+ }
+ String[] gf = gold[i].split("|");
+ int eq=0;
+
+ if (pred[i]!=null) {
+ String[] pf = pred[i].split("|");
+ totalP +=pf.length;
+
+ if (pf.length>gf.length) totalX +=pf.length;
+ else totalX+=gf.length;
+
+ for(String g : gf) {
+ for(String p : pf) {
+ if (g.equals(p)) {eq++;break;}
+ }
+ }
+ } else totalX+=gf.length;
+ totalD +=gf.length;
+ corrD +=eq;
+ }
+ total += instanceLength - 1; // Subtract one to not score fake root token
+
+ if(whole) corrsent++;
+ if(wholeL) corrsentL++;
+ numsent++;
+
+ goldInstance = goldReader.getNext();
+ predInstance = predictedReader.getNext();
+ }
+
+ ArrayList<Entry<String, Integer>> opsl = new ArrayList<Entry<String, Integer>>();
+ for(Entry<String, Integer> e : errors.entrySet()) {
+ opsl.add(e);
+ }
+
+ Collections.sort(opsl, new Comparator<Entry<String, Integer>>(){
+
+ @Override
+ public int compare(Entry<String, Integer> o1,
+ Entry<String, Integer> o2) {
+
+ return o1.getValue()==o2.getValue()?0:o1.getValue()>o2.getValue()?-1:1;
+ }
+
+
+ });
+
+
+ int cnt=0;
+ System.out.println("10 top most errors:");
+ for(Entry<String, Integer> e : opsl) {
+ cnt++;
+ // System.out.println(e.getKey()+" "+e.getValue()+" context: "+words.get(e.getKey()));
+ }
+
+
+ System.out.println("Tokens: " + total+" Correct: " + corrT+" "+(float)corrT/total+" R "+((float)corrD/totalD)+" tP "+totalP+" tG "+totalD+" P "+(float)corrD/totalP);
+ System.out.println("err: " + err+" total "+total+" corr "+corrT);
+// System.out.println("Unlabeled Complete Correct: " + ((double)corrsent/numsent));
+
+ }
+
+ public static void main (String[] args) throws Exception {
+ String format = "CONLL";
+ if (args.length > 2)
+ format = args[2];
+
+ evaluate(args[0], args[1], format);
+ }
+
+}
diff --git a/dependencyParser/mate-tools/src/is2/mtag/ExtractorM.java b/dependencyParser/mate-tools/src/is2/mtag/ExtractorM.java
new file mode 100644
index 0000000..864b977
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/mtag/ExtractorM.java
@@ -0,0 +1,514 @@
+package is2.mtag;
+
+
+import is2.data.Cluster;
+import is2.data.F2SF;
+import is2.data.Instances;
+import is2.data.InstancesTagger;
+import is2.data.Long2Int;
+import is2.data.Long2IntInterface;
+import is2.data.ParametersFloat;
+import is2.data.PipeGen;
+import is2.data.SentenceData09;
+import is2.io.CONLLReader09;
+import is2.tools.IPipe;
+import is2.util.OptionsSuper;
+
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map.Entry;
+
+
+final public class ExtractorM extends PipeGen implements IPipe {
+
+ public static int _CEND;
+
+
+ private static final String STWRD = "STWRD",STPOS = "STPOS",END = "END",STR = "STR";
+
+ public String[] types;
+
+ Cluster cl;
+
+ final public MFO mf =new MFO();
+ public Long2IntInterface li;
+
+
+
+ final MFO.Data4 d1 = new MFO.Data4(),d2 = new MFO.Data4(),d3 = new MFO.Data4(),dw = new MFO.Data4();
+ final MFO.Data4 dwp = new MFO.Data4(),dp = new MFO.Data4();
+
+
+ private OptionsSuper options;
+ private int _ewrd;
+ static private int _mid, _strp,_endp;
+
+ public ExtractorM (Options options, Long2Int long2Int) throws IOException {
+ this.options = options;
+
+ li =long2Int;
+ }
+
+ public ExtractorM (OptionsSuper options) {
+ this.options = options;
+ }
+
+
+ public HashMap<Integer,Integer> form2morph = new HashMap<Integer, Integer> ();
+
+
+ public Instances createInstances(String file) {
+
+ CONLLReader09 depReader = new CONLLReader09(CONLLReader09.NO_NORMALIZE);
+
+ depReader.startReading(file);
+ mf.register(POS,"<root-POS>");
+
+ mf.register(FFEATS, CONLLReader09.NO_TYPE);
+ mf.register(FFEATS, "");
+
+ InstancesTagger is = new InstancesTagger();
+
+ System.out.println("Registering feature parts ");
+
+ HashMap<String,HashSet<String>> op2form = new HashMap<String, HashSet<String>> ();
+ HashMap<String,Integer> freq = new HashMap<String, Integer> ();
+
+
+ int ic=0;
+ while(true) {
+ SentenceData09 instance1 = depReader.getNext();
+ if (instance1== null) break;
+ ic++;
+
+
+ String[] w = instance1.forms;
+ for(int i1 = 0; i1 < w.length; i1++) mf.register(WORD, w[i1]);
+ for(int i1 = 0; i1 < w.length; i1++) registerChars(CHAR, w[i1]);
+
+ for(int i1 = 0; i1 < w.length; i1++) {
+ mf.register(WORD, w[i1].toLowerCase());
+ Integer f = freq.get(w[i1].toLowerCase());
+
+ if (f==null) freq.put(w[i1].toLowerCase(), 1);
+ else freq.put(w[i1].toLowerCase(), f+1);
+
+ HashSet<String> forms = op2form.get(w[i1].toLowerCase());
+ if (forms==null) {
+ forms = new HashSet<String>();
+ op2form.put(w[i1].toLowerCase(), forms);
+ }
+ forms.add(instance1.ofeats[i1]==null?"_":instance1.ofeats[i1]);
+ }
+ for(int i1 = 0; i1 < w.length; i1++) registerChars(CHAR, w[i1].toLowerCase());
+
+ w = instance1.plemmas;
+ for(int i1 = 0; i1 < w.length; i1++) mf.register(WORD, w[i1]);
+ for(int i1 = 0; i1 < w.length; i1++) registerChars(CHAR, w[i1]);
+
+ w = instance1.ppos;
+ for(int i1 = 0; i1 < w.length; i1++) mf.register(POS, w[i1]);
+
+ w = instance1.gpos;
+ for(int i1 = 0; i1 < w.length; i1++) mf.register(POS, w[i1]);
+
+ w = instance1.ofeats;
+ for(int i1 = 0; i1 < w.length; i1++) if (w[i1]!=null) mf.register(FEAT, w[i1]);
+ for(int i1 = 0; i1 < w.length; i1++) if (w[i1]!=null) mf.register(FFEATS, w[i1]);
+
+ // w = instance1.pfeats;
+ //for(int i1 = 0; i1 < w.length; i1++) if (w[i1]!=null) mf.register(FEAT, w[i1]);
+ }
+
+
+ for(Entry<String,HashSet<String>> e : op2form.entrySet()) {
+ if (e.getValue().size()==1 &&freq.get(e.getKey())>10) {
+ // System.out.println("found map "+e.getKey()+" "+e.getValue()+" "+freq.get(e.getKey()));
+ form2morph.put(mf.getValue(ExtractorM.WORD, e.getKey()), mf.getValue(FFEATS, (String)e.getValue().toArray()[0]));
+ }
+ }
+
+ initFeatures();
+
+ mf.calculateBits();
+ initValues();
+
+ System.out.println(""+mf.toString());
+
+ depReader.startReading(file);
+
+ int num1 = 0;
+ long start1 = System.currentTimeMillis();
+
+ System.out.print("Creating Features: ");
+ is.init(ic, mf) ;
+ int del=0;
+
+ while(true) {
+ if (num1 % 100 ==0) {del = outValue(num1, del);}
+ SentenceData09 instance1 = depReader.getNext(is);
+ if (instance1== null) break;
+
+ if (num1>options.count) break;
+
+ num1++;
+ }
+ long end1 = System.currentTimeMillis();
+ System.gc();
+ long mem2 = Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory();
+ System.out.print(" time "+(end1-start1)+" mem "+(mem2/1024)+" kb");
+
+ types = new String[mf.getFeatureCounter().get(FFEATS)];
+
+ for(Entry<String,Integer> e : mf.getFeatureSet().get(FFEATS).entrySet()) {
+ types[e.getValue()] = e.getKey();
+ }
+
+
+ if (options.clusterFile==null)cl = new Cluster();
+ else cl= new Cluster(options.clusterFile, mf,6);
+
+
+ System.out.println("Num Features: " + types.length);
+
+
+
+ depReader.startReading(file);
+
+
+
+ int num11=0;
+
+ while(true) {
+
+ SentenceData09 instance = depReader.getNext();
+ if (instance==null) break;
+
+ is.fillChars(instance, num11, _CEND);
+
+
+ if (num11>options.count) break;
+
+ num11++;
+ }
+
+ return is;//.toNativeArray();
+
+ }
+
+ private void registerChars(String type, String word) {
+ for(int i=0;i<word.length();i++) mf.register(type, Character.toString(word.charAt(i)));
+ }
+
+
+
+ public void initValues() {
+ s_feat = mf.getFeatureBits(FFEATS);
+ s_word = mf.getFeatureBits(WORD);
+ s_type = mf.getFeatureBits(TYPE);
+ s_char = mf.getFeatureBits(CHAR);
+ s_pos =mf.getFeatureBits(POS);
+ // dl1.a[0] = s_type; dl1.a[1] = s_pos;
+ // for (int k = 2; k < 7; k++) dl1.a[k] = s_pos;
+
+ d1.a0 = s_type; d1.a1 = s_feat; d1.a2= s_word;
+ d2.a0 = s_type; d2.a1 = s_feat; d2.a2= s_feat; d2.a3= s_feat; d2.a4= s_feat; d2.a5= s_feat; d2.a6= s_feat;
+ d3.a0 = s_type; d3.a1 = s_feat; d3.a2= s_char; d3.a3= s_char; d3.a4= s_char; d3.a5= s_char; d3.a6= s_char; d3.a7= s_char;
+ dp.a0 = s_type; dp.a1 = s_feat; dp.a2= s_pos; dp.a3= s_pos; dp.a4= s_feat;// dp.a5= s_char; dp.a6= s_char; dp.a7= s_char;
+ dw.a0 = s_type; dw.a1 = s_feat;dw.a2= s_word; dw.a3= s_word; dw.a4= s_word; dw.a5= s_word; dw.a6= s_word; dw.a7= s_word;
+ dwp.a0 = s_type; dwp.a1 = s_feat;dwp.a2= s_word ; dwp.a3= s_feat; dwp.a4= s_word;
+
+ }
+
+ public static short s_feat,s_word,s_type,s_dir,s_dist,s_char,s_pos;
+
+
+
+ /**
+ * Initialize the features types.
+ */
+ public void initFeatures() {
+
+ for(int t=0;t<62;t++) {
+ mf.register(TYPE,"F"+t);
+ }
+
+
+// _mid = mf.register(POS, MID);
+ _strp = mf.register(POS, STR);
+ _endp= mf.register(POS, END);
+
+ mf.register(WORD, STR);
+ _ewrd = mf.register(WORD, END);
+
+
+ _CEND = mf.register(CHAR, END);
+
+
+
+
+ // optional features
+ mf.register(WORD,STWRD);
+ mf.register(POS,STPOS);
+
+
+ }
+
+
+ final public void addCF(InstancesTagger is, int ic, String fs,int i, short pfeat[],short ppos[], int[] forms, int[] lemmas, long[] vs) {
+
+ int c0= is.chars[ic][i][0], c1=is.chars[ic][i][1], c2=is.chars[ic][i][2], c3=is.chars[ic][i][3], c4=is.chars[ic][i][4],c5=is.chars[ic][i][5];
+ int e0 =is.chars[ic][i][6], e1 =is.chars[ic][i][7],e2 =is.chars[ic][i][8],e3 =is.chars[ic][i][9],e4 =is.chars[ic][i][10];
+
+ int f=1,n=0;
+ short upper =0, number = 1;
+ for(int k1=0;k1<fs.length();k1++){
+ char c = fs.charAt(k1);
+ if (Character.isUpperCase(c)) {
+ if (k1==0) upper=1;
+ else {
+ // first char + another
+ if (upper==1) upper=3;
+ // another uppercase in the word
+ else if (upper==0) upper=2;
+ }
+ }
+
+ if (Character.isDigit(c) && k1==0) number =2 ;
+ else if (Character.isDigit(c) && number==1) number = 3;
+ }
+
+ int form = forms[i];
+
+ int len = forms.length;
+ long l;
+ d1.v0 = f++; d1.v2=form; l=mf.calc3(d1); vs[n++]=mf.calc3(d1);
+
+ d1.v0 = f++; d1.v2=is.formlc[ic][i]; vs[n++]=mf.calc3(d1);
+
+ d3.v2=c0; d3.v3=c1; d3.v4=c2; d3.v5=c3; d3.v6=c4;
+ d3.v0=f++; vs[n++]=mf.calc3(d3);
+ d3.v0=f++; vs[n++]=mf.calc4(d3);
+ d3.v0=f++; vs[n++]=mf.calc5(d3);
+ d3.v0=f++; vs[n++]=mf.calc6(d3);
+ d3.v0=f++; vs[n++]=mf.calc7(d3);
+
+ if (form!=-1) {
+ d3.v2=c2; d3.v3=c3; d3.v4=c4; d3.v5=c5; d3.v6=cl.getLP(form);
+ d3.v0=f; vs[n++]=mf.calc6(d3); d3.v0=f+1; vs[n++]=mf.calc7(d3);
+ }
+ f+=2;
+
+ if (form>0) {
+ d3.v0=f; d3.v5=cl.getLP(form); vs[n++]=mf.calc6(d3);
+ d3.v0=f+1; d3.v4=cl.getLP(form); vs[n++]=mf.calc5(d3);
+ d3.v0=f+2; d3.v3=cl.getLP(form); vs[n++]=mf.calc4(d3);
+ }
+ f+=3;
+
+ d3.v2=e0; d3.v3=e1; d3.v4=e2; d3.v5=e3; d3.v6=e4;
+ d3.v0 =f++; vs[n++]=mf.calc3(d3);
+ d3.v0 =f++; vs[n++]=l=mf.calc4(d3); vs[n++]=d3.calcs(3, upper, l);
+ d3.v0 =f++; vs[n++]=l=mf.calc5(d3); vs[n++]=d3.calcs(3, upper, l);
+ d3.v0 =f++; vs[n++]=l=mf.calc6(d3); vs[n++]=d3.calcs(3, upper, l);
+ d3.v0 =f++; vs[n++]=l=mf.calc7(d3); vs[n++]=d3.calcs(3, upper, l);
+
+ if (form>0) {
+ d3.v0=f; d3.v5=cl.getLP(form); vs[n++]=mf.calc6(d3);
+ d3.v0=f+1; d3.v4=cl.getLP(form); vs[n++]=mf.calc5(d3);
+ d3.v0=f+2; d3.v3=cl.getLP(form); vs[n++]=mf.calc4(d3);
+ }
+ f+=3;
+
+
+ dw.v0=f++; dw.v2=i+1<len?forms[i+1]:_ewrd;dw.v3= forms[i];vs[n++]=mf.calc4(dw);
+
+ if (len>i+1) {
+
+ dw.v0=f; dw.v2= forms[i+1]; vs[n++]=mf.calc3(dw);
+ d3.v0=f+1; d3.v2 =is.chars[ic][i+1][0];vs[n++]=mf.calc3(d3);
+ d3.v0=f+2; d3.v2 =is.chars[ic][i+1][6];vs[n++]=mf.calc3(d3);
+
+ d3.v2=e0; d3.v3=e1;
+
+ d3.v0 =f+3; d3.v4 =is.chars[ic][i+1][0];vs[n++]=mf.calc5(d3);
+ d3.v0 =f+4; d3.v4 =is.chars[ic][i+1][6];vs[n++]=mf.calc5(d3);
+
+
+
+ if (is.chars[ic][i+1][11]>1 ) { // instance.forms[i+1].length()
+
+ d3.v0=f+5; d3.v2=is.chars[ic][i+1][0]; d3.v3=is.chars[ic][i+1][1]; vs[n++]=mf.calc4(d3);
+ d3.v0=f+6; d3.v2=is.chars[ic][i+1][6]; d3.v3=is.chars[ic][i+1][7]; vs[n++]=mf.calc4(d3);
+
+ d3.v2=e0; d3.v3=e1;
+
+ d3.v0=f+7; d3.v4 = is.chars[ic][i+1][0]; d3.v5 =is.chars[ic][i+1][1]; vs[n++]=mf.calc6(d3);
+ d3.v0=f+8; d3.v4 = is.chars[ic][i+1][6]; d3.v5=is.chars[ic][i+1][7]; vs[n++]=mf.calc6(d3);
+
+ if (forms[i+1]>0) {
+ d3.v0=f+9; d3.v2=is.chars[ic][i+1][0]; d3.v3=is.chars[ic][i+1][1]; d3.v4 =cl.getLP(forms[i+1]); vs[n++]=mf.calc5(d3);
+ d3.v0=f+10; d3.v2=is.chars[ic][i+1][6]; d3.v3=is.chars[ic][i+1][7]; d3.v4 =cl.getLP(forms[i+1]); vs[n++]=mf.calc5(d3);
+ }
+ }
+
+ if (forms[i+1]>0) {
+ dw.v0=f+11; dw.v2= cl.getLP(forms[i+1]); dw.v3= forms[i];vs[n++]=mf.calc4(dw);
+ }
+
+ if (len>i+2) {
+ dw.v0=f+12; dw.v2= forms[i+2]; dw.v3 = forms[i+1];vs[n++]=mf.calc4(dw);vs[n++]=mf.calc3(dw);
+// d2.v0=f+13; d2.v2=pfeat[i+1]; d2.v3= pfeat[i+2]; vs[n++]=mf.calc4(d2);
+ // dp.v0= f+14; dp.v2=ppos[i+1]; dp.v3=ppos[i+2]; vs[n++]=mf.calc4(dp);
+
+ }
+
+ if (len>i+3) {
+ dw.v0=f+14; dw.v2= forms[i+3]; dw.v3 = forms[i+2]; vs[n++]=mf.calc4(dw); vs[n++]=mf.calc3(dw);
+
+ }
+ }
+ f+=16;
+
+ // length
+ d2.v0=f++; d2.v2=is.chars[ic][i][11];vs[n++]=mf.calc3(d2);
+
+
+ // contains a number
+ d2.v0=f++; d2.v2=number; vs[n++]=mf.calc3(d2);
+ d1.v0=f++; d1.v2=lemmas[i]; vs[n++]=mf.calc3(d1);
+
+ if (i!=0 &&len>i+1) {
+ dw.v0=f; dw.v2=lemmas[i-1];dw.v3=lemmas[i+1];vs[n++]=mf.calc4(dw);
+ d2.v0=f+1; d2.v2=pfeat[i-1]; d2.v3=pfeat[i+1];vs[n++]=mf.calc4(d2);
+ }
+ f+=2;
+
+ d2.v0= f++; d2.v2=i>=1? pfeat[i-1]:_strp; vs[n++]=mf.calc3(d2);
+ dp.v0= f++; dp.v2=ppos[i]; vs[n++]=mf.calc3(dp);
+
+ if (i>0) {
+ dw.v0 = f++; dw.v2 =i>=1? forms[i-1]:_strp; vs[n++]=mf.calc3(dw);
+ dw.v0 = f++; dw.v2 = i>=1? lemmas[i-1]:_strp; vs[n++]=mf.calc3(dw);
+
+ if (len>i+1) {
+// d2.v0=f; d2.v2= pfeat[i-1];d2.v3= pfeat[i+1]; vs[n++]=mf.calc4(d2);
+ // dp.v0= f+1; dp.v2=ppos[i-1]; dp.v3=ppos[i+1]; vs[n++]=mf.calc4(dp);
+
+ }
+ f++;
+ dp.v0= f++; dp.v2=ppos[i]; dp.v3=ppos[i-1]; vs[n++]=mf.calc4(dp);
+
+ if (i>1) {
+ d2.v0=f++; d2.v2=i<2?_strp: pfeat[i-2]; vs[n++]=mf.calc3(d2);
+ d2.v0=f++; d2.v2= pfeat[i-1]; d2.v3= pfeat[i-2]; vs[n++]=mf.calc4(d2);
+
+ dw.v0=f++; dw.v2= forms[i-2]; vs[n++]=mf.calc3(dw);
+ dwp.v0=f++; dwp.v2 = forms[i-1]; dwp.v3 = pfeat[i-2];vs[n++]=mf.calc4(dwp);
+ dwp.v0=f++; dwp.v2 = forms[i-2]; dwp.v3 = pfeat[i-1];vs[n++]=mf.calc4(dwp);
+
+ if (i>2) {
+ d2.v0=f++; d2.v2=pfeat[i-3]; vs[n++]=mf.calc3(d2);
+ d2.v0=f++; d2.v2=pfeat[i-2]; d2.v3= pfeat[i-3]; vs[n++]=mf.calc4(d2);
+ dw.v0=f++; dw.v2 = forms[i-3]; dw.v3 = forms[i-2]; vs[n++]=mf.calc4(dw);
+ // dp.v0= f++; dp.v2=ppos[i-3]; dp.v3=ppos[i-2]; vs[n++]=mf.calc4(dp);
+ }
+ }
+ }
+ vs[n] = Integer.MIN_VALUE;
+ }
+
+
+
+
+
+
+
+ public int fillFeatureVectorsOne(ParametersFloat params, int w1, String form, Instances is, int n, short[] features, long[] vs) {
+ double best = -1;
+ int bestType=-1;
+
+ F2SF f = new F2SF(params.parameters);
+ //is.gfeats[n]
+ addCF((InstancesTagger)is, n, form, w1, features,is.pposs[n], is.forms[n], is.plemmas[n], vs);
+
+ for(int t = 0; t < types.length; t++) {
+
+ f.clear();
+ int p = t<<ExtractorM.s_type;
+ for(int k=0;k<vs.length;k++) {
+ if (vs[k]==Integer.MIN_VALUE) break;
+ if (vs[k]>=0) f.add(li.l2i(vs[k]+p));
+ }
+ if (f.score >best) {
+ bestType=t;
+ best =f.score;
+ }
+
+ }
+ return bestType;
+
+ }
+
+
+
+ //static ArrayList<T> todo = new ArrayList<T>();
+ static SentenceData09 instance;
+
+
+ public static int _FC =200;
+
+
+ /**
+ * Write the lemma that are not mapped by operations
+ * @param dos
+ */
+ public void writeMap(DataOutputStream dos) {
+
+ try {
+ dos.writeInt(this.form2morph.size());
+ for(Entry<Integer, Integer> e : form2morph.entrySet()) {
+ dos.writeInt(e.getKey());
+ dos.writeInt(e.getValue());
+ }
+ } catch (IOException e1) {
+ e1.printStackTrace();
+ }
+ }
+
+
+
+ /**
+ * Read the form-lemma mapping not read by operations
+ * @param dis
+ */
+ public void readMap(DataInputStream dis) {
+ try {
+ int size = dis.readInt();
+ for(int i =0; i<size;i++) {
+ form2morph.put(dis.readInt(), dis.readInt());
+ }
+ } catch (IOException e1) {
+ e1.printStackTrace();
+ }
+ }
+
+
+ /* (non-Javadoc)
+ * @see is2.tools.IPipe#write(java.io.DataOutputStream)
+ */
+ @Override
+ public void write(DataOutputStream dos) {
+ try {
+ cl.write(dos);
+ writeMap(dos);
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ }
+
+
+}
diff --git a/dependencyParser/mate-tools/src/is2/mtag/MFO.java b/dependencyParser/mate-tools/src/is2/mtag/MFO.java
new file mode 100755
index 0000000..d91991e
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/mtag/MFO.java
@@ -0,0 +1,540 @@
+package is2.mtag;
+
+
+import is2.data.IEncoderPlus;
+import is2.util.DB;
+
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map.Entry;
+
+/**
+ * Map Features, do not map long to integer
+ *
+ * @author Bernd Bohnet, 20.09.2009
+ */
+
+final public class MFO implements IEncoderPlus {
+
+ /** The features and its values */
+ static private final HashMap<String,HashMap<String,Integer>> m_featureSets = new HashMap<String,HashMap<String,Integer>>();
+
+ /** The feature class and the number of values */
+ static private final HashMap<String,Integer> m_featureCounters = new HashMap<String,Integer>();
+
+ /** The number of bits needed to encode a feature */
+ static final HashMap<String,Integer> m_featureBits = new HashMap<String,Integer>();
+
+ /** Integer counter for long2int */
+ //private int count=0;
+
+ /** Stop growing */
+ public boolean stop=false;
+
+ final public static String NONE="<None>";
+
+ public static class Data {
+ public final String[] a = new String[8];
+ public final String[] v = new String[8];
+ final short[] s = new short[9];
+ public void clear(int i) {
+ v[i]=null;
+ }
+ }
+
+
+
+ final public static class Data4 {
+ public int shift;
+ public short a0,a1,a2,a3,a4,a5,a6,a7,a8,a9;
+ public int v0,v1,v2,v3,v4,v5,v6,v7,v8,v9;
+
+ final public long calcs(int b, long v, long l) {
+ if (l<0) return l;
+ l |= v<<shift;
+ shift +=b;
+ return l;
+ }
+
+
+ final public long calc2() {
+
+ if (v0<0||v1<0) return -1;
+
+ long l = v0;
+ shift =a0;
+ l |= (long)v1<<shift;
+ shift +=a1;
+
+ return l;
+ }
+
+
+
+ final public long calc3() {
+
+ if (v0<0||v1<0||v2<0) return -1;
+ // if (v1<0||v2<0) return -1;
+
+ long l = v0;
+ shift =a0;
+ l |= (long)v1<<shift;
+ shift +=a1;
+ l |= (long)v2<<shift;
+ shift=(short) (shift + a2);
+
+ //shift=;
+ return l;
+ }
+
+
+ final public long calc4() {
+ if (v0<0||v1<0||v2<0||v3<0) return -1;
+
+ long l = v0;
+ shift =a0;
+ l |= (long)v1<<shift;
+ shift +=a1;
+ l |= (long)v2<<shift;
+ shift +=a2;
+ l |= (long)v3<<shift;
+ shift= shift +a3;
+
+ return l;
+ }
+
+
+
+ final public long calc5() {
+
+ if (v0<0||v1<0||v2<0||v3<0||v4<0) return -1;
+
+ long l = v0;
+ shift =a0;
+ l |= (long)v1<<shift;
+ shift +=a1;
+ l |= (long)v2<<shift;
+ shift +=a2;
+ l |= (long)v3<<shift;
+ shift +=a3;
+ l |= (long)v4<<shift;
+ shift =shift+a4;
+
+ return l;
+ }
+
+
+ final public long calc6() {
+
+ if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0) return -1;
+
+ long l = v0;
+ shift =a0;
+ l |= (long)v1<<shift;
+ shift +=a1;
+ l |= (long)v2<<shift;
+ shift +=a2;
+ l |= (long)v3<<shift;
+ shift +=a3;
+ l |= (long)v4<<shift;
+ shift +=a4;
+ l |= (long)v5<<shift;
+ shift =shift+a5;
+
+ return l;
+ }
+
+ final public long calc7() {
+
+ if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0||v6<0) return -1;
+
+ long l = v0;
+ shift =a0;
+ l |= (long)v1<<shift;
+ shift +=a1;
+ l |= (long)v2<<shift;
+ shift +=a2;
+ l |= (long)v3<<shift;
+ shift +=a3;
+ l |= (long)v4<<shift;
+ shift +=a4;
+ l |= (long)v5<<shift;
+ shift +=a5;
+ l |= (long)v6<<shift;
+ shift =shift+a6;
+
+ return l;
+ }
+
+
+ final public long calc8() {
+
+ if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0||v6<0||v7<0) return -1;
+
+ long l = v0;
+ shift =a0;
+ l |= (long)v1<<shift;
+ shift +=a1;
+ l |= (long)v2<<shift;
+ shift +=a2;
+ l |= (long)v3<<shift;
+ shift +=a3;
+ l |= (long)v4<<shift;
+ shift +=a4;
+ l |= (long)v5<<shift;
+ shift +=a5;
+ l |= (long)v6<<shift;
+ shift +=a6;
+ l |= (long)v7<<shift;
+ shift =shift+a7;
+
+ return l;
+ }
+
+ }
+
+ public MFO () {}
+
+
+ // public int size() {return count;}
+
+
+
+
+ /**
+ * Register an attribute class, if it not exists and add a possible value
+ * @param type
+ * @param type2
+ */
+ final public int register(String a, String v) {
+
+ HashMap<String,Integer> fs = getFeatureSet().get(a);
+ if (fs==null) {
+ fs = new HashMap<String,Integer>();
+ getFeatureSet().put(a, fs);
+ fs.put(NONE, 0);
+ getFeatureCounter().put(a, 1);
+ }
+ Integer c = getFeatureCounter().get(a);
+
+ Integer i = fs.get(v);
+ if (i==null) {
+ fs.put(v, c);
+ c++;
+ getFeatureCounter().put(a,c);
+ return c-1;
+ } else return i;
+ }
+
+ /**
+ * Calculates the number of bits needed to encode a feature
+ */
+ public void calculateBits() {
+
+ int total=0;
+ for(Entry<String,Integer> e : getFeatureCounter().entrySet() ){
+ int bits =(int)Math.ceil((Math.log(e.getValue()+1)/Math.log(2)));
+ m_featureBits.put(e.getKey(), bits);
+ total+=bits;
+ // System.out.println(" "+e.getKey()+" bits "+bits+" number "+(e.getValue()+1));
+ }
+
+ // System.out.println("total number of needed bits "+total);
+ }
+
+
+
+ @Override
+ public String toString() {
+
+ StringBuffer content = new StringBuffer();
+ for(Entry<String,Integer> e : getFeatureCounter().entrySet() ){
+ content.append(e.getKey()+" "+e.getValue());
+ content.append(':');
+ // HashMap<String,Integer> vs = getFeatureSet().get(e.getKey());
+ content.append(getFeatureBits(e.getKey()));
+
+ /*if (vs.size()<120)
+ for(Entry<String,Integer> e2 : vs.entrySet()) {
+ content.append(e2.getKey()+" ("+e2.getValue()+") ");
+ }*/
+ content.append('\n');
+
+ }
+ return content.toString();
+ }
+
+
+ static final public long calcs(Data4 d,int b, long v, long l) {
+ if (l<0) return l;
+ l |= v<<d.shift;
+ d.shift +=b;
+ return l;
+ }
+
+
+ static final public short getFeatureBits(String a) {
+ return (short)m_featureBits.get(a).intValue();
+ }
+
+
+
+ /**
+ * Get the integer place holder of the string value v of the type a
+ *
+ * @param t the type
+ * @param v the value
+ * @return the integer place holder of v
+ */
+ final public int getValue(String t, String v) {
+
+ if (m_featureSets.get(t)==null) return -1;
+ Integer vi = m_featureSets.get(t).get(v);
+ if (vi==null) return -1; //stop &&
+ return vi.intValue();
+ }
+
+ /**
+ * Static version of getValue
+ * @see getValue
+ */
+ static final public int getValueS(String a, String v) {
+
+ if (m_featureSets.get(a)==null) return -1;
+ Integer vi = m_featureSets.get(a).get(v);
+ if (vi==null) return -1; //stop &&
+ return vi.intValue();
+ }
+
+ public int hasValue(String a, String v) {
+
+ Integer vi = m_featureSets.get(a).get(v);
+ if (vi==null) return -1;
+ return vi.intValue();
+ }
+
+
+
+
+ final public long calc2(Data4 d) {
+
+ if (d.v0<0||d.v1<0) return -1;
+ // if (d.v1<0||d.v2<0) return -1;
+
+ long l = d.v0;
+ short shift =d.a0;
+ l |= (long)d.v1<<shift;
+ shift +=d.a1;
+ // l |= (long)d.v2<<shift;
+ d.shift=shift;
+
+ //d.shift=;
+ return l;
+ }
+
+
+
+ final public long calc3(Data4 d) {
+
+ if (d.v0<0||d.v1<0||d.v2<0) return -1;
+ // if (d.v1<0||d.v2<0) return -1;
+
+ long l = d.v0;
+ short shift =d.a0;
+ l |= (long)d.v1<<shift;
+ shift +=d.a1;
+ l |= (long)d.v2<<shift;
+ d.shift=shift + d.a2;
+
+ //d.shift=;
+ return l;
+ }
+
+
+ final public long calc4(Data4 d) {
+ if (d.v0<0||d.v1<0||d.v2<0||d.v3<0) return -1;
+
+ long l = d.v0;
+ int shift =d.a0;
+ l |= (long)d.v1<<shift;
+ shift +=d.a1;
+ l |= (long)d.v2<<shift;
+ shift +=d.a2;
+ l |= (long)d.v3<<shift;
+ d.shift= shift +d.a3;
+
+ return l;
+ }
+
+
+
+ final public long calc5(Data4 d) {
+
+ if (d.v0<0||d.v1<0||d.v2<0||d.v3<0||d.v4<0) return -1;
+
+ long l = d.v0;
+ int shift =d.a0;
+ l |= (long)d.v1<<shift;
+ shift +=d.a1;
+ l |= (long)d.v2<<shift;
+ shift +=d.a2;
+ l |= (long)d.v3<<shift;
+ shift +=d.a3;
+ l |= (long)d.v4<<shift;
+ d.shift =shift+d.a4;
+
+ return l;
+ }
+
+
+ final public long calc6(Data4 d) {
+
+ if (d.v0<0||d.v1<0||d.v2<0||d.v3<0||d.v4<0||d.v5<0) return -1;
+
+ long l = d.v0;
+ int shift =d.a0;
+ l |= (long)d.v1<<shift;
+ shift +=d.a1;
+ l |= (long)d.v2<<shift;
+ shift +=d.a2;
+ l |= (long)d.v3<<shift;
+ shift +=d.a3;
+ l |= (long)d.v4<<shift;
+ shift +=d.a4;
+ l |= (long)d.v5<<shift;
+ d.shift =shift+d.a5;
+
+ return l;
+ }
+
+ final public long calc7(Data4 d) {
+
+ if (d.v0<0||d.v1<0||d.v2<0||d.v3<0||d.v4<0||d.v5<0||d.v6<0) return -1;
+
+ long l = d.v0;
+ int shift =d.a0;
+ l |= (long)d.v1<<shift;
+ shift +=d.a1;
+ l |= (long)d.v2<<shift;
+ shift +=d.a2;
+ l |= (long)d.v3<<shift;
+ shift +=d.a3;
+ l |= (long)d.v4<<shift;
+ shift +=d.a4;
+ l |= (long)d.v5<<shift;
+ shift +=d.a5;
+ l |= (long)d.v6<<shift;
+ d.shift =shift+d.a6;
+
+ return l;
+ }
+
+
+ final public long calc8(Data4 d) {
+
+ if (d.v0<0||d.v1<0||d.v2<0||d.v3<0||d.v4<0||d.v5<0||d.v6<0||d.v7<0) return -1;
+
+ long l = d.v0;
+ int shift =d.a0;
+ l |= (long)d.v1<<shift;
+ shift +=d.a1;
+ l |= (long)d.v2<<shift;
+ shift +=d.a2;
+ l |= (long)d.v3<<shift;
+ shift +=d.a3;
+ l |= (long)d.v4<<shift;
+ shift +=d.a4;
+ l |= (long)d.v5<<shift;
+ shift +=d.a5;
+ l |= (long)d.v6<<shift;
+ shift +=d.a6;
+ l |= (long)d.v7<<shift;
+ d.shift =shift+d.a7;
+
+ return l;
+ }
+
+
+
+
+
+
+
+ /**
+ * Maps a long to a integer value. This is very useful to save memory for sparse data long values
+ * @param node
+ * @return the integer
+ */
+ static public int misses = 0;
+ static public int good = 0;
+
+
+
+
+ /**
+ * Write the data
+ * @param dos
+ * @throws IOException
+ */
+ static public void writeData(DataOutputStream dos) throws IOException {
+
+ dos.writeInt(getFeatureSet().size());
+ for(Entry<String, HashMap<String,Integer>> e : getFeatureSet().entrySet()) {
+ dos.writeUTF(e.getKey());
+ dos.writeInt(e.getValue().size());
+
+ for(Entry<String,Integer> e2 : e.getValue().entrySet()) {
+
+ if(e2.getKey()==null) DB.println("key "+e2.getKey()+" value "+e2.getValue()+" e -key "+e.getKey());
+ dos.writeUTF(e2.getKey());
+ dos.writeInt(e2.getValue());
+ }
+ }
+ }
+ public void read(DataInputStream din) throws IOException {
+
+ int size = din.readInt();
+ for(int i=0; i<size;i++) {
+ String k = din.readUTF();
+ int size2 = din.readInt();
+
+ HashMap<String,Integer> h = new HashMap<String,Integer>();
+ getFeatureSet().put(k,h);
+ for(int j = 0;j<size2;j++) {
+ h.put(din.readUTF(), din.readInt());
+ }
+ getFeatureCounter().put(k, size2);
+ }
+
+ calculateBits();
+ }
+
+
+ /**
+ * Clear the data
+ */
+ static public void clearData() {
+ getFeatureSet().clear();
+ m_featureBits.clear();
+ getFeatureSet().clear();
+ }
+
+ public HashMap<String,Integer> getFeatureCounter() {
+ return m_featureCounters;
+ }
+
+ static public HashMap<String,HashMap<String,Integer>> getFeatureSet() {
+ return m_featureSets;
+ }
+
+ static public String[] reverse(HashMap<String,Integer> v){
+ String[] set = new String[v.size()];
+ for(Entry<String,Integer> e : v.entrySet()) {
+ set[e.getValue()]=e.getKey();
+ }
+ return set;
+ }
+
+}
diff --git a/dependencyParser/mate-tools/src/is2/mtag/Options.java b/dependencyParser/mate-tools/src/is2/mtag/Options.java
new file mode 100755
index 0000000..6b9d806
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/mtag/Options.java
@@ -0,0 +1,45 @@
+package is2.mtag;
+
+import is2.util.OptionsSuper;
+
+public final class Options extends OptionsSuper {
+
+
+ public Options (String[] args) {
+
+ for(int i = 0; i < args.length; i++) {
+
+ if (args[i].equals("--help")) explain();
+
+ if (args[i].equals("-nonormalize")) {
+ normalize=false;
+ } else if (args[i].equals("-features")) {
+ features= args[i+1]; i++;
+ } else if (args[i].equals("-hsize")) {
+ hsize= Integer.parseInt(args[i+1]); i++;
+ } else if (args[i].equals("-len")) {
+ maxLen= Integer.parseInt(args[i+1]); i++;
+ } else super.addOption(args, i);
+ }
+ }
+
+ private void explain() {
+ System.out.println("Usage: ");
+ System.out.println("java -cp anna.jar is2.mtag.Tagger [Options]");
+ System.out.println();
+ System.out.println("Example: ");
+ System.out.println(" java -cp mate.jar is2.mtag.Tagger -model eps3.model -train corpora/conll08st/train/train.closed -test corpora/conll08st/devel/devel.closed -out b3.test -eval corpora/conll08st/devel/devel.closed -count 2000 -i 6");
+ System.out.println("");
+ System.out.println("Options:");
+ System.out.println("");
+ System.out.println(" -train <file> the corpus a model is trained on; default "+this.trainfile);
+ System.out.println(" -test <file> the input corpus for testing; default "+this.testfile);
+ System.out.println(" -out <file> the output corpus (result) of a test run; default "+this.outfile);
+ System.out.println(" -model <file> the parsing model for traing the model is stored in the files");
+ System.out.println(" and for parsing the model is load from this file; default "+this.modelName);
+ System.out.println(" -i <number> the number of training iterations; good numbers are 10 for smaller corpora and 6 for bigger; default "+this.numIters);
+ System.out.println(" -count <number> the n first sentences of the corpus are take for the training default "+this.count);
+
+ System.exit(0);
+ }
+}
diff --git a/dependencyParser/mate-tools/src/is2/mtag/Pipe.java b/dependencyParser/mate-tools/src/is2/mtag/Pipe.java
new file mode 100755
index 0000000..b25b953
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/mtag/Pipe.java
@@ -0,0 +1,508 @@
+package is2.mtag;
+
+
+import is2.data.Cluster;
+import is2.data.F2SF;
+import is2.data.Instances;
+import is2.data.InstancesTagger;
+import is2.data.Long2Int;
+import is2.data.Long2IntInterface;
+import is2.data.ParametersFloat;
+import is2.data.PipeGen;
+import is2.data.SentenceData09;
+import is2.io.CONLLReader09;
+import is2.tools.IPipe;
+import is2.util.OptionsSuper;
+
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map.Entry;
+
+
+final public class Pipe extends PipeGen implements IPipe {
+
+ public static int _CEND;
+
+
+ private static final String STWRD = "STWRD",STPOS = "STPOS",END = "END",STR = "STR";
+
+ public String[] types;
+
+ Cluster cl;
+
+ final public MFO mf =new MFO();
+ public Long2IntInterface li;
+
+
+
+ final MFO.Data4 d1 = new MFO.Data4(),d2 = new MFO.Data4(),d3 = new MFO.Data4(),dw = new MFO.Data4();
+ final MFO.Data4 dwp = new MFO.Data4(),dp = new MFO.Data4();
+
+
+ private OptionsSuper options;
+ private int _ewrd;
+ static private int _mid, _strp,_endp;
+
+ public Pipe (Options options, Long2Int long2Int) throws IOException {
+ this.options = options;
+
+ li =long2Int;
+ }
+
+ public Pipe (OptionsSuper options) {
+ this.options = options;
+ }
+
+
+ public HashMap<Integer,Integer> form2morph = new HashMap<Integer, Integer> ();
+
+
+ public Instances createInstances(String file) {
+
+ CONLLReader09 depReader = new CONLLReader09(CONLLReader09.NO_NORMALIZE);
+
+ depReader.startReading(file);
+ mf.register(POS,"<root-POS>");
+
+ mf.register(FEAT, CONLLReader09.NO_TYPE);
+ mf.register(FEAT, "");
+
+ InstancesTagger is = new InstancesTagger();
+
+ System.out.println("Registering feature parts ");
+
+ HashMap<String,HashSet<String>> op2form = new HashMap<String, HashSet<String>> ();
+ HashMap<String,Integer> freq = new HashMap<String, Integer> ();
+
+
+ int ic=0;
+ while(true) {
+ SentenceData09 instance1 = depReader.getNext();
+ if (instance1== null) break;
+ ic++;
+
+
+ String[] w = instance1.forms;
+ for(int i1 = 0; i1 < w.length; i1++) mf.register(WORD, w[i1]);
+ for(int i1 = 0; i1 < w.length; i1++) registerChars(CHAR, w[i1]);
+ for(int i1 = 0; i1 < w.length; i1++) {
+ mf.register(WORD, w[i1].toLowerCase());
+ Integer f = freq.get(w[i1].toLowerCase());
+ if (f==null) freq.put(w[i1].toLowerCase(), 1);
+ else freq.put(w[i1].toLowerCase(), f+1);
+
+ HashSet<String> forms = op2form.get(w[i1].toLowerCase());
+ if (forms==null) {
+ forms = new HashSet<String>();
+ op2form.put(w[i1].toLowerCase(), forms);
+ }
+ forms.add(instance1.ofeats[i1]==null?"_":instance1.ofeats[i1]);
+ }
+ for(int i1 = 0; i1 < w.length; i1++) registerChars(CHAR, w[i1].toLowerCase());
+
+ w = instance1.plemmas;
+ for(int i1 = 0; i1 < w.length; i1++) mf.register(WORD, w[i1]);
+ for(int i1 = 0; i1 < w.length; i1++) registerChars(CHAR, w[i1]);
+
+ w = instance1.ppos;
+ for(int i1 = 0; i1 < w.length; i1++) mf.register(POS, w[i1]);
+
+ w = instance1.gpos;
+ for(int i1 = 0; i1 < w.length; i1++) mf.register(POS, w[i1]);
+
+ w = instance1.ofeats;
+ for(int i1 = 0; i1 < w.length; i1++) if (w[i1]!=null) mf.register(FEAT, w[i1]);
+
+ // w = instance1.pfeats;
+ //for(int i1 = 0; i1 < w.length; i1++) if (w[i1]!=null) mf.register(FEAT, w[i1]);
+ }
+
+
+ for(Entry<String,HashSet<String>> e : op2form.entrySet()) {
+ if (e.getValue().size()==1 &&freq.get(e.getKey())>10) {
+ // System.out.println("found map "+e.getKey()+" "+e.getValue()+" "+freq.get(e.getKey()));
+ form2morph.put(mf.getValue(Pipe.WORD, e.getKey()), mf.getValue(FEAT, (String)e.getValue().toArray()[0]));
+ }
+ }
+
+ initFeatures();
+
+ mf.calculateBits();
+ initValues();
+
+ System.out.println(""+mf.toString());
+
+ depReader.startReading(file);
+
+ int num1 = 0;
+ long start1 = System.currentTimeMillis();
+
+ System.out.print("Creating Features: ");
+ is.init(ic, mf) ;
+ int del=0;
+
+ while(true) {
+ if (num1 % 100 ==0) {del = outValue(num1, del);}
+ SentenceData09 instance1 = depReader.getNext(is);
+ if (instance1== null) break;
+
+ if (num1>options.count) break;
+
+ num1++;
+ }
+ long end1 = System.currentTimeMillis();
+ System.gc();
+ long mem2 = Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory();
+ System.out.print(" time "+(end1-start1)+" mem "+(mem2/1024)+" kb");
+
+ types = new String[mf.getFeatureCounter().get(FEAT)];
+
+ for(Entry<String,Integer> e : mf.getFeatureSet().get(FEAT).entrySet()) {
+ types[e.getValue()] = e.getKey();
+ }
+
+
+ if (options.clusterFile==null)cl = new Cluster();
+ else cl= new Cluster(options.clusterFile, mf,6);
+
+
+ System.out.println("Num Features: " + types.length);
+
+
+
+ depReader.startReading(file);
+
+
+
+ int num11=0;
+
+ while(true) {
+
+ SentenceData09 instance = depReader.getNext();
+ if (instance==null) break;
+
+ is.fillChars(instance, num11, _CEND);
+
+
+ if (num11>options.count) break;
+
+ num11++;
+ }
+
+ return is;//.toNativeArray();
+
+ }
+
+ private void registerChars(String type, String word) {
+ for(int i=0;i<word.length();i++) mf.register(type, Character.toString(word.charAt(i)));
+ }
+
+
+
+ public void initValues() {
+ s_feat = mf.getFeatureBits(FEAT);
+ s_word = mf.getFeatureBits(WORD);
+ s_type = mf.getFeatureBits(TYPE);
+ s_char = mf.getFeatureBits(CHAR);
+ s_pos =mf.getFeatureBits(POS);
+ // dl1.a[0] = s_type; dl1.a[1] = s_pos;
+ // for (int k = 2; k < 7; k++) dl1.a[k] = s_pos;
+
+ d1.a0 = s_type; d1.a1 = s_feat; d1.a2= s_word;
+ d2.a0 = s_type; d2.a1 = s_feat; d2.a2= s_feat; d2.a3= s_feat; d2.a4= s_feat; d2.a5= s_feat; d2.a6= s_feat;
+ d3.a0 = s_type; d3.a1 = s_feat; d3.a2= s_char; d3.a3= s_char; d3.a4= s_char; d3.a5= s_char; d3.a6= s_char; d3.a7= s_char;
+ dp.a0 = s_type; dp.a1 = s_feat; dp.a2= s_pos; dp.a3= s_pos; dp.a4= s_feat;// dp.a5= s_char; dp.a6= s_char; dp.a7= s_char;
+ dw.a0 = s_type; dw.a1 = s_feat;dw.a2= s_word; dw.a3= s_word; dw.a4= s_word; dw.a5= s_word; dw.a6= s_word; dw.a7= s_word;
+ dwp.a0 = s_type; dwp.a1 = s_feat;dwp.a2= s_word ; dwp.a3= s_feat; dwp.a4= s_word;
+
+ }
+
+ public static short s_feat,s_word,s_type,s_dir,s_dist,s_char,s_pos;
+
+
+
+ /**
+ * Initialize the features types.
+ */
+ public void initFeatures() {
+
+ for(int t=0;t<62;t++) {
+ mf.register(TYPE,"F"+t);
+ }
+
+
+// _mid = mf.register(POS, MID);
+ _strp = mf.register(POS, STR);
+ _endp= mf.register(POS, END);
+
+ mf.register(WORD, STR);
+ _ewrd = mf.register(WORD, END);
+
+
+ _CEND = mf.register(CHAR, END);
+
+
+
+
+ // optional features
+ mf.register(WORD,STWRD);
+ mf.register(POS,STPOS);
+
+
+ }
+
+
+ final public void addCF(InstancesTagger is, int ic, String fs,int i, int pfeat[],short ppos[], int[] forms, int[] lemmas, long[] vs) {
+
+ int c0= is.chars[ic][i][0], c1=is.chars[ic][i][1], c2=is.chars[ic][i][2], c3=is.chars[ic][i][3], c4=is.chars[ic][i][4],c5=is.chars[ic][i][5];
+ int e0 =is.chars[ic][i][6], e1 =is.chars[ic][i][7],e2 =is.chars[ic][i][8],e3 =is.chars[ic][i][9],e4 =is.chars[ic][i][10];
+
+ int f=1,n=0;
+ short upper =0, number = 1;
+ for(int k1=0;k1<fs.length();k1++){
+ char c = fs.charAt(k1);
+ if (Character.isUpperCase(c)) {
+ if (k1==0) upper=1;
+ else {
+ // first char + another
+ if (upper==1) upper=3;
+ // another uppercase in the word
+ else if (upper==0) upper=2;
+ }
+ }
+
+ if (Character.isDigit(c) && k1==0) number =2 ;
+ else if (Character.isDigit(c) && number==1) number = 3;
+ }
+
+ int form = forms[i];
+
+ int len = forms.length;
+ long l;
+ d1.v0 = f++; d1.v2=form; l=mf.calc3(d1); vs[n++]=mf.calc3(d1);
+
+ d1.v0 = f++; d1.v2=is.formlc[ic][i]; vs[n++]=mf.calc3(d1);
+
+ d3.v2=c0; d3.v3=c1; d3.v4=c2; d3.v5=c3; d3.v6=c4;
+ d3.v0=f++; vs[n++]=mf.calc3(d3);
+ d3.v0=f++; vs[n++]=mf.calc4(d3);
+ d3.v0=f++; vs[n++]=mf.calc5(d3);
+ d3.v0=f++; vs[n++]=mf.calc6(d3);
+ d3.v0=f++; vs[n++]=mf.calc7(d3);
+
+ if (form!=-1) {
+ d3.v2=c2; d3.v3=c3; d3.v4=c4; d3.v5=c5; d3.v6=cl.getLP(form);
+ d3.v0=f; vs[n++]=mf.calc6(d3); d3.v0=f+1; vs[n++]=mf.calc7(d3);
+ }
+ f+=2;
+
+ if (form>0) {
+ d3.v0=f; d3.v5=cl.getLP(form); vs[n++]=mf.calc6(d3);
+ d3.v0=f+1; d3.v4=cl.getLP(form); vs[n++]=mf.calc5(d3);
+ d3.v0=f+2; d3.v3=cl.getLP(form); vs[n++]=mf.calc4(d3);
+ }
+ f+=3;
+
+ d3.v2=e0; d3.v3=e1; d3.v4=e2; d3.v5=e3; d3.v6=e4;
+ d3.v0 =f++; vs[n++]=mf.calc3(d3);
+ d3.v0 =f++; vs[n++]=l=mf.calc4(d3); vs[n++]=d3.calcs(3, upper, l);
+ d3.v0 =f++; vs[n++]=l=mf.calc5(d3); vs[n++]=d3.calcs(3, upper, l);
+ d3.v0 =f++; vs[n++]=l=mf.calc6(d3); vs[n++]=d3.calcs(3, upper, l);
+ d3.v0 =f++; vs[n++]=l=mf.calc7(d3); vs[n++]=d3.calcs(3, upper, l);
+
+ if (form>0) {
+ d3.v0=f; d3.v5=cl.getLP(form); vs[n++]=mf.calc6(d3);
+ d3.v0=f+1; d3.v4=cl.getLP(form); vs[n++]=mf.calc5(d3);
+ d3.v0=f+2; d3.v3=cl.getLP(form); vs[n++]=mf.calc4(d3);
+ }
+ f+=3;
+
+
+ dw.v0=f++; dw.v2=i+1<len?forms[i+1]:_ewrd;dw.v3= forms[i];vs[n++]=mf.calc4(dw);
+
+ if (len>i+1) {
+
+ dw.v0=f; dw.v2= forms[i+1]; vs[n++]=mf.calc3(dw);
+ d3.v0=f+1; d3.v2 =is.chars[ic][i+1][0];vs[n++]=mf.calc3(d3);
+ d3.v0=f+2; d3.v2 =is.chars[ic][i+1][6];vs[n++]=mf.calc3(d3);
+
+ d3.v2=e0; d3.v3=e1;
+
+ d3.v0 =f+3; d3.v4 =is.chars[ic][i+1][0];vs[n++]=mf.calc5(d3);
+ d3.v0 =f+4; d3.v4 =is.chars[ic][i+1][6];vs[n++]=mf.calc5(d3);
+
+
+
+ if (is.chars[ic][i+1][11]>1 ) { // instance.forms[i+1].length()
+
+ d3.v0=f+5; d3.v2=is.chars[ic][i+1][0]; d3.v3=is.chars[ic][i+1][1]; vs[n++]=mf.calc4(d3);
+ d3.v0=f+6; d3.v2=is.chars[ic][i+1][6]; d3.v3=is.chars[ic][i+1][7]; vs[n++]=mf.calc4(d3);
+
+ d3.v2=e0; d3.v3=e1;
+
+ d3.v0=f+7; d3.v4 = is.chars[ic][i+1][0]; d3.v5 =is.chars[ic][i+1][1]; vs[n++]=mf.calc6(d3);
+ d3.v0=f+8; d3.v4 = is.chars[ic][i+1][6]; d3.v5=is.chars[ic][i+1][7]; vs[n++]=mf.calc6(d3);
+
+ if (forms[i+1]>0) {
+ d3.v0=f+9; d3.v2=is.chars[ic][i+1][0]; d3.v3=is.chars[ic][i+1][1]; d3.v4 =cl.getLP(forms[i+1]); vs[n++]=mf.calc5(d3);
+ d3.v0=f+10; d3.v2=is.chars[ic][i+1][6]; d3.v3=is.chars[ic][i+1][7]; d3.v4 =cl.getLP(forms[i+1]); vs[n++]=mf.calc5(d3);
+ }
+ }
+
+ if (forms[i+1]>0) {
+ dw.v0=f+11; dw.v2= cl.getLP(forms[i+1]); dw.v3= forms[i];vs[n++]=mf.calc4(dw);
+ }
+
+ if (len>i+2) {
+ dw.v0=f+12; dw.v2= forms[i+2]; dw.v3 = forms[i+1];vs[n++]=mf.calc4(dw);vs[n++]=mf.calc3(dw);
+// d2.v0=f+13; d2.v2=pfeat[i+1]; d2.v3= pfeat[i+2]; vs[n++]=mf.calc4(d2);
+ // dp.v0= f+14; dp.v2=ppos[i+1]; dp.v3=ppos[i+2]; vs[n++]=mf.calc4(dp);
+
+ }
+
+ if (len>i+3) {
+ dw.v0=f+14; dw.v2= forms[i+3]; dw.v3 = forms[i+2]; vs[n++]=mf.calc4(dw); vs[n++]=mf.calc3(dw);
+
+ }
+ }
+ f+=16;
+
+ // length
+ d2.v0=f++; d2.v2=is.chars[ic][i][11];vs[n++]=mf.calc3(d2);
+
+
+ // contains a number
+ d2.v0=f++; d2.v2=number; vs[n++]=mf.calc3(d2);
+ d1.v0=f++; d1.v2=lemmas[i]; vs[n++]=mf.calc3(d1);
+
+ if (i!=0 &&len>i+1) {
+ dw.v0=f; dw.v2=lemmas[i-1];dw.v3=lemmas[i+1];vs[n++]=mf.calc4(dw);
+ d2.v0=f+1; d2.v2=pfeat[i-1]; d2.v3=pfeat[i+1];vs[n++]=mf.calc4(d2);
+ }
+ f+=2;
+
+ d2.v0= f++; d2.v2=i>=1? pfeat[i-1]:_strp; vs[n++]=mf.calc3(d2);
+ dp.v0= f++; dp.v2=ppos[i]; vs[n++]=mf.calc3(dp);
+
+ if (i>0) {
+ dw.v0 = f++; dw.v2 =i>=1? forms[i-1]:_strp; vs[n++]=mf.calc3(dw);
+ dw.v0 = f++; dw.v2 = i>=1? lemmas[i-1]:_strp; vs[n++]=mf.calc3(dw);
+
+ if (len>i+1) {
+// d2.v0=f; d2.v2= pfeat[i-1];d2.v3= pfeat[i+1]; vs[n++]=mf.calc4(d2);
+ // dp.v0= f+1; dp.v2=ppos[i-1]; dp.v3=ppos[i+1]; vs[n++]=mf.calc4(dp);
+
+ }
+ f++;
+ dp.v0= f++; dp.v2=ppos[i]; dp.v3=ppos[i-1]; vs[n++]=mf.calc4(dp);
+
+ if (i>1) {
+ d2.v0=f++; d2.v2=i<2?_strp: pfeat[i-2]; vs[n++]=mf.calc3(d2);
+ d2.v0=f++; d2.v2= pfeat[i-1]; d2.v3= pfeat[i-2]; vs[n++]=mf.calc4(d2);
+
+ dw.v0=f++; dw.v2= forms[i-2]; vs[n++]=mf.calc3(dw);
+ dwp.v0=f++; dwp.v2 = forms[i-1]; dwp.v3 = pfeat[i-2];vs[n++]=mf.calc4(dwp);
+ dwp.v0=f++; dwp.v2 = forms[i-2]; dwp.v3 = pfeat[i-1];vs[n++]=mf.calc4(dwp);
+
+ if (i>2) {
+ d2.v0=f++; d2.v2=pfeat[i-3]; vs[n++]=mf.calc3(d2);
+ d2.v0=f++; d2.v2=pfeat[i-2]; d2.v3= pfeat[i-3]; vs[n++]=mf.calc4(d2);
+ dw.v0=f++; dw.v2 = forms[i-3]; dw.v3 = forms[i-2]; vs[n++]=mf.calc4(dw);
+ // dp.v0= f++; dp.v2=ppos[i-3]; dp.v3=ppos[i-2]; vs[n++]=mf.calc4(dp);
+ }
+ }
+ }
+ vs[n] = Integer.MIN_VALUE;
+ }
+
+
+
+
+
+
+
+ public int fillFeatureVectorsOne(ParametersFloat params, int w1, String form, Instances is, int n, int[] features, long[] vs) {
+ double best = -1;
+ int bestType=-1;
+
+ F2SF f = new F2SF(params.parameters);
+ //is.gfeats[n]
+ addCF((InstancesTagger)is, n, form, w1, features,is.pposs[n], is.forms[n], is.plemmas[n], vs);
+
+ for(int t = 0; t < types.length; t++) {
+
+ f.clear();
+ int p = t<<Pipe.s_type;
+ for(int k=vs.length-1;k>=0;k--) if (vs[k]>=0) f.add(li.l2i(vs[k]+p));
+ if (f.score >best) {
+ bestType=t;
+ best =f.score;
+ }
+
+ }
+ return bestType;
+
+ }
+
+
+
+ //static ArrayList<T> todo = new ArrayList<T>();
+ static SentenceData09 instance;
+
+
+ public static int _FC =200;
+
+
+ /**
+ * Write the lemma that are not mapped by operations
+ * @param dos
+ */
+ public void writeMap(DataOutputStream dos) {
+
+ try {
+ dos.writeInt(this.form2morph.size());
+ for(Entry<Integer, Integer> e : form2morph.entrySet()) {
+ dos.writeInt(e.getKey());
+ dos.writeInt(e.getValue());
+ }
+ } catch (IOException e1) {
+ e1.printStackTrace();
+ }
+ }
+
+
+
+ /**
+ * Read the form-lemma mapping not read by operations
+ * @param dis
+ */
+ public void readMap(DataInputStream dis) {
+ try {
+ int size = dis.readInt();
+ for(int i =0; i<size;i++) {
+ form2morph.put(dis.readInt(), dis.readInt());
+ }
+ } catch (IOException e1) {
+ e1.printStackTrace();
+ }
+ }
+
+
+ /* (non-Javadoc)
+ * @see is2.tools.IPipe#write(java.io.DataOutputStream)
+ */
+ @Override
+ public void write(DataOutputStream dos) {
+ try {
+ cl.write(dos);
+ writeMap(dos);
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ }
+
+
+}
diff --git a/dependencyParser/mate-tools/src/is2/mtag/Tagger.java b/dependencyParser/mate-tools/src/is2/mtag/Tagger.java
new file mode 100644
index 0000000..da31a5b
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/mtag/Tagger.java
@@ -0,0 +1,371 @@
+package is2.mtag;
+
+
+import is2.data.Cluster;
+import is2.data.FV;
+import is2.data.Instances;
+import is2.data.InstancesTagger;
+import is2.data.Long2Int;
+import is2.data.ParametersFloat;
+import is2.data.PipeGen;
+import is2.data.SentenceData09;
+import is2.io.CONLLReader09;
+import is2.io.CONLLWriter09;
+import is2.tools.IPipe;
+import is2.tools.Train;
+import is2.tools.Tool;
+import is2.util.DB;
+import is2.util.OptionsSuper;
+
+import java.io.BufferedInputStream;
+import java.io.BufferedOutputStream;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.util.Map.Entry;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipInputStream;
+import java.util.zip.ZipOutputStream;
+
+
+public class Tagger implements Tool, Train {
+
+ ExtractorM pipe;
+ ParametersFloat params;
+
+
+ /**
+ * Initialize
+ * @param options
+ */
+ public Tagger (Options options) {
+
+ // load the model
+ try {
+ readModel(options);
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+
+ }
+
+ /**
+ * @param string
+ * @throws IOException
+ */
+ public Tagger(String modelFileName) {
+ this(new Options(new String[] {"-model",modelFileName}));
+ }
+
+ public Tagger() { }
+
+ public static void main (String[] args) throws FileNotFoundException, Exception
+ {
+
+ Options options = new Options(args);
+
+ Tagger tagger = new Tagger();
+
+ if (options.train) {
+
+ Long2Int li = new Long2Int(options.hsize);
+ tagger.pipe = new ExtractorM (options,li);
+ InstancesTagger is = (InstancesTagger)tagger.pipe.createInstances(options.trainfile);
+ ParametersFloat params = new ParametersFloat(li.size());
+
+ tagger.train(options, tagger.pipe,params,is);
+ tagger.writeModel(options, tagger.pipe, params);
+ }
+
+ if (options.test) {
+
+ tagger.readModel(options);
+ tagger.out(options,tagger.pipe, tagger.params);
+ }
+
+ if (options.eval) {
+
+ System.out.println("\nEvaluate:");
+ Evaluator.evaluate(options.goldfile, options.outfile,options.format);
+ }
+ }
+
+ /* (non-Javadoc)
+ * @see is2.mtag2.Learn#writeModel(is2.mtag2.Options, is2.mtag2.Pipe, is2.data.ParametersFloat)
+ */
+ public void writeModel(OptionsSuper options, IPipe pipe,ParametersFloat params) {
+
+ try {
+ ZipOutputStream zos = new ZipOutputStream(new BufferedOutputStream(new FileOutputStream(options.modelName)));
+ zos.putNextEntry(new ZipEntry("data"));
+ DataOutputStream dos = new DataOutputStream(new BufferedOutputStream(zos));
+
+ MFO.writeData(dos);
+
+ MFO.clearData();
+
+ DB.println("number of parameters "+params.parameters.length);
+ dos.flush();
+ params.write(dos);
+ pipe.write(dos);
+ dos.flush();
+ dos.close();
+ } catch (Exception e){
+ e.printStackTrace();
+ }
+ }
+
+ /* (non-Javadoc)
+ * @see is2.mtag2.Learn#readModel(is2.mtag2.Options)
+ */
+ public void readModel(OptionsSuper options) {
+
+ try {
+ pipe = new ExtractorM(options);
+ params = new ParametersFloat(0);
+
+ // load the model
+ ZipInputStream zis = new ZipInputStream(new BufferedInputStream(new FileInputStream(options.modelName)));
+ zis.getNextEntry();
+ DataInputStream dis = new DataInputStream(new BufferedInputStream(zis));
+ pipe.mf.read(dis);
+ pipe.initValues();
+ pipe.initFeatures();
+
+ params.read(dis);
+ pipe.li = new Long2Int(params.parameters.length);
+ pipe.cl = new Cluster(dis);
+ pipe.readMap(dis);
+ dis.close();
+
+ this.pipe.types = new String[pipe.mf.getFeatureCounter().get(ExtractorM.FFEATS)];
+ for(Entry<String,Integer> e :pipe.mf.getFeatureSet().get(ExtractorM.FFEATS).entrySet())
+ this.pipe.types[e.getValue()] = e.getKey();
+
+
+ DB.println("Loading data finished. ");
+
+ DB.println("number of parameter "+params.parameters.length);
+ DB.println("number of classes "+this.pipe.types.length);
+ } catch(Exception e) {
+ e.printStackTrace();
+ }
+ }
+
+ /* (non-Javadoc)
+ * @see is2.mtag2.Learn#train(is2.mtag2.Options, is2.mtag2.Pipe, is2.data.ParametersFloat, is2.data.InstancesTagger)
+ */
+ public void train(OptionsSuper options, IPipe pipe, ParametersFloat params, Instances is) {
+
+ int i = 0;
+ int del=0;
+
+ String[] wds = this.pipe.mf.reverse(this.pipe.mf.getFeatureSet().get(ExtractorM.WORD));
+ int numInstances = is.size();
+
+ float upd = (options.numIters*numInstances + 1);
+
+
+ for(i = 0; i < options.numIters; i++) {
+
+ long start = System.currentTimeMillis();
+
+
+ long last= System.currentTimeMillis();
+
+ FV pred = new FV(), gold = new FV();
+ int correct =0,count=0;
+
+ for(int n = 0; n < numInstances; n++) {
+
+ upd--;
+
+ if((n+1) % 500 == 0) del= PipeGen.outValueErr(n+1, (count-correct),(float)correct/(float)count,del,last,upd);
+
+ int length = is.length(n);
+
+ int feats[] = new int[length];
+ long[] vs = new long[ExtractorM._FC];
+
+
+ for(int w1 = 0; w1 < length; w1++) {
+
+
+ count++;
+
+ if (this.pipe.form2morph.get(is.forms[n][w1])!=null){
+ correct++;
+ continue;
+ }
+
+ int bestType = this.pipe.fillFeatureVectorsOne(params, w1, wds[is.forms[n][w1]],is, n, is.gfeats[n],vs);
+ feats[w1]=bestType;
+
+
+ if (bestType == is.gfeats[n][w1] ) {
+ correct++;
+ continue;
+ }
+
+ pred.clear();
+ int p = bestType << ExtractorM.s_type;
+ // System.out.println("test type "+bestType+" ex type "+ExtractorM.s_type);
+ for(int k=0;k<vs.length;k++) {
+ if (vs[k]==Integer.MIN_VALUE) break;
+ if (vs[k]>=0) pred.add(this.pipe.li.l2i(vs[k]+p));
+ }
+
+ gold.clear();
+ p = is.gfeats[n][w1] << ExtractorM.s_type;
+ for(int k=0;k<vs.length;k++) {
+ if (vs[k]==Integer.MIN_VALUE) break;
+ if (vs[k]>=0) gold.add(this.pipe.li.l2i(vs[k]+p));
+ }
+ params.update(pred,gold, (float)upd, 1.0f);
+ }
+
+ }
+
+ long end = System.currentTimeMillis();
+ String info = "time "+(end-start);
+ del= PipeGen.outValueErr(numInstances, (count-correct),(float)correct/(float)count,del,last,0,info);
+
+ System.out.println();
+ }
+
+ params.average(i*is.size());
+
+ }
+
+
+ public void out (OptionsSuper options, IPipe pipe, ParametersFloat params) {
+
+
+ try {
+ long start = System.currentTimeMillis();
+
+ CONLLReader09 depReader = new CONLLReader09(options.testfile, options.formatTask);
+ CONLLWriter09 depWriter = new CONLLWriter09(options.outfile, options.formatTask);
+
+ depReader.normalizeOn=false;
+
+ System.out.print("Processing Sentence: ");
+ pipe.initValues();
+
+ int cnt = 0;
+ int del=0;
+ while(true) {
+
+ InstancesTagger is = new InstancesTagger();
+ is.init(1, this.pipe.mf);
+ cnt++;
+
+ SentenceData09 instance = depReader.getNext(is);
+ if (instance == null || instance.forms == null) break;
+ is.fillChars(instance, 0, ExtractorM._CEND);
+
+ instance = exec(instance, this.pipe, params,(InstancesTagger)is);
+
+ SentenceData09 i09 = new SentenceData09(instance);
+ i09.createSemantic(instance);
+
+ if (options.overwritegold) i09.ofeats = i09.pfeats;
+
+ depWriter.write(i09);
+
+ if (cnt%100==0) del=PipeGen.outValue(cnt, del);
+
+ }
+ depWriter.finishWriting();
+
+ del=PipeGen.outValue(cnt, del);
+
+ long end = System.currentTimeMillis();
+ System.out.println(PipeGen.getSecondsPerInstnace(cnt,(end-start)));
+ System.out.println(PipeGen.getUsedTime((end-start)));
+ } catch(Exception e){
+ e.printStackTrace();
+ }
+ }
+
+
+ private SentenceData09 exec(SentenceData09 instance, ExtractorM pipe, ParametersFloat params, InstancesTagger is) {
+
+ int length = instance.ppos.length;
+
+ short[] feats = new short[instance.gpos.length];
+
+ long vs[] = new long[ExtractorM._FC];
+
+ String[] forms = instance.forms;
+
+ instance.pfeats = new String[instance.gpos.length];
+
+
+ for(int j = 0; j < length; j++) {
+ if (pipe.form2morph.get(is.forms[0][j])!=null) {
+ feats[j] = (short)pipe.form2morph.get(is.forms[0][j]).intValue();
+ instance.pfeats[j] = this.pipe.types[feats[j]];
+ } else {
+
+ int bestType = pipe.fillFeatureVectorsOne(params,j, forms[j], is, 0,feats,vs);
+ feats[j] = (short)bestType;
+ instance.pfeats[j]= this.pipe.types[bestType];
+ }
+ }
+ for(int j = 0; j < length; j++) {
+ if (pipe.form2morph.get(is.forms[0][j])!=null) {
+ feats[j] =(short)pipe.form2morph.get(is.forms[0][j]).intValue();
+ instance.pfeats[j] = this.pipe.types[feats[j]];
+ } else {
+
+ int bestType = pipe.fillFeatureVectorsOne(params,j, forms[j], is, 0,feats,vs);
+ feats[j] = (short)bestType;
+ instance.pfeats[j]= this.pipe.types[bestType];
+ }
+ }
+ return instance;
+ }
+
+
+
+ /* (non-Javadoc)
+ * @see is2.tools.Tool#apply(is2.data.SentenceData09)
+ */
+ @Override
+ public SentenceData09 apply(SentenceData09 snt) {
+
+ try {
+ SentenceData09 it = new SentenceData09();
+ it.createWithRoot(snt);
+
+ InstancesTagger is = new InstancesTagger();
+ is.init(1, pipe.mf);
+ is.createInstance09(it.forms.length);
+
+ String[] forms = it.forms;
+
+
+ int length = forms.length;
+
+ // is.setForm(0, 0, CONLLReader09.ROOT);
+ for(int i=0;i<length;i++) is.setForm(0, i, forms[i]);
+ for(int i=0;i<length;i++) is.setLemma(0, i, it.plemmas[i]);
+ for(int i=0;i<length;i++) is.setPPoss(0, i, it.ppos[i]);
+
+ is.fillChars(it, 0, ExtractorM._CEND);
+
+ exec(it,pipe,params,is);
+ SentenceData09 i09 = new SentenceData09(it);
+ i09.createSemantic(it);
+ return i09;
+ } catch(Exception e) {
+ e.printStackTrace();
+ }
+
+ return null;
+ }
+}
diff --git a/dependencyParser/mate-tools/src/is2/parser/Closed.java b/dependencyParser/mate-tools/src/is2/parser/Closed.java
new file mode 100755
index 0000000..ed61657
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/parser/Closed.java
@@ -0,0 +1,32 @@
+package is2.parser;
+
+import is2.data.Parse;
+
+
+final public class Closed {
+
+ public float p;
+// short b,e,m;
+ byte dir;
+
+ Closed d;
+ Open u;
+
+ public Closed(short s, short t, int m, int dir,Open u, Closed d, float score) {
+ // this.b = s;
+ // this.e = t;
+ // this.m = (short)m;
+ this.dir = (byte)dir;
+ this.u=u;
+ this.d =d;
+ p=score;
+ }
+
+
+ public void create(Parse parse) {
+ if (u != null) u.create(parse);
+ if (d != null) d.create(parse);
+ }
+}
+
+
diff --git a/dependencyParser/mate-tools/src/is2/parser/D5.java b/dependencyParser/mate-tools/src/is2/parser/D5.java
new file mode 100644
index 0000000..58adc0a
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/parser/D5.java
@@ -0,0 +1,254 @@
+/**
+ *
+ */
+package is2.parser;
+
+import is2.data.DX;
+
+import is2.data.IFV;
+import is2.data.Long2IntInterface;
+
+/**
+ * @author Dr. Bernd Bohnet, 30.10.2010
+ *
+ *
+ */
+final public class D5 extends DX {
+
+
+
+
+ public long shift;
+ private long h;
+
+
+
+
+ /* (non-Javadoc)
+ * @see is2.parser52L.DX#cz2()
+ */
+ final public void cz2() {
+
+ if (v0<0||v1<0) {
+ shift=0;
+ h=-1;
+ return ;
+ }
+
+ h = v0 | v1<<(shift=a0);
+ shift +=a1;
+
+ }
+
+
+
+ /* (non-Javadoc)
+ * @see is2.parser52L.DX#cz3()
+ */
+ final public void cz3() {
+
+ if (v0<0||v1<0||v2<0) {
+ shift=0;
+ h=-1;
+ return ;
+
+ }
+
+ h = v0 | v1<<(shift=a0) | v2<<(shift +=a1);
+ shift= shift + a2;
+
+ }
+
+
+ /* (non-Javadoc)
+ * @see is2.parser52L.DX#cz4()
+ */
+ final public void cz4() {
+ if (v0<0||v1<0||v2<0||v3<0) {
+ shift=0;
+ h=-1;
+ return ;
+ }
+
+ h = v0 | v1<<(shift=a0) | v2<<(shift +=a1) | v3<<(shift +=a2);
+ shift= shift +a3;
+
+ }
+
+
+
+ /* (non-Javadoc)
+ * @see is2.parser52L.DX#cz5()
+ */
+ final public void cz5() {
+
+ if (v0<0||v1<0||v2<0||v3<0||v4<0) {
+ shift=0;
+ h=-1;
+ return ;
+ }
+
+ h = v0 | v1<<(shift=a0) | v2<<(shift +=a1) | v3<<(shift +=a2) | v4<<(shift +=a3);
+ shift =shift+a4;
+
+
+ }
+
+
+ /* (non-Javadoc)
+ * @see is2.parser52L.DX#cz6()
+ */
+ final public void cz6() {
+
+ if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0) {
+ shift=0;
+ h=-1;
+ return ;
+ }
+
+ h = v0 | v1<<(shift=a0) | v2<<(shift +=a1) | v3<<(shift +=a2) | v4<<(shift +=a3) | v5<<(shift +=a4);
+ shift =shift+a5;
+
+ }
+
+ /* (non-Javadoc)
+ * @see is2.parser52L.DX#cz7()
+ */
+ final public void cz7() {
+
+ if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0||v6<0) {
+ shift=0;
+ h=-1;
+ return ;
+ }
+
+ h = v0 | v1<<(shift=a0) | v2<<(shift +=a1) | v3<<(shift +=a2) | v4<<(shift +=a3) | v5<<(shift +=a4) | v6<<(shift +=a5);
+ shift =shift+a6;
+
+ }
+
+
+ /* (non-Javadoc)
+ * @see is2.parser52L.DX#cz8()
+ */
+ final public void cz8() {
+
+ if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0||v6<0||v7<0) {
+ h=-1;
+ shift=0;
+ return ;
+ }
+
+ h = v0 | v1<<(shift=a0) | v2<<(shift +=a1) | v3<<(shift +=a2) | v4<<(shift +=a3) | v5<<(shift +=a4) | v6<<(shift +=a5) | v7<<(shift +=a6);
+ shift =shift+a7;
+
+
+ }
+
+
+
+
+
+
+ /* (non-Javadoc)
+ * @see is2.parser52L.DX#clean()
+ */
+ final public void clean() {
+ v0=0;v1=0;v2=0;v3=0;v4=0;v5=0;v6=0;v7=0;v8=0;
+ shift=0;h =0;
+ }
+
+ public final Long2IntInterface _li;
+ public D5(Long2IntInterface li) {
+ _li=li;
+ }
+
+ /* (non-Javadoc)
+ * @see is2.parser52L.DX#cs(int, int)
+ */
+ final public long cs(int b, int v) {
+ if (h<0) {
+ h=-1; shift=0;
+ return -1;
+ }
+
+ h |= (long)v<<shift;
+ shift +=b;
+ if (shift>64) {
+ System.out.println("shift too large "+shift);
+ new Exception().printStackTrace();
+ }
+
+ return h;
+
+ }
+ /* (non-Javadoc)
+ * @see is2.parser52L.DX#csa(int, int)
+ */
+ final public long csa(int b, int v) {
+ if (h<0) {
+ h=-1; shift=0; return -1;
+ }
+
+ h |= (long)v<<shift;
+ shift +=b;
+ if (shift>64) {
+ System.out.println("shift too large "+shift);
+ new Exception().printStackTrace();
+ }
+
+ return h;
+
+ }
+
+ /* (non-Javadoc)
+ * @see is2.parser52L.DX#csa(int, int, is2.data.IFV)
+ */
+ final public void csa(int b, int v, IFV f ) {
+ if (h<0) {
+ h=-1; shift=0; return;
+ }
+
+ h |= (long)v<<shift;
+ shift +=b;
+ if (shift>64) {
+ System.out.println("shift too large "+shift);
+ new Exception().printStackTrace();
+ }
+
+ f.add((int)_li.l2i(h));
+ }
+
+ /* (non-Javadoc)
+ * @see is2.parser52L.DX#getVal()
+ */
+ public long getVal() {
+ if (h<0) {
+ h=-1; shift=0; return h;
+ }
+ return h;
+ }
+
+ /* (non-Javadoc)
+ * @see is2.parser52L.DX#map(is2.data.IFV, long)
+ */
+ public void map(IFV f, long l) {
+ if (l>0) f.add(_li.l2i(l));
+ }
+
+
+
+ /* (non-Javadoc)
+ * @see is2.data.DX#computeLabeValue(short, short)
+ */
+ @Override
+ public int computeLabeValue(int label, int shift) {
+ return label<<shift;
+ }
+
+ public void fix() {
+
+ }
+
+
+}
\ No newline at end of file
diff --git a/dependencyParser/mate-tools/src/is2/parser/Decoder.java b/dependencyParser/mate-tools/src/is2/parser/Decoder.java
new file mode 100755
index 0000000..9fe833a
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/parser/Decoder.java
@@ -0,0 +1,161 @@
+package is2.parser;
+
+import java.util.ArrayList;
+import java.util.concurrent.ExecutorService;
+
+
+import is2.data.DataFES;
+import is2.data.Parse;
+import is2.util.DB;
+
+
+/**
+ * @author Bernd Bohnet, 01.09.2009
+ *
+ * This methods do the actual work and they build the dependency trees.
+ */
+final public class Decoder {
+
+ public static final boolean TRAINING = true;
+ public static long timeDecotder;
+ public static long timeRearrange;
+
+ /**
+ * Threshold for rearrange edges non-projective
+ */
+ public static float NON_PROJECTIVITY_THRESHOLD = 0.3F;
+
+
+ static ExecutorService executerService =java.util.concurrent.Executors.newFixedThreadPool(Parser.THREADS);
+
+ // do not initialize
+ private Decoder() {};
+
+
+ /**
+ * Build a dependency tree based on the data
+ * @param pos part-of-speech tags
+ * @param x the data
+ * @param projective projective or non-projective
+ * @param edges the edges
+ * @return a parse tree
+ * @throws InterruptedException
+ */
+ public static Parse decode(short[] pos, DataFES x, boolean projective, boolean training) throws InterruptedException {
+
+ long ts = System.nanoTime();
+
+ if (executerService.isShutdown()) executerService = java.util.concurrent.Executors.newCachedThreadPool();
+ final int n = pos.length;
+
+ final Open O[][][][] = new Open[n][n][2][];
+ final Closed C[][][][] = new Closed[n][n][2][];
+
+ ArrayList<ParallelDecoder> pe = new ArrayList<ParallelDecoder>();
+
+ for(int i=0;i<Parser.THREADS ;i++) pe.add(new ParallelDecoder(pos, x, O, C, n));
+
+ for (short k = 1; k < n; k++) {
+
+ // provide the threads the data
+ for (short s = 0; s < n; s++) {
+ short t = (short) (s + k);
+ if (t >= n) break;
+
+ ParallelDecoder.add(s,t);
+ }
+
+ executerService.invokeAll(pe);
+ }
+
+ float bestSpanScore = (-1.0F / 0.0F);
+ Closed bestSpan = null;
+ for (int m = 1; m < n; m++)
+ if (C[0][n - 1][1][m].p > bestSpanScore) {
+ bestSpanScore = C[0][n - 1][1][m].p;
+ bestSpan = C[0][n - 1][1][m];
+ }
+
+ // build the dependency tree from the chart
+ Parse out= new Parse(pos.length);
+
+ bestSpan.create(out);
+
+ out.heads[0]=-1;
+ out.labels[0]=0;
+
+ timeDecotder += (System.nanoTime()-ts);
+
+ ts = System.nanoTime();
+
+ if (!projective) rearrange(pos, out.heads, out.labels,x,training);
+
+ timeRearrange += (System.nanoTime()-ts);
+
+ return out;
+ }
+
+
+ /**
+ * This is the parallel non-projective edge re-arranger
+ *
+ * @param pos part-of-speech tags
+ * @param heads parent child relation
+ * @param labs edge labels
+ * @param x the data
+ * @param edges the existing edges defined by part-of-speech tags
+ * @throws InterruptedException
+ */
+ public static void rearrange(short[] pos, short[] heads, short[] labs, DataFES x, boolean training) throws InterruptedException {
+
+ int threads =(pos.length>Parser.THREADS)? Parser.THREADS: pos.length;
+
+
+
+ // wh what to change, nPar - new parent, nType - new type
+ short wh = -1, nPar = -1,nType = -1;
+ ArrayList<ParallelRearrange> pe = new ArrayList<ParallelRearrange>();
+
+ while(true) {
+ boolean[][] isChild = new boolean[heads.length][heads.length];
+ for(int i = 1, l1=1; i < heads.length; i++,l1=i)
+ while((l1= heads[l1]) != -1) isChild[l1][i] = true;
+
+ float max = Float.NEGATIVE_INFINITY;
+ float p = Extractor.encode3(pos, heads, labs, x);
+
+ pe.clear();
+ for(int i=0;i<threads;i++) pe.add(new ParallelRearrange( isChild, pos,x,heads,labs));
+
+ for(int ch = 1; ch < heads.length; ch++) {
+
+ for(short pa = 0; pa < heads.length; pa++) {
+ if(ch == pa || pa == heads[ch] || isChild[ch][pa]) continue;
+
+ ParallelRearrange.add(p,(short) ch, pa);
+ }
+ }
+ executerService.invokeAll(pe);
+
+ for(ParallelRearrange.PA rp :ParallelRearrange.order)
+ if(max < rp.max ) {
+ max = rp.max; wh = rp.wh;
+ nPar = rp.nPar; nType = rp.nType ;
+ }
+ ParallelRearrange.order.clear();
+
+ if(max <= NON_PROJECTIVITY_THRESHOLD) break; // bb: changed from 0.0
+
+ heads[wh] = nPar;
+ labs[wh] = nType;
+
+ }
+ }
+
+ public static String getInfo() {
+
+ return "Decoder non-projectivity threshold: "+NON_PROJECTIVITY_THRESHOLD;
+ }
+
+
+}
diff --git a/dependencyParser/mate-tools/src/is2/parser/Edges.java b/dependencyParser/mate-tools/src/is2/parser/Edges.java
new file mode 100644
index 0000000..5ad892b
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/parser/Edges.java
@@ -0,0 +1,206 @@
+/**
+ *
+ */
+package is2.parser;
+
+import is2.data.PipeGen;
+
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.Map.Entry;
+
+/**
+ * @author Dr. Bernd Bohnet, 13.05.2009;
+ *
+ *
+ */
+public final class Edges {
+
+
+ private static short[][][] edges;
+ private static HashMap<Short,Integer> labelCount = new HashMap<Short,Integer>();
+
+ private static HashMap<String,Integer> slabelCount = new HashMap<String,Integer>();
+
+
+ static short[] def = new short[1];
+
+ private Edges () {}
+
+ /**
+ * @param length
+ */
+ public static void init(int length) {
+ edges = new short[length][length][];
+ }
+
+
+ public static void findDefault(){
+
+ int best =0;
+
+
+
+ for(Entry<Short,Integer> e : labelCount.entrySet()) {
+
+
+ if (best<e.getValue()) {
+ best = e.getValue();
+ def[0]=e.getKey();
+ }
+ }
+
+
+ // labelCount=null;
+ // String[] types = new String[mf.getFeatureCounter().get(PipeGen.REL)];
+ // for (Entry<String, Integer> e : MFO.getFeatureSet().get(PipeGen.REL).entrySet()) types[e.getValue()] = e.getKey();
+
+ is2.util.DB.println("set default label to "+def[0]+" " );
+
+ // System.out.println("found default "+def[0]);
+
+ }
+
+
+ final static public void put(int pos1, int pos2, short label) {
+ putD(pos1, pos2,label);
+ // putD(pos2, pos1,!dir, label);
+ }
+
+
+ final static public void putD(int pos1, int pos2, short label) {
+
+ Integer lc = labelCount.get(label);
+ if (lc==null) labelCount.put(label, 1);
+ else labelCount.put(label, lc+1);
+
+ String key = pos1+"-"+pos2+label;
+ Integer lcs = slabelCount.get(key);
+ if (lcs==null) slabelCount.put(key, 1);
+ else slabelCount.put(key, lcs+1);
+
+ if (edges[pos1][pos2]==null) {
+ edges[pos1][pos2]=new short[1];
+ edges[pos1][pos2][0]=label;
+
+// edgesh[pos1][pos2][dir?0:1] = new TIntHashSet(2);
+// edgesh[pos1][pos2][dir?0:1].add(label);
+ } else {
+ short labels[] = edges[pos1][pos2];
+ for(short l : labels) {
+ //contains label already?
+ if(l==label) return;
+ }
+
+ short[] nlabels = new short[labels.length+1];
+ System.arraycopy(labels, 0, nlabels, 0, labels.length);
+ nlabels[labels.length]=label;
+ edges[pos1][pos2]=nlabels;
+
+ // edgesh[pos1][pos2][dir?0:1].add(label);
+ }
+ }
+
+ final static public short[] get(int pos1, int pos2) {
+
+ if (pos1<0 || pos2<0 || edges[pos1][pos2]==null) return def;
+ return edges[pos1][pos2];
+ }
+
+
+ /**
+ * @param dis
+ */
+ static public void write(DataOutputStream d) throws IOException {
+
+ int len = edges.length;
+ d.writeShort(len);
+
+ for(int p1 =0;p1<len;p1++) {
+ for(int p2 =0;p2<len;p2++) {
+ if (edges[p1][p2]==null) d.writeShort(0);
+ else {
+ d.writeShort(edges[p1][p2].length);
+ for(int l =0;l<edges[p1][p2].length;l++) {
+ d.writeShort(edges[p1][p2][l]);
+ }
+
+ }
+ }
+ }
+
+ d.writeShort(def[0]);
+
+ }
+
+
+ /**
+ * @param dis
+ */
+ public static void read(DataInputStream d) throws IOException {
+ int len = d.readShort();
+
+ edges = new short[len][len][];
+ for(int p1 =0;p1<len;p1++) {
+ for(int p2 =0;p2<len;p2++) {
+ int ll = d.readShort();
+ if (ll==0) {
+ edges[p1][p2]=null;
+ } else {
+ edges[p1][p2] = new short[ll];
+ for(int l =0;l<ll;l++) {
+ edges[p1][p2][l]=d.readShort();
+ }
+ }
+ }
+ }
+
+ def[0]= d.readShort();
+
+ }
+
+ public static class C implements Comparator<Short> {
+
+ public C() {
+ super();
+ }
+
+ String _key;
+
+ public C(String key) {
+ super();
+ _key=key;
+ }
+
+ /* (non-Javadoc)
+ * @see java.util.Comparator#compare(java.lang.Object, java.lang.Object)
+ */
+ @Override
+ public int compare(Short l1, Short l2) {
+
+ // int c1 = labelCount.get(l1);
+ // int c2 = labelCount.get(l2);
+ // if (true) return c1==c2?0:c1>c2?-1:1;
+
+ int x1 = slabelCount.get(_key+l1.shortValue());
+ int x2 = slabelCount.get(_key+l2.shortValue());
+ // System.out.println(x1+" "+x2);
+
+
+ return x1==x2?0:x1>x2?-1:1;
+
+
+
+ }
+
+
+
+
+
+ }
+
+
+}
diff --git a/dependencyParser/mate-tools/src/is2/parser/Evaluator.java b/dependencyParser/mate-tools/src/is2/parser/Evaluator.java
new file mode 100755
index 0000000..c764cc6
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/parser/Evaluator.java
@@ -0,0 +1,94 @@
+package is2.parser;
+
+import is2.data.SentenceData09;
+import is2.io.CONLLReader09;
+
+
+public class Evaluator {
+
+
+
+ public static final String PUNCT ="!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~";
+
+ public static class Results {
+
+ public int total;
+ public int corr;
+ public float las;
+ public float ula;
+
+ }
+
+ public static Results evaluate (String act_file, String pred_file) throws Exception {
+
+ CONLLReader09 goldReader = new CONLLReader09(act_file, -1);
+ CONLLReader09 predictedReader = new CONLLReader09(pred_file, -1);
+
+ int total = 0, corr = 0, corrL = 0;
+ int numsent = 0, corrsent = 0, corrsentL = 0;
+ SentenceData09 goldInstance = goldReader.getNext();
+ SentenceData09 predInstance = predictedReader.getNext();
+
+ while(goldInstance != null) {
+
+ int instanceLength = goldInstance.length();
+
+ if (instanceLength != predInstance.length())
+ System.out.println("Lengths do not match on sentence "+numsent);
+
+ int[] goldHeads = goldInstance.heads;
+ String[] goldLabels = goldInstance.labels;
+ int[] predHeads = predInstance.heads;
+ String[] predLabels = predInstance.labels;
+
+ boolean whole = true;
+ boolean wholeL = true;
+
+ // NOTE: the first item is the root info added during nextInstance(), so we skip it.
+
+ int punc=0;
+ for (int i = 1; i < instanceLength; i++) {
+ if (predHeads[i] == goldHeads[i]) {
+ corr++;
+
+ if (goldLabels[i].equals(predLabels[i])) corrL++;
+ else {
+ // System.out.println(numsent+" error gold "+goldLabels[i]+" "+predLabels[i]+" head "+goldHeads[i]+" child "+i);
+ wholeL = false;
+ }
+ }
+ else {
+ // System.out.println(numsent+"error gold "+goldLabels[i]+" "+predLabels[i]+" head "+goldHeads[i]+" child "+i);
+ whole = false; wholeL = false;
+ }
+ }
+ total += ((instanceLength - 1) - punc); // Subtract one to not score fake root token
+
+ if(whole) corrsent++;
+ if(wholeL) corrsentL++;
+ numsent++;
+
+ goldInstance = goldReader.getNext();
+ predInstance = predictedReader.getNext();
+ }
+
+ Results r = new Results();
+
+ r.total = total;
+ r.corr = corr;
+ r.las =(float)Math.round(((double)corrL/total)*100000)/1000;
+ r.ula =(float)Math.round(((double)corr /total)*100000)/1000;
+ System.out.print("Total: " + total+" \tCorrect: " + corr+" ");
+ System.out.println("LAS: " + (double)Math.round(((double)corrL/total)*100000)/1000+" \tTotal: " + (double)Math.round(((double)corrsentL/numsent)*100000)/1000+
+ " \tULA: " + (double)Math.round(((double)corr /total)*100000)/1000+" \tTotal: " + (double)Math.round(((double)corrsent /numsent)*100000)/1000);
+
+ return r;
+ }
+
+
+ public static float round (double v){
+
+ return Math.round(v*10000F)/10000F;
+ }
+
+}
diff --git a/dependencyParser/mate-tools/src/is2/parser/Extractor.java b/dependencyParser/mate-tools/src/is2/parser/Extractor.java
new file mode 100755
index 0000000..35c90f2
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/parser/Extractor.java
@@ -0,0 +1,973 @@
+package is2.parser;
+
+
+
+import java.util.concurrent.atomic.AtomicInteger;
+
+import is2.data.Cluster;
+import is2.data.D4;
+import is2.data.D6;
+import is2.data.DX;
+
+import is2.data.DataFES;
+import is2.data.F2SF;
+import is2.data.FV;
+import is2.data.IFV;
+import is2.data.Instances;
+import is2.data.Long2IntInterface;
+
+
+import is2.util.DB;
+import is2.util.OptionsSuper;
+
+
+
+
+final public class Extractor {
+
+ static final int _SIB = 85;
+ public static int s_rel,s_word,s_type,s_dir,s_dist,s_feat,s_child,s_spath,s_lpath,s_pos,s_rel1;
+ public final DX d0 ,dl1,dl2, dwr,dr,dwwp,dw,dwp,dlf,d3lp, d2lp,d2pw,d2pp ;
+
+ public final Long2IntInterface li;
+
+ public boolean s_stack=false;
+
+ public Extractor(Long2IntInterface li, boolean stack, int what) {
+
+ s_stack=stack;
+
+ this.li=li;
+
+ if (what == OptionsSuper.MULTIPLICATIVE) {
+ d0 = new D6(li);dl1 = new D6(li);dl2 = new D6(li);dwr = new D6(li);dr = new D6(li);dwwp = new D6(li);
+ dw = new D6(li);dwp = new D6(li);dlf = new D6(li);d3lp = new D6(li); d2lp = new D6(li); d2pw = new D6(li); d2pp = new D6(li);
+ } else {
+ d0 = new D5(li);dl1 = new D5(li);dl2 = new D5(li);dwr = new D5(li);dr = new D5(li);dwwp = new D5(li);
+ dw = new D5(li);dwp = new D5(li);dlf = new D5(li);d3lp = new D5(li); d2lp = new D5(li); d2pw = new D5(li); d2pp = new D5(li);
+ }
+
+ }
+
+ public static void initStat(int what ) {
+ MFO mf = new MFO();
+ if (what == OptionsSuper.MULTIPLICATIVE) {
+
+ DB.println("mult (d4) ");
+
+ s_rel = mf.getFeatureCounter().get(REL).intValue()*16;
+ s_rel1 =mf.getFeatureCounter().get(REL).intValue()+1;
+ s_pos = mf.getFeatureCounter().get(POS).intValue();
+ s_word = mf.getFeatureCounter().get(WORD).intValue();
+ s_type = mf.getFeatureCounter().get(TYPE).intValue();
+ s_dir = (int)(mf.getFeatureCounter().get(DIR));
+ la = (mf.getValue(DIR, LA));
+ ra = (mf.getValue(DIR, RA));
+ s_dist = (int)(mf.getFeatureCounter().get(DIST));//mf.getFeatureBits(DIST);
+ s_feat = (int)(mf.getFeatureCounter().get(Pipe.FEAT));//mf.getFeatureBits(Pipe.FEAT);
+ s_spath = (mf.getFeatureCounter().get(Cluster.SPATH)==null?0:mf.getFeatureCounter().get(Cluster.SPATH));//mf.getFeatureBits(Cluster.SPATH);
+ s_lpath = (mf.getFeatureCounter().get(Cluster.LPATH)==null?0:mf.getFeatureCounter().get(Cluster.LPATH));//mf.getFeatureBits(Cluster.LPATH);
+
+ } else {
+
+ s_rel = mf.getFeatureBits(REL);
+ s_pos = mf.getFeatureBits(POS);
+ s_word = mf.getFeatureBits(WORD);
+ s_type = mf.getFeatureBits(TYPE);
+ s_dir = mf.getFeatureBits(DIR);
+ la = mf.getValue(DIR, LA);
+ ra = mf.getValue(DIR, RA);
+ s_dist = mf.getFeatureBits(DIST);
+ s_feat = mf.getFeatureBits(Pipe.FEAT);
+ s_spath = mf.getFeatureBits(Cluster.SPATH);
+ s_lpath = mf.getFeatureBits(Cluster.LPATH);
+
+ DB.println("shift init (d5) ");
+ }
+
+
+
+ }
+
+ public void init(){
+
+
+
+ d0.a0 = s_type;d0.a1 = s_pos;d0.a2 = s_pos;d0.a3 = s_pos;d0.a4 = s_pos;d0.a5 = s_pos;d0.a6 = s_pos;d0.a7 = s_pos; d0.fix();
+ dl1.a0 = s_type;dl1.a1 = s_rel; dl1.a2 = s_pos;dl1.a3 = s_pos; dl1.a4 = s_pos; dl1.a5 = s_pos; dl1.a6 = s_pos; dl1.a7 = s_pos; dl1.fix();
+ dl2.a0 = s_type;dl2.a1 = s_rel;dl2.a2 = s_word;dl2.a3 = s_pos;dl2.a4 = s_pos;dl2.a5 = s_pos;dl2.a6 = s_pos;dl2.a7 = s_pos; dl2.fix();
+ dwp.a0 = s_type; dwp.a1 = s_rel; dwp.a2 = s_word; dwp.a3 = s_pos; dwp.a4 = s_pos; dwp.a5 = s_word;dwp.fix();
+ dwwp.a0 = s_type; dwwp.a1 = s_rel; dwwp.a2 = s_word; dwwp.a3 = s_word; dwwp.a4 = s_pos; dwwp.a5 = s_word; dwwp.fix();
+ dlf.a0 = s_type;dlf.a1 = s_rel; dlf.a2 = s_pos;dlf.a3 = s_pos; dlf.a4 = s_feat; dlf.a5 = s_feat; dlf.a6 = s_pos; dlf.a7 = s_pos; dlf.fix();
+ d3lp.a0 = s_type; d3lp.a1 = s_rel; d3lp.a2 = s_lpath; d3lp.a3 = s_lpath; d3lp.a4 = s_lpath; d3lp.a5 = s_word; d3lp.a6 = s_spath; d3lp.a7 = s_spath;d3lp.fix();
+ d2lp.a0 = s_type; d2lp.a1 = s_rel; d2lp.a2 = s_lpath; d2lp.a3 = s_lpath; d2lp.a4 = s_word; d2lp.a5 = s_word; d2lp.fix(); //d3lp.a6 = s_spath; d3lp.a7 = s_spath;
+ d2pw.a0 = s_type; d2pw.a1 = s_rel; d2pw.a2 = s_lpath; d2pw.a3 = s_lpath; d2pw.a4 = s_word; d2pw.a5 = s_word;d2pw.fix(); //d3lp.a6 = s_spath; d3lp.a7 = s_spath;
+ d2pp.a0 = s_type; d2pp.a1 = s_rel; d2pp.a2 = s_lpath; d2pp.a3 = s_lpath; d2pp.a4 = s_pos; d2pp.a5 = s_pos; d2pp.fix(); //d3lp.a6 = s_spath; d3lp.a7 = s_spath;
+ }
+
+
+
+
+ public int basic(short[] pposs, int p, int d, IFV f)
+ {
+
+ d0.clean(); dl1.clean(); dl2.clean(); dwp.clean(); dwwp.clean(); dlf.clean(); d3lp.clean();
+
+ d3lp.clean(); d2lp.clean();d2pw.clean(); d2pp.clean();
+
+ int n=1;
+ int dir= (p < d)? ra:la;
+ d0.v0= n++; d0.v1=pposs[p]; d0.v2=pposs[d]; //d0.stop=4;
+ int end= (p >= d ? p : d);
+ int start = (p >= d ? d : p) + 1;
+
+ for(int i = start ; i <end ; i++) {
+ d0.v3=pposs[i];
+ d0.cz4();
+ d0.csa(s_dir,dir,f);
+ }
+ return n;
+ }
+
+
+ public int firstm(Instances is, int i, int prnt, int dpnt, int label, Cluster cluster, long[] f)
+ {
+
+
+ for(int k=0;k<f.length;k++) f[k]=0;
+
+ short[] pposs = is.pposs[i];
+ int[] form =is.forms[i];
+ short[][] feats = is.feats[i];
+
+
+ int pF = form[prnt],dF = form[dpnt];
+ int pL = is.plemmas[i][prnt],dL = is.plemmas[i][dpnt];
+ int pP = pposs[prnt],dP = pposs[dpnt];
+
+ int prntLS = pF==-1?-1:cluster.getLP(pF), chldLS = dF==-1?-1:cluster.getLP(dF);
+
+ // final int dir= (prnt < dpnt)? ra:la;
+
+ if (pF>maxForm) pF=-1;
+ if (pL>maxForm) pL=-1;
+
+ if (dF>maxForm) dF=-1;
+ if (dL>maxForm) dL=-1;
+
+
+ int n=3,c=0;
+
+ dl2.v1=label;
+ dl2.v0= n++; dl2.v2=pF; dl2.v3=dP; dl2.cz4(); f[c++]=dl2.getVal();
+ dl2.v0= n++; dl2.cz3(); f[c++]=dl2.getVal();
+ dl2.v0= n++; dl2.v2=dF; dl2.v3=pP; dl2.cz4(); f[c++]=dl2.getVal();
+ dl2.v0= n++; dl2.cz3(); f[c++]=dl2.getVal();
+
+
+ dwwp.v1=label;
+ dwwp.v0= n++; dwwp.v2=pF; dwwp.v3=dF; dwwp.cz4(); f[c++]=dwwp.getVal();
+
+ dl1.v1=label;
+ dl1.v0= n++; dl1.v2=dP; dl1.cz3(); f[c++]=dl1.getVal();
+ dl1.v0= n++; dl1.v2=pP; dl1.cz3(); f[c++]=dl1.getVal();
+ dl1.v0= n++; dl1.v3=dP; dl1.cz4(); f[c++]=dl1.getVal();
+
+ int pPm1 = prnt > 0 ? pposs[prnt - 1] : s_str, dPm1 = dpnt > 0 ? pposs[dpnt - 1] : s_str;
+ int pPp1 = prnt < pposs.length - 1 ? pposs[prnt + 1]:s_end, dPp1 = dpnt < pposs.length - 1 ? pposs[dpnt + 1]:s_end;
+
+ int pPm2 = prnt > 1 ? pposs[prnt - 2] : s_str, dPm2 = dpnt > 1 ? pposs[dpnt - 2] : s_str;
+ int pPp2 = prnt < pposs.length - 2 ? pposs[prnt + 2]:s_end, dPp2 = dpnt < pposs.length - 2 ? pposs[dpnt + 2]:s_end;
+
+ int pFm1 = prnt > 0 ? form[prnt - 1] : s_stwrd, dFm1 = dpnt > 0 ? form[dpnt - 1] : s_stwrd;
+ int pFp1 = prnt < form.length - 1 ? form[prnt + 1]:s_stwrd, dFp1 = dpnt < form.length - 1 ? form[dpnt + 1]:s_stwrd;
+
+
+ if (prnt-1 == dpnt) pPm1 =-1;
+ if (prnt == dpnt-1) dPm1 =-1;
+
+ if (prnt+1 == dpnt) pPp1 =-1;
+ if (prnt == dpnt+1) dPp1 =-1;
+
+ if (prnt-2 == dpnt) pPm2 =-1;
+ if (prnt == dpnt-2) dPm2 =-1;
+
+ if (prnt+2 == dpnt) pPp2 =-1;
+ if (prnt == dpnt+2) dPp2 =-1;
+
+
+ dl1.v0= n++;dl1.v2=pP; dl1.v3=pPp1; dl1.v4=dP;dl1.v5=dPp1; dl1.v6= (prnt+1==dpnt?4:prnt==dpnt+1?5:6) ; dl1.cz7(); f[c++]=dl1.getVal();
+ dl1.v0= n++; dl1.v5=dPm1; dl1.v6= (prnt+1==dpnt?4:prnt==dpnt-1?5:6) ; dl1.cz7(); f[c++]=dl1.getVal();
+ dl1.v0= n++; dl1.v3=pPm1; dl1.v6= (prnt-1==dpnt?4:prnt==dpnt-1?5:6) ; dl1.cz7(); f[c++]=dl1.getVal();
+ dl1.v0= n++; dl1.v5=dPp1; dl1.v6= (prnt-1==dpnt?4:prnt==dpnt+1?5:6) ; dl1.cz7(); f[c++]=dl1.getVal();
+
+
+ dl1.v0= n++; dl1.v3=pPm1; dl1.v5= (prnt-1==dpnt?4:5) ; dl1.cz6(); f[c++]=dl1.getVal();
+ dl1.v0= n++; dl1.v3=dPm1; dl1.v5= (prnt==dpnt-1?4:5) ; dl1.cz6(); f[c++]=dl1.getVal();
+ dl1.v0= n++; dl1.v3=dPp1; dl1.v5= (prnt==dpnt+1?4:5) ; dl1.cz6(); f[c++]=dl1.getVal();
+ dl1.v0= n++; dl1.v3=pPp1; dl1.v5= (prnt+1==dpnt?4:5) ; dl1.cz6(); f[c++]=dl1.getVal();
+
+ dl1.v0= n++;dl1.v2=pP; dl1.v3=pPp2; dl1.v4=dP;dl1.v5=dPp2; dl1.v6= (prnt+2==dpnt?4:prnt==dpnt+2?5:6) ; dl1.cz7(); f[c++]=dl1.getVal();
+ dl1.v0= n++; dl1.v5=dPm2; dl1.v6= (prnt+2==dpnt?4:prnt==dpnt-2?5:6) ; dl1.cz7(); f[c++]=dl1.getVal();
+ dl1.v0= n++; dl1.v3=pPm2; dl1.v6= (prnt-2==dpnt?4:prnt==dpnt-2?5:6) ; dl1.cz7(); f[c++]=dl1.getVal();
+ dl1.v0= n++; dl1.v5=dPp2; dl1.v6= (prnt-2==dpnt?4:prnt==dpnt+2?5:6) ; dl1.cz7(); f[c++]=dl1.getVal();
+
+
+ // remove this again
+ dl1.v0= n++; dl1.v3=pPm2; dl1.v5= (prnt-2==dpnt?4:5); dl1.cz6(); f[c++]=dl1.getVal();
+ dl1.v0= n++; dl1.v3=dPm2; dl1.v5= (prnt==dpnt-2?4:5); dl1.cz6(); f[c++]=dl1.getVal();
+ dl1.v0= n++; dl1.v3=dPp2; dl1.v5= (prnt==dpnt+2?4:5); dl1.cz6(); f[c++]=dl1.getVal();
+ dl1.v0= n++; dl1.v3=pPp2; dl1.v5= (prnt+2==dpnt?4:5); dl1.cz6(); f[c++]=dl1.getVal();
+
+
+
+ dl2.v0= n++; dl2.v3=dFm1; dl2.v3=pPp1;dl2.v4=pP; dl2.v5= (prnt+1==dpnt?4:prnt==dpnt-1?5:6) ; dl2.cz6(); f[c++]=dl2.getVal();
+ dl2.v0= n++; dl2.v3=dFp1; dl2.v3=pPm1; dl2.v5= (prnt-1==dpnt?4:prnt==dpnt+1?5:6) ; dl2.cz6(); f[c++]=dl2.getVal();
+ dl2.v0= n++; dl2.v3=pFm1; dl2.v3=dPp1;dl2.v4=dP; dl2.v5= (prnt-1==dpnt?4:prnt==dpnt+1?5:6) ; dl2.cz6(); f[c++]=dl2.getVal();
+ dl2.v0= n++; dl2.v3=pFp1; dl2.v3=dPm1; dl2.v5= (prnt+1==dpnt?4:prnt==dpnt-1?5:6) ; dl2.cz6(); f[c++]=dl2.getVal();
+
+
+ // maybe without dir
+ dl2.v0= n++; dl2.v3=dFm1; dl2.v3=dPm2;dl2.v4=pP; dl2.v5= (prnt==dpnt-1?4:prnt==dpnt-2?5:6) ; dl2.cz6(); f[c++]=dl2.getVal();
+ dl2.v0= n++; dl2.v3=dFp1; dl2.v3=dPp2; dl2.v5= (prnt==dpnt+1?4:prnt==dpnt+2?5:6) ; dl2.cz6(); f[c++]=dl2.getVal();
+ dl2.v0= n++; dl2.v3=pFm1; dl2.v3=pPm2;dl2.v4=dP; dl2.v5= (prnt-1==dpnt?4:prnt-2==dpnt?5:6) ; dl2.cz6(); f[c++]=dl2.getVal();
+ dl2.v0= n++; dl2.v3=pFp1; dl2.v3=pPp2; dl2.v5= (prnt+1==dpnt?4:prnt+2==dpnt?5:6) ; dl2.cz6(); f[c++]=dl2.getVal();
+
+
+ dwwp.v0= n++; dwwp.v2=pF; dwwp.v3=dF; dwwp.v4=dP; dwwp.cz5(); f[c++]=dwwp.getVal();
+ dwwp.v0= n++; dwwp.v2=pF; dwwp.v3=dF; dwwp.v4=pP; dwwp.cz5(); f[c++]=dwwp.getVal();
+// dwwp.v0= n++; dwwp.v2=dF; dwwp.v3=pF; dwwp.v4=pP; dwwp.v4=dP; dwwp.cz6(); f[c++]=dwwp.getVal();
+
+
+ // until here
+
+
+ // lemmas
+
+ dl2.v1=label;
+ dl2.v0= n++; dl2.v2=pL; dl2.v3=dP; dl2.cz4(); f[c++]=dl2.getVal();
+ dl2.v0= n++; dl2.cz3(); f[c++]=dl2.getVal();
+ dl2.v0= n++; dl2.v2=dL; dl2.v3=pP; dl2.cz4(); f[c++]=dl2.getVal();
+ dl2.v0= n++; dl2.cz3(); f[c++]=dl2.getVal();
+
+
+ dwwp.v1=label;
+ dwwp.v0= n++; dwwp.v2=pL; dwwp.v3=dL; dwwp.cz4(); f[c++]=dwwp.getVal();
+
+ dwp.v1= label;
+ dwp.v0=n++;dwp.v2=dL; dwp.v3=pP;dwp.v4=dP;dwp.v5=pL; //dwp.cz6(); f[c++]=dwp.getVal();
+
+ dwp.v0=n++;dwp.v2=pL; dwp.v3=pP;dwp.v4=dP; dwp.v0=n++;dwp.cz5(); f[c++]=dwp.getVal();
+
+
+
+ dwp.v0=n++;dwp.v2=pL; dwp.cz5(); f[c++]=dwp.getVal();
+ dwwp.v0= n++; dwwp.v2=pL; dwwp.v3=dL; dwwp.v4=dP; dwwp.cz5(); f[c++]=dwwp.getVal();
+ dwwp.v0= n++; dwwp.v4=pP; dwwp.cz5(); f[c++]=dwwp.getVal();
+
+
+ // cluster
+ if (cluster.size()>10) {
+ d2pw.v1=label;
+ d2pw.v0=n++; d2pw.v2=prntLS; d2pw.v3=chldLS; d2pw.cz4(); f[c++]=d2pw.getVal();
+ d2pw.v0=n++; d2pw.v4=pF; d2pw.cz5(); f[c++]=d2pw.getVal();
+ d2pw.v0=n++; d2pw.v4=dF; d2pw.cz5(); f[c++]=d2pw.getVal();
+ // d2pw.v0=n++; d2pw.v5=pF; d2pw.cz6(); f[c++]=d2pw.getVal();
+
+
+ d2pp.v1=label;
+ d2pp.v0=n++; d2pp.v2=prntLS; d2pp.v3=chldLS; d2pp.cz4(); f[c++]=d2pp.getVal();
+ d2pp.v0=n++; d2pp.v4=pP; d2pp.cz5(); f[c++]=d2pp.getVal();
+ d2pp.v0=n++; d2pp.v4=dP; d2pp.cz5(); f[c++]=d2pp.getVal();
+ d2pp.v0=n++; d2pp.v5=pP; d2pp.cz6(); f[c++]=d2pp.getVal();
+ }
+
+ if (s_stack) {
+
+ short[] prel = is.plabels[i];
+ short[] phead = is.pheads[i];
+
+ //take those in for stacking
+ dl2.v1=label;
+ dl2.v0= n++;dl2.v2=prel[dpnt];dl2.v3=pP;dl2.v4=dP; dl2.v5=prnt==phead[dpnt]?1:2; dl2.cz6(); f[c++]=dl2.getVal();
+ dl2.v0= n++;dl2.v2=pP;dl2.v3=dP; dl2.v4=prnt==phead[dpnt]?1:2; dl2.cz5(); f[c++]=dl2.getVal();
+ }
+
+
+
+ if (feats==null) return c;
+
+ short[] featsP =feats[prnt], featsD =feats[dpnt];
+ dlf.v0= n++; dlf.v1=label; dlf.v2=pP; dlf.v3=dP;
+ c =extractFeat(f, c, featsP, featsD);
+
+
+ return c;
+ }
+
+
+ public int second(Instances is , int i,int p, int d, int x, int label, Cluster cluster, long[] f)
+ {
+
+ //for(int k=0;k<f.length;k++) f[k]=0;
+
+ dl1.clean(); dwp.clean();dlf.clean(); dwwp.clean();
+
+ short[] pos= is.pposs[i];
+ int[] forms=is.forms[i],lemmas=is.plemmas[i];
+
+
+ int pP = pos[p], dP = pos[d];
+ int pF = forms[p],dF = forms[d];
+ int pL = lemmas[p], cL = lemmas[d];
+
+ int sP = x!=-1 ? pos[x] : s_str, sF = x!=-1 ? forms[x] : s_stwrd, sL = x!=-1 ? lemmas[x] : s_stwrd;
+
+ int n=_SIB;
+ if (pF>maxForm) pF=-1;
+ if (pL>maxForm) pL=-1;
+
+ if (dF>maxForm) dF=-1;
+ if (cL>maxForm) cL=-1;
+
+ if (sF>maxForm) sF=-1;
+ if (sL>maxForm) sL=-1;
+
+ int c =0;
+
+ dl1.v1=label;dwwp.v1=label;dwp.v1=label;
+
+ dl1.v0= n++;dl1.v2=pP; dl1.v3=dP;dl1.v4=sP; dl1.cz5(); f[c++]=dl1.getVal() ; // f[c++]=dl1.csa(s_dist,dist);
+ dl1.v0= n++; dl1.v3=sP; dl1.cz4(); f[c++]=dl1.getVal(); //f[c++]=dl1.csa(s_dist,dist);
+ dl1.v0= n++; dl1.v2=dP; dl1.cz4(); f[c++]=dl1.getVal(); //f[c++]=dl1.csa(s_dist,dist);
+
+ // sibling only could be tried
+
+ dwwp.v0= n++; dwwp.v2=pF; dwwp.v3=sF; dwwp.cz4(); f[c++]=dwwp.getVal(); //f[c++]=dwwp.csa(s_dist,dist);
+ dwwp.v0= n++; dwwp.v2=dF; dwwp.cz4(); f[c++]=dwwp.getVal(); //f[c++]=dwwp.csa(s_dist,dist);
+
+ // 154
+ dwp.v0= n++; dwp.v2=sF; dwp.v3=pP; dwp.cz4(); f[c++]=dwp.getVal(); //f[c++]=dwp.csa(s_dist,dist);
+ dwp.v0= n++; /*dwp.v1=label; */dwp.v3=dP; dwp.cz4(); f[c++]=dwp.getVal(); //f[c++]=dwp.csa(s_dist,dist);
+ dwp.v0= n++; /*dwp.v1=label;*/ dwp.v2=pF; dwp.v3=sP; dwp.cz4(); f[c++]=dwp.getVal(); //f[c++]=dwp.csa(s_dist,dist);
+ dwp.v0= n++; /*dwp.v1=label;*/ dwp.v2=dF; dwp.cz4(); f[c++]=dwp.getVal();// f[c++]=dwp.csa(s_dist,dist);
+
+ // 158
+ //lemmas
+
+ dwwp.v0= n++; dwwp.v2=pL; dwwp.v3=sL; dwwp.cz4(); f[c++]=dwwp.getVal();
+ dwwp.v0= n++; dwwp.v2=cL; dwwp.cz4(); f[c++]=dwwp.getVal(); //f[c++]=dwwp.csa(s_dist,dist);
+ dwp.v0= n++; dwp.v2=sL; dwp.v3=pP; dwp.cz4(); f[c++]=dwp.getVal();// f[c++]=dwp.csa(s_dist,dist);
+ dwp.v0= n++; dwp.v3=dP; dwp.cz4(); f[c++]=dwp.getVal(); // f[c++]=dwp.csa(s_dist,dist);
+
+ // 162
+ dwp.v0= n++; dwp.v2=pL; dwp.v3=sP; dwp.cz4(); f[c++]=dwp.getVal(); //f[c++]=dwp.csa(s_dist,dist);
+ dwp.v0= n++; dwp.v2=cL; dwp.cz4(); f[c++]=dwp.getVal();// f[c++]=dwp.csa(s_dist,dist);
+
+ // clusters
+ if (cluster.size()>10) {
+
+ }
+
+ int pPm1 = p!=0 ? pos[p-1] : s_str;
+ int chldPm1 = d-1>=0 ? pos[d-1] : s_str;
+ int prntPp1 = p!=pos.length-1 ? pos[p+1] : s_end;
+ int chldPp1 = d!=pos.length-1 ? pos[d+1] : s_end;
+
+ // sibling part of speech minus and plus 1
+ int sPm1 = x>0 ? pos[x-1]:s_str;
+ int sPp1 = x<pos.length-1 ? pos[x + 1]:s_end;
+
+ if (x+1==x|| x+1==p || x+1==d) sPp1=-1;
+ if (p+1==x|| p+1==p || p+1==d) prntPp1=-1;
+ if (d+1==x|| d+1==p || d+1==d) chldPp1=-1;
+
+ if (x-1==x|| x-1==p || x-1==d) sPm1=-1;
+ if (d-1==x|| d-1==p || d-1==d) chldPm1=-1;
+ if (p-1==x|| p-1==p || p-1==d) pPm1=-1;
+
+
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=sPp1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.getVal();
+ // 165
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=sPm1;dl1.v4=pP; dl1.v5= (x-1==p?3:x-1==d?4:5); dl1.cz6(); f[c++]= dl1.getVal(); //dl1.getVal();// f.add(li.l2i(l));
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=pP;dl1.v4=prntPp1;dl1.v5= (x==p+1?3:4); dl1.cz6(); f[c++]=dl1.getVal();// f.add(li.l2i(l));
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=pP;dl1.v4=pPm1;dl1.v5= (x==p-1?3:4); dl1.cz6(); f[c++]=dl1.getVal();// f.add(li.l2i(l));
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=sPp1;dl1.v4=pPm1;dl1.v5=pP;dl1.v6= (x==p-1?3:x==p+1?4:5); dl1.cz7(); f[c++]=dl1.getVal();
+ dl1.v0=n++; dl1.v2=sPm1; dl1.v3=sP;dl1.v4=pPm1;dl1.v5=pP;dl1.v6= (x==p-1?3:x-1==p?4:5); dl1.cz7(); f[c++]=dl1.getVal();// f.add(li.l2i(l));
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=sPp1;dl1.v4=pP;dl1.v5=prntPp1;dl1.v6= (x+1==p?3:x==p+1?4:5); dl1.cz7(); f[c++]=dl1.getVal();// f.add(li.l2i(l));
+ dl1.v0=n++; dl1.v2=sPm1; dl1.v3=sP; dl1.v4=pP;dl1.v5=prntPp1;dl1.v6= (x==p-1?3:x==p+1?4:5); dl1.cz7(); f[c++]=dl1.getVal();// f.add(li.l2i(l));
+
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=sPp1;dl1.v4=dP; dl1.v5= (x+1==d?3:x+1==p?4:5); dl1.cz6(); f[c++]=dl1.getVal();
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=sPm1;dl1.v4=dP; dl1.v5= (x-1==d?3:x-1==p?4:5); dl1.cz6(); f[c++]=dl1.getVal();
+
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=dP;dl1.v4=chldPp1;dl1.v5= (x==d+1?3:d+1==p?4:5); dl1.cz6(); f[c++]=dl1.getVal();
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=dP;dl1.v4=chldPm1; dl1.v5= (x==d-1?3:d-1==p?4:5); dl1.cz6(); f[c++]=dl1.getVal();
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=sPp1;dl1.v4=chldPm1;dl1.v5=dP; dl1.v6= (x==d-1?3:x+1==d?4:5); dl1.cz7(); f[c++]=dl1.getVal();// f.add(li.l2i(l));
+ dl1.v0=n++; dl1.v2=sPm1; dl1.v3=sP;dl1.v4=chldPm1;dl1.v5=dP; dl1.v6= (x-1==d?3:d-1==x?4:5); dl1.cz7(); f[c++]=dl1.getVal();
+ dl1.v0= n++;dl1.v2=sP; dl1.v3=sPp1;dl1.v4=dP;dl1.v5=chldPp1;dl1.v6= (x==d+1?3:x+1==d?4:5); dl1.cz7();f[c++]=dl1.getVal();// f.add(li.l2i(l));
+ dl1.v0= n++; dl1.v2=sPm1; dl1.v3=sP;dl1.v4=dP;dl1.v5=chldPp1;dl1.v6= (x-1==d?3:d+1==x?4:5);dl1.cz7(); f[c++]=dl1.getVal();
+
+ // c=61;
+ /*
+ if (cluster.size()>10) {
+ AtomicInteger N = new AtomicInteger(n);
+ c = addClusterFeatures(d, p, x, pos, forms, cluster, N, c, f,label);
+ n = N.get();
+ }
+ */
+ // take those in for stacking
+
+ if (s_stack) {
+ short[] prel = is.plabels[i],phead=is.pheads[i];
+
+ int g = p==phead[d]?1:2 ;
+ if (x>=0) g += p==phead[x]?4:8;
+
+ int gr = x==-1?s_relend:prel[x];
+
+
+ dl2.v1 = label;
+ dl2.v0= n++;dl2.v2=prel[d];dl2.v3=g;dl2.v4=sP;dl2.v5=dP;dl2.cz6();f[c++]=dl2.getVal();
+ dl2.v0= n++;dl2.v2=prel[d];dl2.v3=g;dl2.v4=sP;dl2.v5=pP;dl2.cz6();f[c++]=dl2.getVal();
+ dl2.v0= n++;dl2.v2=prel[d];dl2.v3=g;dl2.v4=sP;dl2.v5=pP;dl2.v6=dP;dl2.cz7();f[c++]=dl2.getVal();
+
+ dl2.v0= n++;dl2.v2=gr;dl2.v3=g;dl2.v4=sP;dl2.v5=dP;dl2.cz6();f[c++]=dl2.getVal();
+ dl2.v0= n++;dl2.v2=gr;dl2.v3=g;dl2.v4=sP;dl2.v5=pP;dl2.cz6();f[c++]=dl2.getVal();
+ dl2.v0= n++;dl2.v2=gr;dl2.v3=g;dl2.v4=sP;dl2.v5=pP;dl2.v6=dP;dl2.cz7();f[c++]=dl2.getVal();
+ }
+
+ short[][] feats=is.feats[i];
+
+ if (feats==null) return c;
+
+
+ short[] featsP =feats[d];
+ short[] featsSbl =x!=-1?feats[x]:null;
+ dlf.v1=label;
+ dlf.v0= n++; dlf.v2=sP; dlf.v3=dP;
+ c = extractFeat(f, c ,featsP, featsSbl);
+
+ featsP =feats[p];
+
+
+ dlf.v0= n++; dlf.v1=label; dlf.v2=sP; dlf.v3=pP;
+ c = extractFeat(f, c ,featsP, featsSbl);
+
+
+ return c;
+ }
+
+
+ /**
+ * Separated this method to speed up parsing
+ * @param d
+ * @param p
+ * @param x
+ * @param pos
+ * @param forms
+ * @param cluster
+ * @param N
+ * @param c
+ * @param f
+ * @return
+ */
+ int addClusterFeatures(Instances is, int i, int d, int p, int x, Cluster cluster, int c, long[] f, int label) {
+
+ // int n= N.get();
+
+ short[] pos= is.pposs[i];
+ int[] forms=is.forms[i];
+
+ int n=190;
+ int pP = pos[p], dP = pos[d];
+ int sP = x!=-1 ? pos[x] : s_str;
+
+
+ int pLSp1 = p != pos.length - 1 ? forms[p + 1]==-1?-1:cluster.getLP(forms[p + 1]): _cend;
+ int cLSp1 = d != pos.length - 1 ? forms[d + 1] ==-1?-1:cluster.getLP(forms[d + 1]):_cend;
+ int sLSp1 = x < pos.length -1 ? forms[x + 1] ==-1?-1:cluster.getLP(forms[x + 1]) : _cend;
+
+ int pLSm1 = p!=0 ? forms[p - 1]==-1?-1:cluster.getLP(forms[p - 1]): _cstr;
+ int cLSm1 = d-1>=0 ? forms[d - 1] ==-1?-1:cluster.getLP(forms[d - 1]):_cstr;
+ int sLSm1 = x>0 ? forms[x - 1] ==-1?-1:cluster.getLP(forms[x - 1]):_cstr;
+
+ //int c=61;
+ int pF = forms[p],dF = forms[d], sF = x!=-1 ? forms[x] : s_stwrd;
+ int prntLS = pF==-1?-1:cluster.getLP(pF), chldLS = dF==-1?-1:cluster.getLP(dF);
+
+ int sblLS = (x != -1)&&(sF!=-1) ? cluster.getLP(sF) : s_stwrd;
+
+
+ d2lp.v1=label;
+ d2lp.v0= n++; d2lp.v2=prntLS; d2lp.v3=sblLS; d2lp.cz4(); f[c++]=d2lp.getVal();
+ d2lp.v0= n++; d2lp.v2=chldLS; d2lp.v3=sblLS; d2lp.cz4(); f[c++]=d2lp.getVal();// f[c++]=d2lp.csa(s_dist,dist);
+
+ d3lp.v1= label;
+ d3lp.v0= n++; d3lp.v2=prntLS; d3lp.v3=chldLS; d3lp.v4=sblLS;d3lp.cz5(); f[c++]=d3lp.getVal();
+
+ d2lp.v0= n++; d2lp.v2=prntLS; d2lp.v3=chldLS; d2lp.v4=sF; d2lp.cz5(); f[c++]=d2lp.getVal(); //f[c++]=d2lp.csa(s_dist,dist);
+ d2lp.v0= n++; d2lp.v2=prntLS; d2lp.v3=sblLS; d2lp.v4=dF; d2lp.cz5(); f[c++]=d2lp.getVal(); //f[c++]=d2lp.csa(s_dist,dist);
+ d2lp.v0= n++; d2lp.v2=chldLS; d2lp.v3=sblLS; d2lp.v4=pF; d2lp.cz5(); f[c++]=d2lp.getVal(); //f[c++]=d2lp.csa(s_dist,dist);
+
+ d2pp.v1=label;
+ d2pp.v0= n++; d2pp.v2=prntLS; d2pp.v3=chldLS; d2pp.v4=sP; d2pp.cz5(); f[c++]=d2pp.getVal(); //f[c++]=d2pp.csa(s_dist,dist);
+ d2pp.v0= n++; d2pp.v2=prntLS; d2pp.v3=sblLS; d2pp.v4=dP; d2pp.cz5(); f[c++]=d2pp.getVal(); //f[c++]=d2pp.csa(s_dist,dist);
+ d2pp.v0= n++; d2pp.v2=chldLS; d2pp.v3=sblLS; d2pp.v4=pP; d2pp.cz5(); f[c++]=d2pp.getVal(); //f[c++]=d2pp.csa(s_dist,dist);
+
+
+ if (x+1==x|| x+1==p || x+1==d) sLSp1=-1;
+ if (p+1==x|| p+1==p || p+1==d) pLSp1=-1;
+ if (d+1==x|| d+1==p || d+1==d) cLSp1=-1;
+
+ if (x-1==x|| x-1==p || x-1==d) sLSm1=-1;
+ if (d-1==x|| d-1==p || d-1==d) cLSm1=-1;
+ if (p-1==x|| p-1==p || p-1==d) pLSm1=-1;
+
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.getVal();
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSm1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.getVal();
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=pP;dl1.v4=pLSp1;dl1.cz5(); f[c++]=dl1.getVal();
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=pP;dl1.v4=pLSm1; dl1.cz5(); f[c++]=dl1.getVal();
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=pLSm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.getVal();
+ dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP;dl1.v4=pLSm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.getVal();
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=pP;dl1.v5=pLSp1; dl1.cz6(); f[c++]=dl1.getVal();
+ dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP; dl1.v4=pP;dl1.v5=pLSp1; dl1.cz6(); f[c++]=dl1.getVal();
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=dP; dl1.cz5(); f[c++]=dl1.getVal();
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSm1;dl1.v4=dP; dl1.cz5(); f[c++]=dl1.getVal();
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=dP;dl1.v4=cLSp1; dl1.cz5(); f[c++]=dl1.getVal();
+ dl1.v0=n++;dl1.v2=sP; dl1.v3=dP;dl1.v4=cLSm1; dl1.cz5(); f[c++]=dl1.getVal();
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSm1;dl1.v4=cLSm1;dl1.v5=dP; dl1.cz6(); f[c++]=dl1.getVal();
+ dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP;dl1.v4=cLSm1;dl1.v5=dP;dl1.cz6(); f[c++]=dl1.getVal();
+ dl1.v0=n++;dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=dP;dl1.v5=cLSp1;dl1.cz6();f[c++]=dl1.getVal();
+ dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP;dl1.v4=dP;dl1.v5=cLSp1; dl1.cz6(); f[c++]=dl1.getVal();
+
+
+
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.getVal();
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSm1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.getVal();
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=pP;dl1.v4=pLSp1;dl1.cz5(); f[c++]=dl1.getVal();
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=pP;dl1.v4=pLSm1; dl1.cz5(); f[c++]=dl1.getVal();
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=pLSm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.getVal();
+ dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP;dl1.v4=pLSm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.getVal();
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=pP;dl1.v5=pLSp1; dl1.cz6(); f[c++]=dl1.getVal();
+ dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP; dl1.v4=pP;dl1.v5=pLSp1; dl1.cz6(); f[c++]=dl1.getVal();
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=dP; dl1.cz5(); f[c++]=dl1.getVal();
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSm1;dl1.v4=dP; dl1.cz5(); f[c++]=dl1.getVal();
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=dP;dl1.v4=cLSp1; dl1.cz5(); f[c++]=dl1.getVal();
+ dl1.v0=n++;dl1.v2=sP; dl1.v3=dP;dl1.v4=cLSm1; dl1.cz5(); f[c++]=dl1.getVal();
+ dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSm1;dl1.v4=cLSm1;dl1.v5=dP; dl1.cz6(); f[c++]=dl1.getVal();
+ dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP;dl1.v4=cLSm1;dl1.v5=dP;dl1.cz6(); f[c++]=dl1.getVal();
+ dl1.v0= n++;dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=dP;dl1.v5=cLSp1;dl1.cz6();f[c++]=dl1.getVal();
+ dl1.v0= n++; dl1.v2=sLSm1; dl1.v3=sP;dl1.v4=dP;dl1.v5=cLSp1; dl1.cz6(); f[c++]=dl1.getVal();
+
+
+ return c;
+ }
+
+ private int extractFeat(long[] f, int cnt, short[] featsP, short[] featsD) {
+ if (featsP!=null && featsD!=null) {
+ for(short i1=0;i1<featsP.length;i1++) {
+ for(short i2=0;i2<featsD.length;i2++) {
+ dlf.v4=featsP[i1]; dlf.v5=featsD[i2];
+ dlf.cz6(); f[cnt++]=dlf.getVal();
+ }
+ }
+ } else if (featsP==null && featsD!=null) {
+
+ for(short i2=0;i2<featsD.length;i2++) {
+ dlf.v4=nofeat; dlf.v5=featsD[i2];
+ dlf.cz6(); f[cnt++]=dlf.getVal();
+
+ }
+ } else if (featsP!=null && featsD==null) {
+
+ for(short i1=0;i1<featsP.length;i1++) {
+ dlf.v4=featsP[i1]; dlf.v5=nofeat;
+ dlf.cz6(); f[cnt++]=dlf.getVal();
+
+ }
+ }
+ return cnt;
+ }
+
+
+
+ public FV encodeCat(Instances is, int ic, short pposs[], int forms[], int[] lemmas, short[] heads, short[] types, short feats[][], Cluster cluster, FV f) {
+
+
+ long[] svs = new long[250];
+
+ for (int i = 1; i < heads.length; i++) {
+
+
+ basic(pposs, heads[i], i, f);
+
+ int w1 = heads[i]<i?heads[i]:i;
+ int w2 = heads[i]<i?i:heads[i];
+
+ int dir =heads[i]<i?0:s_rel1;
+ int label = types[i] + dir;
+
+ int c = firstm(is, ic, w1, w2, label, cluster,svs);
+ for(int k=0;k<c;k++) dl1.map(f,svs[k]);
+
+ int ch,cmi,cmo;
+ if (heads[i] < i) {
+ ch = rightmostRight(heads, heads[i], i);
+ cmi = leftmostLeft(heads, i, heads[i]);
+ cmo = rightmostRight(heads, i, heads.length);
+
+ } else {
+ ch = leftmostLeft(heads, heads[i], i);
+ cmi = rightmostRight(heads, i, heads[i]);
+ cmo = leftmostLeft(heads, i, 0);
+ }
+
+ int lx =types[i] + s_rel1*((heads[i]<i?0:1) +8);
+ c =second(is,ic,w1, w2,ch, lx, cluster, svs);
+ for(int k=0;k<c;k++) dl1.map(f,svs[k]);
+ c = addClusterFeatures(is,ic, w1, w2, ch, cluster, c, svs,lx);
+ for(int k=0;k<c;k++) dl1.map(f,svs[k]);
+
+ lx =types[i]+s_rel1*((heads[i]<i?0:1) + ((cmi < i)?0:2) );
+ c =second(is, ic,w1,w2,cmi, lx, cluster, svs);
+ for(int k=0;k<c;k++) dl1.map(f,svs[k]);
+
+ c = addClusterFeatures(is,ic, w1, w2, cmi, cluster, c, svs,lx);
+ for(int k=0;k<c;k++) dl1.map(f,svs[k]);
+
+ lx =types[i]+s_rel1*((heads[i]<i?0:1) + ((cmo < i)?0:2) );
+ c =second(is, ic, w1,w2,cmo, lx, cluster, svs);
+ for(int k=0;k<c;k++) dl1.map(f,svs[k]);
+
+ c = addClusterFeatures(is,ic, w1, w2, cmo, cluster, c, svs,lx);
+ for(int k=0;k<c;k++) dl1.map(f,svs[k]);
+ }
+
+ return f;
+ }
+
+ public void compare(Instances is, int ic, short pos[], short[] heads, short[] types, Cluster cluster, F2SF f, DataFES x) {
+
+
+ long[] svs = new long[250];
+
+ float fx =0.0F;
+
+
+ for (int i = 1; i < heads.length; i++) {
+
+ f.clear();
+ basic(pos, heads[i], i, f);
+
+ if (x.pl[heads[i]][i] != f.getScore()) {
+ DB.println("basic diff "+x.pl[heads[i]][i] +" fg "+f.getScore());
+ }
+
+ int w1 = heads[i]<i?heads[i]:i;
+ int w2 = heads[i]<i?i:heads[i];
+
+ int dir =heads[i]<i?0:s_rel1;
+ int label = types[i] + dir;
+ f.clear();
+ int c = firstm(is, ic, w1, w2, label, cluster,svs);
+ for(int k=0;k<c;k++) dl1.map(f,svs[k]);
+
+ if (x.lab[heads[i]][i][types[i]] != f.getScore()) {
+ DB.println("first diff "+x.lab[heads[i]][i][types[i]] +" fg "+f.getScore());
+ }
+
+ short[] labels = Edges.get(pos[heads[i]], pos[i]);
+ int lid=-1;
+ for(int k=0;k<labels.length;k++) if (types[i]== labels[k]) {lid= k;break;}
+
+
+
+ int ch,cmi,cmo;
+ if (heads[i] < i) {
+ ch = rightmostRight(heads, heads[i], i);
+ cmi = leftmostLeft(heads, i, heads[i]);
+ cmo = rightmostRight(heads, i, heads.length);
+
+ } else {
+ ch = leftmostLeft(heads, heads[i], i);
+ cmi = rightmostRight(heads, i, heads[i]);
+ cmo = leftmostLeft(heads, i, 0);
+ }
+
+ f.clear();
+
+
+ int lx =types[i] + s_rel1*((heads[i]<i?0:1) +8);
+ c =second(is,ic,w1, w2,ch, lx, cluster, svs);
+ for(int k=0;k<c;k++) dl1.map(f,svs[k]);
+
+ if (x.sib[heads[i]][i][ch==-1?heads[i]:ch][lid] != f.getScore()) {
+ DB.println("sib diff "+x.sib[heads[i]][i][ch==-1?i:ch][lid] +" fg "+f.getScore());
+ }
+
+ f.clear();
+
+
+ lx =types[i]+s_rel1*((heads[i]<i?0:1) + ((cmi < i)?0:2) );
+ c =second(is, ic,w1,w2,cmi, lx, cluster, svs);
+ for(int k=0;k<c;k++) dl1.map(f,svs[k]);
+
+ if (x.gra[heads[i]][i][cmi==-1?i:cmi][lid] != f.getScore() ) {
+ DB.println("gcm diff "+x.gra[heads[i]][i][cmi==-1?i:cmi][lid] +" fg "+f.getScore()+" cmi "+cmi+" i "+i+
+ " head "+heads[i]+" w1 "+w1+" w2 "+w2+" label "+lx+" "+((heads[i]<i?0:1) + ((cmi < i)?0:2) ) );
+
+ System.out.println("w1 "+w1+" w2 "+w2+" cmi "+cmi+" label "+label+" ");
+
+ for (long k : svs) System.out.print(k+" ");
+ System.out.println();
+
+ }
+ f.clear();
+ lx =types[i]+s_rel1*((heads[i]<i?0:1) + ((cmo < i)?0:2) );
+ c =second(is, ic, w1,w2,cmo, lx, cluster, svs);
+ for(int k=0;k<c;k++) dl1.map(f,svs[k]);
+
+ if (x.gra[heads[i]][i][cmo==-1?i:cmo][lid] != f.getScore() ) {
+ DB.println("gcm diff "+x.gra[heads[i]][i][cmo==-1?i:cmo][lid] +" fg "+f.getScore()+" cmo "+cmo+" i "+i+
+ " head "+heads[i]+" w1 "+w1+" w2 "+w2+" label "+lx+" "+((heads[i]<i?0:1) + ((cmi < i)?0:2) ) );
+
+ System.out.println("w1 "+w1+" w2 "+w2+" cmi "+cmi+" label "+label+" ");
+
+ for (long k : svs) System.out.print(k+" ");
+ System.out.println();
+
+ }
+ }
+
+
+ }
+
+
+ public short[] searchLabel(Instances is, int ic, short pposs[], int forms[], int[] lemmas, short[] heads, short[] types, short feats[][], Cluster cluster, IFV f) {
+
+
+ long[] svs = new long[250];
+
+ short[] newLabels = new short[types.length];
+
+ for (int i = 1; i < heads.length; i++) {
+
+
+ // int n =basic(pposs, forms, heads[i], i, cluster, f);
+
+ int ch,cmi,cmo;
+ if (heads[i] < i) {
+ ch = rightmostRight(heads, heads[i], i);
+ cmi = leftmostLeft(heads, i, heads[i]);
+ cmo = rightmostRight(heads, i, heads.length);
+
+ } else {
+ ch = leftmostLeft(heads, heads[i], i);
+ cmi = rightmostRight(heads, i, heads[i]);
+ cmo = leftmostLeft(heads, i, 0);
+ }
+
+
+ short labels[] = Edges.get(pposs[is.heads[ic][i]],pposs[i]);
+
+ float best = -1000;
+ short bestL = 0;
+ for(int j=0; j< labels.length;j++) {
+
+ f.clear();
+ firstm(is, ic, heads[i], i, labels[j], cluster,svs);
+ for(int k=0;k<svs.length;k++) dl1.map(f,svs[k]);
+
+ second(is,ic,heads[i], i,ch, labels[j], cluster, svs);
+ for(int k=0;k<svs.length;k++) dl1.map(f,svs[k]);
+
+ second(is, ic,heads[i],i,cmi, labels[j], cluster, svs);
+ for(int k=0;k<svs.length;k++) dl1.map(f,svs[k]);
+
+ second(is, ic, heads[i],i,cmo, labels[j], cluster, svs);
+ for(int k=0;k<svs.length;k++) dl1.map(f,svs[k]);
+
+ if (best < f.getScore()) {
+ best= (float)f.getScore();
+ bestL= labels[j];
+ newLabels[i]=bestL;
+ }
+
+
+ }
+ }
+ return newLabels;
+
+ //return f;
+ }
+
+
+ public static float encode3(short[] pos, short heads[] , short[] types, DataFES d2) {
+
+
+ float v = 0F;
+ for (int i = 1; i < heads.length; i++) {
+
+ // int dir= (heads[i] < i)? 0:1;
+
+ v += d2.pl[heads[i]][i];
+ v += d2.lab[heads[i]][i][types[i]];
+
+ // boolean left = i<heads[i];
+ short[] labels = Edges.get(pos[heads[i]], pos[i]);
+ int lid=-1;
+ for(int k=0;k<labels.length;k++) if (types[i]== labels[k]) {lid= k;break;}
+
+ int ch,cmi,cmo;
+ if (heads[i] < i) {
+ ch = rightmostRight(heads, heads[i], i);
+ cmi = leftmostLeft(heads, i, heads[i]);
+ cmo = rightmostRight(heads, i, heads.length);
+
+ if (ch==-1) ch=heads[i];
+ if (cmi==-1) cmi=heads[i];
+ if (cmo==-1) cmo=heads[i];
+
+ } else {
+ ch = leftmostLeft(heads, heads[i], i);
+ cmi = rightmostRight(heads, i, heads[i]);
+ cmo = leftmostLeft(heads, i, 0);
+
+ if (ch==-1) ch=i;
+ if (cmi==-1) cmi=i;
+ if (cmo==-1) cmo=i;
+ }
+ v += d2.sib[heads[i]][i][ch][lid];
+ v += d2.gra[heads[i]][i][cmi][lid];
+ v += d2.gra[heads[i]][i][cmo][lid];
+ }
+ return v;
+ }
+
+ public static float encode3(short[] pos, short heads[] , short[] types, DataFES d2, float[] scores) {
+
+ float v = 0F;
+ for (int i = 1; i < heads.length; i++) {
+
+
+ scores[i]= d2.pl[heads[i]][i];
+ scores[i] += d2.lab[heads[i]][i][types[i]];
+
+ short[] labels = Edges.get(pos[heads[i]], pos[i]);
+ int lid=-1;
+ for(int k=0;k<labels.length;k++) if (types[i]== labels[k]) {lid= k;break;}
+
+ int ch,cmi,cmo;
+ if (heads[i] < i) {
+ ch = rightmostRight(heads, heads[i], i);
+ cmi = leftmostLeft(heads, i, heads[i]);
+ cmo = rightmostRight(heads, i, heads.length);
+
+ if (ch==-1) ch=heads[i];
+ if (cmi==-1) cmi=heads[i];
+ if (cmo==-1) cmo=heads[i];
+
+ } else {
+ ch = leftmostLeft(heads, heads[i], i);
+ cmi = rightmostRight(heads, i, heads[i]);
+ cmo = leftmostLeft(heads, i, 0);
+
+ if (ch==-1) ch=i;
+ if (cmi==-1) cmi=i;
+ if (cmo==-1) cmo=i;
+ }
+ scores[i] += d2.sib[heads[i]][i][ch][lid];
+ scores[i] += d2.gra[heads[i]][i][cmi][lid];
+ scores[i] += d2.gra[heads[i]][i][cmo][lid];
+ }
+ return v;
+ }
+
+
+ public static int rightmostRight(short[] heads, int head, int max) {
+ int rightmost = -1;
+ for (int i = head + 1; i < max; i++) if (heads[i] == head) rightmost = i;
+
+ return rightmost;
+ }
+
+ public static int leftmostLeft(short[] heads, int head, int min) {
+ int leftmost = -1;
+ for (int i = head - 1; i > min; i--) if (heads[i] == head) leftmost = i;
+ return leftmost;
+ }
+
+ public static final String REL = "REL",END = "END",STR = "STR",LA = "LA",RA = "RA";
+
+ private static int ra,la;
+ private static int s_str;
+ private static int s_end, _cend,_cstr, s_stwrd,s_relend;
+
+ protected static final String TYPE = "TYPE",DIR = "D";
+ public static final String POS = "POS";
+ protected static final String DIST = "DIST",MID = "MID";
+
+ private static final String _0 = "0",_4 = "4",_3 = "3", _2 = "2",_1 = "1",_5 = "5",_10 = "10";
+
+ private static int di0, d4,d3,d2,d1,d5,d10;
+
+
+ private static final String WORD = "WORD",STWRD = "STWRD", STPOS = "STPOS";
+
+
+
+ private static int nofeat;
+
+
+ public static int maxForm;
+
+
+ /**
+ * Initialize the features.
+ * @param maxFeatures
+ */
+ static public void initFeatures() {
+
+
+ MFO mf = new MFO();
+ mf.register(POS, MID);
+ s_str = mf.register(POS, STR);
+ s_end = mf.register(POS, END);
+
+ s_relend = mf.register(REL, END);
+
+ _cstr= mf.register(Cluster.SPATH,STR);
+ _cend=mf.register(Cluster.SPATH,END);
+
+
+ mf.register(TYPE, POS);
+
+ s_stwrd=mf.register(WORD,STWRD);
+ mf.register(POS,STPOS);
+
+ la = mf.register(DIR, LA);
+ ra = mf.register(DIR, RA);
+
+ // mf.register(TYPE, CHAR);
+
+ mf.register(TYPE, Pipe.FEAT);
+ nofeat=mf.register(Pipe.FEAT, "NOFEAT");
+
+ for(int k=0;k<150;k++) mf.register(TYPE, "F"+k);
+
+
+ di0=mf.register(DIST, _0);
+ d1=mf.register(DIST, _1);
+ d2=mf.register(DIST, _2);
+ d3=mf.register(DIST, _3);
+ d4=mf.register(DIST, _4);
+ d5=mf.register(DIST, _5);
+ // d5l=mf.register(DIST, _5l);
+ d10=mf.register(DIST, _10);
+
+
+ }
+
+
+
+}
diff --git a/dependencyParser/mate-tools/src/is2/parser/MFO.java b/dependencyParser/mate-tools/src/is2/parser/MFO.java
new file mode 100755
index 0000000..519ea06
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/parser/MFO.java
@@ -0,0 +1,257 @@
+package is2.parser;
+
+
+import is2.data.IEncoder;
+import is2.data.IEncoderPlus;
+import is2.data.IFV;
+import is2.data.Long2IntInterface;
+import is2.util.DB;
+
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map.Entry;
+
+/**
+ * Map Features, do not map long to integer
+ *
+ * @author Bernd Bohnet, 20.09.2009
+ */
+
+final public class MFO implements IEncoderPlus {
+
+ /** The features and its values */
+ static private final HashMap<String,HashMap<String,Integer>> m_featureSets = new HashMap<String,HashMap<String,Integer>>();
+
+ /** The feature class and the number of values */
+ static private final HashMap<String,Integer> m_featureCounters = new HashMap<String,Integer>();
+
+ /** The number of bits needed to encode a feature */
+ static final HashMap<String,Integer> m_featureBits = new HashMap<String,Integer>();
+
+ /** Integer counter for long2int */
+ static private int count=0;
+
+ /** Stop growing */
+ public boolean stop=false;
+
+ final public static String NONE="<None>";
+
+
+
+
+
+
+
+ public MFO () {}
+
+
+ public int size() {return count;}
+
+
+
+ /**
+ * Register an attribute class, if it not exists and add a possible value
+ * @param type
+ * @param type2
+ */
+ final public int register(String a, String v) {
+
+ HashMap<String,Integer> fs = getFeatureSet().get(a);
+ if (fs==null) {
+ fs = new HashMap<String,Integer>();
+ getFeatureSet().put(a, fs);
+ fs.put(NONE, 0);
+ getFeatureCounter().put(a, 1);
+ }
+ Integer c = getFeatureCounter().get(a);
+
+ Integer i = fs.get(v);
+ if (i==null) {
+ fs.put(v, c);
+ c++;
+ getFeatureCounter().put(a,c);
+ return c-1;
+ } else return i;
+ }
+
+ /**
+ * Calculates the number of bits needed to encode a feature
+ */
+ public void calculateBits() {
+
+ int total=0;
+ for(Entry<String,Integer> e : getFeatureCounter().entrySet() ){
+ int bits =(int)Math.ceil((Math.log(e.getValue()+1)/Math.log(2)));
+ m_featureBits.put(e.getKey(), bits);
+ total+=bits;
+ // System.out.println(" "+e.getKey()+" bits "+bits+" number "+(e.getValue()+1));
+ }
+
+// System.out.println("total number of needed bits "+total);
+ }
+
+
+
+ public String toString() {
+
+ StringBuffer content = new StringBuffer();
+ for(Entry<String,Integer> e : getFeatureCounter().entrySet() ){
+ content.append(e.getKey()+" "+e.getValue());
+ content.append(':');
+ // HashMap<String,Integer> vs = getFeatureSet().get(e.getKey());
+ content.append(getFeatureBits(e.getKey()));
+
+ /*if (vs.size()<120)
+ for(Entry<String,Integer> e2 : vs.entrySet()) {
+ content.append(e2.getKey()+" ("+e2.getValue()+") ");
+ }*/
+ content.append('\n');
+
+ }
+ return content.toString();
+ }
+
+
+
+ static final public short getFeatureBits(String a) {
+ if(m_featureBits.get(a)==null) return 0;
+ return (short)m_featureBits.get(a).intValue();
+ }
+
+
+
+ /**
+ * Get the integer place holder of the string value v of the type a
+ *
+ * @param t the type
+ * @param v the value
+ * @return the integer place holder of v
+ */
+ final public int getValue(String t, String v) {
+
+ if (m_featureSets.get(t)==null) return -1;
+ Integer vi = m_featureSets.get(t).get(v);
+ if (vi==null) return -1; //stop &&
+ return vi.intValue();
+ }
+
+ /**
+ * Static version of getValue
+ * @see getValue
+ */
+ static final public int getValueS(String a, String v) {
+
+ if (m_featureSets.get(a)==null) return -1;
+ Integer vi = m_featureSets.get(a).get(v);
+ if (vi==null) return -1; //stop &&
+ return vi.intValue();
+ }
+
+ public int hasValue(String a, String v) {
+
+ Integer vi = m_featureSets.get(a).get(v);
+ if (vi==null) return -1;
+ return vi.intValue();
+ }
+
+
+ public static String printBits(int k) {
+ StringBuffer s = new StringBuffer();
+ for(int i =0;i<31;i++) {
+ s.append((k&0x00000001)==1?'1':'0');
+ k=k>>1;
+
+ }
+ s.reverse();
+ return s.toString();
+ }
+
+
+
+
+
+
+
+ /**
+ * Maps a long to a integer value. This is very useful to save memory for sparse data long values
+ * @param l
+ * @return the integer
+ */
+ static public int misses = 0;
+ static public int good = 0;
+
+
+
+
+ /**
+ * Write the data
+ * @param dos
+ * @throws IOException
+ */
+ static public void writeData(DataOutputStream dos) throws IOException {
+ dos.writeInt(getFeatureSet().size());
+ // DB.println("write"+getFeatureSet().size());
+ for(Entry<String, HashMap<String,Integer>> e : getFeatureSet().entrySet()) {
+ dos.writeUTF(e.getKey());
+ dos.writeInt(e.getValue().size());
+
+ for(Entry<String,Integer> e2 : e.getValue().entrySet()) {
+
+ if(e2.getKey()==null) DB.println("key "+e2.getKey()+" value "+e2.getValue()+" e -key "+e.getKey());
+ dos.writeUTF(e2.getKey());
+ dos.writeInt(e2.getValue());
+
+ }
+
+ }
+ }
+ public void read(DataInputStream din) throws IOException {
+
+ int size = din.readInt();
+ for(int i=0; i<size;i++) {
+ String k = din.readUTF();
+ int size2 = din.readInt();
+
+ HashMap<String,Integer> h = new HashMap<String,Integer>();
+ getFeatureSet().put(k,h);
+ for(int j = 0;j<size2;j++) {
+ h.put(din.readUTF(), din.readInt());
+ }
+ getFeatureCounter().put(k, size2);
+ }
+
+ count =size;
+ // stop();
+ calculateBits();
+ }
+
+
+ /**
+ * Clear the data
+ */
+ static public void clearData() {
+ getFeatureSet().clear();
+ m_featureBits.clear();
+ getFeatureSet().clear();
+ }
+
+ public HashMap<String,Integer> getFeatureCounter() {
+ return m_featureCounters;
+ }
+
+ static public HashMap<String,HashMap<String,Integer>> getFeatureSet() {
+ return m_featureSets;
+ }
+
+ static public String[] reverse(HashMap<String,Integer> v){
+ String[] set = new String[v.size()];
+ for(Entry<String,Integer> e : v.entrySet()) {
+ set[e.getValue()]=e.getKey();
+ }
+ return set;
+ }
+
+
+}
diff --git a/dependencyParser/mate-tools/src/is2/parser/Open.java b/dependencyParser/mate-tools/src/is2/parser/Open.java
new file mode 100755
index 0000000..35f14a7
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/parser/Open.java
@@ -0,0 +1,38 @@
+package is2.parser;
+
+import is2.data.Parse;
+
+
+final public class Open {
+
+ public float p;
+ short s, e, label;
+ byte dir;
+
+ Closed left;
+ Closed right;
+
+ public Open(short s, short t, short dir, short label,Closed left, Closed right, float p) {
+ this.s = s;
+ this.e = t;
+ this.label = label;
+ this.dir = (byte)dir;
+ this.left =left;
+ this.right=right;
+ this.p=p;
+ }
+
+
+ void create(Parse parse) {
+ if (dir == 0) {
+ parse.heads[s] = e;
+ if (label != -1) parse.labels[s] = label;
+ } else {
+ parse.heads[e] = s;
+ if (label != -1) parse.labels[e] = label;
+ }
+ if (left != null) left.create(parse);
+ if (right != null) right.create(parse);
+ }
+
+}
diff --git a/dependencyParser/mate-tools/src/is2/parser/Options.java b/dependencyParser/mate-tools/src/is2/parser/Options.java
new file mode 100755
index 0000000..3c8b551
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/parser/Options.java
@@ -0,0 +1,63 @@
+package is2.parser;
+
+import is2.util.OptionsSuper;
+
+
+public final class Options extends OptionsSuper {
+
+
+ public Options (String[] args) {
+
+
+
+ for(int i = 0; i < args.length; i++) {
+
+ if (args[i].equals("--help")) explain();
+
+ if (args[i].equals("-decode")) {
+ decodeProjective = args[i+1].equals("proj"); i++;
+ } else if (args[i].equals("-decodeTH")) {
+ decodeTH = Double.parseDouble(args[i+1]); i++;
+ } else if (args[i].equals("-nonormalize")) {
+ normalize=false;
+ } else if (args[i].equals("-features")) {
+ features= args[i+1]; i++;
+ } else if (args[i].equals("-hsize")) {
+ hsize= Integer.parseInt(args[i+1]); i++;
+ } else if (args[i].equals("-len")) {
+ maxLen= Integer.parseInt(args[i+1]); i++;
+ } else if (args[i].equals("-cores")) {
+ cores= Integer.parseInt(args[i+1]); i++;
+ } else if (args[i].equals("-no2nd")) {
+ no2nd= true;
+ } else if (args[i].equals("-few2nd")) {
+ few2nd= true;
+ } else super.addOption(args, i);
+
+ }
+
+
+
+ }
+
+ private void explain() {
+ System.out.println("Usage: ");
+ System.out.println("java -class mate.jar is2.parser.Parser [Options]");
+ System.out.println();
+ System.out.println("Example: ");
+ System.out.println(" java -class mate.jar is2.parser.Parser -model eps3.model -train corpora/conll08st/train/train.closed -test corpora/conll08st/devel/devel.closed -out b3.test -eval corpora/conll08st/devel/devel.closed -count 2000 -i 6");
+ System.out.println("");
+ System.out.println("Options:");
+ System.out.println("");
+ System.out.println(" -train <file> the corpus a model is trained on; default "+this.trainfile);
+ System.out.println(" -test <file> the input corpus for testing; default "+this.testfile);
+ System.out.println(" -out <file> the output corpus (result) of a test run; default "+this.outfile);
+ System.out.println(" -model <file> the parsing model for traing the model is stored in the files");
+ System.out.println(" and for parsing the model is load from this file; default "+this.modelName);
+ System.out.println(" -i <number> the number of training iterations; good numbers are 10 for smaller corpora and 6 for bigger; default "+this.numIters);
+ System.out.println(" -count <number> the n first sentences of the corpus are take for the training default "+this.count);
+ System.out.println(" -format <number> conll format of the year 8 or 9; default "+this.formatTask);
+
+ System.exit(0);
+ }
+}
diff --git a/dependencyParser/mate-tools/src/is2/parser/ParallelDecoder.java b/dependencyParser/mate-tools/src/is2/parser/ParallelDecoder.java
new file mode 100755
index 0000000..dd18f5f
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/parser/ParallelDecoder.java
@@ -0,0 +1,170 @@
+package is2.parser;
+
+
+import is2.data.DataFES;
+
+
+import java.util.ArrayList;
+import java.util.concurrent.Callable;
+
+/**
+ * @author Bernd Bohnet, 30.08.2009
+ *
+ * This class implements a parallel feature extractor.
+ */
+final public class ParallelDecoder implements Callable<Object>
+{
+ // some constants
+ private static final float INIT_BEST = (-1.0F / 0.0F);
+ private static final boolean[] DIR ={false,true};
+
+ // the data space of the weights for a dependency tree
+ final private DataFES x;
+
+ private short[] pos;
+
+ private Open O[][][][];
+ private Closed C[][][][] ;
+
+ private int length;
+
+ boolean done=false;
+ public boolean waiting =false;
+
+ /**
+ * Initialize the parallel decoder.
+ *
+ * @param pos part-of-speech
+ * @param d data
+ * @param edges part-of-speech edge mapping
+ * @param o open spans
+ * @param c closed spans
+ * @param length number of words
+ */
+ public ParallelDecoder(short[] pos, DataFES d, Open o[][][][], Closed c[][][][], int length) {
+
+ this.pos =pos;
+ this.x =d;
+
+ this.O=o;
+ this.C=c;
+ this.length=length;
+ }
+
+
+ private static class DSet { short w1,w2;}
+
+ @Override
+ public Object call() {
+
+
+ try {
+
+ while (true){
+
+ DSet set = get();
+// if (done && set==null) break;
+
+ if (set ==null) return null;
+
+ short s=set.w1, t=set.w2;
+
+ for(short dir =0;dir<2;dir++) {
+
+ short[] labs = (dir==1) ? Edges.get(pos[s],pos[t]):Edges.get(pos[t],pos[s]);
+
+ O[s][t][dir] = new Open[labs.length];
+
+ for (int l = 0; l <labs.length; l++) {
+
+
+ double tRP = INIT_BEST;
+
+ Closed tL = null, tR = null;
+
+ for (int r = s; r < t; r++) {
+
+ if (s == 0 && r != 0) continue;
+
+ double tLPr = INIT_BEST,tRPr = INIT_BEST;
+ Closed tLCld = null, tRCld = null;
+
+ if (r == s) tLPr = dir==1 ? x.sib[s][t][s][l] :
+ x.gra[t][s][s][l];
+ else
+ for (int i = s + 1; i <= r; i++)
+ if (((dir==1 ? x.sib[s][t][i][l] : x.gra[t][s][i][l]) + C[s][r][1][i].p) > tLPr) {
+ tLPr = ((dir==1 ? x.sib[s][t][i][l] : x.gra[t][s][i][l]) + C[s][r][1][i].p);tLCld = C[s][r][1][i];}
+
+ if (r == t-1) tRPr = dir==1 ? x.gra[s][t][s][l] : x.sib[t][s][s][l];
+ else
+ for (int i = r + 1; i < t; i++)
+ if (((dir == 1 ? x.gra[s][t][i][l] :
+ x.sib[t][s][i][l]) +
+ C[r+1][t][0][i].p) > tRPr) {
+ tRPr = ((dir==1?x.gra[s][t][i][l]:x.sib[t][s][i][l]) + C[r+1][t][0][i].p); tRCld=C[r + 1][t][0][i];
+ }
+
+ if (tLPr + tRPr > tRP) {tRP = tLPr + tRPr; tL = tLCld;tR = tRCld;}
+ }
+ O[s][t][dir][l] = new Open(s, t, dir, labs[l],tL, tR,
+ (float) ( tRP+((dir==1)?x.pl[s][t]: x.pl[t][s]) + ((dir==1)? x.lab[s][t][labs[l]]:x.lab[t][s][labs[l]])));
+ }
+ }
+ C[s][t][1] = new Closed[length]; C[s][t][0] = new Closed[length];
+
+ for (int m = s ; m <= t; m++) {
+ for(boolean d : DIR) {
+ if ((d && m!=s)||!d && (m!=t && s!=0)) {
+
+ // create closed structure
+
+ double top = INIT_BEST;
+
+ Open tU = null; Closed tL = null;
+ int numLabels =O[(d ? s : m)][(d ? m : t)][d?1:0].length;
+
+ //for (int l = numLabels-1; l >=0; l--) {
+ for (int l = 0; l < numLabels; l++) {
+
+ Open hi = O[(d ? s : m)][(d ? m : t)][d?1:0][l];
+ for (int amb = m + (d?1:-1); amb != (d?t:s) + (d?1:-1); amb += (d?1:-1)) {
+
+ if ((hi.p + C[d?m:s][d?t:m][d?1:0][amb].p +x.gra[d?s:t][m][amb][l]) > top) {
+ top = (hi.p + C[d?m:s][d?t:m][d?1:0][amb].p +x.gra[d?s:t][m][amb][l]); tU = hi; tL=C[d?m:s][d?t:m][d?1:0][amb];}
+
+ }
+
+ if ((m == (d ? t : s)) && (hi.p + x.gra[d?s:t][d?t:s][m][l]) > top) {
+ top = (hi.p + x.gra[d ? s : t][d?t:s][m][l]); tU = hi; tL = null;}
+ }
+ C[s][t][d?1:0][m] = new Closed(s, t, m, d?1:0,tU,tL,(float) top);
+
+
+ }
+ }
+ }
+ }
+ } catch (Exception e ) {
+ e.printStackTrace();
+ System.exit(0);
+ }
+ return null;
+ }
+
+ public static ArrayList<DSet> sets = new ArrayList<DSet>();
+
+ static synchronized private DSet get() {
+ synchronized (sets) {
+ if (sets.size()==0) return null;
+ return sets.remove(sets.size()-1);
+ }
+ }
+
+ public static void add(short w1, short w2){
+ DSet ds =new DSet();
+ ds.w1=w1;
+ ds.w2=w2;
+ sets.add(ds);
+ }
+}
diff --git a/dependencyParser/mate-tools/src/is2/parser/ParallelExtract.java b/dependencyParser/mate-tools/src/is2/parser/ParallelExtract.java
new file mode 100755
index 0000000..4313bfd
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/parser/ParallelExtract.java
@@ -0,0 +1,246 @@
+package is2.parser;
+
+import is2.data.Cluster;
+import is2.data.DataFES;
+import is2.data.F2SF;
+import is2.data.FV;
+import is2.data.Instances;
+import is2.data.Long2IntInterface;
+import is2.util.DB;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.concurrent.Callable;
+
+
+
+
+
+/**
+ * @author Bernd Bohnet, 30.08.2009
+ *
+ * This class implements a parallel feature extractor.
+ */
+final public class ParallelExtract implements Callable<Object>
+{
+ // the data space of the weights for a dependency tree
+ final DataFES d;
+
+ // the data extractor does the actual work
+ final Extractor extractor;
+
+ private Instances is;
+ private int i;
+
+ private F2SF para;
+
+ private Cluster cluster;
+
+
+ public ParallelExtract(Extractor e, Instances is, int i, DataFES d, F2SF para,Cluster cluster) {
+
+ this.is =is;
+ extractor=e;
+ this.d =d;
+ this.i=i;
+ this.para=para;
+ this.cluster = cluster;
+ }
+
+
+ public static class DSet {
+ int w1,w2;
+ }
+
+ public Object call() {
+
+ try {
+
+ F2SF f= para;
+
+
+ short[] pos=is.pposs[i];
+ int length = pos.length;
+
+ long[] gvs = new long[50];
+ long[] svs = new long[220];
+
+ while (true) {
+
+ DSet set = get();
+ if (set ==null) break;
+
+ int w1=set.w1;
+ int w2=set.w2;
+
+
+ f.clear();
+ extractor.basic(pos, w1, w2, f);
+ d.pl[w1][w2]=f.getScoreF();
+
+
+ f.clear();
+
+ extractor.basic(pos, w2, w1, f);
+ d.pl[w2][w1]=f.getScoreF();
+
+ short[] labels = Edges.get(pos[w1], pos[w2]);
+ float[] lab = d.lab[w1][w2];
+
+ final Long2IntInterface li = extractor.li;
+
+ int c = extractor.firstm(is, i, w1, w2, 0, cluster, svs);
+
+ for (int l = 0; l <lab.length ; l++) lab[l]=-100 ;
+
+ for (int l = 0; l <labels.length ; l++) {
+ short label = labels[l];
+
+ f.clear();
+ int lv = extractor.d0.computeLabeValue(label,Extractor.s_type);
+ for(int k=0;k<c;k++)if (svs[k]>0) f.add(li.l2i(svs[k]+lv));
+
+
+ lab[label]=f.getScoreF();
+ }
+
+ labels = Edges.get(pos[w2], pos[w1]);
+ lab = d.lab[w2][w1];
+
+ for (int l = 0; l <lab.length ; l++) lab[l]=-100 ;
+
+
+ for (int l = 0; l <labels.length ; l++) {
+ int label = labels[l];
+
+ f.clear();
+ int lv = extractor.d0.computeLabeValue(label + Extractor.s_rel1 ,Extractor.s_type);
+ for(int k=0;k<c;k++)if (svs[k]>0) f.add(li.l2i(svs[k]+lv));
+
+ lab[label]=f.getScoreF();
+ }
+
+ int s = w1<w2 ? w1 : w2;
+ int e = w1<w2 ? w2 : w1;
+
+
+ for(int m=0;m<length;m++) {
+
+ int g = (m==s||e==m) ? -1 : m;
+
+ int cn =extractor.second(is, i, w1,w2,g, 0, cluster, svs);
+ int cc = extractor.addClusterFeatures(is,i, w1, w2, g, cluster, 0, gvs,0);
+ //for(int k=0;k<c;k++) dl1.map(f,svs[k]);
+
+
+ if(m>=w1) {
+ labels = Edges.get(pos[w1], pos[w2]);
+ float[] lab2 = new float[labels.length];
+ for (int l = 0; l <labels.length ; l++) {
+
+ short label = labels[l];
+
+ int lx =label+Extractor.s_rel1*( g < w2?0:2 );
+
+ f.clear();
+ int lv = extractor.d0.computeLabeValue(lx,Extractor.s_type);
+ for(int k=0;k<cn;k++)if (svs[k]>0) f.add(li.l2i(svs[k]+lv));
+ for(int k=0;k<cc;k++)if (gvs[k]>0) f.add(li.l2i(gvs[k]+lv));
+
+ lab2[l] = f.getScoreF();
+ }
+ d.gra[w1][w2][m] =lab2;
+ }
+
+
+ if (m<=w2) {
+ labels = Edges.get(pos[w2], pos[w1]);
+ float lab2[];
+ d.gra[w2][w1][m] = lab2 = new float[labels.length];
+ for (int l = 0; l <labels.length ; l++) {
+
+ int label = labels[l] ;
+ int lx =label+Extractor.s_rel1*(1 + (g < w1?0:2) );
+
+ f.clear();
+ int lv = extractor.d0.computeLabeValue(lx,Extractor.s_type);
+ for(int k=0;k<cn;k++)if (svs[k]>0) f.add(li.l2i(svs[k]+lv));
+ for(int k=0;k<cc;k++)if (gvs[k]>0) f.add(li.l2i(gvs[k]+lv));
+
+ lab2[l] = f.getScoreF();
+
+ }
+ }
+
+
+ g = (m==s||e==m) ? -1 : m;
+
+ // int cn = extractor.second(is,i,w1,w2,g,0, cluster, svs,Extractor._SIB);
+ if (m >=w1 && m<=w2) {
+ labels = Edges.get(pos[w1], pos[w2]);
+ float lab2[]= new float[labels.length];
+ d.sib[w1][w2][m] = lab2;
+
+ for (int l = 0; l <labels.length ; l++) {
+
+ short label = labels[l];
+
+ int lx =label+Extractor.s_rel1*( 8);
+ f.clear();
+ int lv = extractor.d0.computeLabeValue(lx,Extractor.s_type);
+ for(int k=0;k<cn;k++) if (svs[k]>0) f.add(li.l2i(svs[k]+lv));
+ for(int k=0;k<cc;k++) if (gvs[k]>0) f.add(li.l2i(gvs[k]+lv));
+
+
+ lab2[l] = (float)f.score;//f.getScoreF();
+ }
+ }
+ if (m >=w1 && m <=w2) {
+ labels = Edges.get(pos[w2], pos[w1]);
+ float[] lab2 = new float[labels.length];
+ d.sib[w2][w1][m]=lab2;
+ for (int l = 0; l <labels.length ; l++) {
+
+ int label = labels[l] ;
+
+ int lx =label+Extractor.s_rel1*(9);
+
+ f.clear();
+ int lv = extractor.d0.computeLabeValue(lx,Extractor.s_type);
+ for(int k=0;k<cn;k++) if (svs[k]>0) f.add(li.l2i(svs[k]+lv));
+ for(int k=0;k<cc;k++) if (gvs[k]>0) f.add(li.l2i(gvs[k]+lv));
+
+ lab2[l] = f.score;//f.getScoreF();
+ }
+ }
+ }
+ }
+
+ } catch(Exception e ) {
+ e.printStackTrace();
+ }
+ return null;
+ }
+
+
+ static ArrayList<DSet> sets = new ArrayList<DSet>();
+
+ private DSet get() {
+
+ synchronized (sets) {
+ if (sets.size()==0) return null;
+ return sets.remove(sets.size()-1);
+ }
+ }
+ static public void add(int w1, int w2){
+ DSet ds =new DSet();
+ ds.w1=w1;
+ ds.w2=w2;
+ sets.add(ds);
+ }
+
+
+
+
+}
diff --git a/dependencyParser/mate-tools/src/is2/parser/ParallelRearrange.java b/dependencyParser/mate-tools/src/is2/parser/ParallelRearrange.java
new file mode 100755
index 0000000..dfd995a
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/parser/ParallelRearrange.java
@@ -0,0 +1,127 @@
+package is2.parser;
+
+import is2.data.DataFES;
+
+import java.util.ArrayList;
+import java.util.concurrent.Callable;
+
+/**
+ * @author Dr. Bernd Bohnet, 30.08.2009
+ *
+ * This class implements a parallel edge rearrangement for non-projective parsing;
+ * The linear method was first suggest by Rayn McDonald et. al. 2005.
+ */
+final public class ParallelRearrange implements Callable<Object> {
+
+ // new parent child combination to explore
+ final static class PA {
+ final float p;
+ final short ch, pa;
+ public float max;
+ public short wh;
+ public short nPar;
+ public short nType;
+ public PA(float p2, short ch2, short pa2) { p=p2; ch=ch2;pa=pa2;}
+ }
+
+ // list of parent child combinations
+ static ArrayList<PA> parents = new ArrayList<PA>();
+ static ArrayList<PA> order = new ArrayList<PA>();
+ // best new parent child combination, found so far
+ public float max;
+
+ // some data from the dependency tree
+ //private EdgesC edges;
+ private short[] pos;
+ private DataFES x;
+ private boolean[][] isChild ;
+ public short[] heads,types;
+
+ // child, new parent, new label
+ public short wh,nPar,nType;
+
+ /**
+ * Initialize the parallel rearrange thread
+ *
+ * @param isChild2 is a child
+ * @param edgesC the part-of-speech edge mapping
+ * @param pos the part-of-speech
+ * @param x the data
+ * @param s the heads
+ * @param ts the types
+ */
+ public ParallelRearrange(boolean[][] isChild2,short[] pos, DataFES x, short[] s, short[] ts) {
+
+ heads =new short[s.length];
+ System.arraycopy(s, 0, heads, 0, s.length);
+
+ types =new short[ts.length];
+ System.arraycopy(ts, 0, types, 0, ts.length);
+
+ isChild=isChild2;
+ //edges = edgesC;
+ this.pos =pos;
+ this.x=x;
+ }
+
+
+ @Override
+ public Object call() {
+
+ // check the list of new possible parents and children for a better combination
+ while(true) {
+ PA px = getPA();
+ if (px==null) break;
+
+ float max=0;
+ short pa =px.pa, ch =px.ch;
+
+ if(ch == pa || pa == heads[ch] || isChild[ch][pa]) continue;
+
+ short oldP = heads[ch], oldT = types[ch];
+
+ heads[ch]=pa;
+
+ short[] labels = Edges.get(pos[pa], pos[ch]);
+
+ for(int l=0;l<labels.length;l++) {
+
+ types[ch]=labels[l];
+
+ float p_new = Extractor.encode3(pos, heads, types, x);
+
+ if(max < p_new-px.p ) {
+ max = p_new-px.p; wh = ch; nPar = pa; nType = labels[l] ;
+ px.max=max;
+ px.wh=ch;
+ px.nPar = pa;
+ px.nType =labels[l];
+ }
+ }
+ heads[ch]= oldP; types[ch]=oldT;
+ }
+ return null;
+ }
+
+ /**
+ * Add a child-parent combination which are latter explored for rearrangement
+ *
+ * @param p2
+ * @param ch2
+ * @param pa
+ */
+ static public void add(float p2, short ch2, short pa) {
+ PA px = new PA(p2,ch2,pa);
+ parents.add(px);
+ order.add(px);
+ }
+
+ static private PA getPA() {
+ synchronized (parents) {
+ if (parents.size()==0) return null;
+ return parents.remove(parents.size()-1);
+ }
+ }
+
+
+}
diff --git a/dependencyParser/mate-tools/src/is2/parser/Parameters.java b/dependencyParser/mate-tools/src/is2/parser/Parameters.java
new file mode 100755
index 0000000..cb13a69
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/parser/Parameters.java
@@ -0,0 +1,38 @@
+/**
+ *
+ */
+package is2.parser;
+
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+
+import is2.data.FV;
+import is2.data.IFV;
+import is2.data.Instances;
+import is2.data.Parse;
+
+/**
+ * @author Bernd Bohnet, 31.08.2009
+ *
+ *
+ */
+public abstract class Parameters {
+
+
+ public abstract void average(double avVal);
+
+ public abstract void update(FV act, FV pred, Instances isd, int instc, Parse d, double upd, double e);
+
+ public abstract void write(DataOutputStream dos) throws IOException;
+
+ public abstract void read(DataInputStream dis ) throws IOException;
+
+ public abstract int size();
+
+ /**
+ * @return
+ */
+ public abstract IFV getFV() ;
+
+}
diff --git a/dependencyParser/mate-tools/src/is2/parser/ParametersFloat.java b/dependencyParser/mate-tools/src/is2/parser/ParametersFloat.java
new file mode 100755
index 0000000..c2cbe93
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/parser/ParametersFloat.java
@@ -0,0 +1,137 @@
+package is2.parser;
+
+import is2.data.F2SF;
+import is2.data.FV;
+import is2.data.Instances;
+import is2.data.Parse;
+import is2.util.DB;
+
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+
+
+
+final public class ParametersFloat extends Parameters {
+
+ public float[] parameters;
+ public float[] total;
+
+ public ParametersFloat(int size) {
+ parameters = new float[size];
+ total = new float[size];
+ for(int i = 0; i < parameters.length; i++) {
+ parameters[i] = 0F;
+ total[i] = 0F;
+ }
+ }
+
+ /**
+ * @param parameters2
+ */
+ public ParametersFloat(float[] p) {
+ parameters =p;
+ }
+
+
+ @Override
+ public void average(double avVal) {
+ for(int j = 0; j < total.length; j++) {
+ parameters[j] = total[j]/((float)avVal);
+ }
+ total =null;
+ }
+
+ public ParametersFloat average2(double avVal) {
+ float[] px = new float[this.parameters.length];
+ for(int j = 0; j < total.length; j++) {
+ px[j] = total[j]/((float)avVal);
+ }
+ ParametersFloat pf = new ParametersFloat(px);
+ return pf;
+ }
+
+ @Override
+ public void update(FV act, FV pred, Instances isd, int instc, Parse d, double upd, double e) {
+
+ e++;
+
+ float lam_dist = getScore(act) - getScore(pred);
+
+ float b = (float)e-lam_dist;
+
+ FV dist = act.getDistVector(pred);
+
+ dist.update(parameters, total, hildreth(dist,b), upd,false);
+ }
+
+ protected double hildreth(FV a, double b) {
+
+ double A = a.dotProduct(a);
+ if (A<=0.0000000000000000001) return 0.0;
+ return b/A;
+ }
+
+
+ public float getScore(FV fv) {
+ if (fv ==null) return 0.0F;
+ return fv.getScore(parameters,false);
+
+ }
+
+ @Override
+ final public void write(DataOutputStream dos) throws IOException{
+
+ dos.writeInt(parameters.length);
+ for(float d : parameters) dos.writeFloat(d);
+
+ }
+
+ @Override
+ public void read(DataInputStream dis ) throws IOException{
+
+ parameters = new float[dis.readInt()];
+ int notZero=0;
+ for(int i=0;i<parameters.length;i++) {
+ parameters[i]=dis.readFloat();
+ if (parameters[i]!=0.0F) notZero++;
+ }
+
+
+ DB.println("read parameters "+parameters.length+" not zero "+notZero);
+
+ }
+
+ public int countNZ() {
+
+ int notZero=0;
+ for(int i=0;i<parameters.length;i++) {
+ if (parameters[i]!=0.0F) notZero++;
+ }
+ return notZero;
+
+ // DB.println("read parameters "+parameters.length+" not zero "+notZero);
+
+ }
+
+
+ /* (non-Javadoc)
+ * @see is2.sp09k99995.Parameters#getFV()
+ */
+ @Override
+ public F2SF getFV() {
+ return new F2SF(parameters);
+ }
+
+
+ /* (non-Javadoc)
+ * @see is2.sp09k99999.Parameters#size()
+ */
+ @Override
+ public int size() {
+ return parameters.length;
+ }
+
+
+
+}
diff --git a/dependencyParser/mate-tools/src/is2/parser/Parser.java b/dependencyParser/mate-tools/src/is2/parser/Parser.java
new file mode 100755
index 0000000..d6ba6d6
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/parser/Parser.java
@@ -0,0 +1,664 @@
+package is2.parser;
+
+
+import is2.data.Cluster;
+import is2.data.DataF;
+import is2.data.DataFES;
+import is2.data.F2SF;
+import is2.data.FV;
+import is2.data.Instances;
+import is2.data.Long2Int;
+import is2.data.Long2IntInterface;
+import is2.data.Parse;
+import is2.data.PipeGen;
+import is2.data.SentenceData09;
+import is2.io.CONLLReader09;
+import is2.io.CONLLWriter09;
+import is2.tools.Retrainable;
+import is2.tools.Tool;
+import is2.util.DB;
+import is2.util.OptionsSuper;
+import is2.util.ParserEvaluator;
+
+import java.io.BufferedInputStream;
+import java.io.BufferedOutputStream;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.text.SimpleDateFormat;
+import java.util.Date;
+import java.util.Map.Entry;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipInputStream;
+import java.util.zip.ZipOutputStream;
+
+
+
+public class Parser implements Tool, Retrainable {
+
+ // output evaluation info
+ private static final boolean MAX_INFO = true;
+
+ public static int THREADS =4;
+
+ public Long2IntInterface l2i;
+ public ParametersFloat params;
+ public Pipe pipe;
+ public OptionsSuper options;
+
+
+ // keep some of the parsing information for later evaluation
+ public Instances is;
+ DataFES d2;
+ public Parse d= null;
+
+ /**
+ * Initialize the parser
+ * @param options
+ */
+ public Parser (OptionsSuper options) {
+
+ this.options=options;
+ pipe = new Pipe(options);
+
+ params = new ParametersFloat(0);
+
+ // load the model
+ try {
+ readModel(options, pipe, params);
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+
+ }
+
+
+ /**
+ * @param modelFileName The file name of the parsing model
+ */
+ public Parser(String modelFileName) {
+ this(new Options(new String[]{"-model",modelFileName}));
+ }
+
+
+ /**
+ *
+ */
+ public Parser() {
+ // TODO Auto-generated constructor stub
+ }
+
+
+ public static void main (String[] args) throws Exception
+ {
+
+
+
+
+ long start = System.currentTimeMillis();
+ OptionsSuper options = new Options(args);
+
+
+ Runtime runtime = Runtime.getRuntime();
+ THREADS = runtime.availableProcessors();
+ if (options.cores<THREADS&&options.cores>0) THREADS =options.cores;
+ DB.println("Found " + runtime.availableProcessors()+" cores use "+THREADS);
+
+
+
+ if (options.train) {
+
+ Parser p =new Parser();
+ p.options=options;
+
+ p.l2i = new Long2Int(options.hsize);
+
+ p.pipe = new Pipe (options);
+ Instances is = new Instances();
+
+ Extractor.initFeatures();
+ p.pipe.extractor = new Extractor[THREADS];
+ DB.println("hsize "+options.hsize);
+
+ DB.println("Use "+(options.featureCreation==OptionsSuper.MULTIPLICATIVE?"multiplication":"shift")+"-based feature creation function");
+ for (int t=0;t<THREADS;t++) p.pipe.extractor[t]=new Extractor(p.l2i, options.stack, options.featureCreation);
+
+ DB.println("Stacking "+options.stack);
+
+ p.pipe.createInstances(options.trainfile,is);
+
+ p.params = new ParametersFloat(p.l2i.size());
+
+ p.train(options, p.pipe,p.params,is,p.pipe.cl);
+
+ p.writeModell(options, p.params, null,p.pipe.cl);
+
+ }
+
+ if (options.test) {
+
+ // Parser p = new Parser();
+ Parser p = new Parser(options);
+
+ // p. pipe = new Pipe(options);
+ // p. params = new ParametersFloat(0); // total should be zero and the parameters are later read
+
+ // load the model
+
+ // p.readModel(options, p.pipe, p.params);
+
+ DB.println("label only? "+options.label);
+
+ p.out(options, p.pipe, p.params, !MAX_INFO, options.label);
+ }
+
+
+
+
+ System.out.println();
+
+ if (options.eval) {
+ System.out.println("\nEVALUATION PERFORMANCE:");
+ ParserEvaluator.evaluate(options.goldfile, options.outfile);
+ }
+
+ long end = System.currentTimeMillis();
+ System.out.println("used time "+((float)((end-start)/100)/10));
+
+ Decoder.executerService.shutdown();
+ Pipe.executerService.shutdown();
+ System.out.println("end.");
+
+
+ }
+
+ /**
+ * Read the models and mapping
+ * @param options
+ * @param pipe
+ * @param params
+ * @throws IOException
+ */
+ public void readModel(OptionsSuper options, Pipe pipe, Parameters params) throws IOException {
+
+
+ DB.println("Reading data started");
+
+ // prepare zipped reader
+ ZipInputStream zis = new ZipInputStream(new BufferedInputStream(new FileInputStream(options.modelName)));
+ zis.getNextEntry();
+ DataInputStream dis = new DataInputStream(new BufferedInputStream(zis));
+
+ pipe.mf.read(dis);
+
+ pipe.cl = new Cluster(dis);
+
+ params.read(dis);
+ this.l2i = new Long2Int(params.size());
+ DB.println("parsing -- li size "+l2i.size());
+
+
+ pipe.extractor = new Extractor[THREADS];
+
+ boolean stack = dis.readBoolean();
+
+ options.featureCreation=dis.readInt();
+
+ for (int t=0;t<THREADS;t++) pipe.extractor[t]=new Extractor(l2i, stack,options.featureCreation);
+ DB.println("Stacking "+stack);
+
+ Extractor.initFeatures();
+ Extractor.initStat(options.featureCreation);
+
+
+ for (int t=0;t<THREADS;t++) pipe.extractor[t].init();
+
+ Edges.read(dis);
+
+ options.decodeProjective = dis.readBoolean();
+
+ Extractor.maxForm = dis.readInt();
+
+ boolean foundInfo =false;
+ try {
+ String info =null;
+ int icnt = dis.readInt();
+ for(int i=0;i<icnt;i++) {
+ info = dis.readUTF();
+ System.out.println(info);
+ }
+ } catch (Exception e) {
+ if (!foundInfo) System.out.println("no info about training");
+ }
+
+
+ dis.close();
+
+ DB.println("Reading data finnished");
+
+ Decoder.NON_PROJECTIVITY_THRESHOLD =(float)options.decodeTH;
+
+ Extractor.initStat(options.featureCreation);
+
+ }
+
+
+
+ /**
+ * Do the training
+ * @param instanceLengths
+ * @param options
+ * @param pipe
+ * @param params
+ * @param is
+ * @param cluster
+ * @throws IOException
+ * @throws InterruptedException
+ * @throws ClassNotFoundException
+ */
+ public void train(OptionsSuper options, Pipe pipe, ParametersFloat params, Instances is, Cluster cluster)
+ throws IOException, InterruptedException, ClassNotFoundException {
+
+
+ DB.println("\nTraining Information ");
+ DB.println("-------------------- ");
+
+
+ Decoder.NON_PROJECTIVITY_THRESHOLD =(float)options.decodeTH;
+
+ if (options.decodeProjective) System.out.println("Decoding: "+(options.decodeProjective?"projective":"non-projective"));
+ else System.out.println(""+Decoder.getInfo());
+ int numInstances = is.size();
+
+ int maxLenInstances =0;
+ for(int i=0;i<numInstances;i++) if (maxLenInstances<is.length(i)) maxLenInstances=is.length(i);
+
+ DataFES data = new DataFES(maxLenInstances, pipe.mf.getFeatureCounter().get(PipeGen.REL).shortValue());
+
+ int iter = 0;
+ int del=0;
+ float error =0;
+ float f1=0;
+
+ FV pred = new FV();
+ FV act = new FV();
+
+ double upd = (double)(numInstances*options.numIters)+1;
+
+ for(; iter < options.numIters; iter++) {
+
+ System.out.print("Iteration "+iter+": ");
+
+ long start = System.currentTimeMillis();
+
+ long last= System.currentTimeMillis();
+ error=0;
+ f1=0;
+ for(int n = 0; n < numInstances; n++) {
+
+ upd--;
+
+ if (is.labels[n].length>options.maxLen) continue;
+
+ String info = " td "+((Decoder.timeDecotder)/1000000F)+" tr "+((Decoder.timeRearrange)/1000000F)
+ +" te "+((Pipe.timeExtract)/1000000F);
+
+ if((n+1) %500 == 0) del= PipeGen.outValueErr(n+1, error,f1/n,del, last, upd,info);
+
+ short pos[] = is.pposs[n];
+
+ data = pipe.fillVector((F2SF)params.getFV(), is, n, data, cluster);
+
+ Parse d = Decoder.decode(pos, data, options.decodeProjective, Decoder.TRAINING);
+
+ double e= pipe.errors(is, n ,d);
+
+ if (d.f1>0)f1+=d.f1;
+
+ if (e<=0) continue;
+
+ pred.clear();
+ pipe.extractor[0].encodeCat(is,n,pos,is.forms[n],is.plemmas[n],d.heads, d.labels, is.feats[n],pipe.cl, pred);
+
+ error += e;
+
+ params.getFV();
+
+
+ act.clear();
+ pipe.extractor[0].encodeCat(is,n,pos,is.forms[n],is.plemmas[n],is.heads[n], is.labels[n], is.feats[n],pipe.cl, act);
+
+ params.update(act, pred, is, n, d, upd,e);
+ }
+
+ String info = " td "+((Decoder.timeDecotder)/1000000F)+" tr "+((Decoder.timeRearrange)/1000000F)
+ +" te "+((Pipe.timeExtract)/1000000F)+" nz "+params.countNZ();
+ PipeGen.outValueErr(numInstances, error,f1/numInstances,del,last, upd,info);
+ del=0;
+ long end = System.currentTimeMillis();
+ System.out.println(" time:"+(end-start));
+
+
+ ParametersFloat pf = params.average2((iter+1)*is.size());
+ try {
+
+ if (options.testfile!=null && options.goldfile!=null) {
+ out (options, pipe, pf, ! MAX_INFO,false);
+ ParserEvaluator.evaluate(options.goldfile, options.outfile);
+ // writeModell(options, pf, ""+(iter+1),pipe.cl);
+ }
+
+
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+
+ if (error==0) {
+ DB.println("stopped because learned all lessons");
+ break;
+ }
+
+ Decoder.timeDecotder=0;Decoder.timeRearrange=0; Pipe.timeExtract=0;
+
+
+ }
+ if (options.average)params.average(iter*is.size());
+ }
+
+
+ /**
+ * Do the parsing job
+ *
+ * @param options
+ * @param pipe
+ * @param params
+ * @throws IOException
+ */
+ private void out (OptionsSuper options, Pipe pipe, ParametersFloat params, boolean maxInfo, boolean labelOnly)
+ throws Exception {
+
+ long start = System.currentTimeMillis();
+
+ CONLLReader09 depReader = new CONLLReader09(options.testfile, options.formatTask);
+ CONLLWriter09 depWriter = new CONLLWriter09(options.outfile, options.formatTask);
+
+ int cnt = 0;
+ int del=0;
+ long last = System.currentTimeMillis();
+
+ if (maxInfo) System.out.println("\nParsing Information ");
+ if (maxInfo) System.out.println("------------------- ");
+
+ if (maxInfo && !options.decodeProjective) System.out.println(""+Decoder.getInfo());
+
+ System.out.print("Processing Sentence: ");
+
+ while(true) {
+
+ // Instances is = new Instances();
+ // is.init(1, new MFO(),options.formatTask);
+
+ // SentenceData09 instance = pipe.nextInstance(is, depReader);
+
+ SentenceData09 instance = depReader.getNext();
+ if (instance==null) break;
+ cnt++;
+
+ SentenceData09 i09 = this.parse(instance,params, labelOnly,options);
+
+ depWriter.write(i09);
+ del=PipeGen.outValue(cnt, del,last);
+
+ }
+ //pipe.close();
+ depWriter.finishWriting();
+ long end = System.currentTimeMillis();
+ // DB.println("errors "+error);
+ if (maxInfo) System.out.println("Used time " + (end-start));
+ if (maxInfo) System.out.println("forms count "+Instances.m_count+" unkown "+Instances.m_unkown);
+
+ }
+
+ /**
+ * Parse a single sentence
+ *
+ * @param instance
+ * @param params
+ * @param labelOnly
+ * @param options
+ * @return
+ */
+ public SentenceData09 parse (SentenceData09 instance, ParametersFloat params, boolean labelOnly, OptionsSuper options) {
+
+ String[] types = new String[pipe.mf.getFeatureCounter().get(PipeGen.REL)];
+ for (Entry<String, Integer> e : MFO.getFeatureSet().get(PipeGen.REL).entrySet()) types[e.getValue()] = e.getKey();
+
+ is = new Instances();
+ is.init(1, new MFO(),options.formatTask);
+ new CONLLReader09().insert(is, instance);
+
+ // use for the training ppos
+
+ SentenceData09 i09 = new SentenceData09(instance);
+ i09.createSemantic(instance);
+
+ if (labelOnly) {
+ F2SF f2s =params.getFV();
+
+ // repair pheads
+
+ is.pheads[0]= is.heads[0];
+
+ for(int l=0;l<is.pheads[0].length;l++) {
+ if (is.pheads[0][l]<0)is.pheads[0][l]=0;
+ }
+
+ short[] labels = pipe.extractor[0].searchLabel(is, 0, is.pposs[0], is.forms[0], is.plemmas[0], is.pheads[0], is.plabels[0], is.feats[0], pipe.cl, f2s);
+
+ for(int j = 0; j < instance.forms.length-1; j++) {
+ i09.plabels[j] = types[labels[j+1]];
+ i09.pheads[j] = is.pheads[0][j+1];
+ }
+ return i09;
+ }
+
+ if (options.maxLength > instance.length() && options.minLength <= instance.length()) {
+ try {
+ // System.out.println("prs "+instance.forms[0]);
+ // System.out.println("prs "+instance.toString());
+ d2 = pipe.fillVector(params.getFV(), is,0,null,pipe.cl);//cnt-1
+ d =Decoder.decode(is.pposs[0],d2,options.decodeProjective, !Decoder.TRAINING); //cnt-1
+
+ }catch (Exception e) {
+ e.printStackTrace();
+ }
+
+ for(int j = 0; j < instance.forms.length-1; j++) {
+ i09.plabels[j] = types[d.labels[j+1]];
+ i09.pheads[j] = d.heads[j+1];
+ }
+ }
+ return i09;
+
+ }
+
+ is2.io.CONLLReader09 reader = new is2.io.CONLLReader09(true);
+ /* (non-Javadoc)
+ * @see is2.tools.Tool#apply(is2.data.SentenceData09)
+ */
+ @Override
+ public SentenceData09 apply(SentenceData09 snt09) {
+
+ SentenceData09 it = new SentenceData09();
+ it.createWithRoot(snt09);
+
+ SentenceData09 out=null;
+ try {
+
+
+ // for(int k=0;k<it.length();k++) {
+ // it.forms[k] = reader.normalize(it.forms[k]);
+ // it.plemmas[k] = reader.normalize(it.plemmas[k]);
+ // }
+
+ out = parse(it,this.params,false,options);
+
+
+ } catch(Exception e) {
+ e.printStackTrace();
+ }
+
+ Decoder.executerService.shutdown();
+ Pipe.executerService.shutdown();
+
+ return out;
+ }
+
+ /**
+ * Get the edge scores of the last parse.
+ * @return the scores
+ */
+ public float[] getInfo() {
+
+
+ float[] scores = new float[is.length(0)];
+ Extractor.encode3(is.pposs[0], d.heads, d.labels, d2,scores);
+
+ return scores;
+ }
+
+
+ /**
+ * Write the parsing model
+ *
+ * @param options
+ * @param params
+ * @param extension
+ * @throws FileNotFoundException
+ * @throws IOException
+ */
+ private void writeModell(OptionsSuper options, ParametersFloat params, String extension, Cluster cs) throws FileNotFoundException, IOException {
+
+ String name = extension==null?options.modelName:options.modelName+extension;
+ // System.out.println("Writting model: "+name);
+ ZipOutputStream zos = new ZipOutputStream(new BufferedOutputStream(new FileOutputStream(name)));
+ zos.putNextEntry(new ZipEntry("data"));
+ DataOutputStream dos = new DataOutputStream(new BufferedOutputStream(zos));
+
+ MFO.writeData(dos);
+ cs.write(dos);
+
+ params.write(dos);
+
+ dos.writeBoolean(options.stack);
+ dos.writeInt(options.featureCreation);
+
+
+ Edges.write(dos);
+
+ dos.writeBoolean(options.decodeProjective);
+
+ dos.writeInt(Extractor.maxForm);
+
+ dos.writeInt(5); // Info count
+ dos.writeUTF("Used parser "+Parser.class.toString());
+ dos.writeUTF("Creation date "+(new SimpleDateFormat("yyyy.MM.dd HH:mm:ss")).format(new Date()));
+ dos.writeUTF("Training data "+options.trainfile);
+ dos.writeUTF("Iterations "+options.numIters+" Used sentences "+options.count);
+ dos.writeUTF("Cluster "+options.clusterFile);
+
+ dos.flush();
+ dos.close();
+ }
+
+
+ @Override
+ public boolean retrain(SentenceData09 sentence, float upd, int iterations) {
+
+ params.total = params.parameters;
+
+ boolean done=false;
+
+ for(int k=0;k<iterations;k++) {
+ try {
+ // create the data structure
+ DataFES data = new DataFES(sentence.length(), pipe.mf.getFeatureCounter().get(PipeGen.REL).shortValue());
+
+
+ Instances is = new Instances();
+ is.m_encoder =pipe.mf;
+
+
+
+ is.init(1, pipe.mf,options.formatTask);
+ new CONLLReader09().insert(is, sentence);
+
+ // String list[] = ((MFO)is.m_encoder).reverse(((MFO)is.m_encoder).getFeatureSet().get(Pipe.POS));
+ // for(String s :list) {
+ // System.out.println(s+" ");
+ // }
+
+ // for(int i=0;i<is.length(0);i++) {
+
+ // System.out.printf("%d\t %d\t %d \n",i,is.forms[0][i],is.pposs[0][i] );
+ // System.out.printf("%s\t form:%s pos:%s\n",i,sentence.forms[i],sentence.ppos[i]);
+
+ // }
+
+ SentenceData09 i09 = new SentenceData09(sentence);
+ i09.createSemantic(sentence);
+
+
+
+ // create the weights
+ data = pipe.fillVector((F2SF)params.getFV(), is, 0, data, pipe.cl);
+
+ short[] pos = is.pposs[0];
+
+ // parse the sentence
+ Parse d = Decoder.decode(pos, data, options.decodeProjective, Decoder.TRAINING);
+
+ // training successful?
+ double e= pipe.errors(is, 0 ,d);
+ // System.out.println("errors "+e);
+ if (e==0) {
+
+
+ done= true;
+ break;
+ }
+
+ // update the weight vector
+ FV pred = new FV();
+ pipe.extractor[0].encodeCat(is,0,pos,is.forms[0],is.plemmas[0],d.heads, d.labels, is.feats[0],pipe.cl, pred);
+
+ params.getFV();
+
+ FV act = new FV();
+ pipe.extractor[0].encodeCat(is,0,pos,is.forms[0],is.plemmas[0],is.heads[0], is.labels[0], is.feats[0],pipe.cl, act);
+
+ params.update(act, pred, is, 0, d, upd,e);
+
+ if (upd >0)upd--;
+
+ } catch(Exception e) {
+ e.printStackTrace();
+ }
+ }
+ Decoder.executerService.shutdown();
+ Pipe.executerService.shutdown();
+
+
+ return done;
+ }
+
+
+ @Override
+ public boolean retrain(SentenceData09 sentence, float upd, int iterations, boolean print) {
+ // TODO Auto-generated method stub
+ return retrain( sentence, upd, iterations);
+ }
+}
\ No newline at end of file
diff --git a/dependencyParser/mate-tools/src/is2/parser/Pipe.java b/dependencyParser/mate-tools/src/is2/parser/Pipe.java
new file mode 100755
index 0000000..13e9389
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/parser/Pipe.java
@@ -0,0 +1,221 @@
+package is2.parser;
+
+import is2.data.Cluster;
+import is2.data.DataF;
+import is2.data.DataFES;
+import is2.data.F2SF;
+import is2.data.Instances;
+
+import is2.data.Parse;
+import is2.data.PipeGen;
+import is2.data.SentenceData09;
+import is2.io.CONLLReader09;
+
+import is2.util.OptionsSuper;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.concurrent.ExecutorService;
+
+final public class Pipe extends PipeGen {
+
+ public Extractor[] extractor;
+ final public MFO mf = new MFO();
+
+ public Cluster cl;
+
+
+ private OptionsSuper options;
+ public static long timeExtract;
+
+ public Pipe(OptionsSuper o) {
+ options = o;
+ }
+
+ public void createInstances(String file, Instances is)
+ throws Exception {
+
+ CONLLReader09 depReader = new CONLLReader09(file);
+
+ mf.register(REL,"<root-type>");
+
+ // register at least one predicate since the parsing data might not contain predicates as in
+ // the Japaness corpus but the development sets contains some
+
+ long sl=0;
+
+ System.out.print("Registering feature parts of sentence: ");
+ int ic = 0;
+ int del = 0;
+ while (true) {
+ SentenceData09 instance = depReader.getNext();
+ if (instance == null) break;
+ ic++;
+
+ sl+=instance.labels.length;
+
+ if (ic % 1000 == 0) {
+ del = outValue(ic, del);
+ }
+
+ String[] labs1 = instance.labels;
+ for (int i1 = 0; i1 < labs1.length; i1++) mf.register(REL, labs1[i1]);
+
+ String[] w = instance.forms;
+ for (int i1 = 0; i1 < w.length; i1++) mf.register(WORD, depReader.normalize(w[i1]));
+
+ w = instance.plemmas;
+ for (int i1 = 0; i1 < w.length; i1++) mf.register(WORD, depReader.normalize(w[i1]));
+
+
+ w = instance.ppos;
+ for (int i1 = 0; i1 < w.length; i1++) mf.register(POS, w[i1]);
+
+ w = instance.gpos;
+ for (int i1 = 0; i1 < w.length; i1++) mf.register(POS, w[i1]);
+
+ if (instance.feats !=null) {
+ String fs[][] = instance.feats;
+ for (int i1 = 0; i1 < fs.length; i1++){
+ w =fs[i1];
+ if (w==null) continue;
+ for (int i2 = 0; i2 < w.length; i2++) mf.register(FEAT, w[i2]);
+ }
+ }
+
+ if ((ic-1)>options.count) break;
+ }
+ del = outValue(ic, del);
+
+ System.out.println();
+ Extractor.initFeatures();
+
+ Extractor.maxForm = mf.getFeatureCounter().get(WORD);
+
+ if (options.clusterFile==null)cl = new Cluster();
+ else cl= new Cluster(options.clusterFile, mf,6);
+
+
+
+ mf.calculateBits();
+ Extractor.initStat(options.featureCreation);
+
+ System.out.println(""+mf.toString());
+
+ for(Extractor e : extractor) e.init();
+
+ depReader.startReading(file);
+
+ int num1 = 0;
+
+ is.init(ic, new MFO());
+
+ Edges.init(mf.getFeatureCounter().get(POS));
+
+
+ System.out.print("Creating edge filters and read corpus: ");
+ del = 0;
+
+ while (true) {
+ if (num1 % 100 == 0) del = outValue(num1, del);
+
+ SentenceData09 instance1 = depReader.getNext(is);
+
+ if (instance1 == null) break;
+
+ int last = is.size() - 1;
+ short[] pos =is.pposs[last];
+
+ for (int k = 0; k < is.length(last); k++) {
+ if (is.heads[last][k] < 0) continue;
+ Edges.put(pos[is.heads[last][k]],pos[k], is.labels[last][k]);
+// Edges.put(pos[k],pos[is.heads[last][k]], is.labels[last][k]);
+ }
+
+ if (!options.allFeatures && num1 > options.count) break;
+
+ num1++;
+
+ }
+ del = outValue(num1, del);
+ System.out.println();
+ Edges.findDefault();
+ }
+
+
+ /**
+ * Creates an instance for outputParses
+ *
+ * @param is
+ * @return
+ * @throws IOException
+ */
+ protected final SentenceData09 nextInstance(Instances is, CONLLReader09 depReader) throws Exception {
+
+ SentenceData09 instance = depReader.getNext(is);
+ if (instance == null || instance.forms == null) return null;
+
+ return instance;
+ }
+
+ public static ExecutorService executerService =java.util.concurrent.Executors.newFixedThreadPool(Parser.THREADS);
+
+
+ public DataFES fillVector(F2SF params, Instances is,int inst, DataFES d, Cluster cluster) throws InterruptedException {
+
+ long ts = System.nanoTime();
+
+ if (executerService.isShutdown()) executerService =java.util.concurrent.Executors.newCachedThreadPool();
+
+
+ final int length = is.length(inst);
+ if (d ==null || d.len<length)d = new DataFES(length,mf.getFeatureCounter().get(PipeGen.REL).shortValue());
+
+ ArrayList<ParallelExtract> pe = new ArrayList<ParallelExtract>();
+ for(int i=0;i<Parser.THREADS;i++) pe.add(new ParallelExtract(extractor[i],is, inst, d, (F2SF)params.clone(), cluster));
+
+ for (int w1 = 0; w1 < length; w1++) {
+ for (int w2 =w1+1; w2 < length; w2++) {
+
+ if (w1==w2) continue;
+
+ ParallelExtract.add(w1, w2);
+
+
+ }
+ }
+// for(int i=0;i<efp.length;i++) efp[i].start();
+// for(int i=0;i<efp.length;i++) efp[i].join();
+ executerService.invokeAll( pe);
+
+ timeExtract += (System.nanoTime()-ts);
+
+
+
+
+
+
+ return d;
+ }
+
+ public double errors( Instances is, int ic, Parse p) {
+ short[] act = is.heads[ic];
+ double correct = 0;
+
+ // do not count root
+ for(int i = 1; i < act.length; i++) {
+
+ // if (is.ppos[ic] ==null ) System.out.println("mf null"+is.ppos[ic][i]);
+ if (p.heads[i]==act[i] ){
+ correct+=0.5;
+ if (p.labels[i]==is.labels[ic][i] ) correct+=0.5;
+ }
+ }
+
+ double x = ((double)act.length- 1 - correct );
+
+ p.f1 = (double)correct / (double)(act.length-1);
+
+ return x;
+ }
+}
diff --git a/dependencyParser/mate-tools/src/is2/parser/package.html b/dependencyParser/mate-tools/src/is2/parser/package.html
new file mode 100755
index 0000000..a4f40a2
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/parser/package.html
@@ -0,0 +1,11 @@
+Package info
+<ul>
+ <li> separate cluster feature to keep speed since two many features in a function reduce speed drastically. </li>
+ <li> try second order stacking features </li>
+ <li> parser stacking features </li>
+ <li> lots of cluster features </li>
+ <li> Iteration over edges and not extraction of all edges each time </li>
+ <li> integrated new structurer writer </li>
+</ul>
+ Change in FS, I observed lots of duplicated grand-children features
+<br>
\ No newline at end of file
diff --git a/dependencyParser/mate-tools/src/is2/parserR2/Decoder.java b/dependencyParser/mate-tools/src/is2/parserR2/Decoder.java
new file mode 100755
index 0000000..1f0424e
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/parserR2/Decoder.java
@@ -0,0 +1,377 @@
+package is2.parserR2;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.concurrent.ExecutorService;
+
+import decoder.ParallelDecoder;
+import decoder.ParallelRearrangeNBest;
+import decoder.ParallelRearrangeNBest2;
+import extractors.Extractor;
+
+
+import is2.data.Closed;
+import is2.data.DataF;
+import is2.data.Instances;
+import is2.data.Open;
+import is2.data.Parse;
+import is2.data.ParseNBest;
+import is2.util.DB;
+
+
+/**
+ * @author Bernd Bohnet, 01.09.2009
+ *
+ * This methods do the actual work and they build the dependency trees.
+ */
+final public class Decoder {
+
+ public static final boolean TRAINING = true;
+ public static long timeDecotder;
+ public static long timeRearrange;
+
+ public static final boolean LAS = true;
+
+ /**
+ * Threshold for rearrange edges non-projective
+ */
+ public static float NON_PROJECTIVITY_THRESHOLD = 0.3F;
+
+ public static ExecutorService executerService =java.util.concurrent.Executors.newFixedThreadPool(Parser.THREADS);
+
+
+ // do not initialize
+ private Decoder() {};
+
+
+ /**
+ * Build a dependency tree based on the data
+ * @param pos part-of-speech tags
+ * @param x the data
+ * @param projective projective or non-projective
+ * @param edges the edges
+ * @return a parse tree
+ * @throws InterruptedException
+ */
+ public static List<ParseNBest> decode(short[] pos, DataF x, boolean projective, Extractor extractor) throws InterruptedException {
+
+ long ts = System.nanoTime();
+
+ if (executerService.isShutdown()) executerService = java.util.concurrent.Executors.newCachedThreadPool();
+ final int n = pos.length;
+
+ final Open O[][][][] = new Open[n][n][2][];
+ final Closed C[][][][] = new Closed[n][n][2][];
+
+ ArrayList<ParallelDecoder> pe = new ArrayList<ParallelDecoder>();
+
+ for(int i=0;i<Parser.THREADS ;i++) pe.add(new ParallelDecoder(pos, x, O, C, n));
+
+ for (short k = 1; k < n; k++) {
+
+ // provide the threads the data
+ for (short s = 0; s < n; s++) {
+ short t = (short) (s + k);
+ if (t >= n) break;
+
+ ParallelDecoder.add(s,t);
+ }
+
+ executerService.invokeAll(pe);
+ }
+
+ double bestSpanScore = (-1.0F / 0.0F);
+ Closed bestSpan = null;
+ for (int m = 1; m < n; m++)
+ if (C[0][n - 1][1][m].p > bestSpanScore) {
+ bestSpanScore = C[0][n - 1][1][m].p;
+ bestSpan = C[0][n - 1][1][m];
+ }
+
+ // build the dependency tree from the chart
+ ParseNBest out= new ParseNBest(pos.length);
+
+ bestSpan.create(out);
+
+ out.heads[0]=-1;
+ out.labels[0]=0;
+ bestProj=out;
+
+ timeDecotder += (System.nanoTime()-ts);
+ // DB.println(""+out);
+
+ ts = System.nanoTime();
+ List<ParseNBest> parses;
+
+ if (!projective) {
+
+ // if (training)
+ // rearrange(pos, out.heads, out.types,x,training);
+ //else {
+ // DB.println("bestSpan score "+(float)bestSpan.p+" comp score "+Extractor.encode3(pos, out.heads, out.types, x));
+ // System.out.println();
+ // Parse best = new Parse(out.heads,out.types,Extractor.encode3(pos, out.heads, out.types, x));
+ parses = rearrangeNBest(pos, out.heads, out.labels,x,extractor);
+ // DB.println("1best "+parses.get(0).f1);
+ // DB.println(""+parses.get(0).toString());
+
+
+ // for(ParseNBest p :parses) if (p.heads==null) p.signature2parse(p.signature());
+
+ /// if (parses.get(0).f1>(best.f1+NON_PROJECTIVITY_THRESHOLD)) out = parses.get(0);
+ // else out =best;
+
+ // }
+ } else {
+ parses = new ArrayList<ParseNBest>();
+ parses.add(out);
+ }
+ timeRearrange += (System.nanoTime()-ts);
+
+ return parses;
+ }
+
+ static Parse bestProj = null;
+
+
+
+
+ /**
+ * This is the parallel non-projective edge re-arranger
+ *
+ * @param pos part-of-speech tags
+ * @param heads parent child relation
+ * @param labs edge labels
+ * @param x the data
+ * @param edges the existing edges defined by part-of-speech tags
+ * @throws InterruptedException
+ */
+ public static List<ParseNBest> rearrangeNBestP(short[] pos, short[] heads, short[] labs, DataF x, Extractor extractor) throws InterruptedException {
+
+ ArrayList<ParallelRearrangeNBest2> pe = new ArrayList<ParallelRearrangeNBest2>();
+
+ int round =0;
+ ArrayList<ParseNBest> parses = new ArrayList<ParseNBest>();
+ ParseNBest px =new ParseNBest();
+ px.signature(heads,labs);
+ //Object extractor;
+ px.f1=extractor.encode3(pos, heads, labs, x);
+ parses.add(px);
+
+ float lastNBest = Float.NEGATIVE_INFINITY;
+
+ HashSet<Parse> done = new HashSet<Parse>();
+ gnu.trove.THashSet<CharSequence> contained = new gnu.trove.THashSet<CharSequence>();
+
+ while(true) {
+
+ pe.clear();
+
+ // used the first three parses
+ int ic=0, considered=0;
+ while(true) {
+
+ if (parses.size()<=ic || considered>11) break;
+
+ ParseNBest parse = parses.get(ic);
+
+ ic++;
+ // parse already extended
+ if (done.contains(parse)) continue;
+ considered++;
+
+ parse.signature2parse(parse.signature());
+
+ done.add(parse);
+
+
+ boolean[][] isChild = new boolean[heads.length][heads.length];
+
+ for(int i = 1, l1=1; i < heads.length; i++,l1=i)
+ while((l1= heads[l1]) != -1) isChild[l1][i] = true;
+
+
+ // check the list of new possible parents and children for a better combination
+ for(short ch = 1; ch < heads.length; ch++) {
+ for(short pa = 0; pa < heads.length; pa++) {
+ if(ch == pa || pa == heads[ch] || isChild[ch][pa]) continue;
+ ParallelRearrangeNBest2.add(parse.clone(), ch, pa);
+ }
+ }
+
+ }
+
+ for(int t =0;t<Parser.THREADS;t++) pe.add(new ParallelRearrangeNBest2( pos,x,lastNBest,extractor, NON_PROJECTIVITY_THRESHOLD) );
+
+
+ executerService.invokeAll(pe);
+
+ // avoid to add parses several times
+ for(ParallelRearrangeNBest2 rp : pe) {
+ for(int k=rp.parses.size()-1;k>=0;k--) {
+ if (lastNBest>rp.parses.get(k).f1) continue;
+ CharSequence sig = rp.parses.get(k).signature();
+ if (!contained.contains(sig)) {
+ parses.add(rp.parses.get(k));
+ contained.add(sig);
+ }
+ }
+ }
+
+ Collections.sort(parses);
+
+ if (round >=2) break;
+ round ++;
+
+ // do not use to much memory
+ if (parses.size()>Parser.NBest) {
+ // if (parses.get(Parser.NBest).f1>lastNBest) lastNBest = (float)parses.get(Parser.NBest).f1;
+ parses.subList(Parser.NBest, parses.size()-1).clear();
+ }
+ }
+ return parses;
+ }
+
+
+ /**
+ * This is the parallel non-projective edge re-arranger
+ *
+ * @param pos part-of-speech tags
+ * @param heads parent child relation
+ * @param labs edge labels
+ * @param x the data
+ * @param edges the existing edges defined by part-of-speech tags
+ * @throws InterruptedException
+ */
+ public static List<ParseNBest> rearrangeNBest(short[] pos, short[] heads, short[] labs, DataF x, Extractor extractor) throws InterruptedException {
+
+ ArrayList<ParallelRearrangeNBest> pe = new ArrayList<ParallelRearrangeNBest>();
+
+ int round =0;
+ ArrayList<ParseNBest> parses = new ArrayList<ParseNBest>();
+ ParseNBest px =new ParseNBest();
+ px.signature(heads,labs);
+ //Object extractor;
+ px.f1=extractor.encode3(pos, heads, labs, x);
+ parses.add(px);
+
+ float lastNBest = Float.NEGATIVE_INFINITY;
+
+ HashSet<Parse> done = new HashSet<Parse>();
+ gnu.trove.THashSet<CharSequence> contained = new gnu.trove.THashSet<CharSequence>();
+ while(true) {
+
+ pe.clear();
+
+ // used the first three parses
+ int i=0;
+ while(true) {
+
+ if (parses.size()<=i||pe.size()>12) break;
+
+ ParseNBest parse = parses.get(i);
+
+ i++;
+
+ // parse already extended
+ if (done.contains(parse)) continue;
+
+// DB.println("err "+parse.heads);
+
+ parse.signature2parse(parse.signature());
+
+ done.add(parse);
+ pe.add(new ParallelRearrangeNBest( pos,x,parse,lastNBest,extractor, (float)parse.f1,NON_PROJECTIVITY_THRESHOLD) );
+ }
+
+ executerService.invokeAll(pe);
+
+ // avoid to add parses several times
+ for(ParallelRearrangeNBest rp : pe) {
+ for(int k=rp.parses.size()-1;k>=0;k--) {
+ if (lastNBest>rp.parses.get(k).f1) continue;
+ CharSequence sig = rp.parses.get(k).signature();
+ if (!contained.contains(sig)) {
+ parses.add(rp.parses.get(k));
+ contained.add(sig);
+ }
+ }
+ }
+
+ Collections.sort(parses);
+
+ if (round >=2) break;
+ round ++;
+
+ // do not use to much memory
+ if (parses.size()>Parser.NBest) {
+ if (parses.get(Parser.NBest).f1>lastNBest) lastNBest = (float)parses.get(Parser.NBest).f1;
+ parses.subList(Parser.NBest, parses.size()-1).clear();
+ }
+ }
+ return parses;
+ }
+
+ public static String getInfo() {
+
+ return "Decoder non-projectivity threshold: "+NON_PROJECTIVITY_THRESHOLD;
+ }
+
+
+ /**
+ * @param parses
+ * @param is
+ * @param i
+ * @return
+ */
+ public static int getGoldRank(List<ParseNBest> parses, Instances is, int i, boolean las) {
+
+ for(int p=0;p<parses.size();p++) {
+
+ if (parses.get(p).heads==null)parses.get(p).signature2parse(parses.get(p).signature());
+
+ boolean eq =true;
+ for(int w =1;w<is.length(0);w++) {
+ if (is.heads[i][w]!=parses.get(p).heads[w] || (is.labels[i][w]!=parses.get(p).labels[w]&& las )) {
+ eq=false;
+ break;
+ }
+ }
+ if (eq) return p;
+ }
+ return -1;
+ }
+
+ public static int getSmallestError(List<ParseNBest> parses, Instances is, int i, boolean las) {
+
+ int smallest=-1;
+ for(int p=0;p<parses.size();p++) {
+
+ int err=0;
+ for(int w =1;w<is.length(0);w++) {
+ if (is.heads[i][w]!=parses.get(p).heads[w] || (is.labels[i][w]!=parses.get(p).labels[w] && las )) {
+ err++;
+ }
+ }
+ if (smallest==-1||smallest>err) smallest=err;
+ if (smallest==0) return 0;
+ }
+ return smallest;
+ }
+
+ public static int getError(ParseNBest parse, Instances is, int i, boolean las) {
+
+
+ int err=0;
+ for(int w =1;w<is.length(i);w++) {
+ if (is.heads[i][w]!=parse.heads[w] || (is.labels[i][w]!=parse.labels[w] && las )) {
+ err++;
+ }
+ }
+ return err;
+ }
+
+
+}
diff --git a/dependencyParser/mate-tools/src/is2/parserR2/Options.java b/dependencyParser/mate-tools/src/is2/parserR2/Options.java
new file mode 100755
index 0000000..b5ec0f9
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/parserR2/Options.java
@@ -0,0 +1,77 @@
+package is2.parserR2;
+
+import is2.util.OptionsSuper;
+
+
+public final class Options extends OptionsSuper {
+
+
+ int start=0, end=0;
+ String prefix_model ="m";
+ String prefix_test ="t";
+
+ public Options (String[] args) {
+
+ for(int i = 0; i < args.length; i++) {
+
+ if (args[i].equals("--help")) explain();
+
+ if (args[i].equals("-decode")) {
+ decodeProjective = args[i+1].equals("proj"); i++;
+ } else if (args[i].equals("-decodeTH")) {
+ decodeTH = Double.parseDouble(args[i+1]); i++;
+ } else if (args[i].equals("-nonormalize")) {
+ normalize=false;
+ } else if (args[i].equals("-features")) {
+ features= args[i+1]; i++;
+ } else if (args[i].equals("-hsize")) {
+ hsize= Integer.parseInt(args[i+1]); i++;
+ } else if (args[i].equals("-len")) {
+ maxLen= Integer.parseInt(args[i+1]); i++;
+ } else if (args[i].equals("-cores")) {
+ cores= Integer.parseInt(args[i+1]); i++;
+ } else if (args[i].equals("-best")) {
+ best= Integer.parseInt(args[i+1]); i++;
+ } else if (args[i].equals("-start")) {
+ start= Integer.parseInt(args[i+1]); i++;
+ } else if (args[i].equals("-end")) {
+ end= Integer.parseInt(args[i+1]); i++;
+ } else if (args[i].equals("-prefix-model")) {
+ prefix_model= args[i+1]; i++;
+ } else if (args[i].equals("-prefix-test")) {
+ prefix_test= args[i+1]; i++;
+ } else if (args[i].equals("-mapping")) {
+ this.useMapping= args[i+1]; i++;
+ } else if (args[i].equals("-no2nd")) {
+ no2nd= true;
+ } else if (args[i].equals("-few2nd")) {
+ few2nd= true;
+ } else super.addOption(args, i);
+
+ }
+
+
+
+ }
+
+ private void explain() {
+ System.out.println("Usage: ");
+ System.out.println("java -class mate.jar is2.parser.Parser [Options]");
+ System.out.println();
+ System.out.println("Example: ");
+ System.out.println(" java -class mate.jar is2.parser.Parser -model eps3.model -train corpora/conll08st/train/train.closed -test corpora/conll08st/devel/devel.closed -out b3.test -eval corpora/conll08st/devel/devel.closed -count 2000 -i 6");
+ System.out.println("");
+ System.out.println("Options:");
+ System.out.println("");
+ System.out.println(" -train <file> the corpus a model is trained on; default "+this.trainfile);
+ System.out.println(" -test <file> the input corpus for testing; default "+this.testfile);
+ System.out.println(" -out <file> the output corpus (result) of a test run; default "+this.outfile);
+ System.out.println(" -model <file> the parsing model for traing the model is stored in the files");
+ System.out.println(" and for parsing the model is load from this file; default "+this.modelName);
+ System.out.println(" -i <number> the number of training iterations; good numbers are 10 for smaller corpora and 6 for bigger; default "+this.numIters);
+ System.out.println(" -count <number> the n first sentences of the corpus are take for the training default "+this.count);
+ System.out.println(" -format <number> conll format of the year 8 or 9; default "+this.formatTask);
+
+ System.exit(0);
+ }
+}
diff --git a/dependencyParser/mate-tools/src/is2/parserR2/Parameters.java b/dependencyParser/mate-tools/src/is2/parserR2/Parameters.java
new file mode 100755
index 0000000..0917ea8
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/parserR2/Parameters.java
@@ -0,0 +1,38 @@
+/**
+ *
+ */
+package is2.parserR2;
+
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+
+import is2.data.FV;
+import is2.data.IFV;
+import is2.data.Instances;
+import is2.data.Parse;
+
+/**
+ * @author Bernd Bohnet, 31.08.2009
+ *
+ *
+ */
+public abstract class Parameters {
+
+
+ public abstract void average(double avVal);
+
+ public abstract void update(FV act, FV pred, Instances isd, int instc, Parse d, double upd, double e);
+
+ public abstract void write(DataOutputStream dos) throws IOException;
+
+ public abstract void read(DataInputStream dis ) throws IOException;
+
+ public abstract int size();
+
+ /**
+ * @return
+ */
+ public abstract IFV getFV() ;
+
+}
diff --git a/dependencyParser/mate-tools/src/is2/parserR2/ParametersFloat.java b/dependencyParser/mate-tools/src/is2/parserR2/ParametersFloat.java
new file mode 100755
index 0000000..44e6d76
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/parserR2/ParametersFloat.java
@@ -0,0 +1,181 @@
+package is2.parserR2;
+
+import is2.data.F2SF;
+import is2.data.FV;
+import is2.data.FVR;
+import is2.data.Instances;
+import is2.data.Parse;
+import is2.util.DB;
+
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+
+
+
+final public class ParametersFloat extends Parameters {
+
+ public float[] parameters;
+ private float[] total;
+
+ public ParametersFloat(int size) {
+ parameters = new float[size];
+ total = new float[size];
+ for(int i = 0; i < parameters.length; i++) {
+ parameters[i] = 0F;
+ total[i] = 0F;
+ }
+ }
+
+
+ /**
+ * @param parameters2
+ */
+ public ParametersFloat(float[] p) {
+ parameters =p;
+ }
+
+
+ @Override
+ public void average(double avVal) {
+ for(int j = 0; j < total.length; j++) {
+ parameters[j] = total[j]/((float)avVal);
+ }
+ total =null;
+ }
+
+ public ParametersFloat average2(double avVal) {
+ float[] px = new float[this.parameters.length];
+ for(int j = 0; j < total.length; j++) {
+ px[j] = total[j]/((float)avVal);
+ }
+ ParametersFloat pf = new ParametersFloat(px);
+ return pf;
+ }
+
+
+ public void update(FV act, FV pred, Instances isd, int instc, Parse dx, double upd, double e,
+ float d, float f) {
+
+ e++;
+
+ float lam_dist = d - f;
+
+ float b = (float)e-lam_dist;
+
+ FV dist = act.getDistVector(pred);
+
+ dist.update(parameters, total, hildreth(dist,b), upd,false);
+ }
+
+ public void update(FV act, FV pred, Instances isd, int instc, Parse dx, double upd, double e) {
+
+ e++;
+
+ float lam_dist = getScore(act) - getScore(pred);
+
+ float b = (float)e-lam_dist;
+
+ FV dist = act.getDistVector(pred);
+
+ dist.update(parameters, total, hildreth(dist,b), upd,false);
+ }
+
+ public void update(FVR act, FVR pred, Instances isd, int instc, Parse dx, double upd, double e, float lam_dist) {
+
+ e++;
+
+
+ float b = (float)e-lam_dist;
+
+ FVR dist = act.getDistVector(pred);
+
+ dist.update(parameters, total, hildreth(dist,b), upd,false);
+ }
+
+
+ protected double hildreth(FV a, double b) {
+
+ double A = a.dotProduct(a);
+ if (A<=0.0000000000000000001) return 0.0;
+ return b/A;
+ }
+
+
+ protected double hildreth(FVR a, double b) {
+
+ double A = a.dotProduct(a);
+ if (A<=0.0000000000000000001) return 0.0;
+ return b/A;
+ }
+
+
+ public float getScore(FV fv) {
+ if (fv ==null) return 0.0F;
+ return fv.getScore(parameters,false);
+
+ }
+
+ public float getScore(FVR fv) { //xx
+ if (fv ==null) return 0.0F;
+ return fv.getScore(parameters,false);
+
+ }
+
+
+ @Override
+ final public void write(DataOutputStream dos) throws IOException{
+
+ dos.writeInt(parameters.length);
+ for(float d : parameters) dos.writeFloat(d);
+
+ }
+
+ @Override
+ public void read(DataInputStream dis ) throws IOException{
+
+ parameters = new float[dis.readInt()];
+ int notZero=0;
+ for(int i=0;i<parameters.length;i++) {
+ parameters[i]=dis.readFloat();
+ if (parameters[i]!=0.0F) notZero++;
+ }
+
+
+ DB.println("read parameters "+parameters.length+" not zero "+notZero);
+
+ }
+
+ public int countNZ() {
+
+ int notZero=0;
+ for(int i=0;i<parameters.length;i++) {
+ if (parameters[i]!=0.0F) notZero++;
+ }
+ return notZero;
+
+ // DB.println("read parameters "+parameters.length+" not zero "+notZero);
+
+ }
+
+
+ /* (non-Javadoc)
+ * @see is2.sp09k99995.Parameters#getFV()
+ */
+ @Override
+ public F2SF getFV() {
+ return new F2SF(parameters);
+ }
+
+
+ /* (non-Javadoc)
+ * @see is2.sp09k99999.Parameters#size()
+ */
+ @Override
+ public int size() {
+ return parameters.length;
+ }
+
+
+
+}
diff --git a/dependencyParser/mate-tools/src/is2/parserR2/Parser.java b/dependencyParser/mate-tools/src/is2/parserR2/Parser.java
new file mode 100755
index 0000000..b0cfe9e
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/parserR2/Parser.java
@@ -0,0 +1,690 @@
+package is2.parserR2;
+
+
+import is2.data.Cluster;
+import is2.data.DataF;
+import is2.data.Edges;
+import is2.data.F2SF;
+import is2.data.FV;
+import is2.data.Instances;
+import is2.data.Long2Int;
+import is2.data.Long2IntInterface;
+import is2.data.MFB;
+import is2.data.Parse;
+import is2.data.ParseNBest;
+import is2.data.PipeGen;
+import is2.data.SentenceData09;
+import is2.io.CONLLReader09;
+import is2.io.CONLLWriter09;
+
+import is2.tools.Tool;
+import is2.util.DB;
+import is2.util.OptionsSuper;
+import is2.util.ParserEvaluator;
+
+import java.io.BufferedInputStream;
+import java.io.BufferedOutputStream;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map.Entry;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipInputStream;
+import java.util.zip.ZipOutputStream;
+
+//import extractors.ExtractorClusterStackedR2;
+import extractors.Extractor;
+import extractors.ExtractorFactory;
+
+
+
+public class Parser implements Tool {
+
+ // output evaluation info
+ private static final boolean MAX_INFO = true;
+
+ public static int THREADS =4;
+
+ Long2IntInterface l2i;
+ ParametersFloat params;
+ Pipe pipe;
+ OptionsSuper options;
+
+ HashMap<Integer,Integer> rank = new HashMap<Integer,Integer>();
+ int amongxbest=0, amongxbest_ula=0, nbest=0,bestProj=0, smallestErrorSum=0, countAllNodes=0;
+ static int NBest =1000;
+
+ ExtractorFactory extractorFactory = new ExtractorFactory(ExtractorFactory.StackedClusteredR2);
+
+
+ /**
+ * Initialize the parser
+ * @param options
+ */
+ public Parser (OptionsSuper options) {
+
+ this.options=options;
+ pipe = new Pipe(options);
+
+ params = new ParametersFloat(0);
+
+ // load the model
+ try {
+ readModel(options, pipe, params);
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+
+ }
+
+
+ /**
+ * @param modelFileName The file name of the parsing model
+ */
+ public Parser(String modelFileName) {
+ this(new Options(new String[]{"-model",modelFileName}));
+ }
+
+
+ /**
+ *
+ */
+ public Parser() {
+ // TODO Auto-generated constructor stub
+ }
+
+
+ public static void main (String[] args) throws Exception
+ {
+
+ long start = System.currentTimeMillis();
+ OptionsSuper options = new Options(args);
+
+ NBest = options.best;
+
+ DB.println("n-best"+NBest);
+
+ Runtime runtime = Runtime.getRuntime();
+ THREADS = runtime.availableProcessors();
+ if (options.cores<THREADS&&options.cores>0) THREADS =options.cores;
+
+ DB.println("Found " + runtime.availableProcessors()+" cores use "+THREADS);
+
+ if (options.train) {
+
+ Parser p =new Parser();
+ p.options=options;
+
+ p.l2i = new Long2Int(options.hsize);
+
+ p.pipe = new Pipe (options);
+ Instances is = new Instances();
+
+ p.pipe.extractor = new Extractor[THREADS];
+
+ for (int t=0;t<THREADS;t++) p.pipe.extractor[t]=p.extractorFactory.getExtractor( p.l2i);
+
+ p.params = new ParametersFloat(p.l2i.size());
+
+ if (options.useMapping!=null) {
+ String model = options.modelName;
+
+ options.modelName = options.useMapping;
+ DB.println("Using mapping of model "+options.modelName);
+ ZipInputStream zis = new ZipInputStream(new BufferedInputStream(new FileInputStream(options.modelName)));
+ zis.getNextEntry();
+ DataInputStream dis = new DataInputStream(new BufferedInputStream(zis));
+ p.pipe.mf.read(dis);
+
+ DB.println("read\n"+p.pipe.mf.toString());
+
+ ParametersFloat params = new ParametersFloat(0);
+ params.read(dis);
+
+ Edges.read(dis);
+
+ dis.close();
+ DB.println("end read model");
+ options.modelName = model;
+ }
+
+
+ p.pipe.createInstances(options.trainfile,is);
+
+
+ p.train(options, p.pipe,p.params,is,p.pipe.cl);
+
+ p.writeModell(options, p.params, null,p.pipe.cl);
+
+ }
+
+ if (options.test) {
+
+ Parser p = new Parser();
+ p.options=options;
+
+ p. pipe = new Pipe(options);
+ p. params = new ParametersFloat(0); // total should be zero and the parameters are later read
+
+ // load the model
+
+ p.readModel(options, p.pipe, p.params);
+
+ DB.println("test on "+options.testfile);
+
+ System.out.println(""+p.pipe.mf.toString());
+
+
+ p.outputParses(options, p.pipe, p.params, !MAX_INFO);
+
+ }
+
+ System.out.println();
+
+ if (options.eval) {
+ System.out.println("\nEVALUATION PERFORMANCE:");
+ ParserEvaluator.evaluate(options.goldfile, options.outfile);
+ }
+
+ long end = System.currentTimeMillis();
+ System.out.println("used time "+((float)((end-start)/100)/10));
+
+ Decoder.executerService.shutdown();
+ Pipe.executerService.shutdown();
+ System.out.println("end.");
+
+
+ }
+
+ /**
+ * Read the models and mapping
+ * @param options
+ * @param pipe
+ * @param params
+ * @throws IOException
+ */
+ public void readModel(OptionsSuper options, Pipe pipe, Parameters params) throws IOException {
+
+
+ DB.println("Reading data started");
+
+ // prepare zipped reader
+ ZipInputStream zis = new ZipInputStream(new BufferedInputStream(new FileInputStream(options.modelName)));
+ zis.getNextEntry();
+ DataInputStream dis = new DataInputStream(new BufferedInputStream(zis));
+
+ pipe.mf.read(dis);
+
+ pipe.cl = new Cluster(dis);
+
+ params.read(dis);
+ this.l2i = new Long2Int(params.size());
+ DB.println("parsing -- li size "+l2i.size());
+
+ pipe.extractor = new Extractor[THREADS];
+
+ for (int t=0;t<THREADS;t++) pipe.extractor[t]=this.extractorFactory.getExtractor(l2i);
+
+ Edges.read(dis);
+
+ options.decodeProjective = dis.readBoolean();
+
+ int maxForm = dis.readInt();
+
+ for (int t=0;t<THREADS;t++) {
+ pipe.extractor[t].setMaxForm(maxForm);
+ pipe.extractor[t].initStat();
+ pipe.extractor[t].init();
+ }
+
+ boolean foundInfo =false;
+ try {
+ String info =null;
+ int icnt = dis.readInt();
+ for(int i=0;i<icnt;i++) {
+ info = dis.readUTF();
+ System.out.println(info);
+ }
+ } catch (Exception e) {
+ if (!foundInfo) System.out.println("no info about training");
+ }
+
+
+ dis.close();
+
+ DB.println("Reading data finnished");
+
+ Decoder.NON_PROJECTIVITY_THRESHOLD =(float)options.decodeTH;
+ for (int t=0;t<THREADS;t++) {
+ pipe.extractor[t].initStat();
+ pipe.extractor[t].init();
+ }
+
+ }
+
+
+
+ /**
+ * Do the training
+ * @param instanceLengths
+ * @param options
+ * @param pipe
+ * @param params
+ * @param is
+ * @param cluster
+ * @throws IOException
+ * @throws InterruptedException
+ * @throws ClassNotFoundException
+ */
+ public void train(OptionsSuper options, Pipe pipe, ParametersFloat params, Instances is, Cluster cluster)
+ throws IOException, InterruptedException, ClassNotFoundException {
+
+
+ DB.println("\nTraining Information ");
+ DB.println("-------------------- ");
+
+
+ Decoder.NON_PROJECTIVITY_THRESHOLD =(float)options.decodeTH;
+
+ if (options.decodeProjective) System.out.println("Decoding: "+(options.decodeProjective?"projective":"non-projective"));
+ else System.out.println(""+Decoder.getInfo());
+ int numInstances = is.size();
+
+ int maxLenInstances =0;
+ for(int i=0;i<numInstances;i++) if (maxLenInstances<is.length(i)) maxLenInstances=is.length(i);
+
+ DataF data = new DataF(maxLenInstances, pipe.mf.getFeatureCounter().get(PipeGen.REL).shortValue());
+
+ int iter = 0;
+ int del=0;
+ float error =0;
+ float f1=0;
+
+ FV pred = new FV();
+ FV act = new FV();
+
+ double upd = (double)(numInstances*options.numIters)+1;
+
+ for(; iter < options.numIters; iter++) {
+
+ System.out.print("Iteration "+iter+": ");
+
+ long start = System.currentTimeMillis();
+
+ long last= System.currentTimeMillis();
+ error=0;
+ f1=0;
+ for(int n = 0; n < numInstances; n++) {
+
+ upd--;
+
+ if (is.labels[n].length>options.maxLen) continue;
+
+ String info = " td "+((Decoder.timeDecotder)/1000000F)+" tr "+((Decoder.timeRearrange)/1000000F)
+ +" te "+((Pipe.timeExtract)/1000000F);
+
+ if((n+1) %500 == 0) del= PipeGen.outValueErr(n+1,Math.round(error*1000)/1000,f1/n,del, last, upd,info);
+
+ short pos[] = is.pposs[n];
+
+ data = pipe.fillVector((F2SF)params.getFV(), is, n, data, cluster, THREADS, l2i);
+
+ List<ParseNBest> parses = Decoder.decode(pos, data, options.decodeProjective,pipe.extractor[0]);
+ Parse d = parses.get(0);
+ double e= pipe.errors(is, n ,d);
+
+ if (d.f1>0)f1+=(d.labels.length-1 -e) /(d.labels.length-1);
+
+ if (e<=0) continue;
+
+ // get predicted feature vector
+ pred.clear();
+ pipe.extractor[0].encodeCat(is,n,pos,is.forms[n],is.plemmas[n],d.heads, d.labels, is.feats[n],pipe.cl, pred);
+
+ error += e;
+
+ act.clear();
+ pipe.extractor[0].encodeCat(is,n,pos,is.forms[n],is.plemmas[n],is.heads[n], is.labels[n], is.feats[n],pipe.cl, act);
+
+ params.update(act, pred, is, n, d, upd,e);
+ }
+
+ String info = " td "+((Decoder.timeDecotder)/1000000F)+" tr "+((Decoder.timeRearrange)/1000000F)
+ +" te "+((Pipe.timeExtract)/1000000F)+" nz "+params.countNZ();
+ PipeGen.outValueErr(numInstances,Math.round(error*1000)/1000,f1/numInstances,del,last, upd,info);
+ del=0;
+ long end = System.currentTimeMillis();
+ System.out.println(" time:"+(end-start));
+
+
+ ParametersFloat pf = params.average2((iter+1)*is.size());
+ try {
+
+ if (options.testfile!=null) {
+ outputParses (options, pipe, pf, ! MAX_INFO);
+ ParserEvaluator.evaluate(options.goldfile, options.outfile);
+ // writeModell(options, pf, ""+(iter+1),pipe.cl);
+ }
+
+
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+
+
+ Decoder.timeDecotder=0;Decoder.timeRearrange=0; Pipe.timeExtract=0;
+
+
+ }
+ params.average(iter*is.size());
+ }
+
+
+ /**
+ * Do the parsing
+ * @param options
+ * @param pipe
+ * @param params
+ * @throws IOException
+ */
+ private void outputParses (OptionsSuper options, Pipe pipe, ParametersFloat params, boolean maxInfo) throws Exception {
+
+ long start = System.currentTimeMillis();
+
+ CONLLReader09 depReader = new CONLLReader09(options.testfile, options.formatTask);
+ CONLLWriter09 depWriter = new CONLLWriter09(options.outfile, options.formatTask);
+
+// ExtractorClusterStacked.initFeatures();
+
+ int cnt = 0;
+ int del=0;
+ long last = System.currentTimeMillis();
+
+ if (maxInfo) System.out.println("\nParsing Information ");
+ if (maxInfo) System.out.println("------------------- ");
+
+ if (maxInfo && !options.decodeProjective) System.out.println(""+Decoder.getInfo());
+
+ // if (!maxInfo) System.out.println();
+
+ String[] types = new String[pipe.mf.getFeatureCounter().get(PipeGen.REL)];
+ for (Entry<String, Integer> e : pipe.mf.getFeatureSet().get(PipeGen.REL).entrySet()) types[e.getValue()] = e.getKey();
+
+
+ System.out.print("Processing Sentence: ");
+
+ while(true) {
+
+ Instances is = new Instances();
+ is.init(1, new MFB(),options.formatTask);
+
+ SentenceData09 instance = pipe.nextInstance(is, depReader);
+ if (instance==null) break;
+ cnt++;
+
+ SentenceData09 i09 = this.parse(instance,params);
+
+ // }
+ depWriter.write(i09);
+ del=PipeGen.outValue(cnt, del,last);
+ // DB.println("xbest "+amongxbest+" cnt "+cnt+" "+((float)((float)amongxbest/cnt))+" nbest "+((float)nbest/cnt)+
+ // " 1best "+((float)(rank.get(0)==null?0:rank.get(0))/cnt)+" best-proj "+((float)bestProj/cnt));
+
+ }
+
+ //pipe.close();
+
+ depWriter.finishWriting();
+ long end = System.currentTimeMillis();
+ DB.println("rank\n"+rank+"\n");
+ DB.println("x-best-las "+amongxbest+" x-best-ula "+amongxbest_ula+" cnt "+cnt+" x-best-las "
+ +((float)((float)amongxbest/cnt))+
+ " x-best-ula "+((float)((float)amongxbest_ula/cnt))+
+ " nbest "+((float)nbest/cnt)+
+ " 1best "+((float)(rank.get(0)==null?0:rank.get(0))/cnt)+
+ " best-proj "+((float)bestProj/cnt)+
+ " Sum LAS "+((float)this.smallestErrorSum/countAllNodes));
+
+ // DB.println("errors "+error);
+
+ rank.clear();
+ amongxbest=0;amongxbest_ula=0;
+ cnt=0;
+ nbest=0;
+ bestProj=0;
+ if (maxInfo) System.out.println("Used time " + (end-start));
+ if (maxInfo) System.out.println("forms count "+Instances.m_count+" unkown "+Instances.m_unkown);
+
+ }
+
+
+ /**
+ * Do the parsing
+ * @param options
+ * @param pipe
+ * @param params
+ * @throws IOException
+ */
+ private void getNBest(OptionsSuper options, Pipe pipe, ParametersFloat params, boolean maxInfo) throws Exception {
+
+
+ CONLLReader09 depReader = new CONLLReader09(options.testfile, options.formatTask);
+
+ // ExtractorClusterStacked.initFeatures();
+
+ int cnt = 0;
+
+ String[] types = new String[pipe.mf.getFeatureCounter().get(PipeGen.REL)];
+ for (Entry<String, Integer> e : pipe.mf.getFeatureSet().get(PipeGen.REL).entrySet()) types[e.getValue()] = e.getKey();
+
+// System.out.print("Processing Sentence: ");
+
+ while(true) {
+
+ Instances is = new Instances();
+ is.init(1, new MFB(),options.formatTask);
+
+ SentenceData09 instance = pipe.nextInstance(is, depReader);
+ if (instance==null) break;
+ cnt++;
+
+ this.parseNBest(instance);
+ }
+
+ //pipe.close();
+// depWriter.finishWriting();
+// long end = System.currentTimeMillis();
+// DB.println("rank\n"+rank+"\n");
+// DB.println("x-best-las "+amongxbest+" x-best-ula "+amongxbest_ula+" cnt "+cnt+" x-best-las "
+// +((float)((float)amongxbest/cnt))+
+// " x-best-ula "+((float)((float)amongxbest_ula/cnt))+
+// " nbest "+((float)nbest/cnt)+
+// " 1best "+((float)(rank.get(0)==null?0:rank.get(0))/cnt)+
+// " best-proj "+((float)bestProj/cnt));
+ // DB.println("errors "+error);
+
+
+ }
+
+
+ public SentenceData09 parse (SentenceData09 instance, ParametersFloat params) {
+
+ String[] types = new String[pipe.mf.getFeatureCounter().get(PipeGen.REL)];
+ for (Entry<String, Integer> e : MFB.getFeatureSet().get(PipeGen.REL).entrySet()) types[e.getValue()] = e.getKey();
+
+ Instances is = new Instances();
+ is.init(1, new MFB(),options.formatTask);
+ new CONLLReader09().insert(is, instance);
+
+ String[] forms = instance.forms;
+
+ // use for the training ppos
+ DataF d2;
+ try {
+ d2 = pipe.fillVector(params.getFV(), is,0,null,pipe.cl,THREADS,l2i);//cnt-1
+ } catch (Exception e ) {
+ e.printStackTrace();
+ return null;
+ }
+ short[] pos = is.pposs[0];
+
+ List<ParseNBest> parses=null;
+ Parse d= null;
+ try {
+ parses =Decoder.decode(pos,d2,options.decodeProjective,pipe.extractor[0]); //cnt-1
+ d = parses.get(0);
+ }catch (Exception e) {
+ e.printStackTrace();
+ }
+
+ if (parses.size()>NBest) parses = parses.subList(0,NBest);
+
+ int g_las = Decoder.getGoldRank(parses, is,0,Decoder.LAS);
+ int g_ula = Decoder.getGoldRank(parses, is,0,!Decoder.LAS);
+
+ int smallest = Decoder.getSmallestError(parses, is,0,!Decoder.LAS);
+ smallestErrorSum+=is.length(0)-smallest;
+ countAllNodes+=is.length(0);
+
+ if (g_las>=0) amongxbest++;
+ if (g_ula>=0) amongxbest_ula++;
+
+ nbest+=parses.size();
+
+ Integer r = rank.get(g_las);
+ if (r==null) rank.put(g_las, 1);
+ else rank.put(g_las, r+1);
+
+ float err = (float)this.pipe.errors(is,0, d);
+
+ float errBestProj = (float)this.pipe.errors(is,0, Decoder.bestProj);
+
+ if (errBestProj==0) bestProj++;
+
+ SentenceData09 i09 = new SentenceData09(instance);
+
+ i09.createSemantic(instance);
+
+ for(int j = 0; j < forms.length-1; j++) {
+ i09.plabels[j] = types[d.labels[j+1]];
+ i09.pheads[j] = d.heads[j+1];
+ }
+ return i09;
+
+ }
+
+ public List<ParseNBest> parseNBest (SentenceData09 instance) {
+
+ Instances is = new Instances();
+ is.init(1, new MFB(),options.formatTask);
+ new CONLLReader09().insert(is, instance);
+
+ // use for the training ppos
+ DataF d2;
+ try {
+ d2 = pipe.fillVector(params.getFV(), is,0,null,pipe.cl,THREADS, l2i);//cnt-1
+ } catch (Exception e ) {
+ e.printStackTrace();
+ return null;
+ }
+ short[] pos = is.pposs[0];
+
+ List<ParseNBest> parses=null;
+ try {
+ parses =Decoder.decode(pos,d2,options.decodeProjective,pipe.extractor[0]); //cnt-1
+ }catch (Exception e) {
+ e.printStackTrace();
+ }
+
+
+ if (parses.size()>NBest) parses = parses.subList(0,NBest);
+
+ return parses;
+
+ }
+
+
+
+ /* (non-Javadoc)
+ * @see is2.tools.Tool#apply(is2.data.SentenceData09)
+ */
+
+ @Override
+ public SentenceData09 apply(SentenceData09 snt09) {
+
+ SentenceData09 it = new SentenceData09();
+ it.createWithRoot(snt09);
+
+ SentenceData09 out=null;
+ try {
+
+
+ // for(int k=0;k<it.length();k++) {
+ // it.forms[k] = reader.normalize(it.forms[k]);
+ // it.plemmas[k] = reader.normalize(it.plemmas[k]);
+ // }
+
+ out = parse(it,this.params);
+
+
+ } catch(Exception e) {
+ e.printStackTrace();
+ }
+
+ Decoder.executerService.shutdown();
+ Pipe.executerService.shutdown();
+
+ return out;
+ }
+
+ /**
+ * Write the parsing model
+ *
+ * @param options
+ * @param params
+ * @param extension
+ * @throws FileNotFoundException
+ * @throws IOException
+ */
+ private void writeModell(OptionsSuper options, ParametersFloat params, String extension, Cluster cs) throws FileNotFoundException, IOException {
+
+ String name = extension==null?options.modelName:options.modelName+extension;
+// System.out.println("Writting model: "+name);
+ ZipOutputStream zos = new ZipOutputStream(new BufferedOutputStream(new FileOutputStream(name)));
+ zos.putNextEntry(new ZipEntry("data"));
+ DataOutputStream dos = new DataOutputStream(new BufferedOutputStream(zos));
+
+ MFB.writeData(dos);
+ cs.write(dos);
+
+ params.write(dos);
+
+ Edges.write(dos);
+
+ dos.writeBoolean(options.decodeProjective);
+
+ dos.writeInt(pipe.extractor[0].getMaxForm());
+
+ dos.writeInt(5); // Info count
+ dos.writeUTF("Used parser "+Parser.class.toString());
+ dos.writeUTF("Creation date "+(new SimpleDateFormat("yyyy.MM.dd HH:mm:ss")).format(new Date()));
+ dos.writeUTF("Training data "+options.trainfile);
+ dos.writeUTF("Iterations "+options.numIters+" Used sentences "+options.count);
+ dos.writeUTF("Cluster "+options.clusterFile);
+
+ dos.flush();
+ dos.close();
+ }
+
+
+
+
+}
diff --git a/dependencyParser/mate-tools/src/is2/parserR2/Pipe.java b/dependencyParser/mate-tools/src/is2/parserR2/Pipe.java
new file mode 100755
index 0000000..ad545ec
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/parserR2/Pipe.java
@@ -0,0 +1,257 @@
+package is2.parserR2;
+
+import is2.data.Cluster;
+import is2.data.DataF;
+import is2.data.Edges;
+import is2.data.F2SF;
+import is2.data.Instances;
+import is2.data.Long2IntInterface;
+import is2.data.MFB;
+
+import is2.data.Parse;
+import is2.data.PipeGen;
+import is2.data.SentenceData09;
+import is2.io.CONLLReader09;
+
+import is2.util.DB;
+import is2.util.OptionsSuper;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.concurrent.ExecutorService;
+
+import extractors.Extractor;
+import extractors.ExtractorClusterStacked;
+import extractors.ExtractorClusterStackedR2;
+import extractors.ParallelExtract;
+
+final public class Pipe extends PipeGen {
+
+ public Extractor[] extractor;
+ final public MFB mf = new MFB();
+
+ Cluster cl;
+
+
+ private OptionsSuper options;
+ public static long timeExtract;
+
+ public Pipe(OptionsSuper o) {
+ options = o;
+ }
+
+ public void createInstances(String file, Instances is)
+ // throws Exception
+
+ {
+
+
+ CONLLReader09 depReader = new CONLLReader09(file);
+
+ mf.register(REL,"<root-type>");
+
+ // register at least one predicate since the parsing data might not contain predicates as in
+ // the Japaness corpus but the development sets contains some
+
+ long sl=0;
+
+ System.out.print("Registering feature parts of sentence: ");
+ int ic = 0;
+ int del = 0;
+ while (true) {
+ SentenceData09 instance = depReader.getNext();
+ if (instance == null) break;
+ ic++;
+
+ sl+=instance.labels.length;
+
+ if (ic % 1000 == 0) {
+ del = outValue(ic, del);
+ }
+
+ String[] labs1 = instance.labels;
+ for (int i1 = 0; i1 < labs1.length; i1++) mf.register(REL, labs1[i1]);
+
+ String[] w = instance.forms;
+ for (int i1 = 0; i1 < w.length; i1++) mf.register(WORD, depReader.normalize(w[i1]));
+
+ w = instance.plemmas;
+ for (int i1 = 0; i1 < w.length; i1++) mf.register(WORD, depReader.normalize(w[i1]));
+
+
+ w = instance.ppos;
+ for (int i1 = 0; i1 < w.length; i1++) mf.register(POS, w[i1]);
+
+ w = instance.gpos;
+ for (int i1 = 0; i1 < w.length; i1++) mf.register(POS, w[i1]);
+
+ if (instance.feats !=null) {
+ String fs[][] = instance.feats;
+ for (int i1 = 0; i1 < fs.length; i1++){
+ w =fs[i1];
+ if (w==null) continue;
+ for (int i2 = 0; i2 < w.length; i2++) mf.register(FEAT, w[i2]);
+ }
+ }
+
+ if ((ic-1)>options.count) break;
+ }
+ del = outValue(ic, del);
+
+
+ for(Extractor e : extractor) {
+ e.setMaxForm(mf.getFeatureCounter().get(WORD));
+ }
+
+ if (options.clusterFile==null)cl = new Cluster();
+ else cl= new Cluster(options.clusterFile, mf,6);
+
+ mf.calculateBits();
+
+ System.out.println(""+mf.toString());
+
+ for(Extractor e : extractor) {
+ e.initStat();
+ e.init();
+ }
+
+ depReader.startReading(file);
+
+ int num1 = 0;
+
+
+ Edges.init(mf.getFeatureCounter().get(POS));
+
+
+ System.out.print("Creating edge filters and read corpus: ");
+ del = 0;
+
+ is.init(ic, new MFB());
+
+ while (true) {
+ if (num1 % 100 == 0) del = outValue(num1, del);
+
+ SentenceData09 instance1 = depReader.getNext(is);
+
+ if (instance1 == null) break;
+
+ int last = is.size() - 1;
+ short[] pos =is.pposs[last];
+
+ for (int k = 0; k < is.length(last); k++) {
+ if (is.heads[last][k] < 0) continue;
+ Edges.put(pos[is.heads[last][k]],pos[k], k < is.heads[last][k],is.labels[last][k]);
+ }
+
+ if (!options.allFeatures && num1 > options.count) break;
+
+ num1++;
+
+ }
+ del = outValue(num1, del);
+ System.out.println();
+ Edges.findDefault();
+ }
+
+
+ public void getInstances(String file, Instances is){
+ CONLLReader09 depReader = new CONLLReader09(file);
+
+ int ic =options.count+2;
+
+ is.init(ic, new MFB());
+
+ int num1 =0,del=0;
+ while (true) {
+ if (num1 % 100 == 0) del = outValue(num1, del);
+
+ SentenceData09 instance1 = depReader.getNext(is);
+
+ if (instance1 == null) break;
+
+ if (!options.allFeatures && num1 > options.count) break;
+
+ num1++;
+
+ }
+ del = outValue(num1, del);
+ System.out.println();
+
+ }
+
+
+ /**
+ * Creates an instance for outputParses
+ *
+ * @param is
+ * @return
+ * @throws IOException
+ */
+ protected final SentenceData09 nextInstance(Instances is, CONLLReader09 depReader) throws Exception {
+
+ SentenceData09 instance = depReader.getNext(is);
+ if (instance == null || instance.forms == null) return null;
+
+ return instance;
+ }
+
+ public static ExecutorService executerService =java.util.concurrent.Executors.newFixedThreadPool(Parser.THREADS);
+
+
+ public DataF fillVector(F2SF params, Instances is,int inst, DataF d, Cluster cluster, int threads, Long2IntInterface li) throws InterruptedException {
+
+ long ts = System.nanoTime();
+
+ if (executerService.isShutdown()) executerService =java.util.concurrent.Executors.newCachedThreadPool();
+
+
+ final int length = is.length(inst);
+ if (d ==null || d.len<length)d = new DataF(length,mf.getFeatureCounter().get(PipeGen.REL).shortValue());
+
+ ArrayList<ParallelExtract> pe = new ArrayList<ParallelExtract>();
+
+
+ for(int i=0;i<threads;i++) {
+
+// DB.println(""+((ExtractorClusterStackedR2)extractor[i]).s_dist);
+ pe.add(new ParallelExtract( extractor[i],is, inst, d, (F2SF)params.clone(), cluster, li));
+ }
+
+ for (int w1 = 0; w1 < length; w1++) {
+ for (int w2 = 0; w2 < length; w2++) {
+ if (w1==w2) continue;
+ ParallelExtract.add(w1, w2);
+ }
+ }
+ executerService.invokeAll( pe);
+
+ timeExtract += (System.nanoTime()-ts);
+
+
+ return d;
+ }
+
+ /**
+ * the loss function
+ */
+ public double errors( Instances is, int ic, Parse p) {
+
+ if (p.heads==null) p.signature2parse(p.signature());
+ short[] act = is.heads[ic];
+ double correct = 0;
+
+ // do not count root
+ for(int i = 1; i < act.length; i++) {
+ if (p.heads[i]==act[i] ){
+ correct+=0.5;
+ if (p.labels[i]==is.labels[ic][i] ) correct+=0.5;
+ }
+ }
+
+ double x = ((double)act.length- 1 - correct );
+
+ //p.f1 = (double)correct / (double)(act.length-1);
+
+ return x;
+ }
+}
diff --git a/dependencyParser/mate-tools/src/is2/parserR2/PipeReranker.java b/dependencyParser/mate-tools/src/is2/parserR2/PipeReranker.java
new file mode 100644
index 0000000..87286ac
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/parserR2/PipeReranker.java
@@ -0,0 +1,138 @@
+package is2.parserR2;
+
+import is2.data.Cluster;
+import is2.data.DataF;
+import is2.data.Edges;
+import is2.data.F2SF;
+import is2.data.Instances;
+import is2.data.MFB;
+import is2.data.ParseNBest;
+
+import is2.data.Parse;
+import is2.data.PipeGen;
+import is2.data.SentenceData09;
+import is2.io.CONLLReader09;
+
+import is2.util.OptionsSuper;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.concurrent.ExecutorService;
+
+import extractors.ExtractorClusterStacked;
+import extractors.ExtractorReranker;
+import extractors.ParallelExtract;
+
+final public class PipeReranker extends PipeGen {
+
+ public ExtractorReranker extractor;
+ final public MFB mf = new MFB();
+
+ Cluster cl;
+
+
+ private OptionsSuper options;
+ public static long timeExtract;
+
+ public PipeReranker(OptionsSuper o) {
+ options = o;
+ }
+
+ public void createInstances(String file, Instances is)
+ // throws Exception
+
+ {
+
+
+ CONLLReader09 depReader = new CONLLReader09(file);
+
+ mf.register(REL,"<root-type>");
+
+ // register at least one predicate since the parsing data might not contain predicates as in
+ // the Japaness corpus but the development sets contains some
+
+ long sl=0;
+
+ System.out.print("Registering feature parts of sentence: ");
+ int ic = 0;
+ int del = 0;
+ while (true) {
+ SentenceData09 instance = depReader.getNext();
+ if (instance == null) break;
+ ic++;
+
+ sl+=instance.labels.length;
+
+ if (ic % 1000 == 0) {
+ del = outValue(ic, del);
+ }
+
+ String[] labs1 = instance.labels;
+ for (int i1 = 0; i1 < labs1.length; i1++) mf.register(REL, labs1[i1]);
+
+ String[] w = instance.forms;
+ for (int i1 = 0; i1 < w.length; i1++) mf.register(WORD, depReader.normalize(w[i1]));
+
+ w = instance.plemmas;
+ for (int i1 = 0; i1 < w.length; i1++) mf.register(WORD, depReader.normalize(w[i1]));
+
+
+ w = instance.ppos;
+ for (int i1 = 0; i1 < w.length; i1++) mf.register(POS, w[i1]);
+
+ w = instance.gpos;
+ for (int i1 = 0; i1 < w.length; i1++) mf.register(POS, w[i1]);
+
+ if (instance.feats !=null) {
+ String fs[][] = instance.feats;
+ for (int i1 = 0; i1 < fs.length; i1++){
+ w =fs[i1];
+ if (w==null) continue;
+ for (int i2 = 0; i2 < w.length; i2++) mf.register(FEAT, w[i2]);
+ }
+ }
+
+ if ((ic-1)>options.count) break;
+ }
+ del = outValue(ic, del);
+
+ System.out.println();
+ ExtractorReranker.initFeatures();
+
+ ExtractorReranker.maxForm = mf.getFeatureCounter().get(WORD);
+
+ if (options.clusterFile==null)cl = new Cluster();
+ else cl= new Cluster(options.clusterFile, mf,6);
+
+ mf.calculateBits();
+ extractor.initStat();
+
+ System.out.println(""+mf.toString());
+
+ extractor.init();
+ depReader.startReading(file);
+
+ int num1 = 0;
+
+ is.init(ic, new MFB());
+
+ Edges.init(mf.getFeatureCounter().get(POS));
+
+ del = 0;
+
+
+ del = outValue(num1, del);
+ System.out.println();
+ }
+
+
+
+ public static ExecutorService executerService =java.util.concurrent.Executors.newFixedThreadPool(Parser.THREADS);
+
+
+
+
+
+
+
+}
diff --git a/dependencyParser/mate-tools/src/is2/parserR2/Reranker.java b/dependencyParser/mate-tools/src/is2/parserR2/Reranker.java
new file mode 100644
index 0000000..a531c3c
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/parserR2/Reranker.java
@@ -0,0 +1,1059 @@
+package is2.parserR2;
+
+
+import is2.data.Cluster;
+import is2.data.DataF;
+import is2.data.Edges;
+import is2.data.F2SF;
+import is2.data.FVR;
+import is2.data.Instances;
+import is2.data.Long2Int;
+import is2.data.Long2IntInterface;
+import is2.data.MFB;
+import is2.data.Parse;
+import is2.data.ParseNBest;
+import is2.data.PipeGen;
+import is2.data.SentenceData09;
+import is2.io.CONLLReader09;
+import is2.io.CONLLWriter09;
+import is2.tools.Tool;
+import is2.util.DB;
+import is2.util.OptionsSuper;
+import is2.util.ParserEvaluator;
+
+import java.io.BufferedInputStream;
+import java.io.BufferedOutputStream;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map.Entry;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipInputStream;
+import java.util.zip.ZipOutputStream;
+
+import extractors.Extractor;
+import extractors.ExtractorClusterStacked;
+import extractors.ExtractorReranker;
+
+
+
+public class Reranker implements Tool {
+
+
+ public static int THREADS =4;
+
+ Long2IntInterface l2i;
+
+ // the parser models
+ ParametersFloat paramsParsers[];
+
+ // the re-ranker model
+ ParametersFloat parametersReranker;
+ PipeReranker pipeReranker;
+ Pipe pipe;
+ Options options;
+
+ HashMap<Integer,Integer> rank = new HashMap<Integer,Integer>();
+ int amongxbest=0, amongxbest_ula=0, nbest=0,bestProj=0, smallestErrorSum=0, countAllNodes=0;
+ static int NBest =1000;
+
+
+
+
+ /**
+ * Initialize the parser
+ * @param options
+ */
+ public Reranker (Options options) {
+
+ this.options=options;
+
+ }
+
+
+ /**
+ * @param modelFileName The file name of the parsing model
+ */
+ public Reranker(String modelFileName) {
+ this(new Options(new String[]{"-model",modelFileName}));
+ }
+
+
+
+ public Reranker() {
+ // TODO Auto-generated constructor stub
+ }
+
+
+ public static void main (String[] args) throws Exception
+ {
+
+ long start = System.currentTimeMillis();
+ Options options = new Options(args);
+
+ NBest = options.best;
+
+ DB.println("n-best "+NBest);
+
+ Runtime runtime = Runtime.getRuntime();
+ THREADS = runtime.availableProcessors();
+
+ if (options.cores<THREADS&&options.cores>0) THREADS =options.cores;
+
+ DB.println("Found " + runtime.availableProcessors()+" cores use "+THREADS);
+
+
+ if (options.train) {
+
+ Reranker p =new Reranker();
+ p.options=options;
+
+
+ p.l2i = new Long2Int(options.hsize);
+ p.pipeReranker = new PipeReranker(options);
+ p.pipeReranker.extractor = new ExtractorReranker(p.l2i);
+
+
+ // initialize the parser
+ p.pipe = new Pipe(options);
+
+ // read parsing models
+ p.paramsParsers = new ParametersFloat[options.end+1];
+ for(int m=0;m<=options.end;m++) {
+ String name = options.prefix_model+m;
+ p.paramsParsers[m] = new ParametersFloat(0);
+ p.readModel(name, p.pipe, p.paramsParsers[m]);
+ }
+
+ // set up the reranker
+ p.parametersReranker = new ParametersFloat(p.l2i.size());
+
+ Instances[] iss = new Instances[options.end+1];
+
+ for(int m=0;m<=options.end;m++) {
+ String name = options.prefix_test+m;
+ iss[m] = new Instances();
+ DB.println("create instances of part "+name);
+ p.pipe.getInstances(name, iss[m]);
+ }
+
+
+ ExtractorReranker.initFeatures();
+ p.pipeReranker.extractor.init();
+
+ p.pipeReranker.extractor.initStat();
+
+ p.train(options,iss);
+
+ p.writeModell(options, p.parametersReranker, null,p.pipe.cl);
+ }
+
+ if (options.test) {
+
+ Reranker p = new Reranker();
+ p.options=options;
+
+ // set up the reranker
+ p.l2i = new Long2Int(options.hsize);
+ p.pipeReranker = new PipeReranker(options);
+ p.pipeReranker.extractor = new ExtractorReranker(p.l2i);
+ p.parametersReranker = new ParametersFloat(p.l2i.size());
+
+
+ // initialize the parser
+ p.pipe = new Pipe(options);
+
+ // read parsing models
+ p.paramsParsers = new ParametersFloat[options.end+1];
+
+ String nbestName ="n-best+"+options.testfile.substring(options.testfile.length()-12,options.testfile.length()-1);
+ File fnbest = new File(nbestName);
+ int read = fnbest.exists()?2:1;
+
+ if (read != 2)
+ for(int m=0;m<=options.end;m++) {
+ String name = options.prefix_model+m;
+ p.paramsParsers[m] = new ParametersFloat(0);
+ p.readModel(name, p.pipe, p.paramsParsers[m]);
+ }
+
+ p.readModel(options.modelName, p.pipeReranker, p.parametersReranker);
+
+
+ ExtractorReranker.initFeatures();
+ p.pipeReranker.extractor.initStat();
+ p.pipeReranker.extractor.init();
+
+ p.rerankedParses(options, p.pipe, p.parametersReranker, false, nbestName);
+
+ }
+
+ System.out.println();
+
+ if (options.eval) {
+ System.out.println("\nEVALUATION PERFORMANCE:");
+ ParserEvaluator.evaluate(options.goldfile, options.outfile);
+ }
+
+ long end = System.currentTimeMillis();
+ System.out.println("used time "+((float)((end-start)/100)/10));
+
+ Decoder.executerService.shutdown();
+ Pipe.executerService.shutdown();
+ System.out.println("end.");
+
+
+ }
+
+ /**
+ * Read the models and mapping
+ * @param options
+ * @param pipe
+ * @param prm
+ * @throws IOException
+ */
+ public void readModel(String modelName, Pipe pipe, Parameters prm) throws IOException {
+
+
+ DB.println("Reading data started: "+modelName);
+
+ // prepare zipped reader
+ ZipInputStream zis = new ZipInputStream(new BufferedInputStream(new FileInputStream(modelName)));
+ zis.getNextEntry();
+ DataInputStream dis = new DataInputStream(new BufferedInputStream(zis));
+
+ pipe.mf.read(dis);
+
+ pipe.cl = new Cluster(dis);
+
+ prm.read(dis);
+
+ Long2Int l2i = new Long2Int(prm.size());
+ DB.println("li size "+l2i.size());
+
+ pipe.extractor = new ExtractorClusterStacked[THREADS];
+
+ for (int t=0;t<THREADS;t++) pipe.extractor[t]=new ExtractorClusterStacked(l2i);
+
+ ExtractorClusterStacked.initFeatures();
+
+
+ for (int t=0;t<THREADS;t++) {
+ pipe.extractor[t].initStat();
+ pipe.extractor[t].init();
+ }
+
+ Edges.read(dis);
+
+ options.decodeProjective = dis.readBoolean();
+
+ ExtractorClusterStacked.maxForm = dis.readInt();
+
+ boolean foundInfo =false;
+ try {
+ String info =null;
+ int icnt = dis.readInt();
+ for(int i=0;i<icnt;i++) {
+ info = dis.readUTF();
+ System.out.println(info);
+ }
+ } catch (Exception e) {
+ if (!foundInfo) System.out.println("no info about training");
+ }
+
+
+ dis.close();
+
+ DB.println("Reading data finnished");
+
+ Decoder.NON_PROJECTIVITY_THRESHOLD =(float)options.decodeTH;
+
+ // ExtractorClusterStacked.initStat();
+
+ }
+
+ /**
+ * Read the models and mapping
+ * @param options
+ * @param pipe
+ * @param params
+ * @throws IOException
+ */
+ public void readModel(String modelName, PipeReranker pipe, Parameters params) throws IOException {
+
+ DB.println("Reading data started: "+modelName);
+
+ // prepare zipped reader
+ ZipInputStream zis = new ZipInputStream(new BufferedInputStream(new FileInputStream(modelName)));
+ zis.getNextEntry();
+ DataInputStream dis = new DataInputStream(new BufferedInputStream(zis));
+
+ pipe.mf.read(dis);
+
+ // DB.println("reranker model "+pipe.mf.toString());
+
+ pipe.cl = new Cluster(dis);
+
+ params.read(dis);
+ this.l2i = new Long2Int(params.size());
+ DB.println("li size "+l2i.size());
+
+ pipe.extractor = new ExtractorReranker(l2i);
+
+ ExtractorReranker.initFeatures();
+ ExtractorReranker.initStat();
+
+ pipe.extractor.init();
+
+ Edges.read(dis);
+
+ options.decodeProjective = dis.readBoolean();
+
+ ExtractorClusterStacked.maxForm = dis.readInt();
+
+ boolean foundInfo =false;
+ try {
+ String info =null;
+ int icnt = dis.readInt();
+ for(int i=0;i<icnt;i++) {
+ info = dis.readUTF();
+ System.out.println(info);
+ }
+ } catch (Exception e) {
+ if (!foundInfo) System.out.println("no info about training");
+ }
+
+
+ dis.close();
+
+ DB.println("Reading data finnished");
+
+ Decoder.NON_PROJECTIVITY_THRESHOLD =(float)options.decodeTH;
+
+ //ExtractorClusterStacked.initStat();
+
+ }
+
+
+ /**
+ * Do the training
+ * @param instanceLengths
+ * @param options
+ * @param pipe
+ * @param parametersReranker
+ * @param is
+ * @param cluster
+ * @throws IOException
+ * @throws InterruptedException
+ * @throws ClassNotFoundException
+ */
+ public void train(OptionsSuper options, Instances[] iss)
+ throws IOException, InterruptedException, ClassNotFoundException {
+
+
+
+ int read = 0; // 0 compute -- 1 compute and write -- 2 read parses
+
+
+ DB.println("Training Information ");
+ DB.println("-------------------- ");
+
+ ExtractorReranker.initStat();
+ pipeReranker.extractor.init();
+
+ for(Extractor e : this.pipe.extractor) {
+ e.init();
+ }
+
+ int numInstances =0;
+ int maxLenInstances =0;
+ // int maxLenSentence=1;
+ for(Instances is : iss) {
+ numInstances += is.size();
+ for(int i=0;i<is.size();i++) if (maxLenInstances<is.length(i)) maxLenInstances=is.length(i);
+ }
+
+
+ DataF data = new DataF(maxLenInstances, pipe.mf.getFeatureCounter().get(PipeGen.REL).shortValue());
+
+ int iter = 0;
+ int del=0;
+ float error =0;
+ float f1=0;
+
+
+
+
+ double upd = (double)(options.count*options.numIters)+options.numIters*10;
+
+ //float[][] = new float[this.NBest][3];
+ FVR act = new FVR();
+
+ FVR pred = new FVR();
+
+ FVR f = new FVR();
+ long[] vs = new long[ExtractorReranker._FC*maxLenInstances];
+
+
+ for(; iter < options.numIters; iter++) {
+
+
+
+
+ System.out.print("Iteration "+iter+": ");
+ error=0;
+ f1=0;
+
+ float las =0, cnt=0,averageScore=0;
+
+
+ float firstBestTotalError=0,totalError=0;
+
+ long start = System.currentTimeMillis();
+
+ long last= System.currentTimeMillis();
+
+ long rerankTime = 0;
+
+
+ String nbest ="n-best";
+ File fnbest = new File(nbest);
+ read = fnbest.exists()?2:1;
+
+ DataInputStream dis =null;
+ DataOutputStream dos = null;
+
+ if (read==1) {
+
+ DB.println("computing and writting nbest list to file: "+nbest);
+
+ ZipOutputStream zos = new ZipOutputStream(new BufferedOutputStream(new FileOutputStream(nbest)));
+ zos.putNextEntry(new ZipEntry("data"));
+ dos = new DataOutputStream(new BufferedOutputStream(zos));
+ }
+
+
+ // start reading again
+ if (read ==2) {
+
+ // DB.println("reading nbest list from file: "+nbest);
+
+ ZipInputStream zis = new ZipInputStream(new BufferedInputStream(new FileInputStream(nbest)));
+ zis.getNextEntry();
+ dis = new DataInputStream(new BufferedInputStream(zis));
+ }
+
+ HashMap<Integer,Integer> remapped = new HashMap<Integer,Integer>();
+
+ int i=0,round=0,instance=0,length=0,count=0, changes=0;
+ for(Instances is : iss) {
+
+ F2SF fparser =this.paramsParsers[instance].getFV();
+ round++;
+
+
+ // go over the sentences in the instance
+ for(int n = 0; n < is.size(); n++) {
+ count+=1;
+ length +=is.length(n);
+ upd--;
+
+ if (is.labels[n].length>options.maxLen) continue;
+
+ List<ParseNBest> parses=null;
+
+ short pos[] = is.pposs[n];
+
+ // read or write nbest list
+ if (read==0|| read==1 && iter==0) {
+ data = pipe.fillVector(fparser, is, n, data, pipe.cl,THREADS,l2i);
+ parses = Decoder.decode(pos, data, options.decodeProjective,pipe.extractor[0]);
+
+ if (parses.size()>NBest) parses = parses.subList(0, NBest);
+
+
+ if (read==1) {
+ // write the forest
+ dos.writeInt(parses.size());
+ for(int k=0;k<parses.size();k++) {
+ dos.writeUTF(parses.get(k).signature());
+ dos.writeFloat((float)parses.get(k).f1);
+ }
+ }
+ } else if (read==2) {
+ parses = new ArrayList<ParseNBest>();
+ int parseCount = dis.readInt();
+ for(int k=0;k<parseCount;k++) {
+ ParseNBest p = new ParseNBest(dis.readUTF(),dis.readFloat());
+ if (parses.size()<NBest) parses.add(p);
+ }
+ }
+
+
+ int best =0; float bestScore=-100;
+ int goldBest =0; double goldError=Float.MAX_VALUE;
+
+ long startReranking = System.currentTimeMillis();
+
+ // score the n-best parses
+ for(int k=0;k<parses.size();k++) {
+
+ ParseNBest p= parses.get(k);
+
+ pipeReranker.extractor.extractFeatures(is,n,p,parses.indexOf(p),vs,pipe.cl);
+
+ int rank=1*ExtractorReranker.s_type;
+
+ f.clear();
+
+ for(int j=0;j<vs.length;j++) {
+ if (vs[j]==Integer.MIN_VALUE) break;
+ if (vs[j]>0) f.add(pipeReranker.extractor.li.l2i(vs[j]+rank));
+ }
+
+ f.add(pipeReranker.extractor.li.l2i(1+rank),(float)p.f1);
+ float score = (float)(parametersReranker.getScore(f));
+ if (score>bestScore) { //rankScore[k][2]>
+ bestScore =score;
+ best=k;
+
+ }
+ }
+
+ // get the best parse in the n-best list
+ for(int k=0;k<parses.size();k++) {
+
+ if (parses.get(k).heads.length!=is.length(n)) {
+ DB.println("error "+n+" "+parses.get(k).heads.length+" "+is.length(n));
+ continue;
+ }
+ double errg = pipe.errors(is, n, parses.get(k));
+ if (goldError > errg) {
+ goldError = errg;
+ goldBest=k;
+ }
+ }
+
+ ParseNBest firstBest = parses.get(0);
+ ParseNBest predParse = parses.get(best);
+ ParseNBest goldBestParse = parses.get(goldBest);
+
+ double e= pipe.errors(is, n ,predParse);
+
+ Integer ctb = remapped.get(best);
+ if (ctb==null) remapped.put(best, 1);
+ else remapped.put(best, ctb+1);
+
+ String info = " 1best-error "+((length-firstBestTotalError)/length)+
+ " reranked "+((length-totalError)/length)+
+ " chd "+changes+" "+" ps las "+(las/cnt)+" avs "+((float)averageScore/(float)count)+" ";
+
+
+
+ if((n+1) %500 == 0) del= PipeGen.outValueErr(count,Math.round(error*1000)/1000,f1/count,del, last, upd,info);
+
+ firstBestTotalError+=Decoder.getError(firstBest, is, n, Decoder.LAS);
+
+ totalError+=Decoder.getError(predParse, is, n, Decoder.LAS);
+
+
+ rerankTime +=System.currentTimeMillis()-startReranking;
+
+ if (best!=0){
+ changes++;
+ }
+
+ las +=is.length(n)-Decoder.getError(goldBestParse, is, n, Decoder.LAS);
+ cnt +=is.length(n);
+
+ averageScore+=predParse.f1;
+
+
+ if (options.count<count) break;
+
+
+ if (Decoder.getError(goldBestParse, is, n, Decoder.LAS)>=
+ Decoder.getError(predParse, is, n, Decoder.LAS) ) continue;
+
+
+ // get predicted feature vector
+ pipeReranker.extractor.extractFeatures(is,n,predParse,parses.indexOf(predParse),vs,pipe.cl);
+
+ pred.clear();
+ int rank=1*ExtractorReranker.s_type;
+
+ for(int j=0;j<vs.length;j++) {
+ if (vs[j]==Integer.MIN_VALUE) break;
+ if (vs[j]>0) pred.add(pipeReranker.extractor.li.l2i(vs[j]+rank));
+ }
+ pred.add(pipeReranker.extractor.li.l2i(1+rank),(float)predParse.f1);
+ error += 1;
+
+ pipeReranker.extractor.extractFeatures(is,n,goldBestParse,parses.indexOf(goldBestParse),vs,pipe.cl);
+
+
+ act.clear();
+ rank=1*ExtractorReranker.s_type;
+ for(int j=0;j<vs.length;j++) {
+ if (vs[j]==Integer.MIN_VALUE) break;
+ if (vs[j]>0) act.add(pipeReranker.extractor.li.l2i(vs[j]+rank));
+ }
+
+ act.add(pipeReranker.extractor.li.l2i(1+rank),(float)goldBestParse.f1);
+ float lam_dist =(float)( parametersReranker.getScore(act) -
+ (parametersReranker.getScore(pred)));
+
+
+
+ parametersReranker.update(act, pred, is, n, null, upd, e,lam_dist);
+
+ }
+ instance++;
+
+ }
+
+ String info = " td "+((Decoder.timeDecotder)/1000000F)+" tr "+((Decoder.timeRearrange)/1000000F)
+ +" te "+((Pipe.timeExtract)/1000000F)+" nz "+parametersReranker.countNZ()+
+ " 1best-error "+((length-firstBestTotalError)/length)+
+ " reranked-best "+((length-totalError)/length)+
+ " rds "+round+" "+
+ " rerank-t "+(rerankTime/count)+
+ " chd "+changes+" "+" ps las "+(las/cnt)+" avs "+((float)averageScore/(float)count)+" ";
+
+
+ // DB.println("remapped "+remapped);
+
+ PipeGen.outValueErr(count,Math.round(error*1000)/1000,f1/count,del,last, upd,info);
+ del=0;
+ long end = System.currentTimeMillis();
+ System.out.println(" time:"+(end-start));
+ i++;
+ // ParametersFloat pf = params.average2((iter+1)*is.size());
+
+
+
+ Decoder.timeDecotder=0;Decoder.timeRearrange=0; Pipe.timeExtract=0;
+
+ if (dos!=null)dos.close();
+ if (dis!=null)dis.close();
+
+ }
+ DB.println("sb "+parametersReranker.parameters[this.pipeReranker.extractor.li.l2i(4090378920L+1*ExtractorReranker.s_type)]);//4090378266
+ parametersReranker.average(iter*numInstances);
+
+ }
+
+
+ /**
+ * Do the parsing
+ * @param options
+ * @param pipe
+ * @param params
+ * @throws IOException
+ */
+ private void rerankedParses (OptionsSuper options, Pipe pipe, ParametersFloat params, boolean maxInfo, String nbestName) throws Exception {
+
+ long start = System.currentTimeMillis();
+
+ ExtractorClusterStacked.initFeatures();
+
+ DataInputStream dis =null;
+ DataOutputStream dos = null;
+
+ float olas=0, olcnt =0;
+
+ File fnbest = new File(nbestName);
+ int read = fnbest.exists()?2:1;
+ if (read==1) {
+
+ DB.println("computing and writting nbest list to file: "+nbestName);
+
+ ZipOutputStream zos = new ZipOutputStream(new BufferedOutputStream(new FileOutputStream(nbestName)));
+ zos.putNextEntry(new ZipEntry("data"));
+ dos = new DataOutputStream(new BufferedOutputStream(zos));
+ }
+
+
+
+ if (read ==2) {
+
+ // DB.println("reading nbest list from file: "+nbestName);
+
+ ZipInputStream zis = new ZipInputStream(new BufferedInputStream(new FileInputStream(nbestName)));
+ zis.getNextEntry();
+ dis = new DataInputStream(new BufferedInputStream(zis));
+ }
+
+ for(int m =0;m< this.paramsParsers.length;m++) {
+
+
+ CONLLReader09 depReader = new CONLLReader09(options.testfile, options.formatTask);
+ CONLLWriter09 depWriter = new CONLLWriter09(options.outfile, options.formatTask);
+
+ float las=0,lcnt =0, averageScore =0;
+ int cnt = 0;
+ int del=0;
+
+
+ long last = System.currentTimeMillis();
+
+
+ String[] types = new String[pipe.mf.getFeatureCounter().get(PipeGen.REL)];
+ for (Entry<String, Integer> e : pipe.mf.getFeatureSet().get(PipeGen.REL).entrySet()) types[e.getValue()] = e.getKey();
+
+ System.out.print("Processing Sentence: ");
+
+
+ FVR f = new FVR();
+
+ while(true) {
+
+ Instances is = new Instances();
+ is.init(1, new MFB(),options.formatTask);
+
+ SentenceData09 instance = pipe.nextInstance(is, depReader);
+ if (instance==null) break;
+ cnt++;
+
+ String[] forms = instance.forms;
+
+ List<ParseNBest> parses =null;
+
+ // read or write nbest list
+ if (read==0|| read==1) {
+ parses = this.parseNBest(instance, this.paramsParsers[m]);
+ // data = pipe.fillVector(fparser, is, n, data, pipe.cl,THREADS);
+ // parses = Decoder.decode(pos, data, options.decodeProjective);
+
+ if (parses.size()>NBest) parses = parses.subList(0, NBest);
+
+
+ if (read==1) {
+ // write the forest
+ dos.writeInt(parses.size());
+ for(int k=0;k<parses.size();k++) {
+ dos.writeUTF(parses.get(k).signature());
+ dos.writeFloat((float)parses.get(k).f1);
+ }
+ }
+ } else if (read==2) {
+ parses = new ArrayList<ParseNBest>();
+ int parseCount = dis.readInt();
+ for(int k=0;k<parseCount;k++) {
+ ParseNBest p = new ParseNBest(dis.readUTF(),dis.readFloat());
+ if (parses.size()<NBest) parses.add(p);
+ }
+ }
+
+ nbest +=parses.size();
+
+
+ //List<ParseNBest> parses = this.parseNBest(instance, this.paramsParsers[m]);
+
+ long vs[] = new long[ExtractorReranker._FC*is.length(0)];
+
+ float bestScore=0;
+ int best=0;
+
+
+ for(int k=0;k<parses.size();k++) {
+
+ ParseNBest p= parses.get(k);
+
+ pipeReranker.extractor.extractFeatures(is,0,p,k,vs,pipeReranker.cl);
+
+ int rank=1*ExtractorReranker.s_type;
+ f.clear();
+ for(int j=0;j<vs.length;j++) {
+ if (vs[j]==Integer.MIN_VALUE) break;
+ if (vs[j]>0) f.add(pipeReranker.extractor.li.l2i(vs[j]+rank));
+ }
+ f.add(pipeReranker.extractor.li.l2i(1+rank),(float)p.f1);
+
+ float score = (float)(parametersReranker.getScore(f));
+ if (score>bestScore) { //rankScore[k][2]>
+ bestScore =score;
+ best=k;
+
+ }
+ }
+ // change to best
+ ParseNBest d = parses.get(best);
+
+ las +=(is.length(0)-1)-Decoder.getError(d, is, 0, Decoder.LAS);
+ lcnt +=is.length(0)-1;
+
+ averageScore+=d.f1;
+
+ SentenceData09 i09 = new SentenceData09(instance);
+
+ i09.createSemantic(instance);
+
+ for(int j = 0; j < forms.length-1; j++) {
+ i09.plabels[j] = types[d.labels[j+1]];
+ i09.pheads[j] = d.heads[j+1];
+ }
+
+
+ depWriter.write(i09);
+ String info =""+((float)(averageScore/(float)cnt))+" ";
+
+ if (cnt%10 ==0)
+ del=PipeGen.outValueErr(cnt, lcnt-las, las/lcnt, del, last, 0, info);//outValue(cnt, del,last, info);
+
+ }
+
+ //pipe.close();
+
+ depWriter.finishWriting();
+ long end = System.currentTimeMillis();
+ DB.println("rank\n"+rank+"\n");
+ DB.println("x-best-las "+amongxbest+" x-best-ula "+amongxbest_ula+" cnt "+cnt+" x-best-las "
+ +((float)((float)amongxbest/cnt))+
+ " x-best-ula "+((float)((float)amongxbest_ula/cnt))+
+ " nbest "+((float)nbest/cnt)+
+ " 1best "+((float)(rank.get(0)==null?0:rank.get(0))/cnt)+
+ " best-proj "+((float)bestProj/cnt)+
+ " Sum LAS "+((float)this.smallestErrorSum/countAllNodes)+" "+
+ ""+(las/lcnt));
+
+ // DB.println("errors "+error);
+ olas+=las;
+ olcnt+=lcnt;
+ rank.clear();
+ amongxbest=0;amongxbest_ula=0;
+ cnt=0;
+ nbest=0;
+ bestProj=0;
+ if (maxInfo) System.out.println("Used time " + (end-start));
+ if (maxInfo) System.out.println("forms count "+Instances.m_count+" unkown "+Instances.m_unkown);
+ }
+
+ if (dos !=null) {
+ dos.flush();
+ dos.close();
+ }
+ if (dis!=null)dis.close();
+
+ DB.println("\n overall las "+(olas/olcnt));
+ }
+
+
+ /**
+ * Do the parsing
+ * @param options
+ * @param pipe
+ * @param params
+ * @throws IOException
+ */
+ private void getNBest(OptionsSuper options, Pipe pipe, ParametersFloat params, boolean maxInfo) throws Exception {
+
+
+ CONLLReader09 depReader = new CONLLReader09(options.testfile, options.formatTask);
+
+ ExtractorClusterStacked.initFeatures();
+
+ int cnt = 0;
+
+ String[] types = new String[pipe.mf.getFeatureCounter().get(PipeGen.REL)];
+ for (Entry<String, Integer> e : pipe.mf.getFeatureSet().get(PipeGen.REL).entrySet()) types[e.getValue()] = e.getKey();
+
+ // System.out.print("Processing Sentence: ");
+
+ while(true) {
+
+ Instances is = new Instances();
+ is.init(1, new MFB(),options.formatTask);
+
+ SentenceData09 instance = pipe.nextInstance(is, depReader);
+ if (instance==null) break;
+ cnt++;
+
+ this.parseNBest(instance, this.paramsParsers[0]);
+ }
+
+ //pipe.close();
+ // depWriter.finishWriting();
+ // long end = System.currentTimeMillis();
+ // DB.println("rank\n"+rank+"\n");
+ // DB.println("x-best-las "+amongxbest+" x-best-ula "+amongxbest_ula+" cnt "+cnt+" x-best-las "
+ // +((float)((float)amongxbest/cnt))+
+ // " x-best-ula "+((float)((float)amongxbest_ula/cnt))+
+ // " nbest "+((float)nbest/cnt)+
+ // " 1best "+((float)(rank.get(0)==null?0:rank.get(0))/cnt)+
+ // " best-proj "+((float)bestProj/cnt));
+ // DB.println("errors "+error);
+
+
+ }
+
+
+ public SentenceData09 parse (SentenceData09 instance, ParametersFloat params) {
+
+ String[] types = new String[pipe.mf.getFeatureCounter().get(PipeGen.REL)];
+ for (Entry<String, Integer> e : MFB.getFeatureSet().get(PipeGen.REL).entrySet()) types[e.getValue()] = e.getKey();
+
+ Instances is = new Instances();
+ is.init(1, new MFB(),options.formatTask);
+ new CONLLReader09().insert(is, instance);
+
+ String[] forms = instance.forms;
+
+ // use for the training ppos
+ DataF d2;
+ try {
+ d2 = pipe.fillVector(params.getFV(), is,0,null,pipe.cl, THREADS,l2i);//cnt-1
+ } catch (Exception e ) {
+ e.printStackTrace();
+ return null;
+ }
+ short[] pos = is.pposs[0];
+
+ List<ParseNBest> parses=null;
+ Parse d= null;
+ try {
+ parses =Decoder.decode(pos,d2,options.decodeProjective,pipe.extractor[0]); //cnt-1
+ d = parses.get(0);
+ }catch (Exception e) {
+ e.printStackTrace();
+ }
+
+ if (parses.size()>NBest) parses = parses.subList(0,NBest);
+
+ int g_las = Decoder.getGoldRank(parses, is,0,Decoder.LAS);
+ int g_ula = Decoder.getGoldRank(parses, is,0,!Decoder.LAS);
+
+ int smallest = Decoder.getSmallestError(parses, is,0,!Decoder.LAS);
+ smallestErrorSum+=is.length(0)-smallest;
+ countAllNodes+=is.length(0);
+
+ if (g_las>=0) amongxbest++;
+ if (g_ula>=0) amongxbest_ula++;
+
+ nbest+=parses.size();
+
+ Integer r = rank.get(g_las);
+ if (r==null) rank.put(g_las, 1);
+ else rank.put(g_las, r+1);
+
+ float err = (float)this.pipe.errors(is,0, d);
+
+ float errBestProj = (float)this.pipe.errors(is,0, Decoder.bestProj);
+
+ if (errBestProj==0) bestProj++;
+
+ SentenceData09 i09 = new SentenceData09(instance);
+
+ i09.createSemantic(instance);
+
+ for(int j = 0; j < forms.length-1; j++) {
+ i09.plabels[j] = types[d.labels[j+1]];
+ i09.pheads[j] = d.heads[j+1];
+ }
+ return i09;
+
+ }
+
+ public List<ParseNBest> parseNBest (SentenceData09 instance, ParametersFloat params) {
+
+ Instances is = new Instances();
+ is.init(1, new MFB(),options.formatTask);
+ new CONLLReader09().insert(is, instance);
+
+
+
+ // use for the training ppos
+ DataF d2;
+ try {
+ d2 = pipe.fillVector(params.getFV(), is,0,null,pipe.cl,THREADS,l2i);//cnt-1
+ } catch (Exception e ) {
+ e.printStackTrace();
+ return null;
+ }
+ short[] pos = is.pposs[0];
+
+ List<ParseNBest> parses=null;
+ try {
+ parses =Decoder.decode(pos,d2,options.decodeProjective,pipe.extractor[0]); //cnt-1
+ }catch (Exception e) {
+ e.printStackTrace();
+ }
+
+ if (parses.size()>NBest) parses = parses.subList(0,NBest);
+
+ return parses;
+
+ }
+
+
+
+ /* (non-Javadoc)
+ * @see is2.tools.Tool#apply(is2.data.SentenceData09)
+ */
+ @Override
+ public SentenceData09 apply(SentenceData09 snt09) {
+
+ try {
+ parse(snt09,this.parametersReranker);
+ } catch(Exception e) {
+ e.printStackTrace();
+ }
+
+ Decoder.executerService.shutdown();
+ Pipe.executerService.shutdown();
+
+ return snt09;
+ }
+
+ /**
+ * Write the parsing model
+ *
+ * @param options
+ * @param params
+ * @param extension
+ * @throws FileNotFoundException
+ * @throws IOException
+ */
+ private void writeModell(OptionsSuper options, ParametersFloat params, String extension, Cluster cs) throws FileNotFoundException, IOException {
+
+ String name = extension==null?options.modelName:options.modelName+extension;
+ DB.println("Writting model: "+name);
+ ZipOutputStream zos = new ZipOutputStream(new BufferedOutputStream(new FileOutputStream(name)));
+ zos.putNextEntry(new ZipEntry("data"));
+ DataOutputStream dos = new DataOutputStream(new BufferedOutputStream(zos));
+
+ MFB.writeData(dos);
+ cs.write(dos);
+
+ params.write(dos);
+
+ Edges.write(dos);
+
+ dos.writeBoolean(options.decodeProjective);
+
+ dos.writeInt(ExtractorClusterStacked.maxForm);
+
+ dos.writeInt(5); // Info count
+ dos.writeUTF("Used parser "+Reranker.class.toString());
+ dos.writeUTF("Creation date "+(new SimpleDateFormat("yyyy.MM.dd HH:mm:ss")).format(new Date()));
+ dos.writeUTF("Training data "+options.trainfile);
+ dos.writeUTF("Iterations "+options.numIters+" Used sentences "+options.count);
+ dos.writeUTF("Cluster "+options.clusterFile);
+
+ dos.flush();
+ dos.close();
+ }
+
+
+
+
+}
diff --git a/dependencyParser/mate-tools/src/is2/parserR2/package.html b/dependencyParser/mate-tools/src/is2/parserR2/package.html
new file mode 100755
index 0000000..6b06482
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/parserR2/package.html
@@ -0,0 +1,3 @@
+Package info
+ - n-best parser
+<br>
\ No newline at end of file
diff --git a/dependencyParser/mate-tools/src/is2/tag/ExtractorT2.java b/dependencyParser/mate-tools/src/is2/tag/ExtractorT2.java
new file mode 100644
index 0000000..a37dbbe
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/tag/ExtractorT2.java
@@ -0,0 +1,523 @@
+package is2.tag;
+
+
+import is2.data.Cluster;
+import is2.data.F2SF;
+import is2.data.Instances;
+import is2.data.InstancesTagger;
+import is2.data.Long2IntInterface;
+import is2.data.ParametersFloat;
+import is2.data.PipeGen;
+import is2.data.SentenceData09;
+import is2.io.CONLLReader09;
+import is2.tools.IPipe;
+import is2.util.OptionsSuper;
+
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map.Entry;
+
+
+
+final public class ExtractorT2 extends PipeGen implements IPipe {
+
+ final static int _MAX=71;
+
+ private static final String STWRD = "STWRD", STPOS = "STPOS";
+
+ private static short s_pos,s_word,s_char;
+ protected static short s_type;
+ private static int _strp,_ewrd;
+ static int _CEND;
+
+ public String[] types;
+
+ final public MFO mf;
+
+ final MFO.Data4 d1 = new MFO.Data4(),d2 = new MFO.Data4(),d3 = new MFO.Data4(),
+ dw = new MFO.Data4(), dwp = new MFO.Data4();
+
+ Cluster cl;
+
+ private OptionsSuper options;
+
+ public ExtractorT2 (OptionsSuper options, MFO mf) throws IOException {
+ this.mf =mf;
+ this.options = options;
+ }
+ public HashMap<Integer, int[]> _pps = new HashMap<Integer, int[]>();
+
+ private Lexicon lx;
+
+ public int corpusWrds = 0;
+
+
+
+
+ /* (non-Javadoc)
+ * @see is2.tag5.IPipe#createInstances(java.lang.String, java.io.File, is2.data.InstancesTagger)
+ */
+ public Instances createInstances(String file) {
+ return createInstances(file, -1, -1);
+ }
+
+
+ public Instances createInstances(String file, int skipStart, int skipEnd) {
+
+ InstancesTagger is = new InstancesTagger();
+
+ CONLLReader09 depReader = new CONLLReader09(CONLLReader09.NO_NORMALIZE);
+
+ depReader.startReading(file);
+ mf.register(POS,"<root-POS>");
+ mf.register(WORD,"<root>");
+
+ System.out.println("Registering feature parts ");
+
+ HashMap<Integer, HashSet<Integer>> pps = new HashMap<Integer, HashSet<Integer>>();
+
+ int ic=0;
+ while(true) {
+
+ SentenceData09 instance1 = depReader.getNext();
+
+ if (instance1== null) break;
+ ic++;
+
+ String[] w = instance1.forms;
+ for(int i1 = 0; i1 < w.length; i1++) mf.register(WORD, w[i1]);
+ for(int i1 = 0; i1 < w.length; i1++) registerChars(CHAR, w[i1]);
+ for(int i1 = 0; i1 < w.length; i1++) registerChars(CHAR, w[i1].toLowerCase());
+
+
+ w = instance1.plemmas;
+ for(int i1 = 0; i1 < w.length; i1++) mf.register(WORD, w[i1]);
+ for(int i1 = 0; i1 < w.length; i1++) registerChars(CHAR, w[i1]);
+
+ w = instance1.gpos;
+ for(int i1 = 0; i1 < w.length; i1++) {
+ mf.register(POS, w[i1]);
+ }
+ for(int i1 = 0; i1 < w.length; i1++) {
+ HashSet<Integer> ps = pps.get(mf.getValue(POS,w[i1]));
+ if (ps==null) {
+ ps= new HashSet<Integer>();
+ pps.put(mf.getValue(POS,w[i1]), ps);
+ }
+ if (i1+1<w.length) ps.add(mf.getValue(POS,w[i1+1]));
+ }
+
+ }
+
+ for(Entry<Integer,HashSet<Integer>> e : pps.entrySet()) {
+ int[] ps = new int[e.getValue().size()];
+ int j=0;
+ for(int k : e.getValue().toArray(new Integer[0])) {
+ ps[j++] =k;
+ }
+ _pps.put(e.getKey(), ps);
+ // System.out.println("put "+e.getKey()+" "+ps.length+" pps size "+_pps.size());
+ }
+
+ System.out.println("words in corpus "+(corpusWrds=mf.getFeatureCounter().get(ExtractorT2.WORD)));
+ if (options.clusterFile==null)cl = new Cluster();
+ else cl= new Cluster(options.clusterFile, mf,6);
+
+ if (options.lexicon==null)lx = new Lexicon(new byte[0][0]);
+ else lx= new Lexicon(options.lexicon,mf);
+
+ initFeatures();
+
+ mf.calculateBits();
+ initValues();
+
+ System.out.println(""+mf.toString());
+
+ depReader.startReading(file);
+
+ int num1 = 0;
+
+ int instanceCount=0;
+
+ System.out.print("Creating Instances: ");
+
+ is.init(ic, mf) ;
+ int del=0;
+
+ while(true) {
+ if (num1 % 100 ==0) del = outValue(num1, del);
+
+ if (num1>=skipStart && num1<skipEnd && skipStart>=0) {
+ SentenceData09 instance1 = depReader.getNext();
+ if (instance1== null) break;
+ num1++;
+ continue;
+ }
+
+
+ SentenceData09 instance1 = depReader.getNext(is);
+ if (instance1== null) break;
+
+ is.fillChars(instance1, instanceCount,_CEND);
+ for(int k=0;k<instance1.length();k++) {
+ if (instance1.ppos[k].contains("\\|"))
+
+ is.pposs[num1][k] = (short)mf.getValue(FM, instance1.ppos[k].split("\\|")[1]);
+ }
+
+
+ if (num1>options.count) break;
+
+ num1++;
+ instanceCount++;
+ }
+ outValue(num1, del);
+ System.out.println();
+
+ types= mf.reverse(mf.getFeatureSet().get(POS));
+ return is;
+ }
+
+ private void registerChars(String type, String word) {
+ for(int i=0;i<word.length();i++) mf.register(type, Character.toString(word.charAt(i)));
+ }
+
+
+ /* (non-Javadoc)
+ * @see is2.tag5.IPipe#initValues()
+ */
+ public void initValues() {
+ s_pos = mf.getFeatureBits(POS);
+ s_word = mf.getFeatureBits(WORD);
+ s_type = mf.getFeatureBits(TYPE);
+ s_char = mf.getFeatureBits(CHAR);
+
+ d1.a0 = s_type; d1.a1 = s_pos; d1.a2= s_word;d1.a3= s_word;
+ d2.a0 = s_type; d2.a1 = s_pos; d2.a2= s_pos; d2.a3= s_pos; d2.a4= s_pos; d2.a5= s_pos; d2.a6= s_pos;
+ d3.a0 = s_type; d3.a1 = s_pos; d3.a2= s_char; d3.a3= s_char; d3.a4= s_char; d3.a5= s_char; d3.a6= s_char; d3.a7= s_char;
+ dw.a0 = s_type; dw.a1 = s_pos;dw.a2= s_word; dw.a3= s_word; dw.a4= s_word; dw.a5= s_word; dw.a6= s_word; dw.a7= s_word;
+ dwp.a0 = s_type; dwp.a1 = s_pos;dwp.a2= s_word ; dwp.a3= s_pos; dwp.a4= s_word;
+
+ }
+
+ /* (non-Javadoc)
+ * @see is2.tag5.IPipe#initFeatures()
+ */
+ public void initFeatures() {
+ // 62
+ for(int t=0;t<67;t++) mf.register(TYPE, "F"+t);
+
+ mf.register(POS, MID);
+ _strp = mf.register(POS, STR);
+ mf.register(POS, END);
+
+ mf.register(WORD, STR);
+ _ewrd =mf.register(WORD, END);
+
+ _CEND = mf.register(CHAR, END);
+
+ mf.register(WORD,STWRD);
+ mf.register(POS,STPOS);
+
+
+ }
+
+ final public void addFeatures(InstancesTagger is, int ic, String fs,int i, short pposs[], int[] forms, int[] lemmas, long[] vs) {
+
+ int c0= is.chars[ic][i][0], c1=is.chars[ic][i][1], c2=is.chars[ic][i][2], c3=is.chars[ic][i][3], c4=is.chars[ic][i][4],c5=is.chars[ic][i][5];
+ int e0 =is.chars[ic][i][6], e1 =is.chars[ic][i][7],e2 =is.chars[ic][i][8],e3 =is.chars[ic][i][9],e4 =is.chars[ic][i][10];
+
+ int f=1,n=0;
+ short upper =0, number = 1;
+ for(int k1=0;k1<fs.length();k1++){
+ char c = fs.charAt(k1);
+ if (Character.isUpperCase(c)) {
+ if (k1==0) upper=1;
+ else {
+ // first char + another
+ if (upper==1) upper=3;
+ // another uppercase in the word
+ else if (upper==0) upper=2;
+ }
+ }
+
+ // first
+ if (Character.isDigit(c) && k1==0) number =2 ;
+ else if (Character.isDigit(c) && number==1) number = 3;
+ // if(number==2 &&Character.isDigit(c)) number=4;
+ // if(number==4 && !Character.isDigit(c)) number=5;
+ }
+
+ // if (i==0 && upper>0) upper+=4;
+ int form = forms[i], form2 = forms[i]<corpusWrds?forms[i]:-1;
+
+ int len = forms.length;
+ long l;
+ d1.v0 = f++; d1.v2=form2; l=mf.calc3(d1); vs[n++]=mf.calc3(d1);
+
+ d1.v0 = f++; d1.v2=is.formlc[ic][i]; vs[n++]=mf.calc3(d1);
+
+
+ d3.v2=c0; d3.v3=c1; d3.v4=c2; d3.v5=c3; d3.v6=c4;
+ d3.v0=f++; vs[n++]=mf.calc3(d3);
+ d3.v0=f++; vs[n++]=mf.calc4(d3);
+ d3.v0=f++; vs[n++]=mf.calc5(d3);
+ d3.v0=f++; vs[n++]=mf.calc6(d3);
+ d3.v0=f++; vs[n++]=mf.calc7(d3);
+
+ if (form!=-1) {
+ d3.v2=c2; d3.v3=c3; d3.v4=c4; d3.v5=c5; d3.v6=cl.getLP(form);
+ d3.v0=f; vs[n++]=mf.calc6(d3); d3.v0=f+1; vs[n++]=mf.calc7(d3);
+ }
+ f+=2;
+
+ if (form>0) {
+ d3.v0=f; d3.v5=cl.getLP(form); vs[n++]=mf.calc6(d3);
+ d3.v0=f+1; d3.v4=cl.getLP(form); vs[n++]=mf.calc5(d3);
+ d3.v0=f+2; d3.v3=cl.getLP(form); vs[n++]=mf.calc4(d3);
+ }
+ f+=5;
+
+ d3.v2=e0; d3.v3=e1; d3.v4=e2; d3.v5=e3; d3.v6=e4;
+ d3.v0 =f++; vs[n++]=mf.calc3(d3);
+ d3.v0 =f++; vs[n++]=l=mf.calc4(d3); vs[n++]=d3.calcs(3, upper, l);
+ d3.v0 =f++; vs[n++]=l=mf.calc5(d3); vs[n++]=d3.calcs(3, upper, l);
+ d3.v0 =f++; vs[n++]=l=mf.calc6(d3); vs[n++]=d3.calcs(3, upper, l);
+ d3.v0 =f++; vs[n++]=l=mf.calc7(d3); vs[n++]=d3.calcs(3, upper, l);
+
+ if (form>0) {
+ d3.v0=f; d3.v5=cl.getLP(form); vs[n++]=mf.calc6(d3);
+ d3.v0=f+1; d3.v4=cl.getLP(form); vs[n++]=mf.calc5(d3);
+ d3.v0=f+2; d3.v3=cl.getLP(form); vs[n++]=mf.calc4(d3);
+
+ d3.v2=e0; d3.v3=e1; d3.v4=e2;
+
+ d3.v0=f+3; d3.v2=lx.getTag(form); vs[n++]=mf.calc3(d3);
+ d3.v0=f+4; d3.v4=cl.getLP(form); vs[n++]=mf.calc5(d3);
+ d3.v0=f+5; d3.v3=cl.getLP(form); vs[n++]=mf.calc4(d3);
+ }
+ f+=6;
+
+ // sign three-grams
+ d3.v0=f++;d3.v2=c1; d3.v3=c2; d3.v4=c3; vs[n++]=mf.calc5(d3);
+ d3.v0=f++;d3.v2=c2; d3.v3=c3; d3.v4=c4; vs[n++]=mf.calc5(d3);
+ d3.v0=f++;d3.v2=c3; d3.v3=c4; d3.v4=c5; vs[n++]=mf.calc5(d3);
+
+ // sign quad-grams
+ d3.v0=f++;d3.v2=c1; d3.v3=c2; d3.v4=c3; d3.v5=c4; vs[n++]=mf.calc6(d3);
+ d3.v0=f++;d3.v2=c2; d3.v3=c3; d3.v4=c4; d3.v5=c5; vs[n++]=mf.calc6(d3); // changed to 6
+
+ if (i+1<len && forms[i+1]<this.corpusWrds) {dw.v0=f; dw.v2=forms[i+1];dw.v3= form2;vs[n++]=mf.calc4(dw);}
+ f++;
+
+ if (len>i+1) {
+
+ if (forms[i+1]<corpusWrds){dw.v0=f; dw.v2= forms[i+1]; vs[n++]=mf.calc3(dw);}
+
+ d3.v0=f+1; d3.v2 =is.chars[ic][i+1][0];vs[n++]=mf.calc3(d3);
+ d3.v0=f+2; d3.v2 =is.chars[ic][i+1][6];vs[n++]=mf.calc3(d3);
+
+ d3.v2=e0; d3.v3=e1;
+
+ d3.v0 =f+3; d3.v4 =is.chars[ic][i+1][0];vs[n++]=mf.calc5(d3);
+ d3.v0 =f+4; d3.v4 =is.chars[ic][i+1][6];vs[n++]=mf.calc5(d3);
+
+ if (is.chars[ic][i+1][11]>1 ) { // instance.forms[i+1].length()
+
+ d3.v0=f+5; d3.v2=is.chars[ic][i+1][0]; d3.v3=is.chars[ic][i+1][1]; vs[n++]=mf.calc4(d3);
+ d3.v0=f+6; d3.v2=is.chars[ic][i+1][6]; d3.v3=is.chars[ic][i+1][7]; vs[n++]=mf.calc4(d3);
+
+ d3.v2=e0; d3.v3=e1;
+
+ d3.v0=f+7; d3.v4 = is.chars[ic][i+1][0]; d3.v5 =is.chars[ic][i+1][1]; vs[n++]=mf.calc6(d3);
+ d3.v0=f+8; d3.v4 = is.chars[ic][i+1][6]; d3.v5=is.chars[ic][i+1][7]; vs[n++]=mf.calc6(d3);
+
+ if (forms[i+1]>0) {
+ d3.v0=f+9; d3.v2=is.chars[ic][i+1][0]; d3.v3=is.chars[ic][i+1][1]; d3.v4 =cl.getLP(forms[i+1]); vs[n++]=mf.calc5(d3);
+ d3.v0=f+10; d3.v2=is.chars[ic][i+1][6]; d3.v3=is.chars[ic][i+1][7]; d3.v4 =cl.getLP(forms[i+1]); vs[n++]=mf.calc5(d3);
+ }
+ }
+
+ if (forms[i+1]>0) {
+
+
+ dw.v0=f+11; dw.v2= cl.getLP(forms[i+1]); dw.v3= form2;vs[n++]=mf.calc4(dw);
+
+ // if (forms[i]>0){
+ // dw.v0=f+12; dw.v2= cl.getLP(forms[i+1]); dw.v3=lx.getTag(form);vs[n++]=mf.calc4(dw);
+ // dw.v0=f+13; dw.v2= cl.getLP(forms[i]); dw.v3=lx.getTag(forms[i+1]);vs[n++]=mf.calc4(dw);
+ // }
+ }
+
+
+ if (len>i+2) {
+ if (forms[i+2]<corpusWrds && forms[i+1]<corpusWrds) {
+ dw.v0=f+12; dw.v2= forms[i+2]; dw.v3 = forms[i+1];vs[n++]=mf.calc4(dw);vs[n++]=mf.calc3(dw);
+ }
+ d2.v0=f+13; d2.v2=pposs[i+1]; d2.v3= pposs[i+2]; vs[n++]=mf.calc4(d2);
+ }
+
+ if (len>i+3) {
+ if (forms[i+3]<this.corpusWrds && forms[i+2]<this.corpusWrds) {
+ dw.v0=f+14; dw.v2= forms[i+3]; dw.v3 = forms[i+2]; vs[n++]=mf.calc4(dw); vs[n++]=mf.calc3(dw);
+ }
+ }
+ }
+ f+=15;
+
+ // length
+ d2.v0=f++; d2.v2=is.chars[ic][i][11];vs[n++]=mf.calc3(d2);
+
+
+ // contains a number
+ d2.v0=f++; d2.v2=number; vs[n++]=mf.calc3(d2);
+ if (lemmas[i]< corpusWrds) {d1.v0=f; d1.v2=lemmas[i]; vs[n++]=mf.calc3(d1); }
+ f++;
+
+ if (i!=0 &&len>i+1) {
+
+ if (lemmas[i-1]< corpusWrds&& lemmas[i+1]<corpusWrds) {dw.v0=f; dw.v2=lemmas[i-1];dw.v3=lemmas[i+1];vs[n++]=mf.calc4(dw);}
+
+ d2.v0=f+1; d2.v2=pposs[i-1]; d2.v3=pposs[i+1];vs[n++]=mf.calc4(d2);
+ }
+ f+=2;
+
+ d2.v0= f++; d2.v2=i>=1? pposs[i-1]:_strp; vs[n++]=mf.calc3(d2);
+
+ if (i>0) {
+
+ dw.v0 = f; dw.v2 =i>=1? forms[i-1]<corpusWrds?forms[i-1]:-1:_strp; vs[n++]=mf.calc3(dw);
+ f++;
+
+ if (lemmas[i-1]<corpusWrds) {dw.v0 = f; dw.v2 = i>=1? lemmas[i-1]:_strp; vs[n++]=mf.calc3(dw);}
+ f++;
+
+ //if (len>i+1) {d2.v0=f; d2.v2= pposs[i-1];d2.v3= pposs[i+1]; vs[n++]=mf.calc4(d2);}
+ //f++;
+
+ if (i>1) {
+
+ d2.v0=f++; d2.v2=i<2?_strp: pposs[i-2]; vs[n++]=mf.calc3(d2);
+ d2.v0=f++; d2.v2= pposs[i-1]; d2.v3= pposs[i-2]; vs[n++]=mf.calc4(d2);
+
+ if (forms[i-2]<corpusWrds) {dw.v0=f;dw.v2= forms[i-2]; vs[n++]=mf.calc3(dw);} f++;
+ if (forms[i-1]<corpusWrds) {dwp.v0=f;dwp.v2 = forms[i-1]; dwp.v3 = pposs[i-2];vs[n++]=mf.calc4(dwp); } f++;
+ if (forms[i-2]<corpusWrds) {dwp.v0=f;dwp.v2 = forms[i-2]; dwp.v3 = pposs[i-1];vs[n++]=mf.calc4(dwp);} f++;
+
+ if (i>2) {
+ d2.v0=f++; d2.v2=pposs[i-3]; vs[n++]=mf.calc3(d2);
+ d2.v0=f++; d2.v2=pposs[i-2]; d2.v3= pposs[i-3]; vs[n++]=mf.calc4(d2);
+ if(forms[i-3]<this.corpusWrds && forms[i-2]<this.corpusWrds) {
+ dw.v0=f; dw.v2 = forms[i-3]; dw.v3 = forms[i-2]; vs[n++]=mf.calc4(dw);
+ }
+ f++;
+ }
+ }
+ }
+ vs[n] = Integer.MIN_VALUE;
+ }
+
+
+ public int fillFeatureVectorsOne(String fs, ParametersFloat params, int w1, InstancesTagger is, int n, short[] pos,Long2IntInterface li, float[] score) {
+
+ float best = -1000;
+ int bestType = -1;
+
+ F2SF f = new F2SF(params.parameters);
+
+ long vs[] = new long[_MAX];
+ int lemmas[];
+ if (options.noLemmas) lemmas = new int[is.length(n)];
+ else lemmas = is.plemmas[n];
+ addFeatures(is,n,fs,w1,pos,is.forms[n], lemmas, vs);
+
+ //for(int t = 0; t < types.length; t++) {
+
+ for(int t=0;t<types.length;t++) {
+
+ int p = t<<s_type;
+
+ f.clear();
+ for(int k=0;vs[k]!=Integer.MIN_VALUE;k++) if(vs[k]>0) f.add(li.l2i(vs[k]+p));
+ if (f.score > best) {
+ bestType=t;
+ score[w1]= best =f.score;
+ }
+ }
+ return bestType;
+
+ }
+
+ public ArrayList<POS> classify(String fs, ParametersFloat params, int w1, InstancesTagger is, int n, short[] pos, Long2IntInterface li) {
+
+
+ F2SF f = new F2SF(params.parameters);
+
+ long vs[] = new long[_MAX];
+ int lemmas[];
+ if (options.noLemmas) lemmas = new int[is.length(n)];
+ else lemmas = is.plemmas[n];
+ addFeatures(is,n,fs,w1,pos,is.forms[n], lemmas, vs);
+
+ ArrayList<POS> best = new ArrayList<POS>(types.length);
+
+ for(int t=0;t<types.length;t++) {
+
+ int p = t<<s_type;
+
+ f.clear();
+ f.add(vs,li, p);
+ POS px = new POS(t, f.score);
+ best.add(px);
+ }
+ Collections.sort(best);
+ return best;
+
+ }
+
+ /* (non-Javadoc)
+ * @see is2.tag5.IPipe#write(java.io.DataOutputStream)
+ */
+ @Override
+ public void write(DataOutputStream dos){
+ try {
+ this.cl.write(dos);
+ this.lx.write(dos);
+ dos.writeInt(this.corpusWrds);
+ dos.writeInt(_pps.size());
+
+ for(Entry<Integer,int[]> e : _pps.entrySet()) {
+ dos.writeInt(e.getValue().length);
+ for(int k : e.getValue()) dos.writeInt(k);
+ dos.writeInt(e.getKey());
+ }
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ }
+
+
+ public void read(DataInputStream dis){
+ try {
+ this.cl =new Cluster(dis);
+ this.lx =new Lexicon(dis);
+ this.corpusWrds = dis.readInt();
+
+ int pc = dis.readInt();
+ for(int j=0;j<pc;j++) {
+ int ps[] = new int [dis.readInt()];
+ for(int k=0;k<ps.length;k++) ps[k]=dis.readInt();
+ _pps.put(dis.readInt(), ps);
+ }
+ // System.out.println("_pps "+ps.length);
+
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ }
+}
diff --git a/dependencyParser/mate-tools/src/is2/tag/Lexicon.java b/dependencyParser/mate-tools/src/is2/tag/Lexicon.java
new file mode 100644
index 0000000..8a85813
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/tag/Lexicon.java
@@ -0,0 +1,140 @@
+/**
+ *
+ */
+package is2.tag;
+
+import is2.data.IEncoderPlus;
+import is2.data.PipeGen;
+import is2.util.DB;
+
+import java.io.BufferedReader;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+
+/**
+ * @author Dr. Bernd Bohnet, 07.01.2011
+ *
+ *
+ */
+public class Lexicon {
+
+ public static final String FR = "FR",TAG = "TAG";
+
+ final byte[][] word2tag;
+ public Lexicon(byte[][] w2t) {
+
+ word2tag = w2t;
+ }
+
+ public Lexicon(String clusterFile, IEncoderPlus mf) {
+
+ final String REGEX = "\t";
+
+ // register words
+ try {
+ BufferedReader inputReader = new BufferedReader(new InputStreamReader(new FileInputStream(clusterFile),"UTF-8"),32768);
+
+ int cnt=0;
+ String line;
+ while ((line =inputReader.readLine())!=null) {
+
+ try {
+ String[] split = line.split(REGEX);
+ // int f = Integer.parseInt(split[2]);
+// if (f>2) {
+ cnt++;
+ mf.register(PipeGen.WORD, split[0]);
+ mf.register(TAG, split[1]); //tag
+
+ if (split.length>1) mf.register(FR, split[1]); // frequency
+// }
+ } catch(Exception e) {
+ System.out.println("Error in lexicon line "+cnt+" error: "+e.getMessage());
+ }
+ }
+ System.out.println("read number of words from lexicon "+cnt);
+ inputReader.close();
+
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+
+ word2tag = new byte[mf.getFeatureCounter().get(PipeGen.WORD)][1];
+ // insert words
+ try {
+ String line;
+ BufferedReader inputReader = new BufferedReader(new InputStreamReader(new FileInputStream(clusterFile),"UTF-8"),32768);
+
+ while ((line =inputReader.readLine())!=null) {
+
+ String[] split = line.split(REGEX);
+ int w =mf.getValue(PipeGen.WORD, split[0]);
+ if (w<0) continue;
+ word2tag[w][0] = (byte)mf.getValue(TAG, split[1]);
+ // if (split.length>1) word2tag[w][1]= (byte)mf.getValue(FR, split[2]); // frequency
+ }
+ inputReader.close();
+ int fill=0;
+ for(int l = 0; l<word2tag.length; l++ ){
+ if (word2tag[l][0]!=0) fill++;
+ }
+ System.out.println("filled "+fill+" of "+word2tag.length);
+
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ }
+
+ /**
+ * Read the cluster
+ * @param dos
+ * @throws IOException
+ */
+ public Lexicon(DataInputStream dis) throws IOException {
+
+ word2tag = new byte[dis.readInt()][1];
+ for(int i =0;i<word2tag.length;i++) {
+ word2tag[i][0]=dis.readByte();
+// word2tag[i][1]=dis.readByte();
+ }
+ DB.println("Read lexicon with "+word2tag.length+" words ");
+ }
+
+ /**
+ * Write the cluster
+ * @param dos
+ * @throws IOException
+ */
+ public void write(DataOutputStream dos) throws IOException {
+
+ dos.writeInt(word2tag.length);
+ for(byte[] i : word2tag) {
+ dos.writeByte(i[0]);
+// dos.writeByte(i[1]);
+ }
+
+ }
+
+ /**
+ * @param form
+ * @return
+ */
+ public int getTag(int form) {
+ if (word2tag.length<form || form<0) return -1;
+ return word2tag[form][0];
+ }
+
+ /**
+ * @param form
+ * @return
+ */
+ public int getConf(int form) {
+ if (word2tag.length<form || form<0) return -1;
+ return word2tag[form][1];
+ }
+
+
+}
diff --git a/dependencyParser/mate-tools/src/is2/tag/MFO.java b/dependencyParser/mate-tools/src/is2/tag/MFO.java
new file mode 100644
index 0000000..df790e3
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/tag/MFO.java
@@ -0,0 +1,537 @@
+package is2.tag;
+
+
+import is2.data.IEncoderPlus;
+import is2.util.DB;
+
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map.Entry;
+
+/**
+ * Map Features, do not map long to integer
+ *
+ * @author Bernd Bohnet, 20.09.2009
+ */
+
+final public class MFO implements IEncoderPlus {
+
+ /** The features and its values */
+ private final HashMap<String,HashMap<String,Integer>> m_featureSets = new HashMap<String,HashMap<String,Integer>>();
+
+ /** The feature class and the number of values */
+ private final HashMap<String,Integer> m_featureCounters = new HashMap<String,Integer>();
+
+ /** The number of bits needed to encode a feature */
+ final HashMap<String,Integer> m_featureBits = new HashMap<String,Integer>();
+
+ /** Integer counter for long2int */
+ //private int count=0;
+
+ /** Stop growing */
+ public boolean stop=false;
+
+ final public static String NONE="<None>";
+
+
+
+
+
+ final public static class Data4 {
+ public int shift;
+ public short a0,a1,a2,a3,a4,a5,a6,a7,a8,a9;
+ public int v0,v1,v2,v3,v4,v5,v6,v7,v8,v9;
+
+ final public long calcs(int b, long v, long l) {
+ if (l<0) return l;
+ l |= v<<shift;
+ shift +=b;
+ return l;
+ }
+
+
+ final public long calc2() {
+
+ if (v0<0||v1<0) return -1;
+
+ long l = v0;
+ shift =a0;
+ l |= (long)v1<<shift;
+ shift +=a1;
+
+ return l;
+ }
+
+
+
+ final public long calc3() {
+
+ if (v0<0||v1<0||v2<0) return -1;
+ // if (v1<0||v2<0) return -1;
+
+ long l = v0;
+ shift =a0;
+ l |= (long)v1<<shift;
+ shift +=a1;
+ l |= (long)v2<<shift;
+ shift=(short) (shift + a2);
+
+ //shift=;
+ return l;
+ }
+
+
+ final public long calc4() {
+ if (v0<0||v1<0||v2<0||v3<0) return -1;
+
+ long l = v0;
+ shift =a0;
+ l |= (long)v1<<shift;
+ shift +=a1;
+ l |= (long)v2<<shift;
+ shift +=a2;
+ l |= (long)v3<<shift;
+ shift= shift +a3;
+
+ return l;
+ }
+
+
+
+ final public long calc5() {
+
+ if (v0<0||v1<0||v2<0||v3<0||v4<0) return -1;
+
+ long l = v0;
+ shift =a0;
+ l |= (long)v1<<shift;
+ shift +=a1;
+ l |= (long)v2<<shift;
+ shift +=a2;
+ l |= (long)v3<<shift;
+ shift +=a3;
+ l |= (long)v4<<shift;
+ shift =shift+a4;
+
+ return l;
+ }
+
+
+ final public long calc6() {
+
+ if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0) return -1;
+
+ long l = v0;
+ shift =a0;
+ l |= (long)v1<<shift;
+ shift +=a1;
+ l |= (long)v2<<shift;
+ shift +=a2;
+ l |= (long)v3<<shift;
+ shift +=a3;
+ l |= (long)v4<<shift;
+ shift +=a4;
+ l |= (long)v5<<shift;
+ shift =shift+a5;
+
+ return l;
+ }
+
+ final public long calc7() {
+
+ if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0||v6<0) return -1;
+
+ long l = v0;
+ shift =a0;
+ l |= (long)v1<<shift;
+ shift +=a1;
+ l |= (long)v2<<shift;
+ shift +=a2;
+ l |= (long)v3<<shift;
+ shift +=a3;
+ l |= (long)v4<<shift;
+ shift +=a4;
+ l |= (long)v5<<shift;
+ shift +=a5;
+ l |= (long)v6<<shift;
+ shift =shift+a6;
+
+ return l;
+ }
+
+
+ final public long calc8() {
+
+ if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0||v6<0||v7<0) return -1;
+
+ long l = v0;
+ shift =a0;
+ l |= (long)v1<<shift;
+ shift +=a1;
+ l |= (long)v2<<shift;
+ shift +=a2;
+ l |= (long)v3<<shift;
+ shift +=a3;
+ l |= (long)v4<<shift;
+ shift +=a4;
+ l |= (long)v5<<shift;
+ shift +=a5;
+ l |= (long)v6<<shift;
+ shift +=a6;
+ l |= (long)v7<<shift;
+ shift =shift+a7;
+
+ return l;
+ }
+
+ }
+
+ public MFO () {}
+
+
+ // public int size() {return count;}
+
+
+ final public void stop() {
+ stop=true;
+ }
+
+ final public void start() {
+ stop=false;
+ }
+
+
+ /**
+ * Register an attribute class, if it not exists and add a possible value
+ * @param type
+ * @param type2
+ */
+ final public int register(String a, String v) {
+
+ HashMap<String,Integer> fs = getFeatureSet().get(a);
+ if (fs==null) {
+ fs = new HashMap<String,Integer>();
+ getFeatureSet().put(a, fs);
+ fs.put(NONE, 0);
+ getFeatureCounter().put(a, 1);
+ }
+ Integer c = getFeatureCounter().get(a);
+
+ Integer i = fs.get(v);
+ if (i==null) {
+ fs.put(v, c);
+ c++;
+ getFeatureCounter().put(a,c);
+ return c-1;
+ } else return i;
+ }
+
+ /**
+ * Calculates the number of bits needed to encode a feature
+ */
+ public void calculateBits() {
+
+ int total=0;
+ for(Entry<String,Integer> e : getFeatureCounter().entrySet() ){
+ int bits =(int)Math.ceil((Math.log(e.getValue()+1)/Math.log(2)));
+ m_featureBits.put(e.getKey(), bits);
+ total+=bits;
+ // System.out.println(" "+e.getKey()+" bits "+bits+" number "+(e.getValue()+1));
+ }
+
+ // System.out.println("total number of needed bits "+total);
+ }
+
+
+
+ @Override
+ public String toString() {
+
+ StringBuffer content = new StringBuffer();
+ for(Entry<String,Integer> e : getFeatureCounter().entrySet() ){
+ content.append(e.getKey()+" "+e.getValue());
+ content.append(':');
+ // HashMap<String,Integer> vs = getFeatureSet().get(e.getKey());
+ content.append(getFeatureBits(e.getKey()));
+
+ /*if (vs.size()<120)
+ for(Entry<String,Integer> e2 : vs.entrySet()) {
+ content.append(e2.getKey()+" ("+e2.getValue()+") ");
+ }*/
+ content.append('\n');
+
+ }
+ return content.toString();
+ }
+
+
+ static final public long calcs(Data4 d,int b, long v, long l) {
+ if (l<0) return l;
+ l |= v<<d.shift;
+ d.shift +=b;
+ return l;
+ }
+
+
+ final public short getFeatureBits(String a) {
+ return (short)m_featureBits.get(a).intValue();
+ }
+
+
+
+ /**
+ * Get the integer place holder of the string value v of the type a
+ *
+ * @param t the type
+ * @param v the value
+ * @return the integer place holder of v
+ */
+ final public int getValue(String t, String v) {
+
+ if (m_featureSets.get(t)==null) return -1;
+ Integer vi = m_featureSets.get(t).get(v);
+ if (vi==null) return -1; //stop &&
+ return vi.intValue();
+ }
+
+ /**
+ * Static version of getValue
+ * @see getValue
+ */
+ final public int getValueS(String a, String v) {
+
+ if (m_featureSets.get(a)==null) return -1;
+ Integer vi = m_featureSets.get(a).get(v);
+ if (vi==null) return -1; //stop &&
+ return vi.intValue();
+ }
+
+ public int hasValue(String a, String v) {
+
+ Integer vi = m_featureSets.get(a).get(v);
+ if (vi==null) return -1;
+ return vi.intValue();
+ }
+
+
+
+
+ final public long calc2(Data4 d) {
+
+ if (d.v0<0||d.v1<0) return -1;
+ // if (d.v1<0||d.v2<0) return -1;
+
+ long l = d.v0;
+ short shift =d.a0;
+ l |= (long)d.v1<<shift;
+ shift +=d.a1;
+ // l |= (long)d.v2<<shift;
+ d.shift=shift;
+
+ //d.shift=;
+ return l;
+ }
+
+
+
+ final public long calc3(Data4 d) {
+
+ if (d.v0<0||d.v1<0||d.v2<0) return -1;
+
+ long l = d.v0;
+ short shift =d.a0;
+ l |= (long)d.v1<<shift;
+ shift +=d.a1;
+ l |= (long)d.v2<<shift;
+ d.shift=shift + d.a2;
+ return l;
+ }
+
+
+ final public long calc4(Data4 d) {
+ if (d.v0<0||d.v1<0||d.v2<0||d.v3<0) return -1;
+
+ long l = d.v0;
+ int shift =d.a0;
+ l |= (long)d.v1<<shift;
+ shift +=d.a1;
+ l |= (long)d.v2<<shift;
+ shift +=d.a2;
+ l |= (long)d.v3<<shift;
+ d.shift= shift +d.a3;
+
+ return l;
+ }
+
+
+ final public long calc5(Data4 d) {
+
+ if (d.v0<0||d.v1<0||d.v2<0||d.v3<0||d.v4<0) return -1;
+
+ long l = d.v0;
+ int shift =d.a0;
+ l |= (long)d.v1<<shift;
+ shift +=d.a1;
+ l |= (long)d.v2<<shift;
+ shift +=d.a2;
+ l |= (long)d.v3<<shift;
+ shift +=d.a3;
+ l |= (long)d.v4<<shift;
+ d.shift =shift+d.a4;
+
+ return l;
+ }
+
+
+ final public long calc6(Data4 d) {
+
+ if (d.v0<0||d.v1<0||d.v2<0||d.v3<0||d.v4<0||d.v5<0) return -1;
+
+ long l = d.v0;
+ int shift =d.a0;
+ l |= (long)d.v1<<shift;
+ shift +=d.a1;
+ l |= (long)d.v2<<shift;
+ shift +=d.a2;
+ l |= (long)d.v3<<shift;
+ shift +=d.a3;
+ l |= (long)d.v4<<shift;
+ shift +=d.a4;
+ l |= (long)d.v5<<shift;
+ d.shift =shift+d.a5;
+
+ return l;
+ }
+
+ final public long calc7(Data4 d) {
+
+ if (d.v0<0||d.v1<0||d.v2<0||d.v3<0||d.v4<0||d.v5<0||d.v6<0) return -1;
+
+ long l = d.v0;
+ int shift =d.a0;
+ l |= (long)d.v1<<shift;
+ shift +=d.a1;
+ l |= (long)d.v2<<shift;
+ shift +=d.a2;
+ l |= (long)d.v3<<shift;
+ shift +=d.a3;
+ l |= (long)d.v4<<shift;
+ shift +=d.a4;
+ l |= (long)d.v5<<shift;
+ shift +=d.a5;
+ l |= (long)d.v6<<shift;
+ d.shift =shift+d.a6;
+
+ return l;
+ }
+
+
+ final public long calc8(Data4 d) {
+
+ if (d.v0<0||d.v1<0||d.v2<0||d.v3<0||d.v4<0||d.v5<0||d.v6<0||d.v7<0) return -1;
+
+ long l = d.v0;
+ int shift =d.a0;
+ l |= (long)d.v1<<shift;
+ shift +=d.a1;
+ l |= (long)d.v2<<shift;
+ shift +=d.a2;
+ l |= (long)d.v3<<shift;
+ shift +=d.a3;
+ l |= (long)d.v4<<shift;
+ shift +=d.a4;
+ l |= (long)d.v5<<shift;
+ shift +=d.a5;
+ l |= (long)d.v6<<shift;
+ shift +=d.a6;
+ l |= (long)d.v7<<shift;
+ d.shift =shift+d.a7;
+
+ return l;
+ }
+
+
+
+
+
+
+
+ /**
+ * Maps a long to a integer value. This is very useful to save memory for sparse data long values
+ * @param node
+ * @return the integer
+ */
+ static public int misses = 0;
+ static public int good = 0;
+
+
+
+
+ /**
+ * Write the data
+ * @param dos
+ * @throws IOException
+ */
+ public void writeData(DataOutputStream dos) throws IOException {
+ dos.writeInt(getFeatureSet().size());
+ for(Entry<String, HashMap<String,Integer>> e : getFeatureSet().entrySet()) {
+ dos.writeUTF(e.getKey());
+ dos.writeInt(e.getValue().size());
+
+ for(Entry<String,Integer> e2 : e.getValue().entrySet()) {
+
+ if(e2.getKey()==null) DB.println("key "+e2.getKey()+" value "+e2.getValue()+" e -key "+e.getKey());
+ dos.writeUTF(e2.getKey());
+ dos.writeInt(e2.getValue());
+
+ }
+
+ }
+ }
+ public void read(DataInputStream din) throws IOException {
+
+ int size = din.readInt();
+ for(int i=0; i<size;i++) {
+ String k = din.readUTF();
+ int size2 = din.readInt();
+
+ HashMap<String,Integer> h = new HashMap<String,Integer>();
+ getFeatureSet().put(k,h);
+ for(int j = 0;j<size2;j++) {
+ h.put(din.readUTF(), din.readInt());
+ }
+ getFeatureCounter().put(k, size2);
+ }
+
+ calculateBits();
+ }
+
+
+ /**
+ * Clear the data
+ */
+ public void clearData() {
+ getFeatureSet().clear();
+ m_featureBits.clear();
+ getFeatureSet().clear();
+ }
+
+ public HashMap<String,Integer> getFeatureCounter() {
+ return m_featureCounters;
+ }
+
+ public HashMap<String,HashMap<String,Integer>> getFeatureSet() {
+ return m_featureSets;
+ }
+
+ static public String[] reverse(HashMap<String,Integer> v){
+ String[] set = new String[v.size()];
+ for(Entry<String,Integer> e : v.entrySet()) {
+ set[e.getValue()]=e.getKey();
+ }
+ return set;
+ }
+
+}
diff --git a/dependencyParser/mate-tools/src/is2/tag/Options.java b/dependencyParser/mate-tools/src/is2/tag/Options.java
new file mode 100644
index 0000000..540f8ed
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/tag/Options.java
@@ -0,0 +1,125 @@
+package is2.tag;
+
+import is2.util.OptionsSuper;
+
+import java.io.File;
+
+
+public final class Options extends OptionsSuper {
+
+
+ public Options (String[] args) {
+
+ for(int i = 0; i < args.length; i++) {
+ String[] pair = args[i].split(":");
+
+ if (pair[0].equals("--help")) explain();
+ else if (pair[0].equals("-train")) {
+ train = true;
+ trainfile = args[i+1];
+ } else if (pair[0].equals("-eval")) {
+ eval = true;
+ goldfile =args[i+1]; i++;
+ } else if (pair[0].equals("-test")) {
+ test = true;
+ testfile = args[i+1]; i++;
+ } else if (pair[0].equals("-i")) {
+ numIters = Integer.parseInt(args[i+1]); i++;
+ }
+ else if (pair[0].equals("-out")) {
+ outfile = args[i+1]; i++;
+ }
+ else if (pair[0].equals("-decode")) {
+ decodeProjective = args[i+1].equals("proj"); i++;
+ }
+ else if (pair[0].equals("-confidence")) {
+
+ conf = true;
+ }
+
+ else if (pair[0].equals("-count")) {
+ count = Integer.parseInt(args[i+1]); i++;
+ } else if (pair[0].equals("-model")) {
+ modelName = args[i+1]; i++;
+ } else if (pair[0].equals("-tmp")) {
+ tmp = args[i+1]; i++;
+ } else if (pair[0].equals("-format")) {
+ //format = args[i+1];
+ formatTask = Integer.parseInt(args[i+1]); i++;
+ } else if (pair[0].equals("-allfeatures")) {
+ allFeatures=true;
+ } else if (pair[0].equals("-nonormalize")) {
+ normalize=false;
+ }else if (pair[0].equals("-nframes")) {
+ //format = args[i+1];
+ nbframes= args[i+1]; i++;
+
+
+ } else if (pair[0].equals("-pframes")) {
+ //format = args[i+1];
+ pbframes= args[i+1]; i++;
+ } else if (pair[0].equals("-nopred")) {
+ nopred =true;
+ } else if (pair[0].equals("-divide")) {
+ keep =true;
+ } else if (pair[0].equals("-lexicon")) {
+ lexicon= args[i+1]; i++;
+
+ } else super.addOption(args, i);
+
+ }
+
+
+
+
+
+ try {
+
+ if (trainfile!=null) {
+
+ if (keep && tmp!=null) {
+ trainforest = new File(tmp);
+ if (!trainforest.exists()) keep=false;
+
+ } else
+ if (tmp!=null) {
+ trainforest = File.createTempFile("train", ".tmp", new File(tmp));
+ trainforest.deleteOnExit();
+ }
+ else {
+ trainforest = File.createTempFile("train", ".tmp"); //,new File("F:\\")
+ trainforest.deleteOnExit();
+ }
+
+
+ }
+
+
+ } catch (java.io.IOException e) {
+ System.out.println("Unable to create tmp files for feature forests!");
+ System.out.println(e);
+ System.exit(0);
+ }
+ }
+
+ private void explain() {
+ System.out.println("Usage: ");
+ System.out.println("java -class mate.jar is2.parser.Parser [Options]");
+ System.out.println();
+ System.out.println("Example: ");
+ System.out.println(" java -class mate.jar is2.parser.Parser -model eps3.model -train corpora/conll08st/train/train.closed -test corpora/conll08st/devel/devel.closed -out b3.test -eval corpora/conll08st/devel/devel.closed -count 2000 -i 6");
+ System.out.println("");
+ System.out.println("Options:");
+ System.out.println("");
+ System.out.println(" -train <file> the corpus a model is trained on; default "+this.trainfile);
+ System.out.println(" -test <file> the input corpus for testing; default "+this.testfile);
+ System.out.println(" -out <file> the output corpus (result) of a test run; default "+this.outfile);
+ System.out.println(" -model <file> the parsing model for traing the model is stored in the files");
+ System.out.println(" and for parsing the model is load from this file; default "+this.modelName);
+ System.out.println(" -i <number> the number of training iterations; good numbers are 10 for smaller corpora and 6 for bigger; default "+this.numIters);
+ System.out.println(" -count <number> the n first sentences of the corpus are take for the training default "+this.count);
+ System.out.println(" -format <number> conll format of the year 8 or 9; default "+this.formatTask);
+
+ System.exit(0);
+ }
+}
diff --git a/dependencyParser/mate-tools/src/is2/tag/POS.java b/dependencyParser/mate-tools/src/is2/tag/POS.java
new file mode 100644
index 0000000..c8e039f
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/tag/POS.java
@@ -0,0 +1,29 @@
+package is2.tag;
+
+public class POS implements Comparable<POS> {
+
+ // pos tag
+ public int p;
+
+ // score of the tag
+ public float s;
+
+ // the position of the word in the sentence
+ public int w;
+
+ public POS(int p, float s) {
+ this.p=p;
+ this.s=s;
+ }
+
+ @Override
+ public int compareTo(POS o) {
+
+ return s>o.s?-1:s==o.s?0:1;
+ }
+
+ public String toString() {
+ return ""+p+":"+s;
+ }
+
+}
diff --git a/dependencyParser/mate-tools/src/is2/tag/Tagger.java b/dependencyParser/mate-tools/src/is2/tag/Tagger.java
new file mode 100644
index 0000000..b0c2dec
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/tag/Tagger.java
@@ -0,0 +1,500 @@
+package is2.tag;
+
+
+
+import is2.data.F2SF;
+import is2.data.FV;
+import is2.data.Instances;
+import is2.data.InstancesTagger;
+import is2.data.Long2Int;
+import is2.data.Long2IntInterface;
+import is2.data.ParametersFloat;
+import is2.data.PipeGen;
+import is2.data.SentenceData09;
+import is2.io.CONLLReader09;
+import is2.io.CONLLWriter09;
+import is2.tools.IPipe;
+import is2.tools.Tool;
+import is2.tools.Train;
+import is2.util.DB;
+import is2.util.Evaluator;
+import is2.util.OptionsSuper;
+
+import java.io.BufferedInputStream;
+import java.io.BufferedOutputStream;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Map.Entry;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipInputStream;
+import java.util.zip.ZipOutputStream;
+
+
+public class Tagger implements Tool, Train {
+
+ public ExtractorT2 pipe;
+ public ParametersFloat params;
+ public Long2IntInterface li;
+ public MFO mf;
+ private OptionsSuper _options;
+
+ /**
+ * Initialize
+ * @param options
+ */
+ public Tagger (Options options) {
+
+
+ // load the model
+ try {
+ readModel(options);
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ }
+ public Tagger() { }
+
+ /**
+ * @param modelFileName the file name of the model
+ */
+ public Tagger(String modelFileName) {
+ this(new Options(new String[]{"-model",modelFileName}));
+ }
+
+ public static void main (String[] args) throws FileNotFoundException, Exception
+ {
+
+ long start = System.currentTimeMillis();
+ Options options = new Options(args);
+
+
+ Tagger tagger = new Tagger();
+
+ if (options.train) {
+
+ // depReader.normalizeOn=false;
+
+ tagger.li = new Long2Int(options.hsize);
+ tagger.pipe = new ExtractorT2 (options, tagger.mf= new MFO());
+
+ //tagger.pipe.li =tagger.li;
+
+ InstancesTagger is = (InstancesTagger)tagger.pipe.createInstances(options.trainfile);
+
+ tagger.params = new ParametersFloat(tagger.li.size());
+
+ tagger.train(options, tagger.pipe,tagger.params,is);
+ tagger.writeModel(options, tagger.pipe, tagger.params);
+
+ }
+
+ if (options.test) {
+
+ tagger.readModel(options);
+
+ tagger.out(options,tagger.pipe, tagger.params);
+ }
+
+ System.out.println();
+
+ if (options.eval) {
+ System.out.println("\nEVALUATION PERFORMANCE:");
+ Evaluator.evaluateTagger(options.goldfile, options.outfile,options.format);
+ }
+ long end = System.currentTimeMillis();
+ System.out.println("used time "+((float)((end-start)/100)/10));
+ }
+
+ public void readModel(OptionsSuper options) {
+
+ try{
+ pipe = new ExtractorT2(options, mf =new MFO());
+ _options=options;
+ // load the model
+ ZipInputStream zis = new ZipInputStream(new BufferedInputStream(new FileInputStream(options.modelName)));
+ zis.getNextEntry();
+ DataInputStream dis = new DataInputStream(new BufferedInputStream(zis));
+
+ pipe.mf.read(dis);
+ pipe.initValues();
+ pipe.initFeatures();
+
+ params = new ParametersFloat(0);
+ params.read(dis);
+ li = new Long2Int(params.parameters.length);
+ pipe.read(dis);
+
+ dis.close();
+
+ pipe.types = new String[pipe.mf.getFeatureCounter().get(ExtractorT2.POS)];
+ for(Entry<String,Integer> e : pipe.mf.getFeatureSet().get(ExtractorT2.POS).entrySet())
+ pipe.types[e.getValue()] = e.getKey();
+
+ DB.println("Loading data finished. ");
+ } catch(Exception e) {
+ e.printStackTrace();
+ }
+ }
+
+ /**
+ * Do the training
+ * @param instanceLengths
+ * @param options
+ * @param pipe
+ * @param params
+ * @throws IOException
+ * @throws InterruptedException
+ * @throws ClassNotFoundException
+ */
+ public void train(OptionsSuper options, IPipe pipe, ParametersFloat params, Instances is2) {
+
+ InstancesTagger is = (InstancesTagger)is2;
+ String wds[] = mf.reverse(this.pipe.mf.getFeatureSet().get(ExtractorT2.WORD));
+
+ int pd[] = new int[this.pipe.types.length];
+ for(int k=0;k<pd.length;k++) pd[k]=k;
+
+ int del=0;
+ F2SF f = new F2SF(params.parameters);
+ long vs[] = new long[ExtractorT2._MAX];
+
+ int types =this.pipe.types.length;
+
+ double upd = options.numIters*is.size() +1;
+
+ for(int i = 0; i <options.numIters ; i++) {
+
+ long start = System.currentTimeMillis();
+
+ int numInstances = is.size();
+
+ long last= System.currentTimeMillis();
+ FV pred = new FV(),gold = new FV();
+
+ int correct =0,count=0;
+ System.out.print("Iteration "+i+": ");
+
+ for(int n = 0; n < numInstances; n++) {
+
+ if((n+1) % 500 == 0) del= PipeGen.outValueErr(n+1, (count-correct),(float)correct/(float)count,del,last,upd);
+
+ int length = is.length(n);
+
+ upd--;
+
+ for(int w = 1; w < length; w++) {
+
+ double best = -1000;
+ short bestType = -1;
+
+ int[] lemmas; //= is.lemmas[n];
+ if (options.noLemmas)lemmas = new int[is.length(n)];
+ else lemmas = is.plemmas[n];
+
+ this.pipe.addFeatures(is,n,wds[is.forms[n][w]],w,is.gpos[n],is.forms[n], lemmas, vs);
+
+ for(short t=0;t<types;t++) {
+
+ // the hypotheses of a part of speech tag
+ long p = t<<ExtractorT2.s_type;
+ f.clear();
+
+ // add the features to the vector
+ for(int k1=0;vs[k1]!=Integer.MIN_VALUE;k1++) {
+ if (vs[k1]>0) f.add(this.li.l2i(vs[k1]|p));
+ }
+
+ if (f.score > best) {
+ bestType=t;
+ best =f.score;
+ }
+ }
+
+ count++;
+ if (bestType == is.gpos[n][w] ) {
+ correct++;
+ continue;
+ }
+
+ pred.clear();
+ for (int k1=0;vs[k1]!=Integer.MIN_VALUE;k1++) if (vs[k1]>0) pred.add(this.li.l2i(vs[k1]| bestType<<ExtractorT2.s_type));
+
+ gold.clear();
+ for (int k1=0;vs[k1]!=Integer.MIN_VALUE;k1++) if (vs[k1]>0) gold.add(this.li.l2i(vs[k1] | is.gpos[n][w]<<ExtractorT2.s_type));
+
+ params.update(pred,gold, (float)upd, 1.0F);
+ }
+ }
+
+ long end = System.currentTimeMillis();
+ String info = "time "+(end-start);
+ PipeGen.outValueErr(numInstances, (count-correct),(float)correct/(float)count,del,last,upd,info);
+ System.out.println();
+ del=0;
+ }
+
+ params.average(options.numIters*is.size());
+
+ }
+
+
+ /**
+ * Tag a sentence
+ * @param options
+ * @param pipe
+ * @param params
+ * @throws IOException
+ */
+ public void out (OptionsSuper options, IPipe pipe, ParametersFloat params) {
+
+ try {
+
+
+ long start = System.currentTimeMillis();
+// change this backe!!!
+// CONLLReader09 depReader = new CONLLReader09(options.testfile, CONLLReader09.NO_NORMALIZE);
+ CONLLReader09 depReader = new CONLLReader09(options.testfile);
+
+ CONLLWriter09 depWriter = new CONLLWriter09(options.outfile);
+
+ System.out.print("Processing Sentence: ");
+ pipe.initValues();
+
+ int cnt = 0;
+ int del=0;
+ while(true) {
+
+ InstancesTagger is = new InstancesTagger();
+ is.init(1, mf);
+ SentenceData09 instance = depReader.getNext(is);
+ if (instance == null || instance.forms == null) break;
+
+
+ is.fillChars(instance, 0, ExtractorT2._CEND);
+
+ cnt++;
+
+
+ tag(is, instance);
+
+ SentenceData09 i09 = new SentenceData09(instance);
+ i09.createSemantic(instance);
+ depWriter.write(i09);
+
+ if(cnt % 100 == 0) del=PipeGen.outValue(cnt, del);
+
+ }
+ del=PipeGen.outValue(cnt, del);
+ depWriter.finishWriting();
+
+ float min=1000, max=-1000;
+
+ // int r[] = new int[14];
+ /*
+ for(Entry<Float, Integer> e : map.entrySet()) {
+ if(e.getKey()<min)min=e.getKey();
+ if(e.getKey()>max)max=e.getKey();
+
+ if(e.getKey()<0.2) r[0]++;
+ else if(e.getKey()<0.5) r[1]+=e.getValue();
+ else if(e.getKey()<0.7) r[2]+=e.getValue();
+ else if(e.getKey()<0.8) r[3]+=e.getValue();
+ else if(e.getKey()<0.9) r[4]+=e.getValue();
+ else if(e.getKey()<1.0) r[5]+=e.getValue();
+ else if(e.getKey()<1.2) r[6]+=e.getValue();
+ else if(e.getKey()<1.3) r[7]+=e.getValue();
+ else if(e.getKey()<1.4) r[8]+=e.getValue();
+ else if(e.getKey()<1.5) r[9]+=e.getValue();
+ else if(e.getKey()<1.9) r[10]+=e.getValue();
+ else if(e.getKey()<2.2) r[11]+=e.getValue();
+ else if(e.getKey()<2.5) r[12]+=e.getValue();
+ else if(e.getKey()>=2.5) r[13]+=e.getValue();
+ }
+ */
+ // for(int k=0;k<r.length;k++) System.out.println(k+" "+r[k][0]+" "+((float)r[k][1]/(float)r[k][0])+" good "+r[k][1]);
+ // System.out.println("min "+min+" "+max);
+
+ long end = System.currentTimeMillis();
+ System.out.println(PipeGen.getSecondsPerInstnace(cnt,(end-start)));
+ System.out.println(PipeGen.getUsedTime(end-start));
+ } catch(Exception e) {
+ e.printStackTrace();
+ }
+ }
+
+
+ public SentenceData09 tag(SentenceData09 instance){
+ InstancesTagger is = new InstancesTagger();
+ is.init(1, pipe.mf);
+ new CONLLReader09().insert(is, instance);
+ is.fillChars(instance, 0, ExtractorT2._CEND);
+ tag(is, instance);
+
+ return instance;
+ }
+
+
+ private void tag(InstancesTagger is, SentenceData09 instance) {
+
+ int length = instance.ppos.length;
+
+ short[] pos = new short[instance.gpos.length];
+
+ float sc[] =new float[instance.ppos.length];
+
+ instance.ppos[0]= is2.io.CONLLReader09.ROOT_POS;
+ pos[0]=(short)pipe.mf.getValue(ExtractorT2.POS, is2.io.CONLLReader09.ROOT_POS);
+
+ for(int j = 1; j < length; j++) {
+
+ short bestType = (short)pipe.fillFeatureVectorsOne( instance.forms[j],params, j, is,0,pos,this.li,sc);
+ pos[j] = bestType;
+ instance.ppos[j]= pipe.types[bestType];
+ }
+
+ for(int j = 1; j < length; j++) {
+
+ short bestType = (short)pipe.fillFeatureVectorsOne(instance.forms[j],params, j, is,0,pos,this.li,sc);
+ instance.ppos[j]= pipe.types[bestType];
+ pos[j]=bestType;
+ }
+ }
+
+ /**
+ * Tag a single word and return a n-best list of Part-of-Speech tags.
+ *
+ * @param is set of sentences
+ * @param instanceIndex index to the sentence in question
+ * @param word word to be tagged
+ * @return n-best list of Part-of-Speech tags
+ */
+ public ArrayList<POS> tag(InstancesTagger is,int instanceIndex, int word, String wordForm) {
+
+ return pipe.classify( wordForm , params, word, is, instanceIndex, is.pposs[instanceIndex], li);
+
+ }
+
+ public ArrayList<String> tagStrings(InstancesTagger is,int instanceIndex, int word, String wordForm) {
+
+ ArrayList<POS> plist = pipe.classify( wordForm , params, word, is, instanceIndex, is.pposs[instanceIndex], li);
+ String pos[] = mf.reverse(this.pipe.mf.getFeatureSet().get(ExtractorT2.POS));
+
+ ArrayList<String> postags =null;
+ for(POS p : plist) {
+ try {
+ postags.add(pos[p.p]);
+ }catch(Exception e) {
+ e.printStackTrace();
+ }
+ }
+ return postags;
+
+
+ }
+
+
+
+ /**
+ * Tag a sentence
+ * @param options
+ * @param pipe
+ * @param parametersReranker
+ * @throws IOException
+ */
+ public String[] tag (String[] words, String[] lemmas) {
+
+ String[] pposs = new String[words.length];
+
+ try {
+ pipe.initValues();
+
+ int length = words.length+1;
+
+
+ InstancesTagger is = new InstancesTagger();
+ is.init(1, pipe.mf);
+ is.createInstance09(length);
+
+ SentenceData09 instance = new SentenceData09();
+ instance.forms = new String[length];
+ instance.forms[0]=is2.io.CONLLReader09.ROOT;
+
+ instance.plemmas = new String[length];
+ instance.plemmas[0]=is2.io.CONLLReader09.ROOT_LEMMA;
+
+ for(int j = 0; j < words.length; j++) {
+ instance.forms[j+1]=words[j];
+ instance.plemmas[j+1]=lemmas[j];
+ }
+
+ for(int j = 0; j < length; j++) {
+ is.setForm(0, j, instance.forms[j]);
+ is.setLemma(0, j, instance.plemmas[j]);
+ }
+
+ instance.ppos = new String[length];
+
+ is.fillChars(instance, 0, ExtractorT2._CEND);
+
+ this.tag(is, instance);
+
+ for(int j = 0; j < words.length; j++) {
+ pposs[j] = instance.ppos[j+1];
+ }
+ } catch(Exception e) {
+ e.printStackTrace();
+ }
+
+ return pposs;
+
+
+ }
+
+ /* (non-Javadoc)
+ * @see is2.tools.Tool#apply(is2.data.SentenceData09)
+ */
+ @Override
+ public SentenceData09 apply(SentenceData09 snt) {
+
+
+ SentenceData09 it = new SentenceData09();
+ it.createWithRoot(snt);
+ it = tag(it);
+ SentenceData09 i09 = new SentenceData09(it);
+ i09.createSemantic(it);
+ return i09;
+ }
+
+
+
+ /* (non-Javadoc)
+ * @see is2.tools.Train#writeModel(is2.util.OptionsSuper, is2.mtag2.Pipe, is2.data.ParametersFloat)
+ */
+ @Override
+ public void writeModel(OptionsSuper options, IPipe pipe, is2.data.ParametersFloat params) {
+ try{
+ ZipOutputStream zos = new ZipOutputStream(new BufferedOutputStream(new FileOutputStream(options.modelName)));
+ zos.putNextEntry(new ZipEntry("data"));
+ DataOutputStream dos = new DataOutputStream(new BufferedOutputStream(zos));
+
+ this.pipe.mf.writeData(dos);
+
+ DB.println("number of parameters "+params.parameters.length);
+ dos.flush();
+
+ params.write(dos);
+ pipe.write(dos);
+ dos.flush();
+ dos.close();
+
+ } catch(Exception e) {
+ e.printStackTrace();
+ }
+ }
+
+}
diff --git a/dependencyParser/mate-tools/src/is2/tag/package.html b/dependencyParser/mate-tools/src/is2/tag/package.html
new file mode 100644
index 0000000..469fdf6
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/tag/package.html
@@ -0,0 +1,4 @@
+Package info
+<br><br>
+This parser includes a tagger into the dependency parser
+<br>
\ No newline at end of file
diff --git a/dependencyParser/mate-tools/src/is2/tools/IPipe.java b/dependencyParser/mate-tools/src/is2/tools/IPipe.java
new file mode 100644
index 0000000..d976074
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/tools/IPipe.java
@@ -0,0 +1,30 @@
+/**
+ *
+ */
+package is2.tools;
+
+import is2.data.Instances;
+import is2.data.InstancesTagger;
+
+import java.io.DataOutputStream;
+import java.io.File;
+
+/**
+ * @author Dr. Bernd Bohnet, 25.12.2010
+ *
+ *
+ */
+public interface IPipe {
+
+ public abstract Instances createInstances(String file);
+
+ public abstract void initValues();
+
+ /**
+ * Initialize the features types.
+ */
+ public abstract void initFeatures();
+
+ public abstract void write(DataOutputStream dos);
+
+}
\ No newline at end of file
diff --git a/dependencyParser/mate-tools/src/is2/tools/Retrainable.java b/dependencyParser/mate-tools/src/is2/tools/Retrainable.java
new file mode 100644
index 0000000..67a2e56
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/tools/Retrainable.java
@@ -0,0 +1,25 @@
+package is2.tools;
+
+import is2.data.SentenceData09;
+
+/**
+ * Provides Methods for the retraining
+ * @author bohnetbd
+ *
+ */
+public interface Retrainable {
+
+ /**
+ * Retrains with a update factor (upd).
+ * The retraining stops when the model was successful adapted or it gave up after the maximal iterations.
+ *
+ * @param sentence the data container of the new example.
+ * @param upd the update factor, e.g. 0.01
+ * @param iterations maximal number of iterations that are tried to adapt the system.
+ * @return success = true -- else false
+ */
+ public boolean retrain(SentenceData09 sentence, float upd, int iterations) ;
+
+ boolean retrain(SentenceData09 sentence, float upd, int iterations, boolean print);
+
+}
diff --git a/dependencyParser/mate-tools/src/is2/tools/Tool.java b/dependencyParser/mate-tools/src/is2/tools/Tool.java
new file mode 100644
index 0000000..06246a2
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/tools/Tool.java
@@ -0,0 +1,25 @@
+/**
+ *
+ */
+package is2.tools;
+
+import is2.data.SentenceData09;
+
+/**
+ * @author Bernd Bohnet, 27.10.2010
+ *
+ * Interface to all tools
+ */
+public interface Tool {
+
+
+ /**
+ * Uses the tool and applies it on the input sentence.
+ * The input is altered and has to include a root (token).
+ *
+ * @param i the input sentence
+ * @return The result of the performance without the root.
+ */
+ SentenceData09 apply(SentenceData09 snt09);
+
+}
diff --git a/dependencyParser/mate-tools/src/is2/tools/ToolIO.java b/dependencyParser/mate-tools/src/is2/tools/ToolIO.java
new file mode 100644
index 0000000..279a4ff
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/tools/ToolIO.java
@@ -0,0 +1,17 @@
+/**
+ *
+ */
+package is2.tools;
+
+import is2.data.SentenceData09;
+
+/**
+ * @author Bernd Bohnet, 27.10.2010
+ *
+ * Interface to all tools
+ */
+public interface ToolIO {
+
+ void readModel();
+
+}
diff --git a/dependencyParser/mate-tools/src/is2/tools/Train.java b/dependencyParser/mate-tools/src/is2/tools/Train.java
new file mode 100644
index 0000000..234f937
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/tools/Train.java
@@ -0,0 +1,25 @@
+/**
+ *
+ */
+package is2.tools;
+
+import is2.data.Instances;
+import is2.data.ParametersFloat;
+import is2.util.OptionsSuper;
+
+/**
+ * @author Dr. Bernd Bohnet, 24.12.2010
+ *
+ *
+ */
+public interface Train {
+
+ public abstract void writeModel(OptionsSuper options, IPipe pipe, ParametersFloat params);
+
+ public abstract void readModel(OptionsSuper options);
+
+ public abstract void train(OptionsSuper options, IPipe pipe, ParametersFloat params, Instances is);
+
+ public abstract void out(OptionsSuper options, IPipe pipe, ParametersFloat params);
+
+}
\ No newline at end of file
diff --git a/dependencyParser/mate-tools/src/is2/util/Convert.java b/dependencyParser/mate-tools/src/is2/util/Convert.java
new file mode 100644
index 0000000..1ed2389
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/util/Convert.java
@@ -0,0 +1,455 @@
+/**
+ *
+ */
+package is2.util;
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.InputStreamReader;
+import java.io.OutputStreamWriter;
+import java.io.Reader;
+import java.nio.charset.Charset;
+import java.util.ArrayList;
+
+
+import is2.data.SentenceData09;
+import is2.io.CONLLReader06;
+import is2.io.CONLLReader08;
+import is2.io.CONLLReader09;
+import is2.io.CONLLWriter06;
+import is2.io.CONLLWriter09;
+
+/**
+ * @author Dr. Bernd Bohnet, 01.03.2010
+ *
+ *
+ */
+public class Convert {
+
+
+
+ public static void main(String args[]) throws Exception {
+
+
+ if (args.length<2) {
+
+ System.out.println("Usage");
+ System.out.println(" java is2.util.Convert <in> <out> [-w06|-w0809|-yue] [-wordsonly]");
+
+
+ }
+
+ int todo =9;
+ boolean wordsOnly=false;
+ for(String a : args) {
+ if (a!=null && a.equals("-w06")) todo=6;
+ else if (a!=null && a.equals("-w0809")) todo=89;
+ else if (a!=null && a.equals("-yue")) todo=99;
+ else if (a!=null && a.equals("-utf8")) todo=8;
+
+ if (a!=null && a.equals("-wordsonly")) wordsOnly=true;
+
+
+ }
+
+ if (todo==9)convert(args[0],args[1]);
+ else if (todo==6) convert0906(args[0],args[1]);
+ else if (todo==8) convert8(args[0],args[1], args[2]);
+ else if (todo==89) convert0809(args[0],args[1]);
+ else if (todo==99) {
+ convertChnYue(args[0],args[1],wordsOnly);
+ }
+
+
+ }
+
+ private static void convert8(String infile, String outfile, String format) {
+ try {
+
+ System.out.println("availableCharsets: "+Charset.availableCharsets());
+
+ BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(infile), format));
+ BufferedWriter out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outfile), "UTF8"));
+ ;
+ int ch;
+
+ int count =0, wcount=0;;
+ while ((ch = in.read()) > -1) {
+ count++;
+
+ if (Character.isDefined(ch)) {
+
+ out.write(ch);
+ wcount++;
+ }
+ }
+ in.close();
+ out.close();
+ System.out.println("read "+count+" chars and wrote "+wcount+" utf8 chars");
+ }
+ catch (Exception e) {
+ e.printStackTrace();
+ }
+
+ }
+
+ public static void convert(String source, String target) throws Exception {
+
+ CONLLReader06 reader = new CONLLReader06(source);
+ CONLLWriter09 writer = new CONLLWriter09(target);
+
+ int str =0;
+ while (true) {
+ SentenceData09 i = reader.getNext();
+ str++;
+ if (i == null) break;
+
+
+ String[] formsNoRoot = new String[i.length()-1];
+ String[] posNoRoot = new String[formsNoRoot.length];
+ String[] lemmas = new String[formsNoRoot.length];
+
+ String[] org_lemmas = new String[formsNoRoot.length];
+
+ String[] of = new String[formsNoRoot.length];
+ String[] pf = new String[formsNoRoot.length];
+
+ String[] pposs = new String[formsNoRoot.length];
+ String[] labels = new String[formsNoRoot.length];
+ String[] fillp = new String[formsNoRoot.length];
+
+ int[] heads = new int[formsNoRoot.length];
+
+
+
+ for(int j = 0; j < formsNoRoot.length; j++) {
+ formsNoRoot[j] = i.forms[j+1];
+ if (formsNoRoot[j].length()==0 ||formsNoRoot[j].equals("")) {
+ System.out.println("error forms "+str);
+ // System.exit(0);
+ formsNoRoot[j]=" ";
+ }
+ posNoRoot[j] = i.gpos[j+1];
+ if (posNoRoot[j].length()==0 ||posNoRoot[j].equals(" ")) {
+ System.out.println("error pos "+str);
+ // System.exit(0);
+ }
+ pposs[j] = i.ppos[j+1];
+ if (pposs[j].length()==0 ||pposs[j].equals(" ")) {
+ System.out.println("error pos "+str);
+ //System.exit(0);
+ }
+
+ labels[j] = i.labels[j+1];
+ if (labels[j].length()==0 ||labels[j].equals(" ")) {
+ System.out.println("error lab "+str);
+ // System.exit(0);
+ }
+ heads[j] = i.heads[j+1];
+ if(heads[j]> posNoRoot.length) {
+ System.out.println("head out of range "+heads[j]+" "+heads.length+" "+str);
+ heads[j]=posNoRoot.length;
+ }
+
+ lemmas[j] = i.plemmas[j+1];
+ if (lemmas[j].length()==0 ||lemmas[j].equals(" ")) {
+ System.out.println("error lab "+str);
+ // System.exit(0);
+ }
+ org_lemmas[j] = i.lemmas[j+1];
+ if (org_lemmas[j].length()==0 ||org_lemmas[j].equals(" ")) {
+ System.out.println("error lab "+str);
+ // System.exit(0);
+ }
+ of[j] = i.ofeats[j+1];
+ pf[j] = i.pfeats[j+1];
+ if (str==6099) {
+ // System.out.println(formsNoRoot[j]+"\t"+posNoRoot[j]+"\t"+pposs[j]+"\t"+labels[j]+"\t"+heads[j]);
+ }
+
+ // (instance.fillp!=null) fillp[j] = instance.fillp[j+1];
+ }
+
+ SentenceData09 i09 = new SentenceData09(formsNoRoot, formsNoRoot, formsNoRoot,pposs, pposs, labels, heads,fillp,of, pf);
+
+ //public SentenceData09(String[] forms, String[] lemmas, String[] olemmas,String[] gpos, String[] ppos, String[] labs, int[] heads, String[] fillpred) {
+ //SentenceData09
+ // SentenceData09 i2 = new SentenceData09(i.forms, i.lemmas,i.org_lemmas,);
+
+ writer.write(i09);
+
+
+ }
+ writer.finishWriting();
+
+
+ }
+
+
+
+ public static void convertChnYue(String source, String target, boolean wordsOnly) throws Exception {
+
+
+ BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(source),"UTF-8"),32768);
+
+ CONLLWriter09 writer = new CONLLWriter09(target);
+
+ int str =0;
+ while (true) {
+
+ ArrayList<String[]> lines = new ArrayList<String[]>();
+
+ String line;
+ while((line = reader.readLine())!=null) {
+
+ if (line.length()<2) break;
+ String split[] = line.split("\t");
+ lines.add(split);
+ }
+ if (line ==null)break;
+
+ str++;
+
+
+ String[] formsNoRoot = new String[lines.size()];
+ String[] posNoRoot = new String[formsNoRoot.length];
+ String[] lemmas = new String[formsNoRoot.length];
+
+ String[] org_lemmas = new String[formsNoRoot.length];
+
+ String[] of = new String[formsNoRoot.length];
+ String[] pf = new String[formsNoRoot.length];
+
+ String[] pposs = new String[formsNoRoot.length];
+ String[] labels = new String[formsNoRoot.length];
+ String[] fillp = new String[formsNoRoot.length];
+
+ int[] heads = new int[formsNoRoot.length];
+
+
+
+ for(int j = 0; j < formsNoRoot.length; j++) {
+ formsNoRoot[j] = lines.get(j)[0];
+ if (formsNoRoot[j].length()==0 ||formsNoRoot[j].equals("")) {
+ System.out.println("error forms "+str);
+ // System.exit(0);
+ formsNoRoot[j]="_";
+ }
+
+ posNoRoot[j] = lines.get(j)[1];
+ if (posNoRoot[j].length()==0 ||posNoRoot[j].equals(" ")) {
+ System.out.println("error pos "+str);
+ // System.exit(0);
+ }
+ pposs[j] = "_";
+
+ labels[j] = lines.get(j)[3];
+ if (labels[j].length()==0 ||labels[j].equals(" ")) {
+ System.out.println("error lab "+str);
+ labels[j] = "_";
+ // System.exit(0);
+ }
+ heads[j] = Integer.parseInt(lines.get(j)[2])+1;
+ if(heads[j]> posNoRoot.length) {
+ System.out.println("head out of range "+heads[j]+" "+heads.length+" "+str);
+ heads[j]=posNoRoot.length;
+ }
+
+ // 0 is root and not -1
+ if (heads[j]==-1)heads[j]=0;
+
+ lemmas[j] = "_";
+
+ org_lemmas[j] = "_";
+
+ of[j] = "_";
+ pf[j] = "_";
+
+ if (wordsOnly) {
+ posNoRoot[j]="_";
+ heads[j]=0;
+ labels[j] = "_";
+ }
+
+ // (instance.fillp!=null) fillp[j] = instance.fillp[j+1];
+ }
+
+ SentenceData09 i09 = new SentenceData09(formsNoRoot, lemmas, org_lemmas,posNoRoot, posNoRoot, labels, heads,fillp,of, pf);
+
+ //public SentenceData09(String[] forms, String[] lemmas, String[] olemmas,String[] gpos, String[] ppos, String[] labs, int[] heads, String[] fillpred) {
+ //SentenceData09
+ // SentenceData09 i2 = new SentenceData09(i.forms, i.lemmas,i.org_lemmas,);
+
+ writer.write(i09);
+
+
+ }
+ writer.finishWriting();
+
+
+ }
+
+
+
+ /**
+ * Convert the 0
+ * @param source
+ * @param target
+ * @throws Exception
+ */
+ public static void convert0809(String source, String target) throws Exception {
+
+ CONLLReader08 reader = new CONLLReader08(source);
+ CONLLWriter09 writer = new CONLLWriter09(target);
+
+ int str =0;
+ while (true) {
+ SentenceData09 i = reader.getNext();
+ str++;
+ if (i == null) break;
+
+
+ String[] formsNoRoot = new String[i.length()-1];
+ String[] posNoRoot = new String[formsNoRoot.length];
+ String[] lemmas = new String[formsNoRoot.length];
+
+ String[] org_lemmas = new String[formsNoRoot.length];
+
+ String[] of = new String[formsNoRoot.length];
+ String[] pf = new String[formsNoRoot.length];
+
+ String[] pposs = new String[formsNoRoot.length];
+ String[] labels = new String[formsNoRoot.length];
+ String[] fillp = new String[formsNoRoot.length];
+
+ int[] heads = new int[formsNoRoot.length];
+
+
+
+ for(int j = 0; j < formsNoRoot.length; j++) {
+ formsNoRoot[j] = i.forms[j+1];
+ if (formsNoRoot[j].length()==0 ||formsNoRoot[j].equals("")) {
+ System.out.println("error forms "+str);
+ // System.exit(0);
+ formsNoRoot[j]=" ";
+ }
+ posNoRoot[j] = i.gpos[j+1];
+ if (posNoRoot[j].length()==0 ||posNoRoot[j].equals(" ")) {
+ System.out.println("error pos "+str);
+ // System.exit(0);
+ }
+ pposs[j] = i.ppos[j+1];
+ if (pposs[j].length()==0 ||pposs[j].equals(" ")) {
+ System.out.println("error pos "+str);
+ //System.exit(0);
+ }
+
+ labels[j] = i.labels[j+1];
+ if (labels[j].length()==0 ||labels[j].equals(" ")) {
+ System.out.println("error lab "+str);
+ // System.exit(0);
+ }
+ heads[j] = i.heads[j+1];
+ if(heads[j]> posNoRoot.length) {
+ System.out.println("head out of range "+heads[j]+" "+heads.length+" "+str);
+ heads[j]=posNoRoot.length;
+ }
+
+ lemmas[j] = i.plemmas[j+1];
+ if (lemmas[j].length()==0 ||lemmas[j].equals(" ")) {
+ System.out.println("error lab "+str);
+ // System.exit(0);
+ }
+ org_lemmas[j] = i.lemmas[j+1];
+ // if (org_lemmas[j].length()==0 ||org_lemmas[j].equals(" ")) {
+ // System.out.println("error lab "+str);
+ // // System.exit(0);
+ // }
+// of[j] = i.ofeats[j+1];
+// pf[j] = i.pfeats[j+1];
+ if (str==6099) {
+ // System.out.println(formsNoRoot[j]+"\t"+posNoRoot[j]+"\t"+pposs[j]+"\t"+labels[j]+"\t"+heads[j]);
+ }
+
+ // (instance.fillp!=null) fillp[j] = instance.fillp[j+1];
+ }
+
+ SentenceData09 i09 = new SentenceData09(formsNoRoot, org_lemmas, lemmas,pposs, pposs, labels, heads,fillp,of, pf);
+
+ //public SentenceData09(String[] forms, String[] lemmas, String[] olemmas,String[] gpos, String[] ppos, String[] labs, int[] heads, String[] fillpred) {
+ //SentenceData09
+ // SentenceData09 i2 = new SentenceData09(i.forms, i.lemmas,i.org_lemmas,);
+
+ writer.write(i09);
+
+
+ }
+ writer.finishWriting();
+
+
+ }
+
+
+ public static void convert0906(String source, String target) throws Exception {
+
+ CONLLReader09 reader = new CONLLReader09(source);
+ CONLLWriter06 writer = new CONLLWriter06(target);
+
+
+ while (true) {
+ SentenceData09 i = reader.getNext();
+
+ if (i == null) break;
+
+
+ String[] formsNoRoot = new String[i.length()-1];
+ String[] posNoRoot = new String[formsNoRoot.length];
+ String[] lemmas = new String[formsNoRoot.length];
+
+ String[] org_lemmas = new String[formsNoRoot.length];
+
+ String[] of = new String[formsNoRoot.length];
+ String[] pf = new String[formsNoRoot.length];
+
+ String[] pposs = new String[formsNoRoot.length];
+ String[] labels = new String[formsNoRoot.length];
+ String[] fillp = new String[formsNoRoot.length];
+
+ int[] heads = new int[formsNoRoot.length];
+
+ for(int j = 0; j < formsNoRoot.length; j++) {
+ formsNoRoot[j] = i.forms[j+1];
+ posNoRoot[j] = i.gpos[j+1];
+ pposs[j] = i.gpos[j+1];
+
+ labels[j] = i.labels[j+1];
+ heads[j] = i.heads[j+1];
+ lemmas[j] = i.plemmas[j+1];
+
+ org_lemmas[j] = i.lemmas[j+1];
+ of[j] = i.ofeats[j+1];
+ pf[j] = i.pfeats[j+1];
+
+ // (instance.fillp!=null) fillp[j] = instance.fillp[j+1];
+ }
+
+ SentenceData09 i09 = new SentenceData09(formsNoRoot, lemmas, org_lemmas,posNoRoot, pposs, labels, heads,fillp,of, pf);
+
+ //public SentenceData09(String[] forms, String[] lemmas, String[] olemmas,String[] gpos, String[] ppos, String[] labs, int[] heads, String[] fillpred) {
+ //SentenceData09
+ // SentenceData09 i2 = new SentenceData09(i.forms, i.lemmas,i.org_lemmas,);
+
+ writer.write(i09);
+
+
+ }
+ writer.finishWriting();
+
+
+ }
+
+
+
+}
diff --git a/dependencyParser/mate-tools/src/is2/util/Convert0409.java b/dependencyParser/mate-tools/src/is2/util/Convert0409.java
new file mode 100644
index 0000000..7fc1142
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/util/Convert0409.java
@@ -0,0 +1,182 @@
+/**
+ *
+ */
+package is2.util;
+
+import is2.data.SentenceData09;
+import is2.io.CONLLReader04;
+import is2.io.CONLLReader06;
+import is2.io.CONLLReader09;
+import is2.io.CONLLWriter06;
+import is2.io.CONLLWriter09;
+
+/**
+ * @author Dr. Bernd Bohnet, 01.03.2010
+ *
+ *
+ */
+public class Convert0409 {
+
+
+
+ public static void main(String args[]) throws Exception {
+
+ convert(args[0],args[1]);
+
+
+ }
+
+ public static void convert(String source, String target) throws Exception {
+
+ CONLLReader04 reader = new CONLLReader04(source);
+ CONLLWriter09 writer = new CONLLWriter09(target);
+
+ int str =0;
+ while (true) {
+ SentenceData09 i = reader.getNext();
+ str++;
+ if (i == null) break;
+
+
+ String[] formsNoRoot = new String[i.length()-1];
+ String[] posNoRoot = new String[formsNoRoot.length];
+ String[] lemmas = new String[formsNoRoot.length];
+
+ String[] org_lemmas = new String[formsNoRoot.length];
+
+ String[] of = new String[formsNoRoot.length];
+ String[] pf = new String[formsNoRoot.length];
+
+ String[] pposs = new String[formsNoRoot.length];
+ String[] labels = new String[formsNoRoot.length];
+ String[] fillp = new String[formsNoRoot.length];
+
+ int[] heads = new int[formsNoRoot.length];
+
+
+
+ for(int j = 0; j < formsNoRoot.length; j++) {
+ formsNoRoot[j] = i.forms[j+1];
+ if (formsNoRoot[j].length()==0 ||formsNoRoot[j].equals("")) {
+ System.out.println("error forms "+str);
+ // System.exit(0);
+ formsNoRoot[j]=" ";
+ }
+ posNoRoot[j] = i.gpos[j+1];
+ if (posNoRoot[j].length()==0 ||posNoRoot[j].equals(" ")) {
+ System.out.println("error pos "+str);
+ // System.exit(0);
+ }
+ pposs[j] = i.ppos[j+1];
+ if (pposs[j].length()==0 ||pposs[j].equals(" ")) {
+ System.out.println("error pos "+str);
+ //System.exit(0);
+ }
+
+ labels[j] = i.labels[j+1];
+ if (labels[j].length()==0 ||labels[j].equals(" ")) {
+ System.out.println("error lab "+str);
+ // System.exit(0);
+ }
+ heads[j] = i.heads[j+1];
+ if(heads[j]> posNoRoot.length) {
+ System.out.println("head out of range "+heads[j]+" "+heads.length+" "+str);
+ heads[j]=posNoRoot.length;
+ }
+
+ lemmas[j] = i.plemmas[j+1];
+ if (lemmas[j].length()==0 ||lemmas[j].equals(" ")) {
+ System.out.println("error lab "+str);
+ // System.exit(0);
+ }
+ org_lemmas[j] = i.lemmas[j+1];
+ if (org_lemmas[j].length()==0 ||org_lemmas[j].equals(" ")) {
+ System.out.println("error lab "+str);
+ // System.exit(0);
+ }
+ of[j] = i.ofeats[j+1];
+ pf[j] = i.pfeats[j+1];
+ if (str==6099) {
+ // System.out.println(formsNoRoot[j]+"\t"+posNoRoot[j]+"\t"+pposs[j]+"\t"+labels[j]+"\t"+heads[j]);
+ }
+
+ // (instance.fillp!=null) fillp[j] = instance.fillp[j+1];
+ }
+
+ SentenceData09 i09 = new SentenceData09(formsNoRoot, lemmas, org_lemmas,pposs, pposs, labels, heads,fillp,of, pf);
+
+ //public SentenceData09(String[] forms, String[] lemmas, String[] olemmas,String[] gpos, String[] ppos, String[] labs, int[] heads, String[] fillpred) {
+ //SentenceData09
+ // SentenceData09 i2 = new SentenceData09(i.forms, i.lemmas,i.org_lemmas,);
+
+ writer.write(i09);
+
+
+ }
+ writer.finishWriting();
+
+
+ }
+
+
+ public static void convert0906(String source, String target) throws Exception {
+
+ CONLLReader09 reader = new CONLLReader09(source);
+ CONLLWriter06 writer = new CONLLWriter06(target);
+
+
+ while (true) {
+ SentenceData09 i = reader.getNext();
+
+ if (i == null) break;
+
+
+ String[] formsNoRoot = new String[i.length()-1];
+ String[] posNoRoot = new String[formsNoRoot.length];
+ String[] lemmas = new String[formsNoRoot.length];
+
+ String[] org_lemmas = new String[formsNoRoot.length];
+
+ String[] of = new String[formsNoRoot.length];
+ String[] pf = new String[formsNoRoot.length];
+
+ String[] pposs = new String[formsNoRoot.length];
+ String[] labels = new String[formsNoRoot.length];
+ String[] fillp = new String[formsNoRoot.length];
+
+ int[] heads = new int[formsNoRoot.length];
+
+ for(int j = 0; j < formsNoRoot.length; j++) {
+ formsNoRoot[j] = i.forms[j+1];
+ posNoRoot[j] = i.gpos[j+1];
+ pposs[j] = i.ppos[j+1];
+
+ labels[j] = i.labels[j+1];
+ heads[j] = i.heads[j+1];
+ lemmas[j] = i.plemmas[j+1];
+
+ org_lemmas[j] = i.lemmas[j+1];
+ of[j] = i.ofeats[j+1];
+ pf[j] = i.pfeats[j+1];
+
+ // (instance.fillp!=null) fillp[j] = instance.fillp[j+1];
+ }
+
+ SentenceData09 i09 = new SentenceData09(formsNoRoot, lemmas, org_lemmas,posNoRoot, pposs, labels, heads,fillp,of, pf);
+
+ //public SentenceData09(String[] forms, String[] lemmas, String[] olemmas,String[] gpos, String[] ppos, String[] labs, int[] heads, String[] fillpred) {
+ //SentenceData09
+ // SentenceData09 i2 = new SentenceData09(i.forms, i.lemmas,i.org_lemmas,);
+
+ writer.write(i09);
+
+
+ }
+ writer.finishWriting();
+
+
+ }
+
+
+
+}
diff --git a/dependencyParser/mate-tools/src/is2/util/ConvertADJ.java b/dependencyParser/mate-tools/src/is2/util/ConvertADJ.java
new file mode 100644
index 0000000..b30aabb
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/util/ConvertADJ.java
@@ -0,0 +1,129 @@
+/**
+ *
+ */
+package is2.util;
+
+import is2.data.SentenceData09;
+import is2.io.CONLLReader04;
+import is2.io.CONLLReader06;
+import is2.io.CONLLReader09;
+import is2.io.CONLLWriter06;
+import is2.io.CONLLWriter09;
+
+/**
+ * @author Dr. Bernd Bohnet, 01.03.2010
+ *
+ *
+ */
+public class ConvertADJ {
+
+
+
+ public static void main(String args[]) throws Exception {
+
+ convert(args[0],args[1]);
+
+
+ }
+
+ public static void convert(String source, String target) throws Exception {
+
+ CONLLReader09 reader = new CONLLReader09(source);
+// CONLLWriter09 writer = new CONLLWriter09(target);
+ int adj=0,argadj=0;
+ int rb=0,argrb=0;
+ int str =0;
+ while (true) {
+ SentenceData09 i = reader.getNext();
+ str++;
+ if (i == null) break;
+
+
+ for (int k =0;k<i.length();k++) {
+
+ if (i.gpos[k].startsWith("JJ")) adj++;
+ if (i.gpos[k].startsWith("RB")) rb++;
+
+ if (i.argposition!=null) {
+ for(int p=0;p<i.argposition.length;p++) {
+ if(i.argposition[p]!=null)
+ for(int a=0;a<i.argposition[p].length;a++) {
+ if(i.argposition[p][a]==k && i.gpos[k].startsWith("JJ")) argadj ++;
+ if(i.argposition[p][a]==k && i.gpos[k].startsWith("RB")) argrb ++;
+ }
+
+ }
+ }
+ // (instance.fillp!=null) fillp[j] = instance.fillp[j+1];
+ }
+
+
+
+ }
+ System.out.println("adj "+adj+ " "+argadj);
+ System.out.println("rb "+rb+ " "+argrb);
+
+ }
+
+
+ public static void convert0906(String source, String target) throws Exception {
+
+ CONLLReader09 reader = new CONLLReader09(source);
+ CONLLWriter06 writer = new CONLLWriter06(target);
+
+
+ while (true) {
+ SentenceData09 i = reader.getNext();
+
+ if (i == null) break;
+
+
+ String[] formsNoRoot = new String[i.length()-1];
+ String[] posNoRoot = new String[formsNoRoot.length];
+ String[] lemmas = new String[formsNoRoot.length];
+
+ String[] org_lemmas = new String[formsNoRoot.length];
+
+ String[] of = new String[formsNoRoot.length];
+ String[] pf = new String[formsNoRoot.length];
+
+ String[] pposs = new String[formsNoRoot.length];
+ String[] labels = new String[formsNoRoot.length];
+ String[] fillp = new String[formsNoRoot.length];
+
+ int[] heads = new int[formsNoRoot.length];
+
+ for(int j = 0; j < formsNoRoot.length; j++) {
+ formsNoRoot[j] = i.forms[j+1];
+ posNoRoot[j] = i.gpos[j+1];
+ pposs[j] = i.ppos[j+1];
+
+ labels[j] = i.labels[j+1];
+ heads[j] = i.heads[j+1];
+ lemmas[j] = i.plemmas[j+1];
+
+ org_lemmas[j] = i.lemmas[j+1];
+ of[j] = i.ofeats[j+1];
+ pf[j] = i.pfeats[j+1];
+
+ // (instance.fillp!=null) fillp[j] = instance.fillp[j+1];
+ }
+
+ SentenceData09 i09 = new SentenceData09(formsNoRoot, lemmas, org_lemmas,posNoRoot, pposs, labels, heads,fillp,of, pf);
+
+ //public SentenceData09(String[] forms, String[] lemmas, String[] olemmas,String[] gpos, String[] ppos, String[] labs, int[] heads, String[] fillpred) {
+ //SentenceData09
+ // SentenceData09 i2 = new SentenceData09(i.forms, i.lemmas,i.org_lemmas,);
+
+ writer.write(i09);
+
+
+ }
+ writer.finishWriting();
+
+
+ }
+
+
+
+}
diff --git a/dependencyParser/mate-tools/src/is2/util/ConvertLowerCase0909.java b/dependencyParser/mate-tools/src/is2/util/ConvertLowerCase0909.java
new file mode 100644
index 0000000..e8f19f3
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/util/ConvertLowerCase0909.java
@@ -0,0 +1,89 @@
+/**
+ *
+ */
+package is2.util;
+
+import is2.data.SentenceData09;
+import is2.io.CONLLReader06;
+import is2.io.CONLLReader09;
+import is2.io.CONLLWriter06;
+import is2.io.CONLLWriter09;
+
+/**
+ * @author Dr. Bernd Bohnet, 01.03.2010
+ *
+ *
+ */
+public class ConvertLowerCase0909 {
+
+
+
+ public static void main(String args[]) throws Exception {
+
+
+
+ CONLLReader09 reader = new CONLLReader09(args[0]);
+ CONLLWriter09 writer = new CONLLWriter09(args[1]);
+
+ int str =0;
+ while (true) {
+ SentenceData09 i = reader.getNext();
+ str++;
+ if (i == null) break;
+
+ SentenceData09 i09 = new SentenceData09(i);
+ i09.createSemantic(i);
+
+ for(int k=0;k<i09.length();k++) {
+ i09.lemmas[k]=i09.lemmas[k].toLowerCase();
+ i09.plemmas[k]=i09.plemmas[k].toLowerCase();
+
+ }
+
+ writer.write(i09);
+
+
+ }
+ writer.finishWriting();
+
+
+ }
+
+ public static void convert(String source, String target) throws Exception {
+
+ CONLLReader09 reader = new CONLLReader09(source);
+ CONLLWriter09 writer = new CONLLWriter09(target);
+
+ int str =0;
+ while (true) {
+ SentenceData09 i = reader.getNext();
+ str++;
+ if (i == null) break;
+
+ SentenceData09 i09 = new SentenceData09(i);
+ i09.createSemantic(i);
+
+ for(int k=0;k<i09.length();k++) {
+ i09.lemmas[k]=i09.lemmas[k].toLowerCase();
+ i09.plemmas[k]=i09.plemmas[k].toLowerCase();
+
+ }
+
+ //public SentenceData09(String[] forms, String[] lemmas, String[] olemmas,String[] gpos, String[] ppos, String[] labs, int[] heads, String[] fillpred) {
+ //SentenceData09
+ // SentenceData09 i2 = new SentenceData09(i.forms, i.lemmas,i.org_lemmas,);
+
+ writer.write(i09);
+
+
+ }
+ writer.finishWriting();
+
+
+ }
+
+
+
+
+
+}
diff --git a/dependencyParser/mate-tools/src/is2/util/ConvertTiger2CoNLL.java b/dependencyParser/mate-tools/src/is2/util/ConvertTiger2CoNLL.java
new file mode 100644
index 0000000..bb528f7
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/util/ConvertTiger2CoNLL.java
@@ -0,0 +1,124 @@
+/**
+ *
+ */
+package is2.util;
+
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.util.StringTokenizer;
+
+/**
+ * @author Dr. Bernd Bohnet, 17.01.2010
+ *
+ * This class removes all information from a conll 2009 file except of columns 1 and 2
+ * that contain the word id and the word form.
+ */
+public class ConvertTiger2CoNLL {
+
+ public static void main (String[] args) throws IOException {
+
+
+ OptionsSuper options = new OptionsSuper(args,null);
+
+ if (options.trainfile!= null){
+ System.err.println("included sentences "+clean(options.trainfile, options.outfile, options.start, options.count));
+ }
+ else System.err.println("Please proivde the file name -train <file-name>");
+
+ }
+
+ /**
+ * @param trainfile
+ * @throws IOException
+ */
+ private static int clean(String file, String outFile, int start, int numberOfSentences) throws IOException {
+
+ System.err.println("writting to "+outFile);
+ System.err.println("start "+start+" to "+(start+numberOfSentences));
+ int state=0;
+
+ BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(file),"UTF-8"),32768);
+ BufferedWriter writer = new BufferedWriter(new java.io.OutputStreamWriter (new java.io.FileOutputStream (outFile),"UTF-8"),32768);
+ String l =null;
+ try {
+
+ int id =1, snt=0,cnt=0;
+
+ while( (l = reader.readLine())!=null) {
+
+
+ if (l.startsWith("#BOS")) {
+ state=1; //BOS
+ id=1;
+ snt++;
+ continue;
+ }
+ if (l.startsWith("#EOS") && state==1) {
+ state=2; //BOS
+ cnt++;
+
+ writer.newLine();
+ }
+
+ if (start>snt || (start+numberOfSentences)<=snt) {
+ state=3;
+ }
+
+ if (l.startsWith("#5")||l.startsWith("#6")||l.startsWith("#7")) continue;
+ if ((start+numberOfSentences)<=snt) break;
+
+ if (state==3) continue;
+
+
+ if (state==1) {
+
+ l = l.replace("\t\t", "\t");
+ l = l.replace("\t\t", "\t");
+
+ StringTokenizer t = new StringTokenizer(l,"\t");
+ int count=0;
+
+ writer.write(""+id+"\t");
+
+ while (t.hasMoreTokens()) {
+ if (count==0) {
+ writer.write(t.nextToken()+"\t");
+ } else if (count==1) {
+ writer.write(t.nextToken()+"\t_\t");
+ } else if (count==2) {
+ writer.write(t.nextToken()+"\t_\t");
+ } else if (count==3) {
+ writer.write(t.nextToken().replace(".", "|")+"\t_\t");
+ }
+ else {
+ t.nextToken();
+ }
+ count++;
+ }
+ writer.write("_\t_\t_\t_\t_\t_\t_\t_\t_");
+ writer.newLine();
+ }
+ id++;
+ }
+ writer.flush();
+ writer.close();
+ reader.close();
+
+ return cnt;
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+
+
+ return -1;
+
+ }
+
+
+
+
+}
diff --git a/dependencyParser/mate-tools/src/is2/util/DB.java b/dependencyParser/mate-tools/src/is2/util/DB.java
new file mode 100755
index 0000000..8218ea5
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/util/DB.java
@@ -0,0 +1,81 @@
+package is2.util;
+
+import java.util.Calendar;
+import java.util.GregorianCalendar;
+
+
+public class DB {
+
+
+ private static final String ARROW = " -> ";
+ private static final String LEER = " " ;
+ private static final String BIG = " " ;
+
+ private static boolean debug = true;
+
+ final static public void println (Object err) {
+
+ if (!debug) return;
+
+ StackTraceElement[] ste = new Exception().getStackTrace();
+
+ StringBuffer msg = new StringBuffer();
+ msg.append((getDate().append(LEER).substring(0,10)));
+ msg.append(' ');
+ msg.append(ste[1].getClassName()+" "+ste[1].getLineNumber());
+ msg.append(':');
+ msg.append(ste[1].getMethodName());
+ msg.append(ARROW);
+
+ int l = 55-msg.length();
+ if (l < 0) l =0;
+ msg.append(BIG.substring(0, l));
+
+
+// if ((m_depth >= 0) && (m_depth < (BIG.length()) )) {
+// vDebugMessage.append(BIG.substring(0, m_depth*2));
+// }
+
+ msg.append(err);
+
+ System.err.println(msg);
+
+
+ }
+
+ final static public void prints (Object err) {
+
+ if (!debug) return;
+ System.err.println(err);
+
+ }
+
+
+ final private static StringBuffer getDate() {
+// if (Preferences.s_debug <= BDebug.FAIL) return s_sb;
+
+ GregorianCalendar s_cal = new GregorianCalendar();
+ StringBuffer sb = new StringBuffer();
+// sb.append(s_cal.get(Calendar.HOUR_OF_DAY));
+// sb.append('_');
+ sb.append(s_cal.get(Calendar.MINUTE));
+ sb.append('.');
+ sb.append(s_cal.get(Calendar.SECOND));
+ sb.append('.');
+ sb.append(s_cal.get(Calendar.MILLISECOND));
+
+ return sb;
+ }
+
+ public static void setDebug(boolean b) {
+ debug=b;
+
+ }
+
+ public static boolean getDebug() {
+
+ return debug;
+ }
+
+
+}
diff --git a/dependencyParser/mate-tools/src/is2/util/Edges.java b/dependencyParser/mate-tools/src/is2/util/Edges.java
new file mode 100644
index 0000000..af1a658
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/util/Edges.java
@@ -0,0 +1,206 @@
+/**
+ *
+ */
+package is2.util;
+
+import is2.data.PipeGen;
+
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.Map.Entry;
+
+/**
+ * @author Dr. Bernd Bohnet, 13.05.2009;
+ *
+ *
+ */
+public final class Edges {
+
+
+ private static short[][][] edges;
+ private static HashMap<Short,Integer> labelCount = new HashMap<Short,Integer>();
+
+ private static HashMap<String,Integer> slabelCount = new HashMap<String,Integer>();
+
+
+ static short[] def = new short[1];
+
+ private Edges () {}
+
+ /**
+ * @param length
+ */
+ public static void init(int length) {
+ edges = new short[length][length][];
+ }
+
+
+ public static void findDefault(){
+
+ int best =0;
+
+
+
+ for(Entry<Short,Integer> e : labelCount.entrySet()) {
+
+
+ if (best<e.getValue()) {
+ best = e.getValue();
+ def[0]=e.getKey();
+ }
+ }
+
+
+ // labelCount=null;
+ // String[] types = new String[mf.getFeatureCounter().get(PipeGen.REL)];
+ // for (Entry<String, Integer> e : MFO.getFeatureSet().get(PipeGen.REL).entrySet()) types[e.getValue()] = e.getKey();
+
+ is2.util.DB.println("set default label to "+def[0]+" " );
+
+ // System.out.println("found default "+def[0]);
+
+ }
+
+
+ final static public void put(int pos1, int pos2, short label) {
+ putD(pos1, pos2,label);
+ // putD(pos2, pos1,!dir, label);
+ }
+
+
+ final static public void putD(int pos1, int pos2, short label) {
+
+ Integer lc = labelCount.get(label);
+ if (lc==null) labelCount.put(label, 1);
+ else labelCount.put(label, lc+1);
+
+ String key = pos1+"-"+pos2+label;
+ Integer lcs = slabelCount.get(key);
+ if (lcs==null) slabelCount.put(key, 1);
+ else slabelCount.put(key, lcs+1);
+
+ if (edges[pos1][pos2]==null) {
+ edges[pos1][pos2]=new short[1];
+ edges[pos1][pos2][0]=label;
+
+// edgesh[pos1][pos2][dir?0:1] = new TIntHashSet(2);
+// edgesh[pos1][pos2][dir?0:1].add(label);
+ } else {
+ short labels[] = edges[pos1][pos2];
+ for(short l : labels) {
+ //contains label already?
+ if(l==label) return;
+ }
+
+ short[] nlabels = new short[labels.length+1];
+ System.arraycopy(labels, 0, nlabels, 0, labels.length);
+ nlabels[labels.length]=label;
+ edges[pos1][pos2]=nlabels;
+
+ // edgesh[pos1][pos2][dir?0:1].add(label);
+ }
+ }
+
+ final static public short[] get(int pos1, int pos2) {
+
+ if (pos1<0 || pos2<0 || edges[pos1][pos2]==null) return def;
+ return edges[pos1][pos2];
+ }
+
+
+ /**
+ * @param dis
+ */
+ static public void write(DataOutputStream d) throws IOException {
+
+ int len = edges.length;
+ d.writeShort(len);
+
+ for(int p1 =0;p1<len;p1++) {
+ for(int p2 =0;p2<len;p2++) {
+ if (edges[p1][p2]==null) d.writeShort(0);
+ else {
+ d.writeShort(edges[p1][p2].length);
+ for(int l =0;l<edges[p1][p2].length;l++) {
+ d.writeShort(edges[p1][p2][l]);
+ }
+
+ }
+ }
+ }
+
+ d.writeShort(def[0]);
+
+ }
+
+
+ /**
+ * @param dis
+ */
+ public static void read(DataInputStream d) throws IOException {
+ int len = d.readShort();
+
+ edges = new short[len][len][];
+ for(int p1 =0;p1<len;p1++) {
+ for(int p2 =0;p2<len;p2++) {
+ int ll = d.readShort();
+ if (ll==0) {
+ edges[p1][p2]=null;
+ } else {
+ edges[p1][p2] = new short[ll];
+ for(int l =0;l<ll;l++) {
+ edges[p1][p2][l]=d.readShort();
+ }
+ }
+ }
+ }
+
+ def[0]= d.readShort();
+
+ }
+
+ public static class C implements Comparator<Short> {
+
+ public C() {
+ super();
+ }
+
+ String _key;
+
+ public C(String key) {
+ super();
+ _key=key;
+ }
+
+ /* (non-Javadoc)
+ * @see java.util.Comparator#compare(java.lang.Object, java.lang.Object)
+ */
+ @Override
+ public int compare(Short l1, Short l2) {
+
+ // int c1 = labelCount.get(l1);
+ // int c2 = labelCount.get(l2);
+ // if (true) return c1==c2?0:c1>c2?-1:1;
+
+ int x1 = slabelCount.get(_key+l1.shortValue());
+ int x2 = slabelCount.get(_key+l2.shortValue());
+ // System.out.println(x1+" "+x2);
+
+
+ return x1==x2?0:x1>x2?-1:1;
+
+
+
+ }
+
+
+
+
+
+ }
+
+
+}
diff --git a/dependencyParser/mate-tools/src/is2/util/Evaluator.java b/dependencyParser/mate-tools/src/is2/util/Evaluator.java
new file mode 100644
index 0000000..c527303
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/util/Evaluator.java
@@ -0,0 +1,616 @@
+package is2.util;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Hashtable;
+import java.util.Map.Entry;
+
+//import org.apache.commons.math.stat.inference.TestUtils;
+
+
+import is2.data.Parse;
+import is2.data.SentenceData09;
+import is2.io.CONLLReader09;
+
+
+public class Evaluator {
+
+
+ public static void main(String[] args) {
+
+ Options options = new Options(args);
+
+ if (options.eval && options.significant1==null ) {
+
+ Results r = evaluate(options.goldfile, options.outfile);
+
+ }
+ /*
+ else if (options.significant1!=null && options.significant2!=null ) {
+
+ System.out.println("compare1 "+options.significant1);
+ System.out.println("compare2 "+options.significant2);
+ System.out.println("gold "+options.goldfile);
+
+ Results r1 = evaluate(options.goldfile, options.significant1,false);
+
+ System.out.println("file 1 done ");
+
+ Results r2 = evaluate(options.goldfile, options.significant2,false);
+
+ double[] s1 = new double[r1.correctHead.size()];
+ double[] s2 = new double[r1.correctHead.size()];
+
+ for(int k=0;k<r1.correctHead.size();k++) {
+ s1[k] = r1.correctHead.get(k);
+ s2[k] = r2.correctHead.get(k);
+ }
+
+ try {
+ double p = TestUtils.pairedTTest(s1, s2);
+ System.out.print("significant to "+p);
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+
+// significant(options.significant1, options.significant2) ;
+
+
+ }
+ */
+ else if (options.significant1!=null) {
+ Results r = evaluate(options.goldfile, options.outfile,true);
+// significant(options.significant1, options.significant2) ;
+
+ }
+
+
+ }
+
+
+ /**
+ *
+ * @param act_file
+ * @param pred_file
+ * @param what top, pos, length, mor
+ */
+ public static void evaluateTagger (String act_file, String pred_file, String what) {
+
+
+ CONLLReader09 goldReader = new CONLLReader09(act_file);
+
+ CONLLReader09 predictedReader = new CONLLReader09();
+ predictedReader.startReading(pred_file);
+
+ Hashtable<String,Integer> errors = new Hashtable<String,Integer>();
+ Hashtable<String,StringBuffer> words = new Hashtable<String,StringBuffer>();
+
+ int total = 0, numsent = 0, corrT=0;
+ SentenceData09 goldInstance = goldReader.getNext();
+ SentenceData09 predInstance = predictedReader.getNext();
+
+
+ HashMap<Integer,int[]> correctL = new HashMap<Integer,int[]>();
+ HashMap<String,int[]> pos = new HashMap<String,int[]>();
+ HashMap<String,int[]> mor = new HashMap<String,int[]>();
+
+ float correctM = 0, allM=0;;
+
+ while(goldInstance != null) {
+
+ int instanceLength = goldInstance.length();
+
+ if (instanceLength != predInstance.length())
+ System.out.println("Lengths do not match on sentence "+numsent);
+
+ String gold[] = goldInstance.gpos;
+ String pred[] = predInstance.ppos;
+
+ String goldM[] = goldInstance.ofeats;
+ String predM[] = predInstance.pfeats;
+
+
+ // NOTE: the first item is the root info added during nextInstance(), so we skip it.
+
+ for (int i = 1; i < instanceLength; i++) {
+
+ int[] cwr = correctL.get(i);
+ if (cwr ==null) {
+ cwr = new int[2];
+ correctL.put(i, cwr);
+ }
+ cwr[1]++;
+ int[] correctPos = pos.get(gold[i]);
+ if (correctPos==null) {
+ correctPos = new int[2];
+ pos.put(gold[i], correctPos);
+ }
+ correctPos[1]++;
+
+ int[] correctMor = mor.get(goldM[i]);
+ if (correctMor==null) {
+ correctMor = new int[2];
+ mor.put(goldM[i], correctMor);
+ }
+
+ if ((goldM[i].equals("_")&&predM[i]==null) || goldM[i].equals(predM[i])) {
+ correctM++;
+ correctMor[0]++;
+ }
+ allM++;
+ correctMor[1]++;
+
+ if (gold[i].equals(pred[i])) {
+ corrT++;
+ cwr[0]++;
+ correctPos[0]++;
+ } else {
+ String key = "gold: '"+gold[i]+"' pred: '"+pred[i]+"'";
+ Integer cnt = errors.get(key);
+ StringBuffer errWrd = words.get(key);
+ if (cnt==null) {
+ errors.put(key,1);
+ words.put(key, new StringBuffer().append(goldInstance.forms[i]));
+ }
+ else {
+ errors.put(key,cnt+1);
+ errWrd.append(" "+goldInstance.forms[i]);
+ }
+ }
+
+
+ }
+ total += instanceLength - 1; // Subtract one to not score fake root token
+
+
+ numsent++;
+
+ goldInstance = goldReader.getNext();
+ predInstance = predictedReader.getNext();
+ }
+
+
+
+
+
+ // System.out.println("error gold:"+goldPos[i]+" pred:"+predPos[i]+" "+goldInstance.forms[i]+" snt "+numsent+" i:"+i);
+ ArrayList<Entry<String, Integer>> opsl = new ArrayList<Entry<String, Integer>>();
+ for(Entry<String, Integer> e : errors.entrySet()) {
+ opsl.add(e);
+ }
+
+ Collections.sort(opsl, new Comparator<Entry<String, Integer>>(){
+
+ @Override
+ public int compare(Entry<String, Integer> o1,
+ Entry<String, Integer> o2) {
+
+ return o1.getValue()==o2.getValue()?0:o1.getValue()>o2.getValue()?-1:1;
+ }
+
+
+ });
+
+
+ int cnt=0;
+ if (what.contains("top") ) {
+ System.out.println("top most errors:");
+ for(Entry<String, Integer> e : opsl) {
+ cnt++;
+ if(e.getValue()>10) System.out.println(e.getKey()+" "+e.getValue()+" context: "+words.get(e.getKey()));
+ }
+ }
+
+ if (what.contains("length")) {
+ for(int k=0;k<60;k++) {
+ int[] cwr = correctL.get(k);
+ if (cwr == null) continue;
+ System.out.print(k+":"+cwr[0]+":"+cwr[1]+":"+(((float)Math.round(10000*(float)((float)cwr[0])/(float)cwr[1]))/100)+" ");
+ }
+ System.out.println();
+ }
+
+ if (what.contains("pos")) {
+ for(Entry<String,int[]> e : pos.entrySet()) {
+
+ System.out.print(e.getKey()+":"+e.getValue()[0]+":"+e.getValue()[1]+":"+
+ (((float)Math.round(10000*((float)e.getValue()[0])/((float)e.getValue()[1])))/100)+" ");
+
+ }
+ System.out.print("");
+ }
+ System.out.println();
+ if (what.contains("mor")) {
+ for(Entry<String,int[]> e : mor.entrySet()) {
+
+ System.out.print(e.getKey()+":"+e.getValue()[0]+":"+e.getValue()[1]+":"+
+ (((float)Math.round(10000*((float)e.getValue()[0])/((float)e.getValue()[1])))/100)+" ");
+
+ }
+ System.out.print("");
+ }
+ System.out.println("\nTokens: " + total+" Correct: " + corrT+" "+(float)corrT/total+" Correct M.:"+(int)correctM+ " morphology "+(correctM/total));
+ }
+
+
+
+
+ public static int errors(SentenceData09 s, boolean uas) {
+
+ int errors =0;
+ for (int k =1;k<s.length();k++) {
+
+ if (s.heads[k] != s.pheads[k] && (uas || ! s.labels[k].equals(s.plabels[k]))) {
+ errors++;
+ }
+ }
+ return errors;
+ }
+
+ public static int errors(SentenceData09 s1, SentenceData09 s2, HashMap<String,Integer> r1,HashMap<String,Integer> r2) {
+
+
+
+ int errors =0;
+ for (int k =1;k<s1.length();k++) {
+
+ if (s1.heads[k] != s1.pheads[k] || (! s1.labels[k].equals(s1.plabels[k]))) {
+
+ if (s2.heads[k] != s2.pheads[k] || (! s2.labels[k].equals(s2.plabels[k]))) {
+
+ // equal do nothing
+
+ } else {
+
+ Integer cnt = r1.get(s1.labels[k]);
+ if (cnt==null) cnt=0;
+ cnt++;
+ r1.put(s1.labels[k],cnt);
+
+
+ }
+
+ }
+
+ if (s2.heads[k] != s2.pheads[k] || (! s2.labels[k].equals(s2.plabels[k]))) {
+
+ if (s1.heads[k] != s1.pheads[k] || (! s1.labels[k].equals(s1.plabels[k]))) {
+
+ // equal do nothing
+
+ } else {
+
+ Integer cnt = r2.get(s2.labels[k]);
+ if (cnt==null) cnt=0;
+ cnt++;
+ r2.put(s2.labels[k],cnt);
+
+
+ }
+
+ }
+ }
+ return errors;
+ }
+
+
+ public static final String PUNCT ="!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~";
+
+ public static class Results {
+
+ public int total;
+ public int corr;
+ public float las;
+ public float ula;
+ public float lpas;
+ public float upla;
+
+ ArrayList<Double> correctHead;
+ }
+
+ public static Results evaluate (String act_file, String pred_file) {
+ return evaluate (act_file, pred_file,true);
+ }
+ public static Results evaluate (String act_file, String pred_file, boolean printEval) {
+ return evaluate ( act_file, pred_file, printEval, false);
+ }
+
+
+ public static Results evaluate (String act_file, String pred_file, boolean printEval, boolean sig) {
+
+ CONLLReader09 goldReader = new CONLLReader09(act_file, -1);
+ CONLLReader09 predictedReader = new CONLLReader09(pred_file, -1);
+
+ int total = 0, corr = 0, corrL = 0, Ptotal=0, Pcorr = 0, PcorrL = 0, BPtotal=0, BPcorr = 0, BPcorrL = 0, corrLableAndPos=0, corrHeadAndPos=0;
+ int corrLableAndPosP=0, corrHeadAndPosP=0,corrLableAndPosC=0;
+ int numsent = 0, corrsent = 0, corrsentL = 0, Pcorrsent = 0, PcorrsentL = 0,sameProj=0;;
+ int proj=0, nonproj=0, pproj=0, pnonproj=0, nonProjOk=0, nonProjWrong=0;
+
+ int corrOne = 0;
+
+ int correctChnWoPunc =0, correctLChnWoPunc=0,CPtotal=0;
+ SentenceData09 goldInstance = goldReader.getNext();
+
+ SentenceData09 predInstance = predictedReader.getNext();
+ HashMap<String,Integer> label = new HashMap<String,Integer>();
+ HashMap<String,Integer> labelCount = new HashMap<String,Integer>();
+ HashMap<String,Integer> labelCorrect = new HashMap<String,Integer>();
+ HashMap<String,Integer> falsePositive = new HashMap<String,Integer>();
+
+ // does the node have the correct head?
+ ArrayList<Double> correctHead = new ArrayList<Double>();
+
+ while(goldInstance != null) {
+
+ int instanceLength = goldInstance.length();
+
+ if (instanceLength != predInstance.length())
+ System.out.println("Lengths do not match on sentence "+numsent);
+
+ int[] goldHeads = goldInstance.heads;
+ String[] goldLabels = goldInstance.labels;
+ int[] predHeads = predInstance.pheads;
+ String[] predLabels = predInstance.plabels;
+
+ boolean whole = true;
+ boolean wholeL = true;
+
+ boolean Pwhole = true;
+ boolean PwholeL = true;
+
+
+ int tlasS=0, totalS=0,corrLabels=0, XLabels=0;
+
+ // NOTE: the first item is the root info added during nextInstance(), so we skip it.
+
+
+
+ int punc=0, bpunc=0,totalChnWoPunc=0;
+ for (int i = 1; i < instanceLength; i++) {
+
+
+
+ Parse p = new Parse(predHeads.length);
+ for (int k=0;k<p.heads.length;k++) p.heads[k]=(short) predHeads[k];
+
+ Parse g = new Parse(predHeads.length);
+ for (int k=0;k<g.heads.length;k++) g.heads[k]=(short) goldHeads[k];
+
+
+
+
+
+ {
+ Integer count = labelCount.get(goldLabels[i]);
+ if (count==null)count = 0;
+
+ count++;
+
+ labelCount.put(goldLabels[i], count);
+
+ if(goldLabels[i].equals(predLabels[i])) {
+ Integer correct = labelCorrect.get(goldLabels[i]);
+ if (correct ==null) correct =0;
+ correct ++;
+ labelCorrect.put(goldLabels[i], correct);
+
+ } else {
+ Integer fp = falsePositive.get(predLabels[i]);
+ if (fp ==null) fp =0;
+ fp ++;
+ falsePositive.put(predLabels[i], fp);
+ }
+
+
+ }
+
+ {
+
+ }
+
+
+ if (goldLabels[i].startsWith("PMOD")) XLabels++;
+
+ boolean tlas =false;
+ if (predHeads[i] == goldHeads[i]) {
+ corr++;
+
+ if (goldInstance.gpos[i].equals(predInstance.ppos[i])) corrHeadAndPos ++;
+ if (goldLabels[i].equals(predLabels[i])) {
+ corrL++;
+ // if (predLabels[i].startsWith("PMOD"))
+ corrLabels++;
+ // else correctHead.add(0);
+ if (goldInstance.gpos[i].equals(predInstance.ppos[i])) {
+ tlasS++;
+ tlas=true;
+ corrLableAndPos ++;
+ }
+ }
+ else {
+ // correctHead.add(0);
+ // System.out.println(numsent+" error gold "+goldLabels[i]+" "+predLabels[i]+" head "+goldHeads[i]+" child "+i);
+ wholeL = false;
+ }
+ }
+ else {
+
+ //correctHead.add(0);
+
+ // System.out.println(numsent+"error gold "+goldLabels[i]+" "+predLabels[i]+" head "+goldHeads[i]+" child "+i);
+ whole = false; wholeL = false;
+
+ Integer count = label.get(goldLabels[i]);
+
+ if (count==null)count = 0;
+ count++;
+ label.put(goldLabels[i], count);
+
+
+
+ int d = Math.abs(goldInstance.heads[i]-i);
+ }
+
+
+ if( ! ("!\"#$%&''()*+,-./:;<=>?@[\\]^_{|}~``".contains(goldInstance.forms[i]))) {
+
+ if (predHeads[i] == goldHeads[i]) {
+ BPcorr++;
+
+ if (goldLabels[i].equals(predLabels[i])) {
+ BPcorrL++;
+ }
+ else {
+ // System.out.println(numsent+" error gold "+goldLabels[i]+" "+predLabels[i]+" head "+goldHeads[i]+" child "+i);
+ // PwholeL = false;
+ }
+ } else {
+ // System.out.println(numsent+"error gold "+goldLabels[i]+" "+predLabels[i]+" head "+goldHeads[i]+" child "+i);
+ //Pwhole = false; wholeL = false;
+ }
+
+ } else bpunc++;
+
+ if( ! (",.:''``".contains(goldInstance.forms[i]))) {
+
+
+ if (predHeads[i] == goldHeads[i]) {
+ if (goldInstance.gpos[i].equals(predInstance.ppos[i])) corrHeadAndPosP ++;
+ Pcorr++;
+
+ if (goldLabels[i].equals(predLabels[i])) {
+ PcorrL++;
+ if (goldInstance.gpos[i].equals(predInstance.ppos[i])) corrLableAndPosP ++;
+
+ }
+ else {
+ // System.out.println(numsent+" error gold "+goldLabels[i]+" "+predLabels[i]+" head "+goldHeads[i]+" child "+i);
+ PwholeL = false;
+ }
+ } else {
+ // System.out.println(numsent+"error gold "+goldLabels[i]+" "+predLabels[i]+" head "+goldHeads[i]+" child "+i);
+ Pwhole = false; PwholeL = false;
+ }
+
+ } else punc++;
+
+
+ if( ! (goldInstance.gpos[i].toLowerCase().startsWith("pu"))) {
+ if (predHeads[i] == goldHeads[i]) {
+ correctChnWoPunc++;
+
+ if (goldLabels[i].equals(predLabels[i])) {
+ correctLChnWoPunc++;
+ if (goldInstance.gpos[i].equals(predInstance.ppos[i])) corrLableAndPosC ++;
+ }
+ else {
+ // System.out.println(numsent+" error gold "+goldLabels[i]+" "+predLabels[i]+" head "+goldHeads[i]+" child "+i);
+ // PwholeL = false;
+ }
+ } else {
+ // System.out.println(numsent+"error gold "+goldLabels[i]+" "+predLabels[i]+" head "+goldHeads[i]+" child "+i);
+ // Pwhole = false; PwholeL = false;
+ }
+
+ } else totalChnWoPunc++;
+
+
+ if (sig) {
+ if(tlas) System.out.println("1\t");
+ else System.out.println("0\t");
+ }
+
+ }
+ total += ((instanceLength - 1)); // Subtract one to not score fake root token
+
+ Ptotal += ((instanceLength - 1) - punc);
+ BPtotal += ((instanceLength - 1) - bpunc);
+ CPtotal += ((instanceLength - 1) - totalChnWoPunc);
+ if(whole) corrsent++;
+ if(wholeL) corrsentL++;
+ if(Pwhole) Pcorrsent++;
+ if(PwholeL) PcorrsentL++;
+ numsent++;
+
+ goldInstance = goldReader.getNext();
+ predInstance = predictedReader.getNext();
+ correctHead.add((double) ((double)corrLabels/(instanceLength - 1)));
+ // System.out.println(""+((double)corrLabels/(instanceLength - 1)));
+ }
+
+ Results r = new Results();
+
+ r.correctHead =correctHead;
+ int mult=100000, diff=1000;
+
+ r.total = total;
+ r.corr = corr;
+ r.las =(float)Math.round(((double)corrL/total)*mult)/diff;
+ r.ula =(float)Math.round(((double)corr /total)*mult)/diff;
+ r.lpas =(float)Math.round(((double)corrLableAndPos/total)*mult)/diff;
+ r.upla =(float)Math.round(((double)corrHeadAndPos /total)*mult)/diff;
+ float tlasp = (float)Math.round(((double)corrLableAndPosP/Ptotal)*mult)/diff;
+ float tlasc = (float)Math.round(((double)corrLableAndPosC/Ptotal)*mult)/diff;
+
+ // System.out.print("Total: " + total+" \tCorrect: " + corr+" ");
+ System.out.print(" LAS/Total/UAS/Total: " + r.las+"/" + (double)Math.round(((double)corrsentL/numsent)*mult)/diff+
+ "/" + r.ula+"/" + (double)Math.round(((double)corrsent /numsent)*mult)/diff+" LPAS/UPAS "+r.lpas+"/"+r.upla);
+
+ System.out.println("; without . " + (double)Math.round(((double)PcorrL/Ptotal)*mult)/diff+"/" +
+ (double)Math.round(((double)PcorrsentL/numsent)*mult)/diff+
+ "/" + (double)Math.round(((double)Pcorr /Ptotal)*mult)/diff+"/" +
+ (double)Math.round(((double)Pcorrsent /numsent)*mult)/diff+" TLAS "+tlasp+
+ " V2 LAS/UAS "+(double)Math.round(((double)BPcorrL/BPtotal)*mult)/diff+
+ "/"+(double)Math.round(((double)BPcorr/BPtotal)*mult)/diff+
+ " CHN LAS/UAS "+(double)Math.round(((double)correctLChnWoPunc/CPtotal)*mult)/diff+
+ "/"+(double)Math.round(((double)correctChnWoPunc/CPtotal)*mult)/diff+" TLAS "+tlasc);
+
+ float precisionNonProj = ((float)nonProjOk)/((float)nonProjOk+nonProjWrong);
+ float recallNonProj = ((float)nonProjOk)/((float)(nonproj));
+ System.out.println("proj "+proj+" nonp "+nonproj+"; predicted proj "+pproj+" non "+pnonproj+"; nonp correct "+
+ nonProjOk+" nonp wrong "+nonProjWrong+
+ " precision=(nonProjOk)/(non-projOk+nonProjWrong): "+precisionNonProj+
+ " recall=nonProjOk/nonproj="+recallNonProj+" F="+(2*precisionNonProj*recallNonProj)/(precisionNonProj+recallNonProj));
+
+ if (!printEval) return r;
+
+
+ HashMap<String,Integer> totalX = new HashMap<String,Integer>();
+ HashMap<String,Integer> totalY = new HashMap<String,Integer>();
+
+ String A=" "; // &
+ System.out.println("label\ttp\tcount\trecall\t\ttp\tfp+tp\tprecision\t F-Score ");
+
+ for(Entry<String, Integer> e : labelCount.entrySet()) {
+
+ int tp = labelCorrect.get(e.getKey())==null?0:labelCorrect.get(e.getKey()).intValue();
+ Integer count = labelCount.get(e.getKey());
+ int fp = falsePositive.get(e.getKey())==null?0:falsePositive.get(e.getKey()).intValue();
+ System.out.println(e.getKey()+"\t"+tp+"\t"+count+"\t"+roundPercent((float)tp/count)+"\t\t"+tp+"\t"+(fp+tp)+
+ "\t"+roundPercent((float)tp/(fp+tp))+"\t\t"+roundPercent((((float)tp/count))+(float)tp/(fp+tp))/2F); //+totalD
+ }
+
+
+
+
+ return r;
+ }
+
+
+ public static float round (double v){
+
+ return Math.round(v*10000F)/10000F;
+ }
+
+ public static float roundPercent (double v){
+
+ return Math.round(v*10000F)/100F;
+ }
+
+
+
+
+}
diff --git a/dependencyParser/mate-tools/src/is2/util/EvaluatorTagger.java b/dependencyParser/mate-tools/src/is2/util/EvaluatorTagger.java
new file mode 100644
index 0000000..c1ee7df
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/util/EvaluatorTagger.java
@@ -0,0 +1,736 @@
+package is2.util;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Hashtable;
+import java.util.Map.Entry;
+
+
+import org.apache.commons.math.stat.inference.TestUtils;
+
+import is2.data.Parse;
+import is2.data.SentenceData09;
+import is2.io.CONLLReader09;
+
+
+public class EvaluatorTagger {
+
+
+ public static int TAGGER = 1;
+ public static int what = 0;
+
+ public static void main(String[] args) {
+
+ Options options = new Options(args);
+
+ what = options.tt;
+
+ if (options.eval && options.significant1==null ) {
+
+ Results r = evaluate(options.goldfile, options.outfile);
+
+ } else if (options.significant1!=null && options.significant2!=null ) {
+
+ System.out.println("compare1 "+options.significant1);
+ System.out.println("compare2 "+options.significant2);
+ System.out.println("gold "+options.goldfile);
+
+ check( options.significant1, options.significant2, options.testfile);
+
+ Results r1 = evaluate(options.goldfile, options.significant1,false);
+
+ System.out.println("file 1 done ");
+
+ Results r2 = evaluate(options.goldfile, options.significant2,false);
+
+ double[] s1 = new double[r1.correctHead.size()];
+ double[] s2 = new double[r1.correctHead.size()];
+
+ for(int k=0;k<r1.correctHead.size();k++) {
+ s1[k] = r1.correctHead.get(k);
+ s2[k] = r2.correctHead.get(k);
+ }
+
+ try {
+ double p = TestUtils.pairedTTest(s1, s2);
+ System.out.print("significant to "+p);
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+
+// significant(options.significant1, options.significant2) ;
+
+
+ } else if (options.significant1!=null) {
+ Results r = evaluate(options.goldfile, options.outfile,true);
+// significant(options.significant1, options.significant2) ;
+
+ }
+
+
+ }
+
+
+ private static void check(String s1, String s2, String pos) {
+ CONLLReader09 s1reader = new CONLLReader09(s1, -1);
+ SentenceData09 s1i = s1reader.getNext();
+ CONLLReader09 s2reader = new CONLLReader09(s2, -1);
+ SentenceData09 s2i = s2reader.getNext();
+
+
+ HashMap<String,HashMap<String,Integer> > labchanged = new HashMap<String,HashMap<String,Integer> > ();
+
+ int snt =0;
+
+ while(s1i != null) {
+
+ snt ++;
+ int good =0,wrong=0;
+
+ for(int w=1;w<s1i.length();w++) {
+
+ // p(s1:head-pos wrong s2:head-pos good => dep-wrong => dep-good)
+
+ if (s1i.gpos[s1i.heads[w]].equals(pos) && !
+ ! s1i.ppos[s1i.heads[w]].equals(s1i.gpos[s1i.heads[w]]) && s2i.ppos[s2i.heads[w]].equals(s2i.gpos[s2i.heads[w]])
+ ) {
+
+
+ HashMap<String,Integer> changed = labchanged.get(s2i.labels[w]);
+ if (changed ==null) {
+ changed= new HashMap<String,Integer>();
+ labchanged.put(s2i.labels[w], changed);
+ }
+ if (! (s1i.plabels[w].equals(s1i.labels[w]) && s1i.pheads[w] == s1i.heads[w] )&&
+ (s2i.plabels[w].equals(s2i.labels[w]) && s2i.pheads[w] == s2i.heads[w] ) ) {
+ good ++;
+ Integer goodL = changed.get("G");
+ if (goodL== null) goodL =0;
+ goodL+=1;
+ changed.put("G", goodL);
+ }
+ else {
+ wrong++;
+ Integer wrongL = changed.get("W");
+ if (wrongL== null) wrongL =0;
+ wrongL+=1;
+ changed.put("W", wrongL);
+ }
+
+
+
+
+
+ }
+
+ }
+
+ if (good!=0 || wrong!=0)
+ System.out.println(snt+" changed yes:"+good+" no:"+wrong);
+ s1i = s1reader.getNext();
+ s2i = s2reader.getNext();
+ }
+ System.out.println(""+labchanged);
+
+ }
+
+
+ /**
+ *
+ * @param act_file
+ * @param pred_file
+ * @param what top, pos, length, mor
+ */
+ public static void evaluateTagger (String act_file, String pred_file, String what) {
+
+
+ CONLLReader09 goldReader = new CONLLReader09(act_file);
+
+ CONLLReader09 predictedReader = new CONLLReader09();
+ predictedReader.startReading(pred_file);
+
+ Hashtable<String,Integer> errors = new Hashtable<String,Integer>();
+ Hashtable<String,StringBuffer> words = new Hashtable<String,StringBuffer>();
+
+ int total = 0, numsent = 0, corrT=0;
+ SentenceData09 goldInstance = goldReader.getNext();
+ SentenceData09 predInstance = predictedReader.getNext();
+
+
+ HashMap<Integer,int[]> correctL = new HashMap<Integer,int[]>();
+ HashMap<String,int[]> pos = new HashMap<String,int[]>();
+ HashMap<String,int[]> mor = new HashMap<String,int[]>();
+
+ float correctM = 0, allM=0;;
+
+ while(goldInstance != null) {
+
+ int instanceLength = goldInstance.length();
+
+ if (instanceLength != predInstance.length())
+ System.out.println("Lengths do not match on sentence "+numsent);
+
+ String gold[] = goldInstance.gpos;
+ String pred[] = predInstance.ppos;
+
+ String goldM[] = goldInstance.ofeats;
+ String predM[] = predInstance.pfeats;
+
+
+ // NOTE: the first item is the root info added during nextInstance(), so we skip it.
+
+ for (int i = 1; i < instanceLength; i++) {
+
+ int[] cwr = correctL.get(i);
+ if (cwr ==null) {
+ cwr = new int[2];
+ correctL.put(i, cwr);
+ }
+ cwr[1]++;
+ int[] correctPos = pos.get(gold[i]);
+ if (correctPos==null) {
+ correctPos = new int[2];
+ pos.put(gold[i], correctPos);
+ }
+ correctPos[1]++;
+
+ int[] correctMor = mor.get(goldM[i]);
+ if (correctMor==null) {
+ correctMor = new int[2];
+ mor.put(goldM[i], correctMor);
+ }
+
+ if ((goldM[i].equals("_")&&predM[i]==null) || goldM[i].equals(predM[i])) {
+ correctM++;
+ correctMor[0]++;
+ }
+ allM++;
+ correctMor[1]++;
+
+ if (gold[i].equals(pred[i])) {
+ corrT++;
+ cwr[0]++;
+ correctPos[0]++;
+ } else {
+ String key = "gold: '"+gold[i]+"' pred: '"+pred[i]+"'";
+ Integer cnt = errors.get(key);
+ StringBuffer errWrd = words.get(key);
+ if (cnt==null) {
+ errors.put(key,1);
+ words.put(key, new StringBuffer().append(goldInstance.forms[i]));
+ }
+ else {
+ errors.put(key,cnt+1);
+ errWrd.append(" "+goldInstance.forms[i]);
+ }
+ }
+
+
+ }
+ total += instanceLength - 1; // Subtract one to not score fake root token
+
+
+ numsent++;
+
+ goldInstance = goldReader.getNext();
+ predInstance = predictedReader.getNext();
+ }
+
+
+
+
+
+ // System.out.println("error gold:"+goldPos[i]+" pred:"+predPos[i]+" "+goldInstance.forms[i]+" snt "+numsent+" i:"+i);
+ ArrayList<Entry<String, Integer>> opsl = new ArrayList<Entry<String, Integer>>();
+ for(Entry<String, Integer> e : errors.entrySet()) {
+ opsl.add(e);
+ }
+
+ Collections.sort(opsl, new Comparator<Entry<String, Integer>>(){
+
+ @Override
+ public int compare(Entry<String, Integer> o1,
+ Entry<String, Integer> o2) {
+
+ return o1.getValue()==o2.getValue()?0:o1.getValue()>o2.getValue()?-1:1;
+ }
+
+
+ });
+
+
+ int cnt=0;
+ if (what.contains("top") ) {
+ System.out.println("top most errors:");
+ for(Entry<String, Integer> e : opsl) {
+ cnt++;
+ if(e.getValue()>10) System.out.println(e.getKey()+" "+e.getValue()+" context: "+words.get(e.getKey()));
+ }
+ }
+
+ if (what.contains("length")) {
+ for(int k=0;k<60;k++) {
+ int[] cwr = correctL.get(k);
+ if (cwr == null) continue;
+ System.out.print(k+":"+cwr[0]+":"+cwr[1]+":"+(((float)Math.round(10000*(float)((float)cwr[0])/(float)cwr[1]))/100)+" ");
+ }
+ System.out.println();
+ }
+
+ if (what.contains("pos")) {
+ for(Entry<String,int[]> e : pos.entrySet()) {
+
+ System.out.print(e.getKey()+":"+e.getValue()[0]+":"+e.getValue()[1]+":"+
+ (((float)Math.round(10000*((float)e.getValue()[0])/((float)e.getValue()[1])))/100)+" ");
+
+ }
+ System.out.print("");
+ }
+ System.out.println();
+ if (what.contains("mor")) {
+ for(Entry<String,int[]> e : mor.entrySet()) {
+
+ System.out.print(e.getKey()+":"+e.getValue()[0]+":"+e.getValue()[1]+":"+
+ (((float)Math.round(10000*((float)e.getValue()[0])/((float)e.getValue()[1])))/100)+" ");
+
+ }
+ System.out.print("");
+ }
+ System.out.println("\nTokens: " + total+" Correct: " + corrT+" "+(float)corrT/total+" Correct M.:"+(int)correctM+ " morphology "+(correctM/total));
+ }
+
+
+
+
+ public static int errors(SentenceData09 s, boolean uas) {
+
+ int errors =0;
+ for (int k =1;k<s.length();k++) {
+
+ if (s.heads[k] != s.pheads[k] && (uas || ! s.labels[k].equals(s.plabels[k]))) {
+ errors++;
+ }
+ }
+ return errors;
+ }
+
+ public static int errors(SentenceData09 s1, SentenceData09 s2, HashMap<String,Integer> r1,HashMap<String,Integer> r2) {
+
+
+
+ int errors =0;
+ for (int k =1;k<s1.length();k++) {
+
+ if (s1.heads[k] != s1.pheads[k] || (! s1.labels[k].equals(s1.plabels[k]))) {
+
+ if (s2.heads[k] != s2.pheads[k] || (! s2.labels[k].equals(s2.plabels[k]))) {
+
+ // equal do nothing
+
+ } else {
+
+ Integer cnt = r1.get(s1.labels[k]);
+ if (cnt==null) cnt=0;
+ cnt++;
+ r1.put(s1.labels[k],cnt);
+
+
+ }
+
+ }
+
+ if (s2.heads[k] != s2.pheads[k] || (! s2.labels[k].equals(s2.plabels[k]))) {
+
+ if (s1.heads[k] != s1.pheads[k] || (! s1.labels[k].equals(s1.plabels[k]))) {
+
+ // equal do nothing
+
+ } else {
+
+ Integer cnt = r2.get(s2.labels[k]);
+ if (cnt==null) cnt=0;
+ cnt++;
+ r2.put(s2.labels[k],cnt);
+
+
+ }
+
+ }
+ }
+ return errors;
+ }
+
+
+ public static final String PUNCT ="!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~";
+
+ public static class Results {
+
+ public int total;
+ public int corr;
+ public float las;
+ public float ula;
+ public float lpas;
+ public float upla;
+
+ ArrayList<Double> correctHead;
+ }
+
+ public static Results evaluate (String act_file, String pred_file) {
+ return evaluate (act_file, pred_file,true);
+ }
+ public static Results evaluate (String act_file, String pred_file, boolean printEval) {
+ return evaluate ( act_file, pred_file, printEval, false);
+ }
+
+
+ public static Results evaluate (String act_file, String pred_file, boolean printEval, boolean sig) {
+
+ CONLLReader09 goldReader = new CONLLReader09(act_file, -1);
+ CONLLReader09 predictedReader = new CONLLReader09(pred_file, -1);
+
+ int total = 0, corr = 0, corrL = 0, Ptotal=0, Pcorr = 0, PcorrL = 0, BPtotal=0, BPcorr = 0, BPcorrL = 0, corrLableAndPos=0, corrHeadAndPos=0;
+ int corrLableAndPosP=0, corrHeadAndPosP=0,corrLableAndPosC=0;
+ int numsent = 0, corrsent = 0, corrsentL = 0, Pcorrsent = 0, PcorrsentL = 0,sameProj=0;;
+ int proj=0, nonproj=0, pproj=0, pnonproj=0, nonProjOk=0, nonProjWrong=0;
+
+ int corrOne = 0;
+
+ int correctChnWoPunc =0, correctLChnWoPunc=0,CPtotal=0;
+ SentenceData09 goldInstance = goldReader.getNext();
+
+ SentenceData09 predInstance = predictedReader.getNext();
+ HashMap<String,Integer> label = new HashMap<String,Integer>();
+ HashMap<String,Integer> labelCount = new HashMap<String,Integer>();
+ HashMap<String,Integer> labelCorrect = new HashMap<String,Integer>();
+ HashMap<String,Integer> falsePositive = new HashMap<String,Integer>();
+ HashMap<String,HashMap<String,Integer> > confusion = new HashMap<String,HashMap<String,Integer> >();
+
+ HashMap<String,HashMap<String,Integer> > posLabelAssign = new HashMap<String,HashMap<String,Integer> >();
+
+ // does the node have the correct head?
+ ArrayList<Double> correctHead = new ArrayList<Double>();
+
+ while(goldInstance != null) {
+
+ int instanceLength = goldInstance.length();
+
+ if (instanceLength != predInstance.length())
+ System.out.println("Lengths do not match on sentence "+numsent);
+
+ int[] goldHeads = goldInstance.heads;
+
+ String[] goldLabels,predLabels;
+ if (what == TAGGER) {
+ goldLabels= goldInstance.gpos;
+ predLabels= predInstance.ppos;
+ }
+ else {
+ goldLabels = goldInstance.labels ;
+ predLabels = predInstance.plabels ;
+ }
+
+
+ int[] predHeads = predInstance.pheads;
+
+
+ boolean whole = true;
+ boolean wholeL = true;
+
+ boolean Pwhole = true;
+ boolean PwholeL = true;
+
+
+ int tlasS=0, totalS=0,corrLabels=0, XLabels=0;
+
+ // NOTE: the first item is the root info added during nextInstance(), so we skip it.
+
+
+
+ int punc=0, bpunc=0,totalChnWoPunc=0;
+ for (int i = 1; i < instanceLength; i++) {
+
+
+
+ Parse p = new Parse(predHeads.length);
+ for (int k=0;k<p.heads.length;k++) p.heads[k]=(short) predHeads[k];
+
+ Parse g = new Parse(predHeads.length);
+ for (int k=0;k<g.heads.length;k++) g.heads[k]=(short) goldHeads[k];
+
+
+
+ HashMap<String,Integer> labelsNum =posLabelAssign.get(goldInstance.gpos[goldInstance.heads[i]]);
+ if (labelsNum== null) {
+ labelsNum = new HashMap<String,Integer>();
+ posLabelAssign.put(goldInstance.gpos[goldInstance.heads[i]], labelsNum);
+ }
+
+ Integer num = labelsNum.get(goldInstance.labels[i]);
+ if (num==null) num =0;
+ num++;
+ labelsNum.put(goldInstance.labels[i],num);
+
+
+
+ Integer count = labelCount.get(goldLabels[i]);
+ if (count==null)count = 0;
+
+ count++;
+
+ labelCount.put(goldLabels[i], count);
+
+ if(goldLabels[i].equals(predLabels[i]) && (what==TAGGER || predHeads[i] == goldHeads[i] )) {
+ Integer correct = labelCorrect.get(goldLabels[i]);
+ if (correct ==null) correct =0;
+ correct ++;
+ labelCorrect.put(goldLabels[i], correct);
+
+ } else {
+
+ Integer fp = falsePositive.get(predLabels[i]);
+ if (fp ==null) fp =0;
+ fp ++;
+ falsePositive.put(predLabels[i], fp);
+
+ HashMap<String,Integer> conf = confusion.get(goldLabels[i]);
+ if (conf == null) confusion.put(goldLabels[i], conf = new HashMap<String,Integer>());
+
+ conf.put(predLabels[i], conf.get(predLabels[i])==null?1:conf.get(predLabels[i])+1);
+
+
+ }
+
+
+
+
+
+
+
+
+
+ boolean tlas =false;
+ if (predHeads[i] == goldHeads[i]) {
+ corr++;
+
+ if (goldInstance.gpos[i].equals(predInstance.ppos[i])) corrHeadAndPos ++;
+ if (goldLabels[i].equals(predLabels[i])) {
+ corrL++;
+ // if (predLabels[i].startsWith("PMOD"))
+ corrLabels++;
+ // else correctHead.add(0);
+ if (goldInstance.gpos[i].equals(predInstance.ppos[i])) {
+ tlasS++;
+ tlas=true;
+ corrLableAndPos ++;
+ }
+ }
+ else {
+ // correctHead.add(0);
+ // System.out.println(numsent+" error gold "+goldLabels[i]+" "+predLabels[i]+" head "+goldHeads[i]+" child "+i);
+ wholeL = false;
+ }
+ }
+ else {
+
+ //correctHead.add(0);
+
+ // System.out.println(numsent+"error gold "+goldLabels[i]+" "+predLabels[i]+" head "+goldHeads[i]+" child "+i);
+ whole = false; wholeL = false;
+
+ count = label.get(goldLabels[i]);
+
+ if (count==null)count = 0;
+ count++;
+ label.put(goldLabels[i], count);
+
+
+
+ int d = Math.abs(goldInstance.heads[i]-i);
+ }
+
+
+ if( ! ("!\"#$%&''()*+,-./:;<=>?@[\\]^_{|}~``".contains(goldInstance.forms[i]))) {
+
+ if (predHeads[i] == goldHeads[i]) {
+ BPcorr++;
+
+ if (goldLabels[i].equals(predLabels[i])) {
+ BPcorrL++;
+ }
+ else {
+ // System.out.println(numsent+" error gold "+goldLabels[i]+" "+predLabels[i]+" head "+goldHeads[i]+" child "+i);
+ // PwholeL = false;
+ }
+ } else {
+ // System.out.println(numsent+"error gold "+goldLabels[i]+" "+predLabels[i]+" head "+goldHeads[i]+" child "+i);
+ //Pwhole = false; wholeL = false;
+ }
+
+ } else bpunc++;
+
+ if( ! (",.:''``".contains(goldInstance.forms[i]))) {
+
+
+ if (predHeads[i] == goldHeads[i]) {
+ if (goldInstance.gpos[i].equals(predInstance.ppos[i])) corrHeadAndPosP ++;
+ Pcorr++;
+
+ if (goldLabels[i].equals(predLabels[i])) {
+ PcorrL++;
+ if (goldInstance.gpos[i].equals(predInstance.ppos[i])) corrLableAndPosP ++;
+
+ }
+ else {
+ // System.out.println(numsent+" error gold "+goldLabels[i]+" "+predLabels[i]+" head "+goldHeads[i]+" child "+i);
+ PwholeL = false;
+ }
+ } else {
+ // System.out.println(numsent+"error gold "+goldLabels[i]+" "+predLabels[i]+" head "+goldHeads[i]+" child "+i);
+ Pwhole = false; PwholeL = false;
+ }
+
+ } else punc++;
+
+
+ if( ! (goldInstance.gpos[i].toLowerCase().startsWith("pu"))) {
+ if (predHeads[i] == goldHeads[i]) {
+ correctChnWoPunc++;
+
+ if (goldLabels[i].equals(predLabels[i])) {
+ correctLChnWoPunc++;
+ if (goldInstance.gpos[i].equals(predInstance.ppos[i])) corrLableAndPosC ++;
+ }
+ else {
+ // System.out.println(numsent+" error gold "+goldLabels[i]+" "+predLabels[i]+" head "+goldHeads[i]+" child "+i);
+ // PwholeL = false;
+ }
+ } else {
+ // System.out.println(numsent+"error gold "+goldLabels[i]+" "+predLabels[i]+" head "+goldHeads[i]+" child "+i);
+ // Pwhole = false; PwholeL = false;
+ }
+
+ } else totalChnWoPunc++;
+
+
+ if (sig) {
+ if(tlas) System.out.println("1\t");
+ else System.out.println("0\t");
+ }
+
+ }
+ total += ((instanceLength - 1)); // Subtract one to not score fake root token
+
+ Ptotal += ((instanceLength - 1) - punc);
+ BPtotal += ((instanceLength - 1) - bpunc);
+ CPtotal += ((instanceLength - 1) - totalChnWoPunc);
+ if(whole) corrsent++;
+ if(wholeL) corrsentL++;
+ if(Pwhole) Pcorrsent++;
+ if(PwholeL) PcorrsentL++;
+ numsent++;
+
+ goldInstance = goldReader.getNext();
+ predInstance = predictedReader.getNext();
+ correctHead.add((double) ((double)corrLabels/(instanceLength - 1)));
+ // System.out.println(""+((double)corrLabels/(instanceLength - 1)));
+ }
+
+ Results r = new Results();
+
+ r.correctHead =correctHead;
+ int mult=100000, diff=1000;
+
+ r.total = total;
+ r.corr = corr;
+ r.las =(float)Math.round(((double)corrL/total)*mult)/diff;
+ r.ula =(float)Math.round(((double)corr /total)*mult)/diff;
+ r.lpas =(float)Math.round(((double)corrLableAndPos/total)*mult)/diff;
+ r.upla =(float)Math.round(((double)corrHeadAndPos /total)*mult)/diff;
+ float tlasp = (float)Math.round(((double)corrLableAndPosP/Ptotal)*mult)/diff;
+ float tlasc = (float)Math.round(((double)corrLableAndPosC/Ptotal)*mult)/diff;
+
+ // System.out.print("Total: " + total+" \tCorrect: " + corr+" ");
+ System.out.print(" LAS/Total/UAS/Total: " + r.las+"/" + (double)Math.round(((double)corrsentL/numsent)*mult)/diff+
+ "/" + r.ula+"/" + (double)Math.round(((double)corrsent /numsent)*mult)/diff+" LPAS/UPAS "+r.lpas+"/"+r.upla);
+
+ System.out.println("; without . " + (double)Math.round(((double)PcorrL/Ptotal)*mult)/diff+"/" +
+ (double)Math.round(((double)PcorrsentL/numsent)*mult)/diff+
+ "/" + (double)Math.round(((double)Pcorr /Ptotal)*mult)/diff+"/" +
+ (double)Math.round(((double)Pcorrsent /numsent)*mult)/diff+" TLAS "+tlasp+
+ " V2 LAS/UAS "+(double)Math.round(((double)BPcorrL/BPtotal)*mult)/diff+
+ "/"+(double)Math.round(((double)BPcorr/BPtotal)*mult)/diff+
+ " CHN LAS/UAS "+(double)Math.round(((double)correctLChnWoPunc/CPtotal)*mult)/diff+
+ "/"+(double)Math.round(((double)correctChnWoPunc/CPtotal)*mult)/diff+" TLAS "+tlasc);
+
+ float precisionNonProj = ((float)nonProjOk)/((float)nonProjOk+nonProjWrong);
+ float recallNonProj = ((float)nonProjOk)/((float)(nonproj));
+ System.out.println("proj "+proj+" nonp "+nonproj+"; predicted proj "+pproj+" non "+pnonproj+"; nonp correct "+
+ nonProjOk+" nonp wrong "+nonProjWrong+
+ " precision=(nonProjOk)/(non-projOk+nonProjWrong): "+precisionNonProj+
+ " recall=nonProjOk/nonproj="+recallNonProj+" F="+(2*precisionNonProj*recallNonProj)/(precisionNonProj+recallNonProj));
+
+ if (!printEval) return r;
+
+
+ HashMap<String,Integer> totalX = new HashMap<String,Integer>();
+ HashMap<String,Integer> totalY = new HashMap<String,Integer>();
+
+ String A=" "; // &
+ System.out.println("label\ttp\tcount\trecall\t\ttp\tfp+tp\tprecision\t F-Score ");
+
+ for(Entry<String, Integer> e : labelCount.entrySet()) {
+
+ int tp = labelCorrect.get(e.getKey())==null?0:labelCorrect.get(e.getKey()).intValue();
+ Integer count = labelCount.get(e.getKey());
+ int fp = falsePositive.get(e.getKey())==null?0:falsePositive.get(e.getKey()).intValue();
+ System.out.println(e.getKey()+"\t"+tp+"\t"+count+"\t"+roundPercent((float)tp/count)+"\t\t"+tp+"\t"+(fp+tp)+
+ "\t"+roundPercent((float)tp/(fp+tp))+"\t\t"+roundPercent((((float)tp/count))+(float)tp/(fp+tp))/2F); //+totalD
+ }
+
+ for(Entry<String, HashMap<String, Integer>> e : confusion.entrySet()) {
+ HashMap<String, Integer> values = e.getValue();
+ ArrayList<Entry<String, Integer>> entries = new ArrayList<Entry<String, Integer>>(values.entrySet());
+ Collections.sort(entries, new Comparator<Entry<String, Integer>>() {
+
+
+
+ @Override
+ public int compare(Entry<String, Integer> o1, Entry<String, Integer> o2) {
+
+ return o2.getValue().compareTo(o1.getValue());
+ }
+
+
+ }
+ );
+
+
+ System.out.println(e.getKey()+"\t"+entries);
+
+
+ }
+ System.out.println(""+posLabelAssign);
+
+
+
+ return r;
+ }
+
+
+ public static float round (double v){
+
+ return Math.round(v*10000F)/10000F;
+ }
+
+ public static float roundPercent (double v){
+
+ return Math.round(v*10000F)/100F;
+ }
+
+
+
+
+}
diff --git a/dependencyParser/mate-tools/src/is2/util/ExtractParagraphs.java b/dependencyParser/mate-tools/src/is2/util/ExtractParagraphs.java
new file mode 100644
index 0000000..a9fabca
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/util/ExtractParagraphs.java
@@ -0,0 +1,87 @@
+package is2.util;
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.OutputStreamWriter;
+import java.io.Reader;
+import java.nio.channels.Channels;
+import java.nio.channels.FileChannel;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.util.StringTokenizer;
+
+public class ExtractParagraphs {
+
+ /**
+
+ * @param args
+ * @throws IOException
+ */
+ public static void main(String args[]) throws IOException {
+
+ if (args.length<1) {
+ System.out.println("Please provide a file name.");
+ System.exit(0);
+ }
+
+ File file = new File(args[0]);
+ file.isDirectory();
+ String[] dirs = file.list();
+
+ BufferedWriter write = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(args[1]),"UTF-8"),32768);
+ int cnt=0;
+
+for (String fileName : dirs) {
+ BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(args[0]+fileName),"UTF-8"),32768);
+
+
+
+
+ int state =0;
+
+ String s;
+ while ((s = reader.readLine()) != null) {
+
+ if (s.startsWith("<P>")||s.startsWith("<p>")) {
+ state=1; // paragraph start
+ continue;
+ }
+
+
+
+
+ if (s.startsWith("</P>")||s.startsWith("</p>")) {
+ state=2; // paragraph end
+ write.newLine();
+ }
+
+ boolean lastNL =false;
+ if (state==1) {
+ String sp[] = s.split("\\. ");
+ for(String p : sp) {
+ write.write(p);
+ // if (sp.length>1) write.newLine();
+ }
+ cnt++;
+ }
+ }
+
+ //if (cnt>5000) break;
+
+ reader.close();
+}
+ write.flush();
+ write.close();
+
+ System.out.println("Extract "+cnt+" lines ");
+
+
+ }
+
+
+}
diff --git a/dependencyParser/mate-tools/src/is2/util/IntStack.java b/dependencyParser/mate-tools/src/is2/util/IntStack.java
new file mode 100644
index 0000000..e92c02c
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/util/IntStack.java
@@ -0,0 +1,86 @@
+/**
+ *
+ */
+package is2.util;
+
+
+
+/**
+ * @author Dr. Bernd Bohnet, 01.06.2011
+ *
+ *
+ */
+final public class IntStack {
+
+ final public int[] stack;
+ public int position =-1;
+
+ public IntStack(int size) {
+ if (size<=0) stack = new int[1];
+ else stack = new int[size+1];
+ }
+
+ public IntStack(IntStack s) {
+ stack=s.stack;
+ position = s.position;
+ }
+
+
+ public int peek() {
+ return position==-1?-1:stack[position];
+ }
+
+ public void push(int i) {
+ // if (i ==2)new Exception().printStackTrace();
+ stack[++position]=i;
+ }
+
+ public int pop() {
+ return position==-1?-1:stack[position--];
+ }
+
+ public int size() {
+ return position+1;
+ }
+
+ public boolean isEmpty() {
+ return position==-1?true:false;
+ }
+
+ public int get(int p) {
+ return stack[p];
+ }
+
+ public void clear() {
+ position=-1;
+ }
+
+ /**
+ * @param b
+ */
+ public void addAll(IntStack b) {
+
+ position=b.position;
+ if (position<0) return;
+
+ for(int k=0; k<=position;k++) stack[k]=b.stack[k];
+
+ }
+
+ public boolean contains(int s) {;
+
+ for(int k=0; k<=position;k++)
+ if (stack[k]==s) return true;
+
+ return false;
+ }
+
+ public String toString() {
+ StringBuffer s = new StringBuffer();
+ for(int k = position;k>=0;k--) {
+ s.append(k).append(":").append(this.stack[k]).append(" ");
+ }
+ return s.toString();
+ }
+
+}
diff --git a/dependencyParser/mate-tools/src/is2/util/Long2Int.java b/dependencyParser/mate-tools/src/is2/util/Long2Int.java
new file mode 100644
index 0000000..d461df8
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/util/Long2Int.java
@@ -0,0 +1,81 @@
+package is2.util;
+
+import is2.data.Long2IntInterface;
+
+
+/**
+ * @author Bernd Bohnet, 01.09.2009
+ *
+ * Maps for the Hash Kernel the long values to the int values.
+ */
+final public class Long2Int implements Long2IntInterface {
+
+
+ public Long2Int() {
+ size=115911564;
+ }
+
+
+ public Long2Int(int s) {
+ size=s;
+ }
+
+
+ /** Integer counter for long2int */
+ final private int size; //0x03ffffff //0x07ffffff
+
+
+ /* (non-Javadoc)
+ * @see is2.sp09k9992.Long2IntIterface#size()
+ */
+ public int size() {return size;}
+
+ /* (non-Javadoc)
+ * @see is2.sp09k9992.Long2IntIterface#start()
+ * has no meaning for this implementation
+ */
+ final public void start() {}
+
+
+ /* (non-Javadoc)
+ * @see is2.sp09k9992.Long2IntIterface#l2i(long)
+ */
+ final public int l2i(long l) {
+ if (l<0) return -1;
+
+ // this works well LAS 88.138
+ // int r= (int)(( l ^ (l&0xffffffff00000000L) >>> 29 ));//0x811c9dc5 ^ // 29
+ // return Math.abs(r % size);
+ // this works a bit better and good with 0x03ffffff
+ //
+ /*
+ long r= l;//26
+ l = (l>>12)&0xfffffffffffff000L;
+ r ^= l;//38
+ l = (l>>11)&0xffffffffffffc000L;
+ r ^= l;//49
+ l = (l>>9)& 0xffffffffffff0000L; //53
+ r ^= l;//58
+ l = (l>>7)&0xfffffffffffc0000L; //62
+ r ^=l;//65
+ int x = (int)r;
+ x = x % size;
+ // return x >= 0 ? x : -x ;// Math.abs(r % size);
+
+ */
+ // 26 0x03ffffff
+ // together with 0x07ffffff 27 88.372
+ long r= l;// 27
+ l = (l>>13)&0xffffffffffffe000L;
+ r ^= l; // 40
+ l = (l>>11)&0xffffffffffff0000L;
+ r ^= l; // 51
+ l = (l>>9)& 0xfffffffffffc0000L; //53
+ r ^= l; // 60
+ l = (l>>7)& 0xfffffffffff00000L; //62
+ r ^=l; //67
+ int x = ((int)r) % size;
+
+ return x >= 0 ? x : -x ;
+ }
+}
diff --git a/dependencyParser/mate-tools/src/is2/util/Options.java b/dependencyParser/mate-tools/src/is2/util/Options.java
new file mode 100644
index 0000000..5989483
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/util/Options.java
@@ -0,0 +1,129 @@
+package is2.util;
+
+import is2.util.OptionsSuper;
+
+import java.io.File;
+
+
+public final class Options extends OptionsSuper {
+
+
+
+ public Options (String[] args) {
+
+ for(int i = 0; i < args.length; i++) {
+ String[] pair = args[i].split(":");
+
+ if (pair[0].equals("--help")) explain();
+ else if (pair[0].equals("-train")) {
+ train = true;
+ trainfile = args[i+1];
+ } else if (pair[0].equals("-eval")) {
+ eval = true;
+ goldfile =args[i+1]; i++;
+ } else if (pair[0].equals("-test")) {
+ test = true;
+ testfile = args[i+1]; i++;
+ } else if (pair[0].equals("-i")) {
+ numIters = Integer.parseInt(args[i+1]); i++;
+ }
+ else if (pair[0].equals("-out")) {
+ outfile = args[i+1]; i++;
+ }
+ else if (pair[0].equals("-decode")) {
+ decodeProjective = args[i+1].equals("proj"); i++;
+ }
+ else if (pair[0].equals("-confidence")) {
+
+ conf = true;
+ }
+
+ else if (pair[0].equals("-count")) {
+ count = Integer.parseInt(args[i+1]); i++;
+ } else if (pair[0].equals("-model")) {
+ modelName = args[i+1]; i++;
+ }
+ else if (pair[0].equals("-device")) {
+ device = args[i+1]; i++;
+ } else if (pair[0].equals("-tmp")) {
+ tmp = args[i+1]; i++;
+ } else if (pair[0].equals("-format")) {
+ //format = args[i+1];
+ formatTask = Integer.parseInt(args[i+1]); i++;
+ } else if (pair[0].equals("-allfeatures")) {
+ allFeatures=true;
+ } else if (pair[0].equals("-nonormalize")) {
+ normalize=false;
+ }else if (pair[0].equals("-nframes")) {
+ //format = args[i+1];
+ nbframes= args[i+1]; i++;
+
+
+ } else if (pair[0].equals("-pframes")) {
+ //format = args[i+1];
+ pbframes= args[i+1]; i++;
+ } else if (pair[0].equals("-nopred")) {
+ nopred =true;
+ } else if (pair[0].equals("-divide")) {
+ keep =true;
+ } else if (pair[0].equals("-lexicon")) {
+ lexicon= args[i+1]; i++;
+
+ } else super.addOption(args, i);
+
+ }
+
+
+
+
+
+ try {
+
+ if (trainfile!=null) {
+
+ if (keep && tmp!=null) {
+ trainforest = new File(tmp);
+ if (!trainforest.exists()) keep=false;
+
+ } else
+ if (tmp!=null) {
+ trainforest = File.createTempFile("train", ".tmp", new File(tmp));
+ trainforest.deleteOnExit();
+ }
+ else {
+ trainforest = File.createTempFile("train", ".tmp"); //,new File("F:\\")
+ trainforest.deleteOnExit();
+ }
+
+
+ }
+
+
+ } catch (java.io.IOException e) {
+ System.out.println("Unable to create tmp files for feature forests!");
+ System.out.println(e);
+ System.exit(0);
+ }
+ }
+
+ private void explain() {
+ System.out.println("Usage: ");
+ System.out.println("java -class mate.jar is2.parser.Parser [Options]");
+ System.out.println();
+ System.out.println("Example: ");
+ System.out.println(" java -class mate.jar is2.parser.Parser -model eps3.model -train corpora/conll08st/train/train.closed -test corpora/conll08st/devel/devel.closed -out b3.test -eval corpora/conll08st/devel/devel.closed -count 2000 -i 6");
+ System.out.println("");
+ System.out.println("Options:");
+ System.out.println("");
+ System.out.println(" -train <file> the corpus a model is trained on; default "+this.trainfile);
+ System.out.println(" -test <file> the input corpus for testing; default "+this.testfile);
+ System.out.println(" -out <file> the output corpus (result) of a test run; default "+this.outfile);
+ System.out.println(" -model <file> the parsing model for traing the model is stored in the files");
+ System.out.println(" and for parsing the model is load from this file; default "+this.modelName);
+ System.out.println(" -i <number> the number of training iterations; good numbers are 10 for smaller corpora and 6 for bigger; default "+this.numIters);
+ System.out.println(" -count <number> the n first sentences of the corpus are take for the training default "+this.count);
+ System.out.println(" -format <number> conll format of the year 8 or 9; default "+this.formatTask);
+
+ System.exit(0);
+ }
+}
diff --git a/dependencyParser/mate-tools/src/is2/util/OptionsSuper.java b/dependencyParser/mate-tools/src/is2/util/OptionsSuper.java
new file mode 100755
index 0000000..0a40f73
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/util/OptionsSuper.java
@@ -0,0 +1,216 @@
+package is2.util;
+
+import is2.io.CONLLReader09;
+
+import java.io.File;
+
+public class OptionsSuper {
+
+ public String trainfile = null;
+ public String testfile = null;
+ public File trainforest = null;
+
+ public String nbframes = null;
+ public String pbframes = null;
+
+ public boolean nopred = false;
+ public boolean upper = false;
+
+ public boolean train = false;
+ public boolean eval = false;
+ public boolean test = false;
+ public boolean keep = false;
+ public boolean flt = false;
+ public boolean loadTaggerModels =false;
+
+ public String modelName = "prs.mdl";
+ public String modelTaggerName = null;
+
+ public String useMapping = null;
+ public String device = "C:";
+ public String tmp = null;
+ public boolean createForest = true;
+ public boolean decodeProjective = false;
+ public double decodeTH = 0.3d;
+ public String format = "CONLL";
+ public int formatTask =9;
+ public int numIters = 10;
+ public int best = 1000;
+ public String outfile = "dp.conll";
+ public String charset = "UTF-8";
+ public String phraseTrain = null;
+ public String phraseTest = null;
+ public String goldfile = null;
+ public String gout = "sec23.gld";
+ public String features = null;
+ public String lexicon = null;
+ public int hsize = 0x07ffffff;
+ public int maxLen = 2000;
+ public int maxForms = Integer.MAX_VALUE;
+ public int beam = 4;
+ public float prune = -100000000;
+
+ public String third ="";
+ public String second ="";
+ public String first ="";
+
+ public int cross=10;
+
+ //public boolean secondOrder = true;
+ public boolean useRelationalFeatures = false;
+ public int count = 10000000;
+ public int cores = Integer.MAX_VALUE;
+ public int start = 0;
+ public int minOccureForms = 0;
+ public int tt=30; // tagger averaging
+ public boolean allFeatures =false;
+ public boolean normalize =false;
+ public boolean no2nd =false;
+ public boolean noLemmas=false;
+ public boolean few2nd =false,noLinear=false,noMorph=false;
+ public String clusterFile;
+
+ // output confidence values
+ public boolean conf =false;
+ public String phraseFormat="penn"; // tiger | penn
+ public boolean average = true;
+ public boolean label =false;
+ public boolean stack=false;
+ public boolean oneRoot = false;
+
+ public String significant1 =null,significant2 =null;
+
+
+ // horizontal stacking
+ public int minLength =0, maxLength =Integer.MAX_VALUE;
+ public boolean overwritegold =false;
+
+
+ public static final int MULTIPLICATIVE=1, SHIFT=2;
+ public int featureCreation = MULTIPLICATIVE;
+
+
+ public OptionsSuper (String[] args, String dummy) {
+
+ for(int i = 0; i < args.length; i++) {
+ i = addOption(args,i);
+ }
+
+ }
+
+ public OptionsSuper() {}
+
+
+ public int addOption(String args[], int i) {
+
+ if (args[i].equals("-train")) {
+ train = true;
+ trainfile = args[i+1];
+ } else if (args[i].equals("-eval")) {
+ eval = true;
+ goldfile =args[i+1]; i++;
+ } else if (args[i].equals("-gout")) {
+ gout =args[i+1]; i++;
+ } else if (args[i].equals("-test")) {
+ test = true;
+ testfile = args[i+1]; i++;
+ } else if (args[i].equals("-sig1")) {
+ significant1 = args[i+1]; i++;
+ } else if (args[i].equals("-sig2")) {
+ significant2 = args[i+1]; i++;
+ } else if (args[i].equals("-i")) {
+ numIters = Integer.parseInt(args[i+1]); i++;
+ } else if (args[i].equals("-out")) {
+ outfile = args[i+1]; i++;
+ } else if (args[i].equals("-cluster")) {
+ clusterFile = args[i+1]; i++;
+ }
+
+ else if (args[i].equals("-count")) {
+ count = Integer.parseInt(args[i+1]); i++;
+ } else if (args[i].equals("-model")) {
+ modelName = args[i+1]; i++;
+ } else if (args[i].equals("-tmodel")) {
+ this.modelTaggerName = args[i+1]; i++;
+ } else if (args[i].equals("-nonormalize")) {
+ normalize=false;
+ } else if (args[i].equals("-float")) {
+ flt =true;
+ } else if (args[i].equals("-hsize")) {
+ hsize= Integer.parseInt(args[i+1]); i++;
+ } else if (args[i].equals("-charset")) {
+ charset= args[++i];
+ } else if (args[i].equals("-pstrain")) {
+ this.phraseTrain=args[i+1]; i++;
+ } else if (args[i].equals("-pstest")) {
+ this.phraseTest=args[i+1]; i++;
+ } else if (args[i].equals("-len")) {
+ maxLen= Integer.parseInt(args[i+1]); i++;
+ } else if (args[i].equals("-cores")) {
+ cores= Integer.parseInt(args[i+1]); i++;
+ } else if (args[i].equals("-start")) {
+ start= Integer.parseInt(args[i+1]); i++;
+ } else if (args[i].equals("-max")) {
+ maxLength= Integer.parseInt(args[i+1]); i++;
+ } else if (args[i].equals("-min")) {
+ minLength= Integer.parseInt(args[i+1]); i++;
+ } else if (args[i].equals("-noLemmas")) {
+ noLemmas= true;
+ } else if (args[i].equals("-noavg")) {
+ this.average= false;
+ } else if (args[i].equals("-label")) {
+ label= true;
+ } else if (args[i].equals("-stack")) {
+ stack= true;
+ } else if (args[i].equals("-overwritegold")) {
+ overwritegold = true;
+ } else if (args[i].equals("-format")) {
+ formatTask = Integer.parseInt(args[++i]);
+ } else if (args[i].equals("-tt")) {
+ tt = Integer.parseInt(args[++i]);
+ } else if (args[i].equals("-min-occure-forms")) {
+ minOccureForms = Integer.parseInt(args[++i]);
+ } else if (args[i].equals("-loadTaggerModels")) {
+ this.loadTaggerModels=true;;
+
+ } else if (args[i].equals("-feature_creation")) {
+ this.featureCreation = args[++i].equals("shift")?SHIFT:MULTIPLICATIVE;
+ }
+
+ return i;
+
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append("FLAGS [");
+ sb.append("train-file: " + trainfile);
+ sb.append(" | ");
+ sb.append("test-file: " + testfile);
+ sb.append(" | ");
+ sb.append("gold-file: " + goldfile);
+ sb.append(" | ");
+ sb.append("output-file: " + outfile);
+ sb.append(" | ");
+ sb.append("model-name: " + modelName);
+ sb.append(" | ");
+ sb.append("train: " + train);
+ sb.append(" | ");
+ sb.append("test: " + test);
+ sb.append(" | ");
+ sb.append("eval: " + eval);
+ sb.append(" | ");
+ sb.append("training-iterations: " + numIters);
+ sb.append(" | ");
+ sb.append("decode-type: " + decodeProjective);
+ sb.append(" | ");
+ sb.append("create-forest: " + createForest);
+ sb.append(" | ");
+ sb.append("format: " + format);
+
+ sb.append("]\n");
+ return sb.toString();
+ }
+
+}
\ No newline at end of file
diff --git a/dependencyParser/mate-tools/src/is2/util/ParserEvaluator.java b/dependencyParser/mate-tools/src/is2/util/ParserEvaluator.java
new file mode 100644
index 0000000..260e4b7
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/util/ParserEvaluator.java
@@ -0,0 +1,94 @@
+package is2.util;
+
+import is2.data.SentenceData09;
+import is2.io.CONLLReader09;
+
+
+public class ParserEvaluator {
+
+
+
+ public static final String PUNCT ="!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~";
+
+ public static class Results {
+
+ public int total;
+ public int corr;
+ public float las;
+ public float ula;
+
+ }
+
+ public static Results evaluate (String act_file, String pred_file) throws Exception {
+
+ CONLLReader09 goldReader = new CONLLReader09(act_file, -1);
+ CONLLReader09 predictedReader = new CONLLReader09(pred_file, -1);
+
+ int total = 0, corr = 0, corrL = 0;
+ int numsent = 0, corrsent = 0, corrsentL = 0;
+ SentenceData09 goldInstance = goldReader.getNext();
+ SentenceData09 predInstance = predictedReader.getNext();
+
+ while(goldInstance != null) {
+
+ int instanceLength = goldInstance.length();
+
+ if (instanceLength != predInstance.length())
+ System.out.println("Lengths do not match on sentence "+numsent);
+
+ int[] goldHeads = goldInstance.heads;
+ String[] goldLabels = goldInstance.labels;
+ int[] predHeads = predInstance.pheads;
+ String[] predLabels = predInstance.plabels;
+
+ boolean whole = true;
+ boolean wholeL = true;
+
+ // NOTE: the first item is the root info added during nextInstance(), so we skip it.
+
+ int punc=0;
+ for (int i = 1; i < instanceLength; i++) {
+ if (predHeads[i] == goldHeads[i]) {
+ corr++;
+
+ if (goldLabels[i].equals(predLabels[i])) corrL++;
+ else {
+ // System.out.println(numsent+" error gold "+goldLabels[i]+" "+predLabels[i]+" head "+goldHeads[i]+" child "+i);
+ wholeL = false;
+ }
+ }
+ else {
+ // System.out.println(numsent+"error gold "+goldLabels[i]+" "+predLabels[i]+" head "+goldHeads[i]+" child "+i);
+ whole = false; wholeL = false;
+ }
+ }
+ total += ((instanceLength - 1) - punc); // Subtract one to not score fake root token
+
+ if(whole) corrsent++;
+ if(wholeL) corrsentL++;
+ numsent++;
+
+ goldInstance = goldReader.getNext();
+ predInstance = predictedReader.getNext();
+ }
+
+ Results r = new Results();
+
+ r.total = total;
+ r.corr = corr;
+ r.las =(float)Math.round(((double)corrL/total)*100000)/1000;
+ r.ula =(float)Math.round(((double)corr /total)*100000)/1000;
+ System.out.print("Total: " + total+" \tCorrect: " + corr+" ");
+ System.out.println("LAS: " + (double)Math.round(((double)corrL/total)*100000)/1000+" \tTotal: " + (double)Math.round(((double)corrsentL/numsent)*100000)/1000+
+ " \tULA: " + (double)Math.round(((double)corr /total)*100000)/1000+" \tTotal: " + (double)Math.round(((double)corrsent /numsent)*100000)/1000);
+
+ return r;
+ }
+
+
+ public static float round (double v){
+
+ return Math.round(v*10000F)/10000F;
+ }
+
+}
diff --git a/dependencyParser/mate-tools/src/is2/util/Split.java b/dependencyParser/mate-tools/src/is2/util/Split.java
new file mode 100755
index 0000000..48eadbe
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/util/Split.java
@@ -0,0 +1,94 @@
+package is2.util;
+
+import java.io.BufferedReader;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.Reader;
+import java.nio.channels.Channels;
+import java.nio.channels.FileChannel;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.util.StringTokenizer;
+
+public class Split {
+
+ /**
+ * Splits a tokenized sentences into one word per line format:
+ *
+ * Input
+ * > I am an text .
+ * > Sentence two ...
+ *
+ * Output:
+ * I _ _ _ ...
+ * am _ _ _ ...
+ * ...
+ *
+ * @param args
+ * @throws IOException
+ */
+ public static void main(String args[]) throws IOException {
+
+ if (args.length!=1) {
+ System.out.println("Please provide a file name.");
+ System.exit(0);
+ }
+
+ String filename = args[0];
+// Charset charset = Charset.forName("UTF-8");
+
+ FileInputStream in = new FileInputStream(filename);
+ FileChannel channel = in.getChannel();
+ CharsetDecoder decoder = Charset.defaultCharset().newDecoder();//charset.newDecoder();
+ Reader infile = Channels.newReader(channel , decoder, 16*1024);
+ BufferedReader bInfile = new BufferedReader(infile);
+
+// DataOutputStream dos = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(options.modelName)));
+
+
+ String s;
+ while ((s = bInfile.readLine()) != null) {
+
+
+ // do the first tokens contain a colon?
+ int colon =0;
+ for(int k=0;k<12;k++) {
+ if (s.length()<=k) break;
+ if (s.charAt(k) == ':') {
+
+ colon++;
+ break;
+ }
+ if (s.charAt(k) == ' ') break;
+ }
+
+ String prefix =colon>0?s.substring(0,s.indexOf(":"))+"_":"";
+
+ if (colon>0) {
+ s = s.substring(s.indexOf(":")+1);
+ }
+
+ StringTokenizer t = new StringTokenizer(s);
+ int i=1;
+ boolean found=false;
+ while(t.hasMoreTokens()) {
+ found =true;
+ String tk =t.nextToken();
+ if (tk.contains("=")) continue;
+ System.out.print(prefix+i+"\t");
+ System.out.print(tk);
+ System.out.println("\t_\t_\t_\t_\t_\t_\t_\t_\t_\t_\t_\t_\t_");
+ i++;
+ }
+ if (found) System.out.println();
+
+ }
+ bInfile.close();
+
+
+
+
+ }
+
+
+}
diff --git a/dependencyParser/mate-tools/src/is2/util/Split2.java b/dependencyParser/mate-tools/src/is2/util/Split2.java
new file mode 100644
index 0000000..1690a3d
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/util/Split2.java
@@ -0,0 +1,70 @@
+package is2.util;
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.OutputStreamWriter;
+import java.io.Reader;
+import java.nio.channels.Channels;
+import java.nio.channels.FileChannel;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.util.StringTokenizer;
+
+public class Split2 {
+
+ /**
+ * Splits a tokenized sentences into one word per line format:
+ *
+ * Input
+ * > I am an text .
+ * > Sentence two ...
+ *
+ * Output:
+ * I _ _ _ ...
+ * am _ _ _ ...
+ * ...
+ *
+ * @param args
+ * @throws IOException
+ */
+ public static void main(String args[]) throws IOException {
+
+ if (args.length<1) {
+ System.out.println("Please provide a file name.");
+ System.exit(0);
+ }
+
+
+ BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(args[0]),"UTF-8"),32768);
+ BufferedWriter write = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(args[1]),"ISO-8859-1"));
+
+
+ String s;
+ int cnt=0;
+ while ((s = reader.readLine()) != null) {
+ StringTokenizer t = new StringTokenizer(s);
+ while(t.hasMoreTokens()) {
+ String tk =t.nextToken();
+ for(int c : tk.toCharArray()) {
+ if (c<0 && c>=255) System.out.println("contain sign "+c+" "+cnt);
+ }
+ write.write(tk);
+ write.newLine();
+ cnt++;
+ }
+ write.newLine();
+ }
+ reader.close();
+ write.flush();
+ write.close();
+
+
+
+ }
+
+
+}
diff --git a/dependencyParser/mate-tools/src/is2/util/Split3.java b/dependencyParser/mate-tools/src/is2/util/Split3.java
new file mode 100644
index 0000000..03d920c
--- /dev/null
+++ b/dependencyParser/mate-tools/src/is2/util/Split3.java
@@ -0,0 +1,67 @@
+package is2.util;
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.OutputStreamWriter;
+import java.io.Reader;
+import java.nio.channels.Channels;
+import java.nio.channels.FileChannel;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.util.StringTokenizer;
+
+public class Split3 {
+
+ /**
+ * Splits a tokenized sentences into one word per line format:
+ *
+ * Input
+ * > I am an text .
+ * > Sentence two ...
+ *
+ * Output:
+ * I _ _ _ ...
+ * am _ _ _ ...
+ * ...
+ *
+ * @param args
+ * @throws IOException
+ */
+ public static void main(String args[]) throws IOException {
+
+ if (args.length<1) {
+ System.out.println("Please provide a file name.");
+ System.exit(0);
+ }
+
+
+ BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(args[0]),"UTF-8"),32768);
+ BufferedWriter write = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(args[1]),"UTF-8"),32768);
+
+
+ String s;
+ int cnt=0;
+ while ((s = reader.readLine()) != null) {
+ StringTokenizer t = new StringTokenizer(s);
+ while(t.hasMoreTokens()) {
+ String tk =t.nextToken();
+ write.write(tk);
+ write.newLine();
+ cnt++;
+ }
+ write.newLine();
+ }
+ reader.close();
+ write.flush();
+ write.close();
+
+
+
+ }
+
+
+}
--
libgit2 0.22.2