From 8fa0049b8c711a2cca0e3de500b99fd80117cdf6 Mon Sep 17 00:00:00 2001 From: Jan Lupa <jl320820@students.mimuw.edu.pl> Date: Mon, 8 Aug 2016 16:22:01 +0200 Subject: [PATCH] Dependency Parser now outputs multiple candidate parses --- dependencyParser/mate-tools/.classpath | 16 ++++++++-------- dependencyParser/mate-tools/classes/decoder/ParallelDecoder$DSet.class | Bin 0 -> 485 bytes dependencyParser/mate-tools/classes/decoder/ParallelDecoder.class | Bin 0 -> 7230 bytes dependencyParser/mate-tools/classes/decoder/ParallelRearrangeNBest$PA.class | Bin 0 -> 563 bytes dependencyParser/mate-tools/classes/decoder/ParallelRearrangeNBest.class | Bin 0 -> 2802 bytes dependencyParser/mate-tools/classes/decoder/ParallelRearrangeNBest2$PA.class | Bin 0 -> 729 bytes dependencyParser/mate-tools/classes/decoder/ParallelRearrangeNBest2.class | Bin 0 -> 3214 bytes dependencyParser/mate-tools/classes/examples/DependencyParser.class | Bin 0 -> 2933 bytes dependencyParser/mate-tools/classes/examples/FullPipelineSpanish.class | Bin 0 -> 3598 bytes dependencyParser/mate-tools/classes/examples/FullPipelineTest.class | Bin 0 -> 2998 bytes dependencyParser/mate-tools/classes/examples/MorphTagger.class | Bin 0 -> 2729 bytes dependencyParser/mate-tools/classes/examples/ParseOnly.class | Bin 0 -> 1484 bytes dependencyParser/mate-tools/classes/examples/Pipeline.class | Bin 0 -> 3068 bytes dependencyParser/mate-tools/classes/extractors/Extractor.class | Bin 0 -> 644 bytes dependencyParser/mate-tools/classes/extractors/ExtractorClusterStacked.class | Bin 0 -> 34872 bytes dependencyParser/mate-tools/classes/extractors/ExtractorClusterStackedR2.class | Bin 0 -> 34015 bytes dependencyParser/mate-tools/classes/extractors/ExtractorFactory.class | Bin 0 -> 856 bytes dependencyParser/mate-tools/classes/extractors/ExtractorReranker.class | Bin 0 -> 18998 bytes dependencyParser/mate-tools/classes/extractors/ParallelExtract$DSet.class | Bin 0 -> 402 bytes dependencyParser/mate-tools/classes/extractors/ParallelExtract.class | Bin 0 -> 5033 bytes dependencyParser/mate-tools/classes/is2/data/Closed.class | Bin 0 -> 889 bytes dependencyParser/mate-tools/classes/is2/data/Cluster.class | Bin 0 -> 4313 bytes dependencyParser/mate-tools/classes/is2/data/D4.class | Bin 0 -> 5263 bytes dependencyParser/mate-tools/classes/is2/data/D6.class | Bin 0 -> 4876 bytes dependencyParser/mate-tools/classes/is2/data/D7.class | Bin 0 -> 5529 bytes dependencyParser/mate-tools/classes/is2/data/DPSTree.class | Bin 0 -> 2035 bytes dependencyParser/mate-tools/classes/is2/data/DX.class | Bin 0 -> 785 bytes dependencyParser/mate-tools/classes/is2/data/DataF.class | Bin 0 -> 604 bytes dependencyParser/mate-tools/classes/is2/data/DataFES.class | Bin 0 -> 605 bytes dependencyParser/mate-tools/classes/is2/data/DataT.class | Bin 0 -> 437 bytes dependencyParser/mate-tools/classes/is2/data/Edges$C.class | Bin 0 -> 1470 bytes dependencyParser/mate-tools/classes/is2/data/Edges.class | Bin 0 -> 4827 bytes dependencyParser/mate-tools/classes/is2/data/F2S.class | Bin 0 -> 1066 bytes dependencyParser/mate-tools/classes/is2/data/F2SD.class | Bin 0 -> 907 bytes dependencyParser/mate-tools/classes/is2/data/F2SF.class | Bin 0 -> 1659 bytes dependencyParser/mate-tools/classes/is2/data/F2SP.class | Bin 0 -> 1660 bytes dependencyParser/mate-tools/classes/is2/data/F2ST.class | Bin 0 -> 984 bytes dependencyParser/mate-tools/classes/is2/data/FV.class | Bin 0 -> 10524 bytes dependencyParser/mate-tools/classes/is2/data/FVR.class | Bin 0 -> 7780 bytes dependencyParser/mate-tools/classes/is2/data/IEncoder.class | Bin 0 -> 167 bytes dependencyParser/mate-tools/classes/is2/data/IEncoderPlus.class | Bin 0 -> 407 bytes dependencyParser/mate-tools/classes/is2/data/IFV.class | Bin 0 -> 974 bytes dependencyParser/mate-tools/classes/is2/data/Instances.class | Bin 0 -> 7305 bytes dependencyParser/mate-tools/classes/is2/data/InstancesTagger.class | Bin 0 -> 4141 bytes dependencyParser/mate-tools/classes/is2/data/IntIntHash.class | Bin 0 -> 5365 bytes dependencyParser/mate-tools/classes/is2/data/Long2Int.class | Bin 0 -> 2492 bytes dependencyParser/mate-tools/classes/is2/data/Long2IntExact.class | Bin 0 -> 1052 bytes dependencyParser/mate-tools/classes/is2/data/Long2IntInterface.class | Bin 0 -> 166 bytes dependencyParser/mate-tools/classes/is2/data/Long2IntQuick.class | Bin 0 -> 718 bytes dependencyParser/mate-tools/classes/is2/data/MFB.class | Bin 0 -> 6510 bytes dependencyParser/mate-tools/classes/is2/data/MFC.class | Bin 0 -> 6546 bytes dependencyParser/mate-tools/classes/is2/data/MFO$Data4.class | Bin 0 -> 809 bytes dependencyParser/mate-tools/classes/is2/data/MFO.class | Bin 0 -> 8558 bytes dependencyParser/mate-tools/classes/is2/data/Open.class | Bin 0 -> 1047 bytes dependencyParser/mate-tools/classes/is2/data/PSTree.class | Bin 0 -> 11849 bytes dependencyParser/mate-tools/classes/is2/data/Parameter.class | Bin 0 -> 270 bytes dependencyParser/mate-tools/classes/is2/data/ParametersFloat.class | Bin 0 -> 4869 bytes dependencyParser/mate-tools/classes/is2/data/Parse.class | Bin 0 -> 3425 bytes dependencyParser/mate-tools/classes/is2/data/ParseNBest.class | Bin 0 -> 2410 bytes dependencyParser/mate-tools/classes/is2/data/PipeGen.class | Bin 0 -> 3772 bytes dependencyParser/mate-tools/classes/is2/data/PrimeFinder.class | Bin 0 -> 3237 bytes dependencyParser/mate-tools/classes/is2/data/RandomIndex.class | Bin 0 -> 2136 bytes dependencyParser/mate-tools/classes/is2/data/SentenceData09.class | Bin 0 -> 10528 bytes dependencyParser/mate-tools/classes/is2/data/Thesaurus.class | Bin 0 -> 4767 bytes dependencyParser/mate-tools/classes/is2/io/CONLLReader04.class | Bin 0 -> 6133 bytes dependencyParser/mate-tools/classes/is2/io/CONLLReader06.class | Bin 0 -> 6262 bytes dependencyParser/mate-tools/classes/is2/io/CONLLReader08.class | Bin 0 -> 7771 bytes dependencyParser/mate-tools/classes/is2/io/CONLLReader09.class | Bin 0 -> 8166 bytes dependencyParser/mate-tools/classes/is2/io/CONLLWriter06.class | Bin 0 -> 5397 bytes dependencyParser/mate-tools/classes/is2/io/CONLLWriter09.class | Bin 0 -> 7210 bytes dependencyParser/mate-tools/classes/is2/io/IOGenerals.class | Bin 0 -> 803 bytes dependencyParser/mate-tools/classes/is2/io/PSReader.class | Bin 0 -> 210 bytes dependencyParser/mate-tools/classes/is2/io/TigerReader$Line.class | Bin 0 -> 462 bytes dependencyParser/mate-tools/classes/is2/io/TigerReader.class | Bin 0 -> 4750 bytes dependencyParser/mate-tools/classes/is2/lemmatizer/Evaluator$1.class | Bin 0 -> 1371 bytes dependencyParser/mate-tools/classes/is2/lemmatizer/Evaluator.class | Bin 0 -> 3837 bytes dependencyParser/mate-tools/classes/is2/lemmatizer/Lemmatizer$1.class | Bin 0 -> 1499 bytes dependencyParser/mate-tools/classes/is2/lemmatizer/Lemmatizer.class | Bin 0 -> 15094 bytes dependencyParser/mate-tools/classes/is2/lemmatizer/MFO.class | Bin 0 -> 6423 bytes dependencyParser/mate-tools/classes/is2/lemmatizer/Options.class | Bin 0 -> 3195 bytes dependencyParser/mate-tools/classes/is2/lemmatizer/Pipe$1.class | Bin 0 -> 1430 bytes dependencyParser/mate-tools/classes/is2/lemmatizer/Pipe.class | Bin 0 -> 18093 bytes dependencyParser/mate-tools/classes/is2/lemmatizer/StringEdit.class | Bin 0 -> 5863 bytes dependencyParser/mate-tools/classes/is2/mtag/Convert.class | Bin 0 -> 3442 bytes dependencyParser/mate-tools/classes/is2/mtag/Evaluator$1.class | Bin 0 -> 1353 bytes dependencyParser/mate-tools/classes/is2/mtag/Evaluator.class | Bin 0 -> 5012 bytes dependencyParser/mate-tools/classes/is2/mtag/ExtractorM.class | Bin 0 -> 16754 bytes dependencyParser/mate-tools/classes/is2/mtag/MFO$Data.class | Bin 0 -> 576 bytes dependencyParser/mate-tools/classes/is2/mtag/MFO$Data4.class | Bin 0 -> 3022 bytes dependencyParser/mate-tools/classes/is2/mtag/MFO.class | Bin 0 -> 8628 bytes dependencyParser/mate-tools/classes/is2/mtag/Options.class | Bin 0 -> 2760 bytes dependencyParser/mate-tools/classes/is2/mtag/Pipe.class | Bin 0 -> 16478 bytes dependencyParser/mate-tools/classes/is2/mtag/Tagger.class | Bin 0 -> 11439 bytes dependencyParser/mate-tools/classes/is2/parser/Closed.class | Bin 0 -> 806 bytes dependencyParser/mate-tools/classes/is2/parser/D5.class | Bin 0 -> 4353 bytes dependencyParser/mate-tools/classes/is2/parser/Decoder.class | Bin 0 -> 5509 bytes dependencyParser/mate-tools/classes/is2/parser/Edges$C.class | Bin 0 -> 1476 bytes dependencyParser/mate-tools/classes/is2/parser/Edges.class | Bin 0 -> 4217 bytes dependencyParser/mate-tools/classes/is2/parser/Evaluator$Results.class | Bin 0 -> 424 bytes dependencyParser/mate-tools/classes/is2/parser/Evaluator.class | Bin 0 -> 3009 bytes dependencyParser/mate-tools/classes/is2/parser/Extractor.class | Bin 0 -> 37772 bytes dependencyParser/mate-tools/classes/is2/parser/MFO.class | Bin 0 -> 6415 bytes dependencyParser/mate-tools/classes/is2/parser/Open.class | Bin 0 -> 1059 bytes dependencyParser/mate-tools/classes/is2/parser/Options.class | Bin 0 -> 3330 bytes dependencyParser/mate-tools/classes/is2/parser/ParallelDecoder$DSet.class | Bin 0 -> 497 bytes dependencyParser/mate-tools/classes/is2/parser/ParallelDecoder.class | Bin 0 -> 7039 bytes dependencyParser/mate-tools/classes/is2/parser/ParallelExtract$DSet.class | Bin 0 -> 402 bytes dependencyParser/mate-tools/classes/is2/parser/ParallelExtract.class | Bin 0 -> 6618 bytes dependencyParser/mate-tools/classes/is2/parser/ParallelRearrange$PA.class | Bin 0 -> 595 bytes dependencyParser/mate-tools/classes/is2/parser/ParallelRearrange.class | Bin 0 -> 2669 bytes dependencyParser/mate-tools/classes/is2/parser/Parameters.class | Bin 0 -> 598 bytes dependencyParser/mate-tools/classes/is2/parser/ParametersFloat.class | Bin 0 -> 3497 bytes dependencyParser/mate-tools/classes/is2/parser/Parser.class | Bin 0 -> 18950 bytes dependencyParser/mate-tools/classes/is2/parser/Pipe.class | Bin 0 -> 6984 bytes dependencyParser/mate-tools/classes/is2/parser/package.html | 11 +++++++++++ dependencyParser/mate-tools/classes/is2/parserR2/Decoder.class | Bin 0 -> 8871 bytes dependencyParser/mate-tools/classes/is2/parserR2/Options.class | Bin 0 -> 3864 bytes dependencyParser/mate-tools/classes/is2/parserR2/Parameters.class | Bin 0 -> 602 bytes dependencyParser/mate-tools/classes/is2/parserR2/ParametersFloat.class | Bin 0 -> 4532 bytes dependencyParser/mate-tools/classes/is2/parserR2/Parser.class | Bin 0 -> 17961 bytes dependencyParser/mate-tools/classes/is2/parserR2/Pipe.class | Bin 0 -> 7608 bytes dependencyParser/mate-tools/classes/is2/parserR2/PipeReranker.class | Bin 0 -> 4042 bytes dependencyParser/mate-tools/classes/is2/parserR2/Reranker.class | Bin 0 -> 26638 bytes dependencyParser/mate-tools/classes/is2/parserR2/package.html | 3 +++ dependencyParser/mate-tools/classes/is2/tag/ExtractorT2.class | Bin 0 -> 17820 bytes dependencyParser/mate-tools/classes/is2/tag/Lexicon.class | Bin 0 -> 3857 bytes dependencyParser/mate-tools/classes/is2/tag/MFO$Data4.class | Bin 0 -> 3019 bytes dependencyParser/mate-tools/classes/is2/tag/MFO.class | Bin 0 -> 8816 bytes dependencyParser/mate-tools/classes/is2/tag/Options.class | Bin 0 -> 4563 bytes dependencyParser/mate-tools/classes/is2/tag/POS.class | Bin 0 -> 1042 bytes dependencyParser/mate-tools/classes/is2/tag/Tagger.class | Bin 0 -> 12998 bytes dependencyParser/mate-tools/classes/is2/tag/package.html | 4 ++++ dependencyParser/mate-tools/classes/is2/tools/IPipe.class | Bin 0 -> 268 bytes dependencyParser/mate-tools/classes/is2/tools/Retrainable.class | Bin 0 -> 206 bytes dependencyParser/mate-tools/classes/is2/tools/Tool.class | Bin 0 -> 170 bytes dependencyParser/mate-tools/classes/is2/tools/ToolIO.class | Bin 0 -> 129 bytes dependencyParser/mate-tools/classes/is2/tools/Train.class | Bin 0 -> 365 bytes dependencyParser/mate-tools/classes/is2/util/Convert.class | Bin 0 -> 8979 bytes dependencyParser/mate-tools/classes/is2/util/Convert0409.class | Bin 0 -> 3967 bytes dependencyParser/mate-tools/classes/is2/util/ConvertADJ.class | Bin 0 -> 3356 bytes dependencyParser/mate-tools/classes/is2/util/ConvertLowerCase0909.class | Bin 0 -> 1702 bytes dependencyParser/mate-tools/classes/is2/util/ConvertTiger2CoNLL.class | Bin 0 -> 3559 bytes dependencyParser/mate-tools/classes/is2/util/DB.class | Bin 0 -> 2535 bytes dependencyParser/mate-tools/classes/is2/util/Edges$C.class | Bin 0 -> 1470 bytes dependencyParser/mate-tools/classes/is2/util/Edges.class | Bin 0 -> 4211 bytes dependencyParser/mate-tools/classes/is2/util/Evaluator$1.class | Bin 0 -> 1359 bytes dependencyParser/mate-tools/classes/is2/util/Evaluator$Results.class | Bin 0 -> 558 bytes dependencyParser/mate-tools/classes/is2/util/Evaluator.class | Bin 0 -> 14710 bytes dependencyParser/mate-tools/classes/is2/util/EvaluatorTagger$1.class | Bin 0 -> 1383 bytes dependencyParser/mate-tools/classes/is2/util/EvaluatorTagger$2.class | Bin 0 -> 1412 bytes dependencyParser/mate-tools/classes/is2/util/EvaluatorTagger$Results.class | Bin 0 -> 582 bytes dependencyParser/mate-tools/classes/is2/util/EvaluatorTagger.class | Bin 0 -> 18350 bytes dependencyParser/mate-tools/classes/is2/util/ExtractParagraphs.class | Bin 0 -> 2592 bytes dependencyParser/mate-tools/classes/is2/util/IntStack.class | Bin 0 -> 1918 bytes dependencyParser/mate-tools/classes/is2/util/Long2Int.class | Bin 0 -> 892 bytes dependencyParser/mate-tools/classes/is2/util/Options.class | Bin 0 -> 4636 bytes dependencyParser/mate-tools/classes/is2/util/OptionsSuper.class | Bin 0 -> 7107 bytes dependencyParser/mate-tools/classes/is2/util/ParserEvaluator$Results.class | Bin 0 -> 442 bytes dependencyParser/mate-tools/classes/is2/util/ParserEvaluator.class | Bin 0 -> 3074 bytes dependencyParser/mate-tools/classes/is2/util/Split.class | Bin 0 -> 2958 bytes dependencyParser/mate-tools/classes/is2/util/Split2.class | Bin 0 -> 2147 bytes dependencyParser/mate-tools/classes/is2/util/Split3.class | Bin 0 -> 1688 bytes dependencyParser/mate-tools/src/decoder/ParallelDecoder.java | 202 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------------------------------------------------------------------ dependencyParser/mate-tools/src/decoder/ParallelRearrangeNBest.java | 172 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------------------------------------------------------------------------- dependencyParser/mate-tools/src/decoder/ParallelRearrangeNBest2.java | 157 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------------------------------------------------------------- dependencyParser/mate-tools/src/examples/DependencyParser.java | 144 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------------------------------------------------------- dependencyParser/mate-tools/src/examples/FullPipelineSpanish.java | 80 +++++++++++++++++++++++++++++++++++++++++++++----------------------------------- dependencyParser/mate-tools/src/examples/FullPipelineTest.java | 178 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------------------------------------------------------------------------------- dependencyParser/mate-tools/src/examples/MorphTagger.java | 72 ++++++++++++++++++++++++++++++++++++------------------------------------ dependencyParser/mate-tools/src/examples/ParseOnly.java | 36 +++++++++++++++++------------------- dependencyParser/mate-tools/src/examples/Pipeline.java | 67 +++++++++++++++++++++++++++++++------------------------------------ dependencyParser/mate-tools/src/extractors/Extractor.java | 27 ++++++++++----------------- dependencyParser/mate-tools/src/extractors/ExtractorClusterStacked.java | 2284 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ dependencyParser/mate-tools/src/extractors/ExtractorClusterStackedR2.java | 2274 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- dependencyParser/mate-tools/src/extractors/ExtractorFactory.java | 28 +++++++++++++--------------- dependencyParser/mate-tools/src/extractors/ExtractorReranker.java | 1241 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- dependencyParser/mate-tools/src/extractors/ParallelExtract.java | 261 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------------------------------------------------------------------------------------------------------------------------- dependencyParser/mate-tools/src/is2/data/Closed.java | 29 +++++++++++++---------------- dependencyParser/mate-tools/src/is2/data/Cluster.java | 129 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------------------------------------------------- dependencyParser/mate-tools/src/is2/data/D4.java | 265 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------------------------------------------------------------------------------------ dependencyParser/mate-tools/src/is2/data/D6.java | 273 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------------------------------------------------------------------------------------------- dependencyParser/mate-tools/src/is2/data/D7.java | 328 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------------------------------------------------------------------------------------------------------------------- dependencyParser/mate-tools/src/is2/data/DPSTree.java | 162 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------------------------------------------------------------------ dependencyParser/mate-tools/src/is2/data/DX.java | 20 +++++++++----------- dependencyParser/mate-tools/src/is2/data/DataF.java | 62 +++++++++++++++++++++++++++++--------------------------------- dependencyParser/mate-tools/src/is2/data/DataFES.java | 60 ++++++++++++++++++++++++++++-------------------------------- dependencyParser/mate-tools/src/is2/data/DataT.java | 30 ++++++++++++------------------ dependencyParser/mate-tools/src/is2/data/Edges.java | 220 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------------------------------------------------------------------------------------------------ dependencyParser/mate-tools/src/is2/data/F2S.java | 32 +++++++++++++++++++------------- dependencyParser/mate-tools/src/is2/data/F2SD.java | 28 ++++++++++++++-------------- dependencyParser/mate-tools/src/is2/data/F2SF.java | 59 +++++++++++++++++++++++++++++------------------------------ dependencyParser/mate-tools/src/is2/data/F2SP.java | 59 ++++++++++++++++++++++++++++++----------------------------- dependencyParser/mate-tools/src/is2/data/F2ST.java | 31 ++++++++++++++++--------------- dependencyParser/mate-tools/src/is2/data/FV.java | 559 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- dependencyParser/mate-tools/src/is2/data/FVR.java | 495 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ dependencyParser/mate-tools/src/is2/data/IEncoder.java | 12 +++++------- dependencyParser/mate-tools/src/is2/data/IEncoderPlus.java | 15 +++++++-------- dependencyParser/mate-tools/src/is2/data/IFV.java | 27 ++++++++++++++------------- dependencyParser/mate-tools/src/is2/data/Instances.java | 358 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- dependencyParser/mate-tools/src/is2/data/InstancesTagger.java | 112 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------------------------------------- dependencyParser/mate-tools/src/is2/data/IntIntHash.java | 219 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------------------------------------------------------------------------------------------------------- dependencyParser/mate-tools/src/is2/data/Long2Int.java | 165 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------------------------------------------------------------------------- dependencyParser/mate-tools/src/is2/data/Long2IntExact.java | 71 ++++++++++++++++++++++++++++++++++++++--------------------------------- dependencyParser/mate-tools/src/is2/data/Long2IntInterface.java | 7 ++++--- dependencyParser/mate-tools/src/is2/data/Long2IntQuick.java | 56 ++++++++++++++++++++++++++++++-------------------------- dependencyParser/mate-tools/src/is2/data/MFB.java | 322 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------------------------------------------------------------------------------------------------------------------------------------------------------- dependencyParser/mate-tools/src/is2/data/MFC.java | 309 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------------------------------------------------------------------------------------------------------------------------------------------ dependencyParser/mate-tools/src/is2/data/MFO.java | 350 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- dependencyParser/mate-tools/src/is2/data/Open.java | 29 +++++++++++++++-------------- dependencyParser/mate-tools/src/is2/data/PSTree.java | 634 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ dependencyParser/mate-tools/src/is2/data/Parameter.java | 6 +++--- dependencyParser/mate-tools/src/is2/data/ParametersFloat.java | 162 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------------------------------------------------------------------------- dependencyParser/mate-tools/src/is2/data/Parse.java | 112 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------------------------------------------- dependencyParser/mate-tools/src/is2/data/ParseNBest.java | 82 +++++++++++++++++++++++++++++++++++++++------------------------------------------- dependencyParser/mate-tools/src/is2/data/PipeGen.java | 88 ++++++++++++++++++++++++++++++++++++++++++++-------------------------------------------- dependencyParser/mate-tools/src/is2/data/PrimeFinder.java | 91 ++++++++++++++++++++++++++++++++++++++----------------------------------------------------- dependencyParser/mate-tools/src/is2/data/RandomIndex.java | 247 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------------------------------------------------------------------------------------------------------------------- dependencyParser/mate-tools/src/is2/data/SentenceData09.java | 484 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- dependencyParser/mate-tools/src/is2/data/Thesaurus.java | 174 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------------------------------------------------------------------ dependencyParser/mate-tools/src/is2/io/CONLLReader04.java | 220 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------------------------------------------------------------------------------------------------ dependencyParser/mate-tools/src/is2/io/CONLLReader06.java | 224 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------------------------------------------------------------------------------------------------ dependencyParser/mate-tools/src/is2/io/CONLLReader08.java | 360 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- dependencyParser/mate-tools/src/is2/io/CONLLReader09.java | 362 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- dependencyParser/mate-tools/src/is2/io/CONLLWriter06.java | 254 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------------------------------------------------------------------------------------------------------------------- dependencyParser/mate-tools/src/is2/io/CONLLWriter09.java | 435 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ dependencyParser/mate-tools/src/is2/io/IOGenerals.java | 13 ++++++------- dependencyParser/mate-tools/src/is2/io/PSReader.java | 8 ++++---- dependencyParser/mate-tools/src/is2/io/TigerReader.java | 427 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- dependencyParser/mate-tools/src/is2/lemmatizer/Evaluator.java | 97 ++++++++++++++++++++++++++++++++++++++++++++++++++----------------------------------------------- dependencyParser/mate-tools/src/is2/lemmatizer/Lemmatizer.java | 428 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ dependencyParser/mate-tools/src/is2/lemmatizer/MFO.java | 301 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------------------------------------------------------------------------------------------------------------------------------------------ dependencyParser/mate-tools/src/is2/lemmatizer/Options.java | 73 +++++++++++++++++++++++++++++++++++++++---------------------------------- dependencyParser/mate-tools/src/is2/lemmatizer/Pipe.java | 630 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- dependencyParser/mate-tools/src/is2/lemmatizer/StringEdit.java | 311 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------------------------------------------------------------------------------------------------------------------------------------------------- dependencyParser/mate-tools/src/is2/mtag/Convert.java | 97 +++++++++++++++++++++++++++++++++++++++++++++++++------------------------------------------------ dependencyParser/mate-tools/src/is2/mtag/Evaluator.java | 149 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------------------------------------------------------------- dependencyParser/mate-tools/src/is2/mtag/ExtractorM.java | 743 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- dependencyParser/mate-tools/src/is2/mtag/MFO.java | 522 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- dependencyParser/mate-tools/src/is2/mtag/Options.java | 47 ++++++++++++++++++++++++++++------------------- dependencyParser/mate-tools/src/is2/mtag/Pipe.java | 731 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ dependencyParser/mate-tools/src/is2/mtag/Tagger.java | 427 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- dependencyParser/mate-tools/src/is2/parser/Closed.java | 32 +++++++++++++++----------------- dependencyParser/mate-tools/src/is2/parser/D5.java | 265 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------------------------------------------------------------------------------------------------------- dependencyParser/mate-tools/src/is2/parser/Decoder.java | 226 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------------------------------------------------------ dependencyParser/mate-tools/src/is2/parser/Edges.java | 197 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------------------------------------------------------------------------------------- dependencyParser/mate-tools/src/is2/parser/Evaluator.java | 74 ++++++++++++++++++++++++++++++++++++++++---------------------------------- dependencyParser/mate-tools/src/is2/parser/Extractor.java | 2007 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- dependencyParser/mate-tools/src/is2/parser/MFO.java | 303 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------------------------------------------------------------------------------------------------------------------------------------------- dependencyParser/mate-tools/src/is2/parser/Open.java | 28 +++++++++++++++------------- dependencyParser/mate-tools/src/is2/parser/Options.java | 69 ++++++++++++++++++++++++++++++++++++++------------------------------- dependencyParser/mate-tools/src/is2/parser/ParallelDecoder.java | 254 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------------------------------------------------------------------------------------------------- dependencyParser/mate-tools/src/is2/parser/ParallelExtract.java | 230 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------------------------------------------------------------------------------------------------ dependencyParser/mate-tools/src/is2/parser/ParallelRearrange.java | 131 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------------------------------------------- dependencyParser/mate-tools/src/is2/parser/Parameters.java | 21 ++++++++++----------- dependencyParser/mate-tools/src/is2/parser/ParametersFloat.java | 101 +++++++++++++++++++++++++++++++++++++++++++++++++++-------------------------------------------------- dependencyParser/mate-tools/src/is2/parser/Parser.java | 632 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- dependencyParser/mate-tools/src/is2/parser/Pipe.java | 165 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------------------------------------------------------------------- dependencyParser/mate-tools/src/is2/parserR2/Decoder.java | 402 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- dependencyParser/mate-tools/src/is2/parserR2/Options.java | 95 +++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------------------------------- dependencyParser/mate-tools/src/is2/parserR2/Parameters.java | 21 ++++++++++----------- dependencyParser/mate-tools/src/is2/parserR2/ParametersFloat.java | 145 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------------------------------------------------------------- dependencyParser/mate-tools/src/is2/parserR2/Parser.java | 617 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ dependencyParser/mate-tools/src/is2/parserR2/Pipe.java | 190 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------------------------------------------------------------------------------- dependencyParser/mate-tools/src/is2/parserR2/PipeReranker.java | 99 ++++++++++++++++++++++++++++++++++++++++++--------------------------------------------------------- dependencyParser/mate-tools/src/is2/parserR2/Reranker.java | 872 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- dependencyParser/mate-tools/src/is2/tag/ExtractorT2.java | 849 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- dependencyParser/mate-tools/src/is2/tag/Lexicon.java | 118 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------------------------------------ dependencyParser/mate-tools/src/is2/tag/MFO.java | 515 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- dependencyParser/mate-tools/src/is2/tag/Options.java | 139 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------------------------------------------------ dependencyParser/mate-tools/src/is2/tag/POS.java | 21 +++++++++++---------- dependencyParser/mate-tools/src/is2/tag/Tagger.java | 464 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- dependencyParser/mate-tools/src/is2/tools/IPipe.java | 12 +++++------- dependencyParser/mate-tools/src/is2/tools/Retrainable.java | 23 ++++++++++++++--------- dependencyParser/mate-tools/src/is2/tools/Tool.java | 22 +++++++++++----------- dependencyParser/mate-tools/src/is2/tools/ToolIO.java | 12 +++++------- dependencyParser/mate-tools/src/is2/tools/Train.java | 6 +++--- dependencyParser/mate-tools/src/is2/util/Convert.java | 500 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- dependencyParser/mate-tools/src/is2/util/Convert0409.java | 174 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------------------------------------------------------------------------ dependencyParser/mate-tools/src/is2/util/ConvertADJ.java | 128 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------------------------------------------------------- dependencyParser/mate-tools/src/is2/util/ConvertLowerCase0909.java | 83 +++++++++++++++++++++++++++++++++++------------------------------------------------ dependencyParser/mate-tools/src/is2/util/ConvertTiger2CoNLL.java | 118 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------------------------------------------- dependencyParser/mate-tools/src/is2/util/DB.java | 55 ++++++++++++++++++++++++++----------------------------- dependencyParser/mate-tools/src/is2/util/Edges.java | 197 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------------------------------------------------------------------------------------- dependencyParser/mate-tools/src/is2/util/Evaluator.java | 625 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- dependencyParser/mate-tools/src/is2/util/EvaluatorTagger.java | 783 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- dependencyParser/mate-tools/src/is2/util/ExtractParagraphs.java | 97 ++++++++++++++++++++++++++++++++++++++++++------------------------------------------------------- dependencyParser/mate-tools/src/is2/util/IntStack.java | 80 ++++++++++++++++++++++++++++++++++++++++++-------------------------------------- dependencyParser/mate-tools/src/is2/util/Long2Int.java | 112 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------------------------------------------- dependencyParser/mate-tools/src/is2/util/Options.java | 144 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------------------------------------------------------------- dependencyParser/mate-tools/src/is2/util/OptionsSuper.java | 265 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------------------------------------------------------------------------------------------------------------------- dependencyParser/mate-tools/src/is2/util/ParserEvaluator.java | 74 ++++++++++++++++++++++++++++++++++++++++---------------------------------- dependencyParser/mate-tools/src/is2/util/Split.java | 83 +++++++++++++++++++++++++++++++++++++++-------------------------------------------- dependencyParser/mate-tools/src/is2/util/Split2.java | 48 +++++++++++++++++------------------------------- dependencyParser/mate-tools/src/is2/util/Split3.java | 44 ++++++++++++++------------------------------ 294 files changed, 19011 insertions(+), 15045 deletions(-) create mode 100644 dependencyParser/mate-tools/classes/decoder/ParallelDecoder$DSet.class create mode 100644 dependencyParser/mate-tools/classes/decoder/ParallelDecoder.class create mode 100644 dependencyParser/mate-tools/classes/decoder/ParallelRearrangeNBest$PA.class create mode 100644 dependencyParser/mate-tools/classes/decoder/ParallelRearrangeNBest.class create mode 100644 dependencyParser/mate-tools/classes/decoder/ParallelRearrangeNBest2$PA.class create mode 100644 dependencyParser/mate-tools/classes/decoder/ParallelRearrangeNBest2.class create mode 100644 dependencyParser/mate-tools/classes/examples/DependencyParser.class create mode 100644 dependencyParser/mate-tools/classes/examples/FullPipelineSpanish.class create mode 100644 dependencyParser/mate-tools/classes/examples/FullPipelineTest.class create mode 100644 dependencyParser/mate-tools/classes/examples/MorphTagger.class create mode 100644 dependencyParser/mate-tools/classes/examples/ParseOnly.class create mode 100644 dependencyParser/mate-tools/classes/examples/Pipeline.class create mode 100644 dependencyParser/mate-tools/classes/extractors/Extractor.class create mode 100644 dependencyParser/mate-tools/classes/extractors/ExtractorClusterStacked.class create mode 100644 dependencyParser/mate-tools/classes/extractors/ExtractorClusterStackedR2.class create mode 100644 dependencyParser/mate-tools/classes/extractors/ExtractorFactory.class create mode 100644 dependencyParser/mate-tools/classes/extractors/ExtractorReranker.class create mode 100644 dependencyParser/mate-tools/classes/extractors/ParallelExtract$DSet.class create mode 100644 dependencyParser/mate-tools/classes/extractors/ParallelExtract.class create mode 100644 dependencyParser/mate-tools/classes/is2/data/Closed.class create mode 100644 dependencyParser/mate-tools/classes/is2/data/Cluster.class create mode 100644 dependencyParser/mate-tools/classes/is2/data/D4.class create mode 100644 dependencyParser/mate-tools/classes/is2/data/D6.class create mode 100644 dependencyParser/mate-tools/classes/is2/data/D7.class create mode 100644 dependencyParser/mate-tools/classes/is2/data/DPSTree.class create mode 100644 dependencyParser/mate-tools/classes/is2/data/DX.class create mode 100644 dependencyParser/mate-tools/classes/is2/data/DataF.class create mode 100644 dependencyParser/mate-tools/classes/is2/data/DataFES.class create mode 100644 dependencyParser/mate-tools/classes/is2/data/DataT.class create mode 100644 dependencyParser/mate-tools/classes/is2/data/Edges$C.class create mode 100644 dependencyParser/mate-tools/classes/is2/data/Edges.class create mode 100644 dependencyParser/mate-tools/classes/is2/data/F2S.class create mode 100644 dependencyParser/mate-tools/classes/is2/data/F2SD.class create mode 100644 dependencyParser/mate-tools/classes/is2/data/F2SF.class create mode 100644 dependencyParser/mate-tools/classes/is2/data/F2SP.class create mode 100644 dependencyParser/mate-tools/classes/is2/data/F2ST.class create mode 100644 dependencyParser/mate-tools/classes/is2/data/FV.class create mode 100644 dependencyParser/mate-tools/classes/is2/data/FVR.class create mode 100644 dependencyParser/mate-tools/classes/is2/data/IEncoder.class create mode 100644 dependencyParser/mate-tools/classes/is2/data/IEncoderPlus.class create mode 100644 dependencyParser/mate-tools/classes/is2/data/IFV.class create mode 100644 dependencyParser/mate-tools/classes/is2/data/Instances.class create mode 100644 dependencyParser/mate-tools/classes/is2/data/InstancesTagger.class create mode 100644 dependencyParser/mate-tools/classes/is2/data/IntIntHash.class create mode 100644 dependencyParser/mate-tools/classes/is2/data/Long2Int.class create mode 100644 dependencyParser/mate-tools/classes/is2/data/Long2IntExact.class create mode 100644 dependencyParser/mate-tools/classes/is2/data/Long2IntInterface.class create mode 100644 dependencyParser/mate-tools/classes/is2/data/Long2IntQuick.class create mode 100644 dependencyParser/mate-tools/classes/is2/data/MFB.class create mode 100644 dependencyParser/mate-tools/classes/is2/data/MFC.class create mode 100644 dependencyParser/mate-tools/classes/is2/data/MFO$Data4.class create mode 100644 dependencyParser/mate-tools/classes/is2/data/MFO.class create mode 100644 dependencyParser/mate-tools/classes/is2/data/Open.class create mode 100644 dependencyParser/mate-tools/classes/is2/data/PSTree.class create mode 100644 dependencyParser/mate-tools/classes/is2/data/Parameter.class create mode 100644 dependencyParser/mate-tools/classes/is2/data/ParametersFloat.class create mode 100644 dependencyParser/mate-tools/classes/is2/data/Parse.class create mode 100644 dependencyParser/mate-tools/classes/is2/data/ParseNBest.class create mode 100644 dependencyParser/mate-tools/classes/is2/data/PipeGen.class create mode 100644 dependencyParser/mate-tools/classes/is2/data/PrimeFinder.class create mode 100644 dependencyParser/mate-tools/classes/is2/data/RandomIndex.class create mode 100644 dependencyParser/mate-tools/classes/is2/data/SentenceData09.class create mode 100644 dependencyParser/mate-tools/classes/is2/data/Thesaurus.class create mode 100644 dependencyParser/mate-tools/classes/is2/io/CONLLReader04.class create mode 100644 dependencyParser/mate-tools/classes/is2/io/CONLLReader06.class create mode 100644 dependencyParser/mate-tools/classes/is2/io/CONLLReader08.class create mode 100644 dependencyParser/mate-tools/classes/is2/io/CONLLReader09.class create mode 100644 dependencyParser/mate-tools/classes/is2/io/CONLLWriter06.class create mode 100644 dependencyParser/mate-tools/classes/is2/io/CONLLWriter09.class create mode 100644 dependencyParser/mate-tools/classes/is2/io/IOGenerals.class create mode 100644 dependencyParser/mate-tools/classes/is2/io/PSReader.class create mode 100644 dependencyParser/mate-tools/classes/is2/io/TigerReader$Line.class create mode 100644 dependencyParser/mate-tools/classes/is2/io/TigerReader.class create mode 100644 dependencyParser/mate-tools/classes/is2/lemmatizer/Evaluator$1.class create mode 100644 dependencyParser/mate-tools/classes/is2/lemmatizer/Evaluator.class create mode 100644 dependencyParser/mate-tools/classes/is2/lemmatizer/Lemmatizer$1.class create mode 100644 dependencyParser/mate-tools/classes/is2/lemmatizer/Lemmatizer.class create mode 100644 dependencyParser/mate-tools/classes/is2/lemmatizer/MFO.class create mode 100644 dependencyParser/mate-tools/classes/is2/lemmatizer/Options.class create mode 100644 dependencyParser/mate-tools/classes/is2/lemmatizer/Pipe$1.class create mode 100644 dependencyParser/mate-tools/classes/is2/lemmatizer/Pipe.class create mode 100644 dependencyParser/mate-tools/classes/is2/lemmatizer/StringEdit.class create mode 100644 dependencyParser/mate-tools/classes/is2/mtag/Convert.class create mode 100644 dependencyParser/mate-tools/classes/is2/mtag/Evaluator$1.class create mode 100644 dependencyParser/mate-tools/classes/is2/mtag/Evaluator.class create mode 100644 dependencyParser/mate-tools/classes/is2/mtag/ExtractorM.class create mode 100644 dependencyParser/mate-tools/classes/is2/mtag/MFO$Data.class create mode 100644 dependencyParser/mate-tools/classes/is2/mtag/MFO$Data4.class create mode 100644 dependencyParser/mate-tools/classes/is2/mtag/MFO.class create mode 100644 dependencyParser/mate-tools/classes/is2/mtag/Options.class create mode 100644 dependencyParser/mate-tools/classes/is2/mtag/Pipe.class create mode 100644 dependencyParser/mate-tools/classes/is2/mtag/Tagger.class create mode 100644 dependencyParser/mate-tools/classes/is2/parser/Closed.class create mode 100644 dependencyParser/mate-tools/classes/is2/parser/D5.class create mode 100644 dependencyParser/mate-tools/classes/is2/parser/Decoder.class create mode 100644 dependencyParser/mate-tools/classes/is2/parser/Edges$C.class create mode 100644 dependencyParser/mate-tools/classes/is2/parser/Edges.class create mode 100644 dependencyParser/mate-tools/classes/is2/parser/Evaluator$Results.class create mode 100644 dependencyParser/mate-tools/classes/is2/parser/Evaluator.class create mode 100644 dependencyParser/mate-tools/classes/is2/parser/Extractor.class create mode 100644 dependencyParser/mate-tools/classes/is2/parser/MFO.class create mode 100644 dependencyParser/mate-tools/classes/is2/parser/Open.class create mode 100644 dependencyParser/mate-tools/classes/is2/parser/Options.class create mode 100644 dependencyParser/mate-tools/classes/is2/parser/ParallelDecoder$DSet.class create mode 100644 dependencyParser/mate-tools/classes/is2/parser/ParallelDecoder.class create mode 100644 dependencyParser/mate-tools/classes/is2/parser/ParallelExtract$DSet.class create mode 100644 dependencyParser/mate-tools/classes/is2/parser/ParallelExtract.class create mode 100644 dependencyParser/mate-tools/classes/is2/parser/ParallelRearrange$PA.class create mode 100644 dependencyParser/mate-tools/classes/is2/parser/ParallelRearrange.class create mode 100644 dependencyParser/mate-tools/classes/is2/parser/Parameters.class create mode 100644 dependencyParser/mate-tools/classes/is2/parser/ParametersFloat.class create mode 100644 dependencyParser/mate-tools/classes/is2/parser/Parser.class create mode 100644 dependencyParser/mate-tools/classes/is2/parser/Pipe.class create mode 100755 dependencyParser/mate-tools/classes/is2/parser/package.html create mode 100644 dependencyParser/mate-tools/classes/is2/parserR2/Decoder.class create mode 100644 dependencyParser/mate-tools/classes/is2/parserR2/Options.class create mode 100644 dependencyParser/mate-tools/classes/is2/parserR2/Parameters.class create mode 100644 dependencyParser/mate-tools/classes/is2/parserR2/ParametersFloat.class create mode 100644 dependencyParser/mate-tools/classes/is2/parserR2/Parser.class create mode 100644 dependencyParser/mate-tools/classes/is2/parserR2/Pipe.class create mode 100644 dependencyParser/mate-tools/classes/is2/parserR2/PipeReranker.class create mode 100644 dependencyParser/mate-tools/classes/is2/parserR2/Reranker.class create mode 100755 dependencyParser/mate-tools/classes/is2/parserR2/package.html create mode 100644 dependencyParser/mate-tools/classes/is2/tag/ExtractorT2.class create mode 100644 dependencyParser/mate-tools/classes/is2/tag/Lexicon.class create mode 100644 dependencyParser/mate-tools/classes/is2/tag/MFO$Data4.class create mode 100644 dependencyParser/mate-tools/classes/is2/tag/MFO.class create mode 100644 dependencyParser/mate-tools/classes/is2/tag/Options.class create mode 100644 dependencyParser/mate-tools/classes/is2/tag/POS.class create mode 100644 dependencyParser/mate-tools/classes/is2/tag/Tagger.class create mode 100644 dependencyParser/mate-tools/classes/is2/tag/package.html create mode 100644 dependencyParser/mate-tools/classes/is2/tools/IPipe.class create mode 100644 dependencyParser/mate-tools/classes/is2/tools/Retrainable.class create mode 100644 dependencyParser/mate-tools/classes/is2/tools/Tool.class create mode 100644 dependencyParser/mate-tools/classes/is2/tools/ToolIO.class create mode 100644 dependencyParser/mate-tools/classes/is2/tools/Train.class create mode 100644 dependencyParser/mate-tools/classes/is2/util/Convert.class create mode 100644 dependencyParser/mate-tools/classes/is2/util/Convert0409.class create mode 100644 dependencyParser/mate-tools/classes/is2/util/ConvertADJ.class create mode 100644 dependencyParser/mate-tools/classes/is2/util/ConvertLowerCase0909.class create mode 100644 dependencyParser/mate-tools/classes/is2/util/ConvertTiger2CoNLL.class create mode 100644 dependencyParser/mate-tools/classes/is2/util/DB.class create mode 100644 dependencyParser/mate-tools/classes/is2/util/Edges$C.class create mode 100644 dependencyParser/mate-tools/classes/is2/util/Edges.class create mode 100644 dependencyParser/mate-tools/classes/is2/util/Evaluator$1.class create mode 100644 dependencyParser/mate-tools/classes/is2/util/Evaluator$Results.class create mode 100644 dependencyParser/mate-tools/classes/is2/util/Evaluator.class create mode 100644 dependencyParser/mate-tools/classes/is2/util/EvaluatorTagger$1.class create mode 100644 dependencyParser/mate-tools/classes/is2/util/EvaluatorTagger$2.class create mode 100644 dependencyParser/mate-tools/classes/is2/util/EvaluatorTagger$Results.class create mode 100644 dependencyParser/mate-tools/classes/is2/util/EvaluatorTagger.class create mode 100644 dependencyParser/mate-tools/classes/is2/util/ExtractParagraphs.class create mode 100644 dependencyParser/mate-tools/classes/is2/util/IntStack.class create mode 100644 dependencyParser/mate-tools/classes/is2/util/Long2Int.class create mode 100644 dependencyParser/mate-tools/classes/is2/util/Options.class create mode 100644 dependencyParser/mate-tools/classes/is2/util/OptionsSuper.class create mode 100644 dependencyParser/mate-tools/classes/is2/util/ParserEvaluator$Results.class create mode 100644 dependencyParser/mate-tools/classes/is2/util/ParserEvaluator.class create mode 100644 dependencyParser/mate-tools/classes/is2/util/Split.class create mode 100644 dependencyParser/mate-tools/classes/is2/util/Split2.class create mode 100644 dependencyParser/mate-tools/classes/is2/util/Split3.class diff --git a/dependencyParser/mate-tools/.classpath b/dependencyParser/mate-tools/.classpath index 8092159..4d8fea6 100644 --- a/dependencyParser/mate-tools/.classpath +++ b/dependencyParser/mate-tools/.classpath @@ -1,8 +1,8 @@ -<?xml version="1.0" encoding="UTF-8"?> -<classpath> - <classpathentry kind="src" path="src"/> - <classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/> - <classpathentry kind="lib" path="/mtt/lib/trove.jar"/> - <classpathentry kind="lib" path="lib/commons-math-2.2.jar"/> - <classpathentry kind="output" path="classes"/> -</classpath> +<?xml version="1.0" encoding="UTF-8"?> +<classpath> + <classpathentry kind="src" path="src"/> + <classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/> + <classpathentry kind="lib" path="lib/commons-math-2.2.jar"/> + <classpathentry kind="lib" path="/home/masterofu/workspace/mate-tools/lib/trove-2.0.4.jar"/> + <classpathentry kind="output" path="classes"/> +</classpath> diff --git a/dependencyParser/mate-tools/classes/decoder/ParallelDecoder$DSet.class b/dependencyParser/mate-tools/classes/decoder/ParallelDecoder$DSet.class new file mode 100644 index 0000000..54de172 Binary files /dev/null and b/dependencyParser/mate-tools/classes/decoder/ParallelDecoder$DSet.class differ diff --git a/dependencyParser/mate-tools/classes/decoder/ParallelDecoder.class b/dependencyParser/mate-tools/classes/decoder/ParallelDecoder.class new file mode 100644 index 0000000..96350c1 Binary files /dev/null and b/dependencyParser/mate-tools/classes/decoder/ParallelDecoder.class differ diff --git a/dependencyParser/mate-tools/classes/decoder/ParallelRearrangeNBest$PA.class b/dependencyParser/mate-tools/classes/decoder/ParallelRearrangeNBest$PA.class new file mode 100644 index 0000000..a476a4f Binary files /dev/null and b/dependencyParser/mate-tools/classes/decoder/ParallelRearrangeNBest$PA.class differ diff --git a/dependencyParser/mate-tools/classes/decoder/ParallelRearrangeNBest.class b/dependencyParser/mate-tools/classes/decoder/ParallelRearrangeNBest.class new file mode 100644 index 0000000..b5e5ec0 Binary files /dev/null and b/dependencyParser/mate-tools/classes/decoder/ParallelRearrangeNBest.class differ diff --git a/dependencyParser/mate-tools/classes/decoder/ParallelRearrangeNBest2$PA.class b/dependencyParser/mate-tools/classes/decoder/ParallelRearrangeNBest2$PA.class new file mode 100644 index 0000000..70762c4 Binary files /dev/null and b/dependencyParser/mate-tools/classes/decoder/ParallelRearrangeNBest2$PA.class differ diff --git a/dependencyParser/mate-tools/classes/decoder/ParallelRearrangeNBest2.class b/dependencyParser/mate-tools/classes/decoder/ParallelRearrangeNBest2.class new file mode 100644 index 0000000..996ec56 Binary files /dev/null and b/dependencyParser/mate-tools/classes/decoder/ParallelRearrangeNBest2.class differ diff --git a/dependencyParser/mate-tools/classes/examples/DependencyParser.class b/dependencyParser/mate-tools/classes/examples/DependencyParser.class new file mode 100644 index 0000000..37c977b Binary files /dev/null and b/dependencyParser/mate-tools/classes/examples/DependencyParser.class differ diff --git a/dependencyParser/mate-tools/classes/examples/FullPipelineSpanish.class b/dependencyParser/mate-tools/classes/examples/FullPipelineSpanish.class new file mode 100644 index 0000000..96e89a2 Binary files /dev/null and b/dependencyParser/mate-tools/classes/examples/FullPipelineSpanish.class differ diff --git a/dependencyParser/mate-tools/classes/examples/FullPipelineTest.class b/dependencyParser/mate-tools/classes/examples/FullPipelineTest.class new file mode 100644 index 0000000..01d550b Binary files /dev/null and b/dependencyParser/mate-tools/classes/examples/FullPipelineTest.class differ diff --git a/dependencyParser/mate-tools/classes/examples/MorphTagger.class b/dependencyParser/mate-tools/classes/examples/MorphTagger.class new file mode 100644 index 0000000..590def3 Binary files /dev/null and b/dependencyParser/mate-tools/classes/examples/MorphTagger.class differ diff --git a/dependencyParser/mate-tools/classes/examples/ParseOnly.class b/dependencyParser/mate-tools/classes/examples/ParseOnly.class new file mode 100644 index 0000000..f0ba0ec Binary files /dev/null and b/dependencyParser/mate-tools/classes/examples/ParseOnly.class differ diff --git a/dependencyParser/mate-tools/classes/examples/Pipeline.class b/dependencyParser/mate-tools/classes/examples/Pipeline.class new file mode 100644 index 0000000..3e573bf Binary files /dev/null and b/dependencyParser/mate-tools/classes/examples/Pipeline.class differ diff --git a/dependencyParser/mate-tools/classes/extractors/Extractor.class b/dependencyParser/mate-tools/classes/extractors/Extractor.class new file mode 100644 index 0000000..688223e Binary files /dev/null and b/dependencyParser/mate-tools/classes/extractors/Extractor.class differ diff --git a/dependencyParser/mate-tools/classes/extractors/ExtractorClusterStacked.class b/dependencyParser/mate-tools/classes/extractors/ExtractorClusterStacked.class new file mode 100644 index 0000000..38ebfe6 Binary files /dev/null and b/dependencyParser/mate-tools/classes/extractors/ExtractorClusterStacked.class differ diff --git a/dependencyParser/mate-tools/classes/extractors/ExtractorClusterStackedR2.class b/dependencyParser/mate-tools/classes/extractors/ExtractorClusterStackedR2.class new file mode 100644 index 0000000..45488a7 Binary files /dev/null and b/dependencyParser/mate-tools/classes/extractors/ExtractorClusterStackedR2.class differ diff --git a/dependencyParser/mate-tools/classes/extractors/ExtractorFactory.class b/dependencyParser/mate-tools/classes/extractors/ExtractorFactory.class new file mode 100644 index 0000000..f3bb8bf Binary files /dev/null and b/dependencyParser/mate-tools/classes/extractors/ExtractorFactory.class differ diff --git a/dependencyParser/mate-tools/classes/extractors/ExtractorReranker.class b/dependencyParser/mate-tools/classes/extractors/ExtractorReranker.class new file mode 100644 index 0000000..3d471e2 Binary files /dev/null and b/dependencyParser/mate-tools/classes/extractors/ExtractorReranker.class differ diff --git a/dependencyParser/mate-tools/classes/extractors/ParallelExtract$DSet.class b/dependencyParser/mate-tools/classes/extractors/ParallelExtract$DSet.class new file mode 100644 index 0000000..c4ebe42 Binary files /dev/null and b/dependencyParser/mate-tools/classes/extractors/ParallelExtract$DSet.class differ diff --git a/dependencyParser/mate-tools/classes/extractors/ParallelExtract.class b/dependencyParser/mate-tools/classes/extractors/ParallelExtract.class new file mode 100644 index 0000000..3b7bd76 Binary files /dev/null and b/dependencyParser/mate-tools/classes/extractors/ParallelExtract.class differ diff --git a/dependencyParser/mate-tools/classes/is2/data/Closed.class b/dependencyParser/mate-tools/classes/is2/data/Closed.class new file mode 100644 index 0000000..ffb6a29 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/data/Closed.class differ diff --git a/dependencyParser/mate-tools/classes/is2/data/Cluster.class b/dependencyParser/mate-tools/classes/is2/data/Cluster.class new file mode 100644 index 0000000..de8d544 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/data/Cluster.class differ diff --git a/dependencyParser/mate-tools/classes/is2/data/D4.class b/dependencyParser/mate-tools/classes/is2/data/D4.class new file mode 100644 index 0000000..1911800 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/data/D4.class differ diff --git a/dependencyParser/mate-tools/classes/is2/data/D6.class b/dependencyParser/mate-tools/classes/is2/data/D6.class new file mode 100644 index 0000000..2e48adb Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/data/D6.class differ diff --git a/dependencyParser/mate-tools/classes/is2/data/D7.class b/dependencyParser/mate-tools/classes/is2/data/D7.class new file mode 100644 index 0000000..ad36d57 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/data/D7.class differ diff --git a/dependencyParser/mate-tools/classes/is2/data/DPSTree.class b/dependencyParser/mate-tools/classes/is2/data/DPSTree.class new file mode 100644 index 0000000..ffe3741 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/data/DPSTree.class differ diff --git a/dependencyParser/mate-tools/classes/is2/data/DX.class b/dependencyParser/mate-tools/classes/is2/data/DX.class new file mode 100644 index 0000000..99e0087 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/data/DX.class differ diff --git a/dependencyParser/mate-tools/classes/is2/data/DataF.class b/dependencyParser/mate-tools/classes/is2/data/DataF.class new file mode 100644 index 0000000..132234d Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/data/DataF.class differ diff --git a/dependencyParser/mate-tools/classes/is2/data/DataFES.class b/dependencyParser/mate-tools/classes/is2/data/DataFES.class new file mode 100644 index 0000000..3ca57f1 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/data/DataFES.class differ diff --git a/dependencyParser/mate-tools/classes/is2/data/DataT.class b/dependencyParser/mate-tools/classes/is2/data/DataT.class new file mode 100644 index 0000000..0a847d6 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/data/DataT.class differ diff --git a/dependencyParser/mate-tools/classes/is2/data/Edges$C.class b/dependencyParser/mate-tools/classes/is2/data/Edges$C.class new file mode 100644 index 0000000..a8645f6 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/data/Edges$C.class differ diff --git a/dependencyParser/mate-tools/classes/is2/data/Edges.class b/dependencyParser/mate-tools/classes/is2/data/Edges.class new file mode 100644 index 0000000..819d9e4 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/data/Edges.class differ diff --git a/dependencyParser/mate-tools/classes/is2/data/F2S.class b/dependencyParser/mate-tools/classes/is2/data/F2S.class new file mode 100644 index 0000000..9116920 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/data/F2S.class differ diff --git a/dependencyParser/mate-tools/classes/is2/data/F2SD.class b/dependencyParser/mate-tools/classes/is2/data/F2SD.class new file mode 100644 index 0000000..7e11c51 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/data/F2SD.class differ diff --git a/dependencyParser/mate-tools/classes/is2/data/F2SF.class b/dependencyParser/mate-tools/classes/is2/data/F2SF.class new file mode 100644 index 0000000..975f47a Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/data/F2SF.class differ diff --git a/dependencyParser/mate-tools/classes/is2/data/F2SP.class b/dependencyParser/mate-tools/classes/is2/data/F2SP.class new file mode 100644 index 0000000..e554dcf Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/data/F2SP.class differ diff --git a/dependencyParser/mate-tools/classes/is2/data/F2ST.class b/dependencyParser/mate-tools/classes/is2/data/F2ST.class new file mode 100644 index 0000000..3a36a04 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/data/F2ST.class differ diff --git a/dependencyParser/mate-tools/classes/is2/data/FV.class b/dependencyParser/mate-tools/classes/is2/data/FV.class new file mode 100644 index 0000000..19db729 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/data/FV.class differ diff --git a/dependencyParser/mate-tools/classes/is2/data/FVR.class b/dependencyParser/mate-tools/classes/is2/data/FVR.class new file mode 100644 index 0000000..a97d526 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/data/FVR.class differ diff --git a/dependencyParser/mate-tools/classes/is2/data/IEncoder.class b/dependencyParser/mate-tools/classes/is2/data/IEncoder.class new file mode 100644 index 0000000..65e0af1 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/data/IEncoder.class differ diff --git a/dependencyParser/mate-tools/classes/is2/data/IEncoderPlus.class b/dependencyParser/mate-tools/classes/is2/data/IEncoderPlus.class new file mode 100644 index 0000000..e6d31c9 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/data/IEncoderPlus.class differ diff --git a/dependencyParser/mate-tools/classes/is2/data/IFV.class b/dependencyParser/mate-tools/classes/is2/data/IFV.class new file mode 100644 index 0000000..debfe75 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/data/IFV.class differ diff --git a/dependencyParser/mate-tools/classes/is2/data/Instances.class b/dependencyParser/mate-tools/classes/is2/data/Instances.class new file mode 100644 index 0000000..6674d1d Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/data/Instances.class differ diff --git a/dependencyParser/mate-tools/classes/is2/data/InstancesTagger.class b/dependencyParser/mate-tools/classes/is2/data/InstancesTagger.class new file mode 100644 index 0000000..d3a9d82 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/data/InstancesTagger.class differ diff --git a/dependencyParser/mate-tools/classes/is2/data/IntIntHash.class b/dependencyParser/mate-tools/classes/is2/data/IntIntHash.class new file mode 100644 index 0000000..e9c299e Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/data/IntIntHash.class differ diff --git a/dependencyParser/mate-tools/classes/is2/data/Long2Int.class b/dependencyParser/mate-tools/classes/is2/data/Long2Int.class new file mode 100644 index 0000000..7cff024 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/data/Long2Int.class differ diff --git a/dependencyParser/mate-tools/classes/is2/data/Long2IntExact.class b/dependencyParser/mate-tools/classes/is2/data/Long2IntExact.class new file mode 100644 index 0000000..a42466b Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/data/Long2IntExact.class differ diff --git a/dependencyParser/mate-tools/classes/is2/data/Long2IntInterface.class b/dependencyParser/mate-tools/classes/is2/data/Long2IntInterface.class new file mode 100644 index 0000000..54a8684 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/data/Long2IntInterface.class differ diff --git a/dependencyParser/mate-tools/classes/is2/data/Long2IntQuick.class b/dependencyParser/mate-tools/classes/is2/data/Long2IntQuick.class new file mode 100644 index 0000000..6652115 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/data/Long2IntQuick.class differ diff --git a/dependencyParser/mate-tools/classes/is2/data/MFB.class b/dependencyParser/mate-tools/classes/is2/data/MFB.class new file mode 100644 index 0000000..92134af Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/data/MFB.class differ diff --git a/dependencyParser/mate-tools/classes/is2/data/MFC.class b/dependencyParser/mate-tools/classes/is2/data/MFC.class new file mode 100644 index 0000000..45afacc Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/data/MFC.class differ diff --git a/dependencyParser/mate-tools/classes/is2/data/MFO$Data4.class b/dependencyParser/mate-tools/classes/is2/data/MFO$Data4.class new file mode 100644 index 0000000..300a47e Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/data/MFO$Data4.class differ diff --git a/dependencyParser/mate-tools/classes/is2/data/MFO.class b/dependencyParser/mate-tools/classes/is2/data/MFO.class new file mode 100644 index 0000000..17a6f0f Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/data/MFO.class differ diff --git a/dependencyParser/mate-tools/classes/is2/data/Open.class b/dependencyParser/mate-tools/classes/is2/data/Open.class new file mode 100644 index 0000000..eaf9919 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/data/Open.class differ diff --git a/dependencyParser/mate-tools/classes/is2/data/PSTree.class b/dependencyParser/mate-tools/classes/is2/data/PSTree.class new file mode 100644 index 0000000..abf080e Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/data/PSTree.class differ diff --git a/dependencyParser/mate-tools/classes/is2/data/Parameter.class b/dependencyParser/mate-tools/classes/is2/data/Parameter.class new file mode 100644 index 0000000..f06c2ee Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/data/Parameter.class differ diff --git a/dependencyParser/mate-tools/classes/is2/data/ParametersFloat.class b/dependencyParser/mate-tools/classes/is2/data/ParametersFloat.class new file mode 100644 index 0000000..33a70b2 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/data/ParametersFloat.class differ diff --git a/dependencyParser/mate-tools/classes/is2/data/Parse.class b/dependencyParser/mate-tools/classes/is2/data/Parse.class new file mode 100644 index 0000000..f1260af Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/data/Parse.class differ diff --git a/dependencyParser/mate-tools/classes/is2/data/ParseNBest.class b/dependencyParser/mate-tools/classes/is2/data/ParseNBest.class new file mode 100644 index 0000000..809247a Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/data/ParseNBest.class differ diff --git a/dependencyParser/mate-tools/classes/is2/data/PipeGen.class b/dependencyParser/mate-tools/classes/is2/data/PipeGen.class new file mode 100644 index 0000000..c5c69b4 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/data/PipeGen.class differ diff --git a/dependencyParser/mate-tools/classes/is2/data/PrimeFinder.class b/dependencyParser/mate-tools/classes/is2/data/PrimeFinder.class new file mode 100644 index 0000000..795c3f5 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/data/PrimeFinder.class differ diff --git a/dependencyParser/mate-tools/classes/is2/data/RandomIndex.class b/dependencyParser/mate-tools/classes/is2/data/RandomIndex.class new file mode 100644 index 0000000..42b1a83 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/data/RandomIndex.class differ diff --git a/dependencyParser/mate-tools/classes/is2/data/SentenceData09.class b/dependencyParser/mate-tools/classes/is2/data/SentenceData09.class new file mode 100644 index 0000000..5aa4fe8 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/data/SentenceData09.class differ diff --git a/dependencyParser/mate-tools/classes/is2/data/Thesaurus.class b/dependencyParser/mate-tools/classes/is2/data/Thesaurus.class new file mode 100644 index 0000000..63d81a7 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/data/Thesaurus.class differ diff --git a/dependencyParser/mate-tools/classes/is2/io/CONLLReader04.class b/dependencyParser/mate-tools/classes/is2/io/CONLLReader04.class new file mode 100644 index 0000000..324220f Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/io/CONLLReader04.class differ diff --git a/dependencyParser/mate-tools/classes/is2/io/CONLLReader06.class b/dependencyParser/mate-tools/classes/is2/io/CONLLReader06.class new file mode 100644 index 0000000..c72c6bf Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/io/CONLLReader06.class differ diff --git a/dependencyParser/mate-tools/classes/is2/io/CONLLReader08.class b/dependencyParser/mate-tools/classes/is2/io/CONLLReader08.class new file mode 100644 index 0000000..548f709 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/io/CONLLReader08.class differ diff --git a/dependencyParser/mate-tools/classes/is2/io/CONLLReader09.class b/dependencyParser/mate-tools/classes/is2/io/CONLLReader09.class new file mode 100644 index 0000000..edcaff7 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/io/CONLLReader09.class differ diff --git a/dependencyParser/mate-tools/classes/is2/io/CONLLWriter06.class b/dependencyParser/mate-tools/classes/is2/io/CONLLWriter06.class new file mode 100644 index 0000000..4171bc0 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/io/CONLLWriter06.class differ diff --git a/dependencyParser/mate-tools/classes/is2/io/CONLLWriter09.class b/dependencyParser/mate-tools/classes/is2/io/CONLLWriter09.class new file mode 100644 index 0000000..0237ffa Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/io/CONLLWriter09.class differ diff --git a/dependencyParser/mate-tools/classes/is2/io/IOGenerals.class b/dependencyParser/mate-tools/classes/is2/io/IOGenerals.class new file mode 100644 index 0000000..a7d8599 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/io/IOGenerals.class differ diff --git a/dependencyParser/mate-tools/classes/is2/io/PSReader.class b/dependencyParser/mate-tools/classes/is2/io/PSReader.class new file mode 100644 index 0000000..f94e563 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/io/PSReader.class differ diff --git a/dependencyParser/mate-tools/classes/is2/io/TigerReader$Line.class b/dependencyParser/mate-tools/classes/is2/io/TigerReader$Line.class new file mode 100644 index 0000000..d3db578 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/io/TigerReader$Line.class differ diff --git a/dependencyParser/mate-tools/classes/is2/io/TigerReader.class b/dependencyParser/mate-tools/classes/is2/io/TigerReader.class new file mode 100644 index 0000000..356bbdf Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/io/TigerReader.class differ diff --git a/dependencyParser/mate-tools/classes/is2/lemmatizer/Evaluator$1.class b/dependencyParser/mate-tools/classes/is2/lemmatizer/Evaluator$1.class new file mode 100644 index 0000000..c13d7a3 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/lemmatizer/Evaluator$1.class differ diff --git a/dependencyParser/mate-tools/classes/is2/lemmatizer/Evaluator.class b/dependencyParser/mate-tools/classes/is2/lemmatizer/Evaluator.class new file mode 100644 index 0000000..01b1a18 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/lemmatizer/Evaluator.class differ diff --git a/dependencyParser/mate-tools/classes/is2/lemmatizer/Lemmatizer$1.class b/dependencyParser/mate-tools/classes/is2/lemmatizer/Lemmatizer$1.class new file mode 100644 index 0000000..c7ba6af Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/lemmatizer/Lemmatizer$1.class differ diff --git a/dependencyParser/mate-tools/classes/is2/lemmatizer/Lemmatizer.class b/dependencyParser/mate-tools/classes/is2/lemmatizer/Lemmatizer.class new file mode 100644 index 0000000..07d39fb Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/lemmatizer/Lemmatizer.class differ diff --git a/dependencyParser/mate-tools/classes/is2/lemmatizer/MFO.class b/dependencyParser/mate-tools/classes/is2/lemmatizer/MFO.class new file mode 100644 index 0000000..b88de89 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/lemmatizer/MFO.class differ diff --git a/dependencyParser/mate-tools/classes/is2/lemmatizer/Options.class b/dependencyParser/mate-tools/classes/is2/lemmatizer/Options.class new file mode 100644 index 0000000..7595712 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/lemmatizer/Options.class differ diff --git a/dependencyParser/mate-tools/classes/is2/lemmatizer/Pipe$1.class b/dependencyParser/mate-tools/classes/is2/lemmatizer/Pipe$1.class new file mode 100644 index 0000000..550ea7f Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/lemmatizer/Pipe$1.class differ diff --git a/dependencyParser/mate-tools/classes/is2/lemmatizer/Pipe.class b/dependencyParser/mate-tools/classes/is2/lemmatizer/Pipe.class new file mode 100644 index 0000000..a243c5d Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/lemmatizer/Pipe.class differ diff --git a/dependencyParser/mate-tools/classes/is2/lemmatizer/StringEdit.class b/dependencyParser/mate-tools/classes/is2/lemmatizer/StringEdit.class new file mode 100644 index 0000000..4e5498c Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/lemmatizer/StringEdit.class differ diff --git a/dependencyParser/mate-tools/classes/is2/mtag/Convert.class b/dependencyParser/mate-tools/classes/is2/mtag/Convert.class new file mode 100644 index 0000000..76de45b Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/mtag/Convert.class differ diff --git a/dependencyParser/mate-tools/classes/is2/mtag/Evaluator$1.class b/dependencyParser/mate-tools/classes/is2/mtag/Evaluator$1.class new file mode 100644 index 0000000..739d1cb Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/mtag/Evaluator$1.class differ diff --git a/dependencyParser/mate-tools/classes/is2/mtag/Evaluator.class b/dependencyParser/mate-tools/classes/is2/mtag/Evaluator.class new file mode 100644 index 0000000..665630c Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/mtag/Evaluator.class differ diff --git a/dependencyParser/mate-tools/classes/is2/mtag/ExtractorM.class b/dependencyParser/mate-tools/classes/is2/mtag/ExtractorM.class new file mode 100644 index 0000000..a8880b7 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/mtag/ExtractorM.class differ diff --git a/dependencyParser/mate-tools/classes/is2/mtag/MFO$Data.class b/dependencyParser/mate-tools/classes/is2/mtag/MFO$Data.class new file mode 100644 index 0000000..94d9284 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/mtag/MFO$Data.class differ diff --git a/dependencyParser/mate-tools/classes/is2/mtag/MFO$Data4.class b/dependencyParser/mate-tools/classes/is2/mtag/MFO$Data4.class new file mode 100644 index 0000000..611dff0 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/mtag/MFO$Data4.class differ diff --git a/dependencyParser/mate-tools/classes/is2/mtag/MFO.class b/dependencyParser/mate-tools/classes/is2/mtag/MFO.class new file mode 100644 index 0000000..23cf2f0 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/mtag/MFO.class differ diff --git a/dependencyParser/mate-tools/classes/is2/mtag/Options.class b/dependencyParser/mate-tools/classes/is2/mtag/Options.class new file mode 100644 index 0000000..1dab67c Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/mtag/Options.class differ diff --git a/dependencyParser/mate-tools/classes/is2/mtag/Pipe.class b/dependencyParser/mate-tools/classes/is2/mtag/Pipe.class new file mode 100644 index 0000000..1fe35e6 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/mtag/Pipe.class differ diff --git a/dependencyParser/mate-tools/classes/is2/mtag/Tagger.class b/dependencyParser/mate-tools/classes/is2/mtag/Tagger.class new file mode 100644 index 0000000..1882f1b Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/mtag/Tagger.class differ diff --git a/dependencyParser/mate-tools/classes/is2/parser/Closed.class b/dependencyParser/mate-tools/classes/is2/parser/Closed.class new file mode 100644 index 0000000..062a60d Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/parser/Closed.class differ diff --git a/dependencyParser/mate-tools/classes/is2/parser/D5.class b/dependencyParser/mate-tools/classes/is2/parser/D5.class new file mode 100644 index 0000000..7cda6a8 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/parser/D5.class differ diff --git a/dependencyParser/mate-tools/classes/is2/parser/Decoder.class b/dependencyParser/mate-tools/classes/is2/parser/Decoder.class new file mode 100644 index 0000000..737d95b Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/parser/Decoder.class differ diff --git a/dependencyParser/mate-tools/classes/is2/parser/Edges$C.class b/dependencyParser/mate-tools/classes/is2/parser/Edges$C.class new file mode 100644 index 0000000..ebe8aea Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/parser/Edges$C.class differ diff --git a/dependencyParser/mate-tools/classes/is2/parser/Edges.class b/dependencyParser/mate-tools/classes/is2/parser/Edges.class new file mode 100644 index 0000000..886a335 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/parser/Edges.class differ diff --git a/dependencyParser/mate-tools/classes/is2/parser/Evaluator$Results.class b/dependencyParser/mate-tools/classes/is2/parser/Evaluator$Results.class new file mode 100644 index 0000000..e29795d Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/parser/Evaluator$Results.class differ diff --git a/dependencyParser/mate-tools/classes/is2/parser/Evaluator.class b/dependencyParser/mate-tools/classes/is2/parser/Evaluator.class new file mode 100644 index 0000000..70a74c4 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/parser/Evaluator.class differ diff --git a/dependencyParser/mate-tools/classes/is2/parser/Extractor.class b/dependencyParser/mate-tools/classes/is2/parser/Extractor.class new file mode 100644 index 0000000..8c045e0 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/parser/Extractor.class differ diff --git a/dependencyParser/mate-tools/classes/is2/parser/MFO.class b/dependencyParser/mate-tools/classes/is2/parser/MFO.class new file mode 100644 index 0000000..16ac10c Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/parser/MFO.class differ diff --git a/dependencyParser/mate-tools/classes/is2/parser/Open.class b/dependencyParser/mate-tools/classes/is2/parser/Open.class new file mode 100644 index 0000000..a94b605 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/parser/Open.class differ diff --git a/dependencyParser/mate-tools/classes/is2/parser/Options.class b/dependencyParser/mate-tools/classes/is2/parser/Options.class new file mode 100644 index 0000000..7550ce0 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/parser/Options.class differ diff --git a/dependencyParser/mate-tools/classes/is2/parser/ParallelDecoder$DSet.class b/dependencyParser/mate-tools/classes/is2/parser/ParallelDecoder$DSet.class new file mode 100644 index 0000000..6359bfa Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/parser/ParallelDecoder$DSet.class differ diff --git a/dependencyParser/mate-tools/classes/is2/parser/ParallelDecoder.class b/dependencyParser/mate-tools/classes/is2/parser/ParallelDecoder.class new file mode 100644 index 0000000..874c8c9 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/parser/ParallelDecoder.class differ diff --git a/dependencyParser/mate-tools/classes/is2/parser/ParallelExtract$DSet.class b/dependencyParser/mate-tools/classes/is2/parser/ParallelExtract$DSet.class new file mode 100644 index 0000000..af2c8f3 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/parser/ParallelExtract$DSet.class differ diff --git a/dependencyParser/mate-tools/classes/is2/parser/ParallelExtract.class b/dependencyParser/mate-tools/classes/is2/parser/ParallelExtract.class new file mode 100644 index 0000000..3ab26dd Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/parser/ParallelExtract.class differ diff --git a/dependencyParser/mate-tools/classes/is2/parser/ParallelRearrange$PA.class b/dependencyParser/mate-tools/classes/is2/parser/ParallelRearrange$PA.class new file mode 100644 index 0000000..12dd7b2 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/parser/ParallelRearrange$PA.class differ diff --git a/dependencyParser/mate-tools/classes/is2/parser/ParallelRearrange.class b/dependencyParser/mate-tools/classes/is2/parser/ParallelRearrange.class new file mode 100644 index 0000000..466f37e Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/parser/ParallelRearrange.class differ diff --git a/dependencyParser/mate-tools/classes/is2/parser/Parameters.class b/dependencyParser/mate-tools/classes/is2/parser/Parameters.class new file mode 100644 index 0000000..6348c26 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/parser/Parameters.class differ diff --git a/dependencyParser/mate-tools/classes/is2/parser/ParametersFloat.class b/dependencyParser/mate-tools/classes/is2/parser/ParametersFloat.class new file mode 100644 index 0000000..eaeb6e2 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/parser/ParametersFloat.class differ diff --git a/dependencyParser/mate-tools/classes/is2/parser/Parser.class b/dependencyParser/mate-tools/classes/is2/parser/Parser.class new file mode 100644 index 0000000..1e50ad2 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/parser/Parser.class differ diff --git a/dependencyParser/mate-tools/classes/is2/parser/Pipe.class b/dependencyParser/mate-tools/classes/is2/parser/Pipe.class new file mode 100644 index 0000000..fbddb9f Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/parser/Pipe.class differ diff --git a/dependencyParser/mate-tools/classes/is2/parser/package.html b/dependencyParser/mate-tools/classes/is2/parser/package.html new file mode 100755 index 0000000..a4f40a2 --- /dev/null +++ b/dependencyParser/mate-tools/classes/is2/parser/package.html @@ -0,0 +1,11 @@ +Package info +<ul> + <li> separate cluster feature to keep speed since two many features in a function reduce speed drastically. </li> + <li> try second order stacking features </li> + <li> parser stacking features </li> + <li> lots of cluster features </li> + <li> Iteration over edges and not extraction of all edges each time </li> + <li> integrated new structurer writer </li> +</ul> + Change in FS, I observed lots of duplicated grand-children features +<br> \ No newline at end of file diff --git a/dependencyParser/mate-tools/classes/is2/parserR2/Decoder.class b/dependencyParser/mate-tools/classes/is2/parserR2/Decoder.class new file mode 100644 index 0000000..700d423 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/parserR2/Decoder.class differ diff --git a/dependencyParser/mate-tools/classes/is2/parserR2/Options.class b/dependencyParser/mate-tools/classes/is2/parserR2/Options.class new file mode 100644 index 0000000..87aab26 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/parserR2/Options.class differ diff --git a/dependencyParser/mate-tools/classes/is2/parserR2/Parameters.class b/dependencyParser/mate-tools/classes/is2/parserR2/Parameters.class new file mode 100644 index 0000000..a988d40 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/parserR2/Parameters.class differ diff --git a/dependencyParser/mate-tools/classes/is2/parserR2/ParametersFloat.class b/dependencyParser/mate-tools/classes/is2/parserR2/ParametersFloat.class new file mode 100644 index 0000000..d111424 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/parserR2/ParametersFloat.class differ diff --git a/dependencyParser/mate-tools/classes/is2/parserR2/Parser.class b/dependencyParser/mate-tools/classes/is2/parserR2/Parser.class new file mode 100644 index 0000000..33aa5db Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/parserR2/Parser.class differ diff --git a/dependencyParser/mate-tools/classes/is2/parserR2/Pipe.class b/dependencyParser/mate-tools/classes/is2/parserR2/Pipe.class new file mode 100644 index 0000000..00f929e Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/parserR2/Pipe.class differ diff --git a/dependencyParser/mate-tools/classes/is2/parserR2/PipeReranker.class b/dependencyParser/mate-tools/classes/is2/parserR2/PipeReranker.class new file mode 100644 index 0000000..5fed356 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/parserR2/PipeReranker.class differ diff --git a/dependencyParser/mate-tools/classes/is2/parserR2/Reranker.class b/dependencyParser/mate-tools/classes/is2/parserR2/Reranker.class new file mode 100644 index 0000000..6f7e217 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/parserR2/Reranker.class differ diff --git a/dependencyParser/mate-tools/classes/is2/parserR2/package.html b/dependencyParser/mate-tools/classes/is2/parserR2/package.html new file mode 100755 index 0000000..6b06482 --- /dev/null +++ b/dependencyParser/mate-tools/classes/is2/parserR2/package.html @@ -0,0 +1,3 @@ +Package info + - n-best parser +<br> \ No newline at end of file diff --git a/dependencyParser/mate-tools/classes/is2/tag/ExtractorT2.class b/dependencyParser/mate-tools/classes/is2/tag/ExtractorT2.class new file mode 100644 index 0000000..13fafd4 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/tag/ExtractorT2.class differ diff --git a/dependencyParser/mate-tools/classes/is2/tag/Lexicon.class b/dependencyParser/mate-tools/classes/is2/tag/Lexicon.class new file mode 100644 index 0000000..cc6f935 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/tag/Lexicon.class differ diff --git a/dependencyParser/mate-tools/classes/is2/tag/MFO$Data4.class b/dependencyParser/mate-tools/classes/is2/tag/MFO$Data4.class new file mode 100644 index 0000000..b86c7c2 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/tag/MFO$Data4.class differ diff --git a/dependencyParser/mate-tools/classes/is2/tag/MFO.class b/dependencyParser/mate-tools/classes/is2/tag/MFO.class new file mode 100644 index 0000000..8c29d8b Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/tag/MFO.class differ diff --git a/dependencyParser/mate-tools/classes/is2/tag/Options.class b/dependencyParser/mate-tools/classes/is2/tag/Options.class new file mode 100644 index 0000000..cc1ff04 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/tag/Options.class differ diff --git a/dependencyParser/mate-tools/classes/is2/tag/POS.class b/dependencyParser/mate-tools/classes/is2/tag/POS.class new file mode 100644 index 0000000..2ad72de Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/tag/POS.class differ diff --git a/dependencyParser/mate-tools/classes/is2/tag/Tagger.class b/dependencyParser/mate-tools/classes/is2/tag/Tagger.class new file mode 100644 index 0000000..c333c04 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/tag/Tagger.class differ diff --git a/dependencyParser/mate-tools/classes/is2/tag/package.html b/dependencyParser/mate-tools/classes/is2/tag/package.html new file mode 100644 index 0000000..469fdf6 --- /dev/null +++ b/dependencyParser/mate-tools/classes/is2/tag/package.html @@ -0,0 +1,4 @@ +Package info +<br><br> +This parser includes a tagger into the dependency parser +<br> \ No newline at end of file diff --git a/dependencyParser/mate-tools/classes/is2/tools/IPipe.class b/dependencyParser/mate-tools/classes/is2/tools/IPipe.class new file mode 100644 index 0000000..4afbc7e Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/tools/IPipe.class differ diff --git a/dependencyParser/mate-tools/classes/is2/tools/Retrainable.class b/dependencyParser/mate-tools/classes/is2/tools/Retrainable.class new file mode 100644 index 0000000..884aa54 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/tools/Retrainable.class differ diff --git a/dependencyParser/mate-tools/classes/is2/tools/Tool.class b/dependencyParser/mate-tools/classes/is2/tools/Tool.class new file mode 100644 index 0000000..fe933ee Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/tools/Tool.class differ diff --git a/dependencyParser/mate-tools/classes/is2/tools/ToolIO.class b/dependencyParser/mate-tools/classes/is2/tools/ToolIO.class new file mode 100644 index 0000000..7dbc0e2 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/tools/ToolIO.class differ diff --git a/dependencyParser/mate-tools/classes/is2/tools/Train.class b/dependencyParser/mate-tools/classes/is2/tools/Train.class new file mode 100644 index 0000000..b78cf03 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/tools/Train.class differ diff --git a/dependencyParser/mate-tools/classes/is2/util/Convert.class b/dependencyParser/mate-tools/classes/is2/util/Convert.class new file mode 100644 index 0000000..8f4979a Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/util/Convert.class differ diff --git a/dependencyParser/mate-tools/classes/is2/util/Convert0409.class b/dependencyParser/mate-tools/classes/is2/util/Convert0409.class new file mode 100644 index 0000000..85d14c2 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/util/Convert0409.class differ diff --git a/dependencyParser/mate-tools/classes/is2/util/ConvertADJ.class b/dependencyParser/mate-tools/classes/is2/util/ConvertADJ.class new file mode 100644 index 0000000..edd73bc Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/util/ConvertADJ.class differ diff --git a/dependencyParser/mate-tools/classes/is2/util/ConvertLowerCase0909.class b/dependencyParser/mate-tools/classes/is2/util/ConvertLowerCase0909.class new file mode 100644 index 0000000..d135d6e Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/util/ConvertLowerCase0909.class differ diff --git a/dependencyParser/mate-tools/classes/is2/util/ConvertTiger2CoNLL.class b/dependencyParser/mate-tools/classes/is2/util/ConvertTiger2CoNLL.class new file mode 100644 index 0000000..2bbd3b3 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/util/ConvertTiger2CoNLL.class differ diff --git a/dependencyParser/mate-tools/classes/is2/util/DB.class b/dependencyParser/mate-tools/classes/is2/util/DB.class new file mode 100644 index 0000000..ec0c2c1 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/util/DB.class differ diff --git a/dependencyParser/mate-tools/classes/is2/util/Edges$C.class b/dependencyParser/mate-tools/classes/is2/util/Edges$C.class new file mode 100644 index 0000000..0688366 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/util/Edges$C.class differ diff --git a/dependencyParser/mate-tools/classes/is2/util/Edges.class b/dependencyParser/mate-tools/classes/is2/util/Edges.class new file mode 100644 index 0000000..ea82d67 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/util/Edges.class differ diff --git a/dependencyParser/mate-tools/classes/is2/util/Evaluator$1.class b/dependencyParser/mate-tools/classes/is2/util/Evaluator$1.class new file mode 100644 index 0000000..336aa44 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/util/Evaluator$1.class differ diff --git a/dependencyParser/mate-tools/classes/is2/util/Evaluator$Results.class b/dependencyParser/mate-tools/classes/is2/util/Evaluator$Results.class new file mode 100644 index 0000000..d64a297 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/util/Evaluator$Results.class differ diff --git a/dependencyParser/mate-tools/classes/is2/util/Evaluator.class b/dependencyParser/mate-tools/classes/is2/util/Evaluator.class new file mode 100644 index 0000000..b388a90 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/util/Evaluator.class differ diff --git a/dependencyParser/mate-tools/classes/is2/util/EvaluatorTagger$1.class b/dependencyParser/mate-tools/classes/is2/util/EvaluatorTagger$1.class new file mode 100644 index 0000000..3f65ea3 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/util/EvaluatorTagger$1.class differ diff --git a/dependencyParser/mate-tools/classes/is2/util/EvaluatorTagger$2.class b/dependencyParser/mate-tools/classes/is2/util/EvaluatorTagger$2.class new file mode 100644 index 0000000..82010b0 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/util/EvaluatorTagger$2.class differ diff --git a/dependencyParser/mate-tools/classes/is2/util/EvaluatorTagger$Results.class b/dependencyParser/mate-tools/classes/is2/util/EvaluatorTagger$Results.class new file mode 100644 index 0000000..0a4333e Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/util/EvaluatorTagger$Results.class differ diff --git a/dependencyParser/mate-tools/classes/is2/util/EvaluatorTagger.class b/dependencyParser/mate-tools/classes/is2/util/EvaluatorTagger.class new file mode 100644 index 0000000..e988817 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/util/EvaluatorTagger.class differ diff --git a/dependencyParser/mate-tools/classes/is2/util/ExtractParagraphs.class b/dependencyParser/mate-tools/classes/is2/util/ExtractParagraphs.class new file mode 100644 index 0000000..847a14a Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/util/ExtractParagraphs.class differ diff --git a/dependencyParser/mate-tools/classes/is2/util/IntStack.class b/dependencyParser/mate-tools/classes/is2/util/IntStack.class new file mode 100644 index 0000000..0934a86 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/util/IntStack.class differ diff --git a/dependencyParser/mate-tools/classes/is2/util/Long2Int.class b/dependencyParser/mate-tools/classes/is2/util/Long2Int.class new file mode 100644 index 0000000..90c54dd Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/util/Long2Int.class differ diff --git a/dependencyParser/mate-tools/classes/is2/util/Options.class b/dependencyParser/mate-tools/classes/is2/util/Options.class new file mode 100644 index 0000000..04d0e4e Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/util/Options.class differ diff --git a/dependencyParser/mate-tools/classes/is2/util/OptionsSuper.class b/dependencyParser/mate-tools/classes/is2/util/OptionsSuper.class new file mode 100644 index 0000000..e6d9786 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/util/OptionsSuper.class differ diff --git a/dependencyParser/mate-tools/classes/is2/util/ParserEvaluator$Results.class b/dependencyParser/mate-tools/classes/is2/util/ParserEvaluator$Results.class new file mode 100644 index 0000000..54f3129 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/util/ParserEvaluator$Results.class differ diff --git a/dependencyParser/mate-tools/classes/is2/util/ParserEvaluator.class b/dependencyParser/mate-tools/classes/is2/util/ParserEvaluator.class new file mode 100644 index 0000000..b6114b0 Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/util/ParserEvaluator.class differ diff --git a/dependencyParser/mate-tools/classes/is2/util/Split.class b/dependencyParser/mate-tools/classes/is2/util/Split.class new file mode 100644 index 0000000..7155abd Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/util/Split.class differ diff --git a/dependencyParser/mate-tools/classes/is2/util/Split2.class b/dependencyParser/mate-tools/classes/is2/util/Split2.class new file mode 100644 index 0000000..e5831ba Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/util/Split2.class differ diff --git a/dependencyParser/mate-tools/classes/is2/util/Split3.class b/dependencyParser/mate-tools/classes/is2/util/Split3.class new file mode 100644 index 0000000..7e8f8ce Binary files /dev/null and b/dependencyParser/mate-tools/classes/is2/util/Split3.class differ diff --git a/dependencyParser/mate-tools/src/decoder/ParallelDecoder.java b/dependencyParser/mate-tools/src/decoder/ParallelDecoder.java index 0dd1c18..cf4cfa9 100755 --- a/dependencyParser/mate-tools/src/decoder/ParallelDecoder.java +++ b/dependencyParser/mate-tools/src/decoder/ParallelDecoder.java @@ -1,137 +1,170 @@ package decoder; +import java.util.ArrayList; +import java.util.concurrent.Callable; + import is2.data.Closed; import is2.data.DataF; import is2.data.Edges; import is2.data.Open; -import java.util.ArrayList; -import java.util.concurrent.Callable; - /** * @author Bernd Bohnet, 30.08.2009 - * - * This class implements a parallel feature extractor. + * + * This class implements a parallel feature extractor. */ -final public class ParallelDecoder implements Callable<Object> -{ +final public class ParallelDecoder implements Callable<Object> { // some constants private static final float INIT_BEST = (-1.0F / 0.0F); - private static final boolean[] DIR ={false,true}; + private static final boolean[] DIR = { false, true }; - // the data space of the weights for a dependency tree + // the data space of the weights for a dependency tree final private DataF x; private short[] pos; private Open O[][][][]; - private Closed C[][][][] ; + private Closed C[][][][]; private int n; - boolean done=false; - public boolean waiting =false; + boolean done = false; + public boolean waiting = false; /** * Initialize the parallel decoder. - * - * @param pos part-of-speech - * @param d data - * @param edges part-of-speech edge mapping - * @param o open spans - * @param c closed spans - * @param n number of words + * + * @param pos + * part-of-speech + * @param d + * data + * @param edges + * part-of-speech edge mapping + * @param o + * open spans + * @param c + * closed spans + * @param n + * number of words */ public ParallelDecoder(short[] pos, DataF d, Open o[][][][], Closed c[][][][], int n) { - this.pos =pos; - this.x =d; + this.pos = pos; + this.x = d; - this.O=o; - this.C=c; - this.n=n; + this.O = o; + this.C = c; + this.n = n; } - - private static class DSet { short w1,w2;} + private static class DSet { + short w1, w2; + } @Override public Object call() { - while (true){ + while (true) { DSet set = get(); - if (done && set==null) break; + if (done && set == null) + break; + + if (set == null) + return null; - if (set ==null) return null; + short s = set.w1, t = set.w2; - short s=set.w1, t=set.w2; + for (short dir = 1; dir >= 0; dir--) { - for(short dir =1;dir>=0;dir--) { - - short[] labs = (dir==1) ? Edges.get(pos[s],pos[t], false):Edges.get(pos[t],pos[s], true); + short[] labs = (dir == 1) ? Edges.get(pos[s], pos[t], false) : Edges.get(pos[t], pos[s], true); O[s][t][dir] = new Open[labs.length]; for (int l = O[s][t][dir].length - 1; l >= 0; l--) { - - double tRP = INIT_BEST; - + + double tRP = INIT_BEST; + Closed tL = null, tR = null; - + for (int r = s; r < t; r++) { - - if (s == 0 && r != 0) continue; - - double tLPr = INIT_BEST,tRPr = INIT_BEST; + + if (s == 0 && r != 0) + continue; + + double tLPr = INIT_BEST, tRPr = INIT_BEST; Closed tLCld = null, tRCld = null; - - if (r == s) tLPr = dir==1 ? x.sib[s][t][s][0][l] : x.gra[t][s][s][1 ][l]; - else - for (int i = s + 1; i <= r; i++) - if (((dir==1 ? x.sib[s][t][i][0][l] : x.gra[t][s][i][1][l]) + C[s][r][1][i].p) > tLPr) { - tLPr = ((dir==1 ? x.sib[s][t][i][0][l] : x.gra[t][s][i][1][l]) + C[s][r][1][i].p);tLCld = C[s][r][1][i];} - - if (r == t-1) tRPr = dir==1 ? x.gra[s][t][s][0][l] : x.sib[t][s][s][1][l]; + + if (r == s) + tLPr = dir == 1 ? x.sib[s][t][s][0][l] : x.gra[t][s][s][1][l]; else - for (int i = r + 1; i < t; i++) - if (((dir == 1 ? x.gra[s][t][i][0][l] : x.sib[t][s][i][1][l]) + C[r+1][t][0][i].p) > tRPr) { - tRPr = ((dir==1?x.gra[s][t][i][0][l]:x.sib[t][s][i][1][l]) + C[r+1][t][0][i].p); tRCld=C[r + 1][t][0][i];} - - if (tLPr + tRPr > tRP) {tRP = tLPr + tRPr; tL = tLCld;tR = tRCld;} + for (int i = s + 1; i <= r; i++) + if (((dir == 1 ? x.sib[s][t][i][0][l] : x.gra[t][s][i][1][l]) + + C[s][r][1][i].p) > tLPr) { + tLPr = ((dir == 1 ? x.sib[s][t][i][0][l] : x.gra[t][s][i][1][l]) + C[s][r][1][i].p); + tLCld = C[s][r][1][i]; + } + + if (r == t - 1) + tRPr = dir == 1 ? x.gra[s][t][s][0][l] : x.sib[t][s][s][1][l]; + else + for (int i = r + 1; i < t; i++) + if (((dir == 1 ? x.gra[s][t][i][0][l] : x.sib[t][s][i][1][l]) + + C[r + 1][t][0][i].p) > tRPr) { + tRPr = ((dir == 1 ? x.gra[s][t][i][0][l] : x.sib[t][s][i][1][l]) + + C[r + 1][t][0][i].p); + tRCld = C[r + 1][t][0][i]; + } + + if (tLPr + tRPr > tRP) { + tRP = tLPr + tRPr; + tL = tLCld; + tR = tRCld; + } } - O[s][t][dir][l] = new Open(s, t, dir, labs[l],tL, tR, - (float) ( tRP+((dir==1)?x.pl[s][t]: x.pl[t][s]) + ((dir==1)? x.lab[s][t][labs[l]][0]:x.lab[t][s][labs[l]][1]))); + O[s][t][dir][l] = new Open(s, t, dir, labs[l], tL, tR, + (float) (tRP + ((dir == 1) ? x.pl[s][t] : x.pl[t][s]) + + ((dir == 1) ? x.lab[s][t][labs[l]][0] : x.lab[t][s][labs[l]][1]))); } } - C[s][t][1] = new Closed[n]; C[s][t][0] = new Closed[n]; + C[s][t][1] = new Closed[n]; + C[s][t][0] = new Closed[n]; + + for (int m = s; m <= t; m++) { + for (boolean d : DIR) { + if ((d && m != s) || !d && (m != t && s != 0)) { - for (int m = s ; m <= t; m++) { - for(boolean d : DIR) { - if ((d && m!=s)||!d && (m!=t && s!=0)) { - // create closed structure - + double top = INIT_BEST; - - Open tU = null; Closed tL = null; - int numLabels =O[(d ? s : m)][(d ? m : t)][d?1:0].length; - - //for (int l = numLabels-1; l >=0; l--) { + + Open tU = null; + Closed tL = null; + int numLabels = O[(d ? s : m)][(d ? m : t)][d ? 1 : 0].length; + + // for (int l = numLabels-1; l >=0; l--) { for (int l = 0; l < numLabels; l++) { - - Open hi = O[(d ? s : m)][(d ? m : t)][d?1:0][l]; - for (int amb = m + (d?1:-1); amb != (d?t:s) + (d?1:-1); amb += (d?1:-1)) { - - if ((hi.p + C[d?m:s][d?t:m][d?1:0][amb].p +x.gra[d?s:t][m][amb][d?0:1][l]) > top) { - top = (hi.p + C[d?m:s][d?t:m][d?1:0][amb].p +x.gra[d?s:t][m][amb][(d?0:1)][l]); tU = hi; tL=C[d?m:s][d?t:m][d?1:0][amb];} + + Open hi = O[(d ? s : m)][(d ? m : t)][d ? 1 : 0][l]; + for (int amb = m + (d ? 1 : -1); amb != (d ? t : s) + (d ? 1 : -1); amb += (d ? 1 : -1)) { + + if ((hi.p + C[d ? m : s][d ? t : m][d ? 1 : 0][amb].p + + x.gra[d ? s : t][m][amb][d ? 0 : 1][l]) > top) { + top = (hi.p + C[d ? m : s][d ? t : m][d ? 1 : 0][amb].p + + x.gra[d ? s : t][m][amb][(d ? 0 : 1)][l]); + tU = hi; + tL = C[d ? m : s][d ? t : m][d ? 1 : 0][amb]; + } + } + + if ((m == (d ? t : s)) && (hi.p + x.gra[d ? s : t][m][d ? s : t][(d ? 0 : 1)][l]) > top) { + top = (hi.p + x.gra[(d ? s : t)][m][d ? s : t][d ? 0 : 1][l]); + tU = hi; + tL = null; } - - if ((m == (d ? t : s)) && (hi.p + x.gra[d?s:t][m][d?s:t][(d ? 0 :1)][l]) > top) { - top = (hi.p + x.gra[(d ? s : t)][m][d?s:t][d?0:1][l]); tU = hi; tL = null;} } - C[s][t][d?1:0][m] = new Closed(s, t, m, d?1:0,tU,tL,(float) top); + C[s][t][d ? 1 : 0][m] = new Closed(s, t, m, d ? 1 : 0, tU, tL, (float) top); } - } + } } } return null; @@ -141,15 +174,16 @@ final public class ParallelDecoder implements Callable<Object> static synchronized private DSet get() { synchronized (sets) { - if (sets.size()==0) return null; - return sets.remove(sets.size()-1); + if (sets.size() == 0) + return null; + return sets.remove(sets.size() - 1); } } - public static void add(short w1, short w2){ - DSet ds =new DSet(); - ds.w1=w1; - ds.w2=w2; + public static void add(short w1, short w2) { + DSet ds = new DSet(); + ds.w1 = w1; + ds.w2 = w2; sets.add(ds); } } diff --git a/dependencyParser/mate-tools/src/decoder/ParallelRearrangeNBest.java b/dependencyParser/mate-tools/src/decoder/ParallelRearrangeNBest.java index 493917b..5a16211 100755 --- a/dependencyParser/mate-tools/src/decoder/ParallelRearrangeNBest.java +++ b/dependencyParser/mate-tools/src/decoder/ParallelRearrangeNBest.java @@ -1,20 +1,20 @@ package decoder; -import is2.data.DataF; -import is2.data.Edges; -import is2.data.Parse; -import is2.data.ParseNBest; - import java.util.ArrayList; import java.util.concurrent.Callable; import extractors.Extractor; +import is2.data.DataF; +import is2.data.Edges; +import is2.data.Parse; +import is2.data.ParseNBest; /** * @author Dr. Bernd Bohnet, 30.08.2009 - * - * This class implements a parallel edge rearrangement for non-projective parsing; - * The linear method was first suggest by Rayn McDonald et. al. 2005. + * + * This class implements a parallel edge rearrangement for + * non-projective parsing; The linear method was first suggest by Rayn + * McDonald et. al. 2005. */ final public class ParallelRearrangeNBest implements Callable<Object> { @@ -22,115 +22,123 @@ final public class ParallelRearrangeNBest implements Callable<Object> { final static class PA { final float p; final short ch, pa; - + float best; - - - - public PA(float p2, short ch2, short pa2) { p=p2; ch=ch2;pa=pa2;} - } - // list of parent child combinations - private static ArrayList<PA> parents = new ArrayList<PA>(); + public PA(float p2, short ch2, short pa2) { + p = p2; + ch = ch2; + pa = pa2; + } + } // some data from the dependency tree private short[] pos; private DataF x; - private boolean[][] isChild ; - public short[] heads,types; + private boolean[][] isChild; + public short[] heads, types; private float lastNBest; - private float best; // best so far + private float best; // best so far private float threshold; private Extractor extractor; - - + /** * Initialize the parallel rearrange thread - * - * @param isChild2 is a child - * @param edgesC the part-of-speech edge mapping - * @param pos the part-of-speech - * @param x the data - * @param lastNBest - * @param s the heads - * @param ts the types + * + * @param isChild2 + * is a child + * @param edgesC + * the part-of-speech edge mapping + * @param pos + * the part-of-speech + * @param x + * the data + * @param lastNBest + * @param s + * the heads + * @param ts + * the types */ - public ParallelRearrangeNBest(short[] pos , DataF x, Parse p, float lastNBest, Extractor extractor, float best, float threshold) { - - - heads=p.heads; + public ParallelRearrangeNBest(short[] pos, DataF x, Parse p, float lastNBest, Extractor extractor, float best, + float threshold) { + + heads = p.heads; + + types = p.labels; - types= p.labels; - isChild = new boolean[heads.length][heads.length]; - for(int i = 1, l1=1; i < heads.length; i++,l1=i) - while((l1= heads[l1]) != -1) isChild[l1][i] = true; - + for (int i = 1, l1 = 1; i < heads.length; i++, l1 = i) + while ((l1 = heads[l1]) != -1) + isChild[l1][i] = true; + + this.lastNBest = lastNBest; + this.pos = pos; + this.x = x; - this.lastNBest =lastNBest; - this.pos =pos; - this.x=x; - this.extractor = extractor; - this.best=best; + this.best = best; this.threshold = threshold; } - public ArrayList<ParseNBest> parses = new ArrayList<ParseNBest>(); + public ArrayList<ParseNBest> parses = new ArrayList<ParseNBest>(); @Override public Object call() { - - // check the list of new possible parents and children for a better combination - for(int ch = 1; ch < heads.length; ch++) { - for(short pa = 0; pa < heads.length; pa++) { - if(ch == pa || pa == heads[ch] || isChild[ch][pa]) continue; - short oldP = heads[ch], oldT = types[ch]; - heads[ch]=pa; + // check the list of new possible parents and children for a better + // combination + for (int ch = 1; ch < heads.length; ch++) { + for (short pa = 0; pa < heads.length; pa++) { + if (ch == pa || pa == heads[ch] || isChild[ch][pa]) + continue; - short[] labels = Edges.get(pos[pa], pos[ch],ch<pa); + short oldP = heads[ch], oldT = types[ch]; + heads[ch] = pa; - for(int l=0;l<labels.length;l++) { + short[] labels = Edges.get(pos[pa], pos[ch], ch < pa); - types[ch]=labels[l]; - float p_new = extractor.encode3(pos, heads, types, x); + for (short label : labels) { - if (p_new<lastNBest || ((best+this.threshold)>p_new)) continue; - - ParseNBest p = new ParseNBest(); - p.signature(heads, types); - p.f1=p_new; - parses.add(p); - } - - // change back - heads[ch]= oldP; types[ch]=oldT; - - // consider changes to labels only - labels = Edges.get(pos[oldP], pos[ch],ch<oldP); + types[ch] = label; + float p_new = extractor.encode3(pos, heads, types, x); - for(int l=0;l<labels.length;l++) { + if (p_new < lastNBest || ((best + this.threshold) > p_new)) + continue; - types[ch]=labels[l]; - float p_new = (float) extractor.encode3(pos, heads, types, x); + ParseNBest p = new ParseNBest(); + p.signature(heads, types); + p.f1 = p_new; + parses.add(p); + } - // optimization: add only if larger than smallest of n-best - if (p_new<lastNBest || ((best+this.threshold)>p_new)) continue; + // change back + heads[ch] = oldP; + types[ch] = oldT; - ParseNBest p = new ParseNBest(); - p.signature(heads, types); - p.f1=p_new; - parses.add(p); - } + // consider changes to labels only + labels = Edges.get(pos[oldP], pos[ch], ch < oldP); - heads[ch]= oldP; types[ch]=oldT; - } + for (short label : labels) { + + types[ch] = label; + float p_new = extractor.encode3(pos, heads, types, x); + + // optimization: add only if larger than smallest of n-best + if (p_new < lastNBest || ((best + this.threshold) > p_new)) + continue; + + ParseNBest p = new ParseNBest(); + p.signature(heads, types); + p.f1 = p_new; + parses.add(p); + } + + heads[ch] = oldP; + types[ch] = oldT; + } } return parses; } - - } diff --git a/dependencyParser/mate-tools/src/decoder/ParallelRearrangeNBest2.java b/dependencyParser/mate-tools/src/decoder/ParallelRearrangeNBest2.java index a25b392..6795b7b 100644 --- a/dependencyParser/mate-tools/src/decoder/ParallelRearrangeNBest2.java +++ b/dependencyParser/mate-tools/src/decoder/ParallelRearrangeNBest2.java @@ -1,22 +1,20 @@ package decoder; -import is2.data.DataF; -import is2.data.Edges; -import is2.data.Parse; -import is2.data.ParseNBest; - import java.util.ArrayList; import java.util.concurrent.Callable; -import decoder.ParallelRearrangeNBest.PA; - import extractors.Extractor; +import is2.data.DataF; +import is2.data.Edges; +import is2.data.Parse; +import is2.data.ParseNBest; /** * @author Dr. Bernd Bohnet, 30.08.2009 - * - * This class implements a parallel edge rearrangement for non-projective parsing; - * The linear method was first suggest by Rayn McDonald et. al. 2005. + * + * This class implements a parallel edge rearrangement for + * non-projective parsing; The linear method was first suggest by Rayn + * McDonald et. al. 2005. */ final public class ParallelRearrangeNBest2 implements Callable<Object> { @@ -24,16 +22,16 @@ final public class ParallelRearrangeNBest2 implements Callable<Object> { final static class PA { final float p; final short ch, pa; - - public short[] heads,types; + public short[] heads, types; + + public PA(Parse p, short ch2, short pa2) { + this.p = (float) p.f1; + heads = p.heads; + types = p.labels; + ch = ch2; + pa = pa2; - public PA(Parse p, short ch2, short pa2) { - this.p =(float)p.f1; - heads =p.heads; - types=p.labels; - ch=ch2;pa=pa2; - } } @@ -47,105 +45,114 @@ final public class ParallelRearrangeNBest2 implements Callable<Object> { private float threshold; private Extractor extractor; - /** * Initialize the parallel rearrange thread - * @param pos the part-of-speech - * @param x the data - * @param lastNBest - * @param isChild2 is a child - * @param edgesC the part-of-speech edge mapping - * @param s the heads - * @param ts the types + * + * @param pos + * the part-of-speech + * @param x + * the data + * @param lastNBest + * @param isChild2 + * is a child + * @param edgesC + * the part-of-speech edge mapping + * @param s + * the heads + * @param ts + * the types */ - public ParallelRearrangeNBest2(short[] pos , DataF x, float lastNBest, Extractor extractor, float threshold) { - - + public ParallelRearrangeNBest2(short[] pos, DataF x, float lastNBest, Extractor extractor, float threshold) { - this.lastNBest =lastNBest; - this.pos =pos; - this.x=x; + this.lastNBest = lastNBest; + this.pos = pos; + this.x = x; this.extractor = extractor; this.threshold = threshold; } - public ArrayList<ParseNBest> parses = new ArrayList<ParseNBest>(); + public ArrayList<ParseNBest> parses = new ArrayList<ParseNBest>(); @Override public Object call() { try { - - while(true) { - PA p = getPA(); - if (p==null) return parses; - - short oldP = p.heads[p.ch], oldT = p.types[p.ch]; - p.heads[p.ch]=p.pa; + while (true) { + PA p = getPA(); - short[] labels = Edges.get(pos[p.pa], pos[p.ch],p.ch<p.pa); + if (p == null) + return parses; - for(int l=0;l<labels.length;l++) { + short oldP = p.heads[p.ch], oldT = p.types[p.ch]; + p.heads[p.ch] = p.pa; - p.types[p.ch]=labels[l]; - float p_new = extractor.encode3(pos, p.heads, p.types, x); + short[] labels = Edges.get(pos[p.pa], pos[p.ch], p.ch < p.pa); - if (p_new<lastNBest || ((p.p+this.threshold)>p_new)) continue; + for (short label : labels) { - ParseNBest x = new ParseNBest(); - x.signature(p.heads, p.types); - x.f1=p_new; - parses.add(x); - } + p.types[p.ch] = label; + float p_new = extractor.encode3(pos, p.heads, p.types, x); - // change back - p.heads[p.ch]= oldP; p.types[p.ch]=oldT; + if (p_new < lastNBest || ((p.p + this.threshold) > p_new)) + continue; - // consider changes to labels only - labels = Edges.get(pos[oldP], pos[p.ch],p.ch<oldP); + ParseNBest x = new ParseNBest(); + x.signature(p.heads, p.types); + x.f1 = p_new; + parses.add(x); + } - for(int l=0;l<labels.length;l++) { + // change back + p.heads[p.ch] = oldP; + p.types[p.ch] = oldT; - p.types[p.ch]=labels[l]; - float p_new = (float) extractor.encode3(pos, p.heads, p.types, x); + // consider changes to labels only + labels = Edges.get(pos[oldP], pos[p.ch], p.ch < oldP); - // optimization: add only if larger than smallest of n-best - if (p_new<lastNBest || ((p.p+this.threshold)>p_new)) continue; + for (short label : labels) { - ParseNBest x = new ParseNBest(); - x.signature(p.heads, p.types); - x.f1=p_new; - parses.add(x); - } + p.types[p.ch] = label; + float p_new = extractor.encode3(pos, p.heads, p.types, x); - p.heads[p.ch]= oldP; p.types[p.ch]=oldT; - } - } catch(Exception e) { + // optimization: add only if larger than smallest of n-best + if (p_new < lastNBest || ((p.p + this.threshold) > p_new)) + continue; + + ParseNBest x = new ParseNBest(); + x.signature(p.heads, p.types); + x.f1 = p_new; + parses.add(x); + } + + p.heads[p.ch] = oldP; + p.types[p.ch] = oldT; + } + } catch (Exception e) { e.printStackTrace(); } return parses; } /** - * Add a child-parent combination which are latter explored for rearrangement - * + * Add a child-parent combination which are latter explored for + * rearrangement + * * @param p2 * @param ch2 * @param pa */ public static void add(Parse p, short ch2, short pa) { - parents.add(new PA(p,ch2,pa)); + parents.add(new PA(p, ch2, pa)); } public static PA getPA() { - synchronized(parents) { - if (parents.size()==0) return null; - return parents.remove(parents.size()-1); + synchronized (parents) { + if (parents.size() == 0) + return null; + return parents.remove(parents.size() - 1); } } - - } diff --git a/dependencyParser/mate-tools/src/examples/DependencyParser.java b/dependencyParser/mate-tools/src/examples/DependencyParser.java index c41a101..917dc04 100644 --- a/dependencyParser/mate-tools/src/examples/DependencyParser.java +++ b/dependencyParser/mate-tools/src/examples/DependencyParser.java @@ -1,92 +1,90 @@ package examples; +import java.io.File; +import java.util.Arrays; import is2.data.InstancesTagger; import is2.data.SentenceData09; import is2.io.CONLLReader09; +import is2.io.IOGenerals; import is2.lemmatizer.Lemmatizer; import is2.lemmatizer.MFO; import is2.parser.Parser; import is2.tag.Tagger; //import org.apache.log4j.Logger; -import java.io.File; -import java.util.Arrays; - /** * Dependency parsing * * @author B. Piwowarski <benjamin@bpiwowar.net> * @date 10/10/12 */ -//@TaskDescription(name = "dependency-parser", project = "mate-tools") -public class DependencyParser { - // final static private Logger LOGGER = Logger.getLogger(DependencyParser.class); - //@Argument(name = "lemmatizer", required = true, checkers = IOChecker.Readable.class) - File lemmatizerFile; - - //@Argument(name = "tagger", required = true) - File taggerFile; - - //@Argument(name = "parser", required = true) - File parserFile; - - //@Override - public int execute() throws Throwable { - - // Load lemmatizer - //LOGGER.info("Loading lemmatizer"); - // true = do uppercase lemmatization - Lemmatizer lemmatizer = new Lemmatizer(lemmatizerFile.getAbsolutePath()); - - // Load tagger - //LOGGER.info("Loading tagger"); - Tagger tagger = new Tagger(taggerFile.getAbsolutePath()); - - // Load parser - //LOGGER.info("Loading parser"); - Parser parser = new Parser(parserFile.getAbsolutePath()); - - - // Sentences to parse - String sentences[] = new String[]{ - "Airfields have been constructed on a number of the islands .", - "Private investment has even made an increasingly modern ferry fleet possible .", - "Politically , the 1990s have been relatively quite times for the islands ." - }; - - CONLLReader09 reader = new CONLLReader09(CONLLReader09.NO_NORMALIZE); - - for (String sentence : sentences) { - // Prepare the sentence - InstancesTagger instanceTagger = new InstancesTagger(); - instanceTagger.init(1, new MFO()); - - String[] split = sentence.split("\\s+"); - String[] splitRoot = new String[split.length+1]; - System.arraycopy(split, 0, splitRoot, 1, split.length); - splitRoot[0] = CONLLReader09.ROOT; - - SentenceData09 instance = new SentenceData09(); - instance.init(splitRoot); - - reader.insert(instanceTagger, instance); - - SentenceData09 result = lemmatizer.apply(instance); - tagger.apply(result); - result = parser.parse(result, parser.params, false, parser.options); - - - // Output - System.out.println(Arrays.toString(result.forms)); - System.out.println(Arrays.toString(result.plemmas)); - System.out.println(Arrays.toString(result.ppos)); - System.out.println(Arrays.toString(result.pheads)); - System.out.println(Arrays.toString(result.plabels)); - System.out.println(); - - } - - return 0; - } +// @TaskDescription(name = "dependency-parser", project = "mate-tools") +public class DependencyParser { + // final static private Logger LOGGER = + // Logger.getLogger(DependencyParser.class); + // @Argument(name = "lemmatizer", required = true, checkers = + // IOChecker.Readable.class) + File lemmatizerFile; + + // @Argument(name = "tagger", required = true) + File taggerFile; + + // @Argument(name = "parser", required = true) + File parserFile; + + // @Override + public int execute() throws Throwable { + + // Load lemmatizer + // LOGGER.info("Loading lemmatizer"); + // true = do uppercase lemmatization + Lemmatizer lemmatizer = new Lemmatizer(lemmatizerFile.getAbsolutePath()); + + // Load tagger + // LOGGER.info("Loading tagger"); + Tagger tagger = new Tagger(taggerFile.getAbsolutePath()); + + // Load parser + // LOGGER.info("Loading parser"); + Parser parser = new Parser(parserFile.getAbsolutePath()); + + // Sentences to parse + String sentences[] = new String[] { "Airfields have been constructed on a number of the islands .", + "Private investment has even made an increasingly modern ferry fleet possible .", + "Politically , the 1990s have been relatively quite times for the islands ." }; + + CONLLReader09 reader = new CONLLReader09(CONLLReader09.NO_NORMALIZE); + + for (String sentence : sentences) { + // Prepare the sentence + InstancesTagger instanceTagger = new InstancesTagger(); + instanceTagger.init(1, new MFO()); + + String[] split = sentence.split("\\s+"); + String[] splitRoot = new String[split.length + 1]; + System.arraycopy(split, 0, splitRoot, 1, split.length); + splitRoot[0] = IOGenerals.ROOT; + + SentenceData09 instance = new SentenceData09(); + instance.init(splitRoot); + + reader.insert(instanceTagger, instance); + + SentenceData09 result = lemmatizer.apply(instance); + tagger.apply(result); + result = parser.parse(result, parser.params, false, parser.options); + + // Output + System.out.println(Arrays.toString(result.forms)); + System.out.println(Arrays.toString(result.plemmas)); + System.out.println(Arrays.toString(result.ppos)); + System.out.println(Arrays.toString(result.pheads)); + System.out.println(Arrays.toString(result.plabels)); + System.out.println(); + + } + + return 0; + } } diff --git a/dependencyParser/mate-tools/src/examples/FullPipelineSpanish.java b/dependencyParser/mate-tools/src/examples/FullPipelineSpanish.java index a255595..9bfff59 100644 --- a/dependencyParser/mate-tools/src/examples/FullPipelineSpanish.java +++ b/dependencyParser/mate-tools/src/examples/FullPipelineSpanish.java @@ -1,71 +1,79 @@ package examples; +import java.io.IOException; +import java.util.ArrayList; +import java.util.StringTokenizer; + import is2.data.SentenceData09; import is2.io.CONLLWriter09; import is2.lemmatizer.Lemmatizer; - import is2.parser.Parser; import is2.tag.Tagger; import is2.tools.Tool; -import java.io.IOException; -import java.util.ArrayList; -import java.util.StringTokenizer; - /** * @author Bernd Bohnet, 13.09.2010 - * - * Illustrates the application the full pipeline: lemmatizer, morphologic, tagger, and parser + * + * Illustrates the application the full pipeline: lemmatizer, + * morphologic, tagger, and parser */ public class FullPipelineSpanish { - - // shows how to parse a sentences and call the tools + // shows how to parse a sentences and call the tools public static void main(String[] args) throws IOException { - + // Create a data container for a sentence SentenceData09 i = new SentenceData09(); - if (args.length==1) { // input might be a sentence: "This is another test ." + if (args.length == 1) { // input might be a sentence: "This is another + // test ." StringTokenizer st = new StringTokenizer(args[0]); ArrayList<String> forms = new ArrayList<String>(); - + forms.add("<root>"); - while(st.hasMoreTokens()) forms.add(st.nextToken()); - + while (st.hasMoreTokens()) + forms.add(st.nextToken()); + i.init(forms.toArray(new String[0])); - + } else { // provide a default sentence: Haus has a mutated vowel - i.init(new String[] {"<root>","También","estuve","emocionado","pero","no","pude","imaginar","mi","vida","sin","la", - "gente","tan","intima","a","mÃ","."}); + i.init(new String[] { "<root>", "También", "estuve", "emocionado", "pero", "no", "pude", "imaginar", "mi", + "vida", "sin", "la", "gente", "tan", "intima", "a", "mÃ", "." }); } // lemmatizing - + System.out.println("\nReading the model of the lemmatizer"); - Tool lemmatizer = new Lemmatizer("models/lemma-spa.model"); // create a lemmatizer - + Tool lemmatizer = new Lemmatizer("models/lemma-spa.model"); // create a + // lemmatizer + System.out.println("Applying the lemmatizer"); lemmatizer.apply(i); System.out.print(i.toString()); - System.out.print("Lemmata: "); for (String l : i.plemmas) System.out.print(l+" "); System.out.println(); - + System.out.print("Lemmata: "); + for (String l : i.plemmas) + System.out.print(l + " "); + System.out.println(); + // morphologic tagging - + System.out.println("\nReading the model of the morphologic tagger"); is2.mtag.Tagger morphTagger = new is2.mtag.Tagger("models/mtag-spa.model"); - + System.out.println("\nApplying the morpholoigc tagger"); morphTagger.apply(i); - + System.out.print(i.toString()); - System.out.print("Morph: "); for (String f : i.pfeats) System.out.print(f+" "); System.out.println(); - + System.out.print("Morph: "); + for (String f : i.pfeats) + System.out.print(f + " "); + System.out.println(); + // part-of-speech tagging - + System.out.println("\nReading the model of the part-of-speech tagger"); Tool tagger = new Tagger("models/tag-spa.model"); @@ -73,26 +81,28 @@ public class FullPipelineSpanish { tagger.apply(i); System.out.print(i.toString()); - System.out.print("Part-of-Speech tags: "); for (String p : i.ppos) System.out.print(p+" "); System.out.println(); - + System.out.print("Part-of-Speech tags: "); + for (String p : i.ppos) + System.out.print(p + " "); + System.out.println(); + // parsing - + System.out.println("\nReading the model of the dependency parser"); Tool parser = new Parser("models/prs-spa.model"); System.out.println("\nApplying the parser"); parser.apply(i); - + System.out.println(i.toString()); // write the result to a file - + CONLLWriter09 writer = new is2.io.CONLLWriter09("example-out.txt"); - + writer.write(i, CONLLWriter09.NO_ROOT); writer.finishWriting(); } - } diff --git a/dependencyParser/mate-tools/src/examples/FullPipelineTest.java b/dependencyParser/mate-tools/src/examples/FullPipelineTest.java index c8f992a..4aecdc2 100644 --- a/dependencyParser/mate-tools/src/examples/FullPipelineTest.java +++ b/dependencyParser/mate-tools/src/examples/FullPipelineTest.java @@ -1,5 +1,6 @@ package examples; +import java.io.File; import is2.data.InstancesTagger; import is2.data.SentenceData09; @@ -11,100 +12,99 @@ import is2.parser.Parser; import is2.tag.Tagger; //import org.apache.log4j.Logger; -import java.io.File; -import java.util.Arrays; - /** * Dependency parsing * * @author B. Piwowarski <benjamin@bpiwowar.net> * @date 10/10/12 */ -//@TaskDescription(name = "dependency-parser", project = "mate-tools") -public class FullPipelineTest { - // final static private Logger LOGGER = Logger.getLogger(DependencyParser.class); - //@Argument(name = "lemmatizer", required = true, checkers = IOChecker.Readable.class) - public File lemmatizerFile; - - //@Argument(name = "tagger", required = true) - public File taggerFile; - - public File mtaggerFile; - - //@Argument(name = "parser", required = true) - public File parserFile; - - //@Override - public int execute(String source, String target) throws Throwable { - - // Load lemmatizer - //LOGGER.info("Loading lemmatizer"); - // true = do uppercase lemmatization - Lemmatizer lemmatizer = new Lemmatizer(lemmatizerFile.getAbsolutePath()); - - // Load tagger - //LOGGER.info("Loading tagger"); - Tagger tagger = new Tagger(taggerFile.getAbsolutePath()); - - is2.mtag.Tagger mtagger = new is2.mtag.Tagger(mtaggerFile.getAbsolutePath()); - - // Load parser - //LOGGER.info("Loading parser"); - Parser parser = new Parser(parserFile.getAbsolutePath()); - - - CONLLReader09 reader = new CONLLReader09(source); - CONLLWriter09 writer = new CONLLWriter09(target); - - int count=0; - while (true) { - // Prepare the sentence - InstancesTagger is = new InstancesTagger(); - is.init(1, new MFO()); - - SentenceData09 instance= reader.getNext(is); - if (instance ==null) break; - SentenceData09 result = null; -try { - - System.out.print("\b\b\b\b"+count); - result= lemmatizer.apply(instance); - - result = tagger.apply(result); - result= mtagger.apply(result); - result = parser.apply(result); - - count++; -} catch(Exception e) { - - System.out.println("error"+result); - System.out.println("error"+instance); - e.printStackTrace(); - break; -} +// @TaskDescription(name = "dependency-parser", project = "mate-tools") +public class FullPipelineTest { + // final static private Logger LOGGER = + // Logger.getLogger(DependencyParser.class); + // @Argument(name = "lemmatizer", required = true, checkers = + // IOChecker.Readable.class) + public File lemmatizerFile; + + // @Argument(name = "tagger", required = true) + public File taggerFile; + + public File mtaggerFile; + + // @Argument(name = "parser", required = true) + public File parserFile; + + // @Override + public int execute(String source, String target) throws Throwable { + + // Load lemmatizer + // LOGGER.info("Loading lemmatizer"); + // true = do uppercase lemmatization + Lemmatizer lemmatizer = new Lemmatizer(lemmatizerFile.getAbsolutePath()); + + // Load tagger + // LOGGER.info("Loading tagger"); + Tagger tagger = new Tagger(taggerFile.getAbsolutePath()); + + is2.mtag.Tagger mtagger = new is2.mtag.Tagger(mtaggerFile.getAbsolutePath()); + + // Load parser + // LOGGER.info("Loading parser"); + Parser parser = new Parser(parserFile.getAbsolutePath()); + + CONLLReader09 reader = new CONLLReader09(source); + CONLLWriter09 writer = new CONLLWriter09(target); + + int count = 0; + while (true) { + // Prepare the sentence + InstancesTagger is = new InstancesTagger(); + is.init(1, new MFO()); + + SentenceData09 instance = reader.getNext(is); + if (instance == null) + break; + SentenceData09 result = null; + try { + + System.out.print("\b\b\b\b" + count); + result = lemmatizer.apply(instance); + + result = tagger.apply(result); + result = mtagger.apply(result); + result = parser.apply(result); + + count++; + } catch (Exception e) { + + System.out.println("error" + result); + System.out.println("error" + instance); + e.printStackTrace(); + break; + } + + // Output + writer.write(result); + + } + writer.finishWriting(); + return 0; + } + + public static void main(String args[]) throws Throwable { + + if (args.length < 3) { + System.out.println("lemmatizer-model tagger-model parser-model source target"); + System.exit(0); + } + FullPipelineTest p = new FullPipelineTest(); + p.lemmatizerFile = new File(args[0]); + p.taggerFile = new File(args[1]); + p.mtaggerFile = new File(args[2]); + p.parserFile = new File(args[3]); + + p.execute(args[4], args[5]); + + } - // Output - writer.write(result); - - } - writer.finishWriting(); - return 0; - } - - public static void main(String args[]) throws Throwable { - - if (args.length<3) { - System.out.println("lemmatizer-model tagger-model parser-model source target"); - System.exit(0); - } - FullPipelineTest p = new FullPipelineTest(); - p.lemmatizerFile = new File(args[0]); - p.taggerFile = new File(args[1]); - p.mtaggerFile = new File(args[2]); - p.parserFile = new File(args[3]); - - p.execute(args[4], args[5]); - - } - } diff --git a/dependencyParser/mate-tools/src/examples/MorphTagger.java b/dependencyParser/mate-tools/src/examples/MorphTagger.java index 0088426..75bfc28 100644 --- a/dependencyParser/mate-tools/src/examples/MorphTagger.java +++ b/dependencyParser/mate-tools/src/examples/MorphTagger.java @@ -1,79 +1,79 @@ package examples; -import is2.data.SentenceData09; -import is2.lemmatizer.Lemmatizer; -import is2.lemmatizer.Options; - import java.io.IOException; import java.util.ArrayList; import java.util.StringTokenizer; +import is2.data.SentenceData09; +import is2.lemmatizer.Lemmatizer; +import is2.lemmatizer.Options; + /** * @author Bernd Bohnet, 13.09.2010 - * - * Illustrates the application of some components: lemmatizer, tagger, and parser + * + * Illustrates the application of some components: lemmatizer, tagger, + * and parser */ public class MorphTagger { - /** * How to lemmatize a sentences? */ public static void main(String[] args) throws IOException { - // Create a data container for a sentence SentenceData09 i = new SentenceData09(); - if (args.length==1) { // input might be a sentence: "This is another test ." + if (args.length == 1) { // input might be a sentence: "This is another + // test ." StringTokenizer st = new StringTokenizer(args[0]); ArrayList<String> forms = new ArrayList<String>(); - + forms.add("<root>"); - while(st.hasMoreTokens()) forms.add(st.nextToken()); - + while (st.hasMoreTokens()) + forms.add(st.nextToken()); + i.init(forms.toArray(new String[0])); - + } else { - // provide a default sentence - i.init(new String[] {"<root>","Häuser","hat","ein","Umlaut","."}); + // provide a default sentence + i.init(new String[] { "<root>", "Häuser", "hat", "ein", "Umlaut", "." }); } - //print the forms - for (String l : i.forms) System.out.println("forms : "+l); + // print the forms + for (String l : i.forms) + System.out.println("forms : " + l); // tell the lemmatizer the location of the model - is2.lemmatizer.Options optsLemmatizer = new Options(new String[] {"-model","models/lemma-ger.model"}); + is2.lemmatizer.Options optsLemmatizer = new Options(new String[] { "-model", "models/lemma-ger.model" }); // create a lemmatizer Lemmatizer lemmatizer = new Lemmatizer(optsLemmatizer.modelName); - // lemmatize a sentence; the result is stored in the stenenceData09 i + // lemmatize a sentence; the result is stored in the stenenceData09 i lemmatizer.apply(i); - // output the lemmata - for (String l : i.plemmas) System.out.println("lemma : "+l); - - - is2.mtag.Options morphologicTaggerOptions = new is2.mtag.Options(new String[] {"-model","models/mtag-ger.model"}); - + for (String l : i.plemmas) + System.out.println("lemma : " + l); + + is2.mtag.Options morphologicTaggerOptions = new is2.mtag.Options( + new String[] { "-model", "models/mtag-ger.model" }); + is2.mtag.Tagger mt = new is2.mtag.Tagger(morphologicTaggerOptions); - + try { - - - // SentenceData09 snt = is2.mtag.Main.out(i.forms, lemmata); - + + // SentenceData09 snt = is2.mtag.Main.out(i.forms, lemmata); + SentenceData09 snt = mt.apply(i); - for(String f : snt.pfeats) System.out.println("feats "+f); - - } catch(Exception e){ + for (String f : snt.pfeats) + System.out.println("feats " + f); + + } catch (Exception e) { e.printStackTrace(); } - - + } - } diff --git a/dependencyParser/mate-tools/src/examples/ParseOnly.java b/dependencyParser/mate-tools/src/examples/ParseOnly.java index cec31dd..23eaf59 100755 --- a/dependencyParser/mate-tools/src/examples/ParseOnly.java +++ b/dependencyParser/mate-tools/src/examples/ParseOnly.java @@ -4,47 +4,45 @@ import is2.data.SentenceData09; import is2.parser.Options; import is2.parser.Parser; - public class ParseOnly { public static void main(String[] args) { - if (args.length ==0) { + if (args.length == 0) { plain(); } - + } - + /** * This example shows how to parse a sentence. */ public static void plain() { - // initialize the options - String[] opts ={"-model","models/prs-eng-x.model"}; + // initialize the options + String[] opts = { "-model", "models/prs-eng-x.model" }; Options options = new Options(opts); - + // create a parser Parser parser = new Parser(options); - + // Create a data container for a sentence SentenceData09 i = new SentenceData09(); - - // Provide the sentence - i.init(new String[] {"<root>","This","is","a","test","."}); - i.setPPos(new String[]{"<root-POS>","DT","VBZ","DT","NN","."}); - - // parse the sentence + + // Provide the sentence + i.init(new String[] { "<root>", "This", "is", "a", "test", "." }); + i.setPPos(new String[] { "<root-POS>", "DT", "VBZ", "DT", "NN", "." }); + + // parse the sentence SentenceData09 out = parser.apply(i); - + // output the sentence and dependency tree System.out.println(out.toString()); - + // Get the parsing results out.getLabels(); out.getParents(); - + } - - + } diff --git a/dependencyParser/mate-tools/src/examples/Pipeline.java b/dependencyParser/mate-tools/src/examples/Pipeline.java index e55869d..dcb5a24 100644 --- a/dependencyParser/mate-tools/src/examples/Pipeline.java +++ b/dependencyParser/mate-tools/src/examples/Pipeline.java @@ -1,6 +1,5 @@ package examples; - -import java.io.File; + import java.io.IOException; import java.util.ArrayList; import java.util.StringTokenizer; @@ -13,70 +12,69 @@ import is2.tag.Tagger; /** * @author Bernd Bohnet, 13.09.2010 - * - * Illustrates the application of some components: lemmatizer, tagger, and parser + * + * Illustrates the application of some components: lemmatizer, tagger, + * and parser */ public class Pipeline { - - // how to parse a sentences and call the tools + // how to parse a sentences and call the tools public static void main(String[] args) throws IOException { - // Create a data container for a sentence SentenceData09 i = new SentenceData09(); - if (args.length==1) { // input might be a sentence: "This is another test ." + if (args.length == 1) { // input might be a sentence: "This is another + // test ." StringTokenizer st = new StringTokenizer(args[0]); ArrayList<String> forms = new ArrayList<String>(); - + forms.add("<root>"); - while(st.hasMoreTokens()) forms.add(st.nextToken()); - + while (st.hasMoreTokens()) + forms.add(st.nextToken()); + i.init(forms.toArray(new String[0])); - + } else { - // provide a default sentence - i.init(new String[] {"<root>","This","is","a","test","."}); + // provide a default sentence + i.init(new String[] { "<root>", "This", "is", "a", "test", "." }); } - //print the forms - for (String l : i.forms) System.out.println("form : "+l); + // print the forms + for (String l : i.forms) + System.out.println("form : " + l); // tell the lemmatizer the location of the model - is2.lemmatizer.Options optsLemmatizer = new is2.lemmatizer.Options(new String[] {"-model","models/lemma-eng.model"}); + is2.lemmatizer.Options optsLemmatizer = new is2.lemmatizer.Options( + new String[] { "-model", "models/lemma-eng.model" }); // create a lemmatizer Lemmatizer lemmatizer = new Lemmatizer(optsLemmatizer.modelName); - // lemmatize a sentence; the result is stored in the stenenceData09 i + // lemmatize a sentence; the result is stored in the stenenceData09 i i = lemmatizer.apply(i); - // output the lemmata - for (String l : i.plemmas) System.out.println("lemma : "+l); + for (String l : i.plemmas) + System.out.println("lemma : " + l); // tell the tagger the location of the model - is2.tag.Options optsTagger = new is2.tag.Options(new String[]{"-model","models/tag-eng.model"}); + is2.tag.Options optsTagger = new is2.tag.Options(new String[] { "-model", "models/tag-eng.model" }); Tagger tagger = new Tagger(optsTagger); + // String pos[] =tagger.tag(i.forms, i.lemmas); + // i.setPPos(pos); - -// String pos[] =tagger.tag(i.forms, i.lemmas); -// i.setPPos(pos); - - SentenceData09 tagged = tagger.tag(i); - for (String p : tagged.ppos) System.out.println("pos "+p); + for (String p : tagged.ppos) + System.out.println("pos " + p); - - - // initialize the options - Options optsParser = new Options(new String[]{"-model","models/prs-eng-x.model"}); + // initialize the options + Options optsParser = new Options(new String[] { "-model", "models/prs-eng-x.model" }); // create a parser Parser parser = new Parser(optsParser); - + // parse the sentence (you get a copy of the input i) SentenceData09 parse = parser.apply(tagged); @@ -84,12 +82,9 @@ public class Pipeline { // create some trash on the hard drive :-) is2.io.CONLLWriter09 writer = new is2.io.CONLLWriter09("example-out.txt"); - + writer.write(i); writer.finishWriting(); } - - - } diff --git a/dependencyParser/mate-tools/src/extractors/Extractor.java b/dependencyParser/mate-tools/src/extractors/Extractor.java index 327895d..190ccc6 100644 --- a/dependencyParser/mate-tools/src/extractors/Extractor.java +++ b/dependencyParser/mate-tools/src/extractors/Extractor.java @@ -1,5 +1,5 @@ /** - * + * */ package extractors; @@ -11,19 +11,18 @@ import is2.data.Instances; /** * @author Dr. Bernd Bohnet, 29.04.2011 - * - * + * + * */ public interface Extractor { - /** - * Initializes the Extractor general parts + * Initializes the Extractor general parts */ public void initStat(); /** - * Initializes the Extractor specific parts + * Initializes the Extractor specific parts */ public void init(); @@ -31,17 +30,15 @@ public interface Extractor { public void firstm(Instances is, int i, int w1, int w2, int j, Cluster cluster, long[] svs); - public void siblingm(Instances is, int i, short[] pos, int[] forms, - int[] lemmas, short[][] feats, int w1, int w2, int g, int j, - Cluster cluster, long[] svs, int n); + public void siblingm(Instances is, int i, short[] pos, int[] forms, int[] lemmas, short[][] feats, int w1, int w2, + int g, int j, Cluster cluster, long[] svs, int n); public void gcm(Instances is, int i, int w1, int w2, int g, int j, Cluster cluster, long[] svs); - + public int getType(); - public FV encodeCat(Instances is, int n, short[] pos, int[] is2, - int[] is3, short[] heads, short[] labels, short[][] s, Cluster cl, - FV pred); + public FV encodeCat(Instances is, int n, short[] pos, int[] is2, int[] is3, short[] heads, short[] labels, + short[][] s, Cluster cl, FV pred); public void setMaxForm(int integer); @@ -50,10 +47,6 @@ public interface Extractor { */ public int getMaxForm(); - public float encode3(short[] pos, short[] heads, short[] labs, DataF x); - - - } diff --git a/dependencyParser/mate-tools/src/extractors/ExtractorClusterStacked.java b/dependencyParser/mate-tools/src/extractors/ExtractorClusterStacked.java index 79a44ca..436bd5c 100755 --- a/dependencyParser/mate-tools/src/extractors/ExtractorClusterStacked.java +++ b/dependencyParser/mate-tools/src/extractors/ExtractorClusterStacked.java @@ -1,6 +1,5 @@ package extractors; - import is2.data.Cluster; import is2.data.D4; import is2.data.DataF; @@ -10,24 +9,22 @@ import is2.data.IFV; import is2.data.Instances; import is2.data.Long2IntInterface; import is2.data.MFB; -import is2.util.DB; - - final public class ExtractorClusterStacked implements Extractor { - public static int s_rel,s_word,s_type,s_dir,s_dist,s_feat,s_child,s_spath,s_lpath,s_pos; - + public static int s_rel, s_word, s_type, s_dir, s_dist, s_feat, s_child, s_spath, s_lpath, s_pos; - final D4 d0 ,dl1,dl2, dwr,dr,dwwp,dw,dwp,dlf,d3lp, d2lp,d2pw,d2pp ; + final D4 d0, dl1, dl2, dwr, dr, dwwp, dw, dwp, dlf, d3lp, d2lp, d2pw, d2pp; public final Long2IntInterface li; public ExtractorClusterStacked(Long2IntInterface li) { - - this.initFeatures(); - this.li=li; - d0 = new D4(li);dl1 = new D4(li);dl2 = new D4(li); + + ExtractorClusterStacked.initFeatures(); + this.li = li; + d0 = new D4(li); + dl1 = new D4(li); + dl2 = new D4(li); dwr = new D4(li); dr = new D4(li); dwwp = new D4(li); @@ -36,657 +33,1622 @@ final public class ExtractorClusterStacked implements Extractor { dwp = new D4(li); dlf = new D4(li); - d3lp = new D4(li); d2lp = new D4(li); d2pw = new D4(li); d2pp = new D4(li); + d3lp = new D4(li); + d2lp = new D4(li); + d2pw = new D4(li); + d2pp = new D4(li); } - public void initStat() { - - + @Override + public void initStat() { + MFB mf = new MFB(); s_rel = mf.getFeatureCounter().get(REL).intValue(); - s_pos = mf.getFeatureCounter().get(POS).intValue(); + s_pos = mf.getFeatureCounter().get(POS).intValue(); s_word = mf.getFeatureCounter().get(WORD).intValue(); - s_type = mf.getFeatureCounter().get(TYPE).intValue();//mf.getFeatureBits(); + s_type = mf.getFeatureCounter().get(TYPE).intValue();// mf.getFeatureBits(); s_dir = mf.getFeatureCounter().get(DIR); la = mf.getValue(DIR, LA); ra = mf.getValue(DIR, RA); - s_dist = mf.getFeatureCounter().get(DIST);//mf.getFeatureBits(DIST); - s_feat = mf.getFeatureCounter().get(FEAT);//mf.getFeatureBits(Pipe.FEAT); - s_spath = mf.getFeatureCounter().get(Cluster.SPATH)==null?0:mf.getFeatureCounter().get(Cluster.SPATH);//mf.getFeatureBits(Cluster.SPATH); - s_lpath = mf.getFeatureCounter().get(Cluster.LPATH)==null?0:mf.getFeatureCounter().get(Cluster.LPATH);//mf.getFeatureBits(Cluster.LPATH); + s_dist = mf.getFeatureCounter().get(DIST);// mf.getFeatureBits(DIST); + s_feat = mf.getFeatureCounter().get(FEAT);// mf.getFeatureBits(Pipe.FEAT); + s_spath = mf.getFeatureCounter().get(Cluster.SPATH) == null ? 0 : mf.getFeatureCounter().get(Cluster.SPATH);// mf.getFeatureBits(Cluster.SPATH); + s_lpath = mf.getFeatureCounter().get(Cluster.LPATH) == null ? 0 : mf.getFeatureCounter().get(Cluster.LPATH);// mf.getFeatureBits(Cluster.LPATH); } - public void init(){ - // DB.println("init"); - d0.a0 = s_type;d0.a1 = s_pos;d0.a2 = s_pos;d0.a3 = s_pos;d0.a4 = s_pos;d0.a5 = s_pos;d0.a6 = s_pos;d0.a7 = s_pos; - dl1.a0 = s_type;dl1.a1 = s_rel; dl1.a2 = s_pos;dl1.a3 = s_pos; dl1.a4 = s_pos; dl1.a5 = s_pos; dl1.a6 = s_pos; dl1.a7 = s_pos; - dl2.a0 = s_type;dl2.a1 = s_rel;dl2.a2 = s_word;dl2.a3 = s_pos;dl2.a4 = s_pos;dl2.a5 = s_pos;dl2.a6 = s_pos;dl2.a7 = s_pos; - dwp.a0 = s_type; dwp.a1 = s_rel; dwp.a2 = s_word; dwp.a3 = s_pos; dwp.a4 = s_pos; dwp.a5 = s_word; - dwwp.a0 = s_type; dwwp.a1 = s_rel; dwwp.a2 = s_word; dwwp.a3 = s_word; dwwp.a4 = s_pos; dwwp.a5 = s_word; - dlf.a0 = s_type;dlf.a1 = s_rel; dlf.a2 = s_pos;dlf.a3 = s_pos; dlf.a4 = s_feat; dlf.a5 = s_feat; dlf.a6 = s_pos; dlf.a7 = s_pos; - d3lp.a0 = s_type; d3lp.a1 = s_rel; d3lp.a2 = s_lpath; d3lp.a3 = s_lpath; d3lp.a4 = s_lpath; d3lp.a5 = s_word; d3lp.a6 = s_spath; d3lp.a7 = s_spath; - d2lp.a0 = s_type; d2lp.a1 = s_rel; d2lp.a2 = s_lpath; d2lp.a3 = s_lpath; d2lp.a4 = s_word; d2lp.a5 = s_word; //d3lp.a6 = s_spath; d3lp.a7 = s_spath; - d2pw.a0 = s_type; d2pw.a1 = s_rel; d2pw.a2 = s_lpath; d2pw.a3 = s_lpath; d2pw.a4 = s_word; d2pw.a5 = s_word; //d3lp.a6 = s_spath; d3lp.a7 = s_spath; - d2pp.a0 = s_type; d2pp.a1 = s_rel; d2pp.a2 = s_lpath; d2pp.a3 = s_lpath; d2pp.a4 = s_pos; d2pp.a5 = s_pos; //d3lp.a6 = s_spath; d3lp.a7 = s_spath; + @Override + public void init() { + // DB.println("init"); + d0.a0 = s_type; + d0.a1 = s_pos; + d0.a2 = s_pos; + d0.a3 = s_pos; + d0.a4 = s_pos; + d0.a5 = s_pos; + d0.a6 = s_pos; + d0.a7 = s_pos; + dl1.a0 = s_type; + dl1.a1 = s_rel; + dl1.a2 = s_pos; + dl1.a3 = s_pos; + dl1.a4 = s_pos; + dl1.a5 = s_pos; + dl1.a6 = s_pos; + dl1.a7 = s_pos; + dl2.a0 = s_type; + dl2.a1 = s_rel; + dl2.a2 = s_word; + dl2.a3 = s_pos; + dl2.a4 = s_pos; + dl2.a5 = s_pos; + dl2.a6 = s_pos; + dl2.a7 = s_pos; + dwp.a0 = s_type; + dwp.a1 = s_rel; + dwp.a2 = s_word; + dwp.a3 = s_pos; + dwp.a4 = s_pos; + dwp.a5 = s_word; + dwwp.a0 = s_type; + dwwp.a1 = s_rel; + dwwp.a2 = s_word; + dwwp.a3 = s_word; + dwwp.a4 = s_pos; + dwwp.a5 = s_word; + dlf.a0 = s_type; + dlf.a1 = s_rel; + dlf.a2 = s_pos; + dlf.a3 = s_pos; + dlf.a4 = s_feat; + dlf.a5 = s_feat; + dlf.a6 = s_pos; + dlf.a7 = s_pos; + d3lp.a0 = s_type; + d3lp.a1 = s_rel; + d3lp.a2 = s_lpath; + d3lp.a3 = s_lpath; + d3lp.a4 = s_lpath; + d3lp.a5 = s_word; + d3lp.a6 = s_spath; + d3lp.a7 = s_spath; + d2lp.a0 = s_type; + d2lp.a1 = s_rel; + d2lp.a2 = s_lpath; + d2lp.a3 = s_lpath; + d2lp.a4 = s_word; + d2lp.a5 = s_word; // d3lp.a6 = s_spath; d3lp.a7 = s_spath; + d2pw.a0 = s_type; + d2pw.a1 = s_rel; + d2pw.a2 = s_lpath; + d2pw.a3 = s_lpath; + d2pw.a4 = s_word; + d2pw.a5 = s_word; // d3lp.a6 = s_spath; d3lp.a7 = s_spath; + d2pp.a0 = s_type; + d2pp.a1 = s_rel; + d2pp.a2 = s_lpath; + d2pp.a3 = s_lpath; + d2pp.a4 = s_pos; + d2pp.a5 = s_pos; // d3lp.a6 = s_spath; d3lp.a7 = s_spath; } - - public int basic(short[] pposs, int[] form, int p, int d, Cluster cluster, IFV f) - { - - d0.clean(); dl1.clean(); dl2.clean(); dwp.clean(); dwwp.clean(); dlf.clean(); d3lp.clean(); - - d3lp.clean(); d2lp.clean();d2pw.clean(); d2pp.clean(); - - int n=1; - int dir= (p < d)? ra:la; - d0.v0= n++; d0.v1=pposs[p]; d0.v2=pposs[d]; //d0.stop=4; - int end= (p >= d ? p : d); + @Override + public int basic(short[] pposs, int[] form, int p, int d, Cluster cluster, IFV f) { + + d0.clean(); + dl1.clean(); + dl2.clean(); + dwp.clean(); + dwwp.clean(); + dlf.clean(); + d3lp.clean(); + + d3lp.clean(); + d2lp.clean(); + d2pw.clean(); + d2pp.clean(); + + int n = 1; + int dir = (p < d) ? ra : la; + d0.v0 = n++; + d0.v1 = pposs[p]; + d0.v2 = pposs[d]; // d0.stop=4; + int end = (p >= d ? p : d); int start = (p >= d ? d : p) + 1; - for(int i = start ; i <end ; i++) { - d0.v3=pposs[i]; + for (int i = start; i < end; i++) { + d0.v3 = pposs[i]; d0.cz4(); - d0.csa(s_dir,dir,f); + d0.csa(s_dir, dir, f); } return n; } + @Override + public void firstm(Instances is, int i, int prnt, int dpnt, int label, Cluster cluster, long[] f) { - public void firstm(Instances is, int i, - int prnt, int dpnt, int label, Cluster cluster, long[] f) - { - - - //short[] pposs, int[] form, int[] lemmas, short[][] feats - for(int k=0;k<f.length;k++) f[k]=0; + // short[] pposs, int[] form, int[] lemmas, short[][] feats + for (int k = 0; k < f.length; k++) + f[k] = 0; short[] pposs = is.pposs[i]; - int[] form =is.forms[i]; + int[] form = is.forms[i]; short[][] feats = is.feats[i]; - - - int pF = form[prnt],dF = form[dpnt]; - int pL = is.plemmas[i][prnt],dL = is.plemmas[i][dpnt]; - int pP = pposs[prnt],dP = pposs[dpnt]; - - int prntLS = pF==-1?-1:cluster.getLP(pF), chldLS = dF==-1?-1:cluster.getLP(dF); - - final int dir= (prnt < dpnt)? ra:la; - - if (pF>maxForm) pF=-1; - if (pL>maxForm) pL=-1; - - if (dF>maxForm) dF=-1; - if (dL>maxForm) dL=-1; - - - int n=3,c=0; - - dl2.v1=label; - dl2.v0= n++; dl2.v2=pF; dl2.v3=dP; dl2.cz4(); f[c++]=dl2.csa(s_dir,dir); - dl2.v0= n++; dl2.cz3(); f[c++]=dl2.csa(s_dir,dir); - dl2.v0= n++; dl2.v2=dF; dl2.v3=pP; dl2.cz4(); f[c++]=dl2.csa(s_dir,dir); - dl2.v0= n++; dl2.cz3(); f[c++]=dl2.csa(s_dir,dir); - - - dwwp.v1=label; - dwwp.v0= n++; dwwp.v2=pF; dwwp.v3=dF; dwwp.cz4(); f[c++]=dwwp.csa(s_dir,dir); - - dl1.v1=label; - dl1.v0= n++; dl1.v2=dP; dl1.cz3(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v2=pP; dl1.cz3(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v3=dP; dl1.cz4(); f[c++]=dl1.csa(s_dir,dir); - - int pPm1 = prnt > 0 ? pposs[prnt - 1] : s_str, dPm1 = dpnt > 0 ? pposs[dpnt - 1] : s_str; - int pPp1 = prnt < pposs.length - 1 ? pposs[prnt + 1]:s_end, dPp1 = dpnt < pposs.length - 1 ? pposs[dpnt + 1]:s_end; - - int pPm2 = prnt > 1 ? pposs[prnt - 2] : s_str, dPm2 = dpnt > 1 ? pposs[dpnt - 2] : s_str; - int pPp2 = prnt < pposs.length - 2 ? pposs[prnt + 2]:s_end, dPp2 = dpnt < pposs.length - 2 ? pposs[dpnt + 2]:s_end; - - int pFm1 = prnt > 0 ? form[prnt - 1] : s_stwrd, dFm1 = dpnt > 0 ? form[dpnt - 1] : s_stwrd; - int pFp1 = prnt < form.length - 1 ? form[prnt + 1]:s_stwrd, dFp1 = dpnt < form.length - 1 ? form[dpnt + 1]:s_stwrd; - - - - dl1.v0= n++;dl1.v2=pP; dl1.v3=pPp1; dl1.v4=dP;dl1.v5=dPp1; dl1.cz6(); f[n++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v5=dPm1; dl1.cz6(); f[n++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v3=pPm1; dl1.cz6(); f[n++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v5=dPp1; dl1.cz6(); f[n++]=dl1.csa(s_dir,dir); - - - dl1.v0= n++; dl1.v3=pPm1; dl1.cz5(); f[n++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v3=dPm1; dl1.cz5(); f[n++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v3=dPp1; dl1.cz5(); f[n++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v3=pPp1; dl1.cz5(); f[n++]=dl1.csa(s_dir,dir); - - dl1.v0= n++;dl1.v2=pP; dl1.v3=pPp2; dl1.v4=dP;dl1.v5=dPp2; dl1.cz6(); f[n++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v5=dPm2; dl1.cz6(); f[n++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v3=pPm2; dl1.cz6(); f[n++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v5=dPp2; dl1.cz6(); f[n++]=dl1.csa(s_dir,dir); - - dl1.v0= n++; dl1.v3=pPm2; dl1.cz5(); f[n++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v3=dPm2; dl1.cz5(); f[n++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v3=dPp2; dl1.cz5(); f[n++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v3=pPp2; dl1.cz5(); f[n++]=dl1.csa(s_dir,dir); - - - - dl2.v0= n++; dl2.v3=dFm1; dl2.v3=pPp1;dl2.v4=pP; dl2.cz5(); f[n++]=dl2.getVal(); - dl2.v0= n++; dl2.v3=dFp1; dl2.v3=pPm1; dl2.cz5(); f[n++]=dl2.getVal(); - dl2.v0= n++; dl2.v3=pFm1; dl2.v3=dPp1;dl2.v4=dP; dl2.cz5(); f[n++]=dl2.getVal(); - dl2.v0= n++; dl2.v3=pFp1; dl2.v3=dPm1; dl2.cz5(); f[n++]=dl2.getVal(); - - - dl2.v0= n++; dl2.v3=dFm1; dl2.v3=dPm2;dl2.v4=pP; dl2.cz5(); f[n++]=dl2.getVal(); - dl2.v0= n++; dl2.v3=dFp1; dl2.v3=dPp2; dl2.cz5(); f[n++]=dl2.getVal(); - dl2.v0= n++; dl2.v3=pFm1; dl2.v3=pPm2;dl2.v4=dP; dl2.cz5(); f[n++]=dl2.getVal(); - dl2.v0= n++; dl2.v3=pFp1; dl2.v3=pPp2; dl2.cz5(); f[n++]=dl2.getVal(); - - - dwwp.v0= n++; dwwp.v2=pF; dwwp.v3=dF; dwwp.v4=dP; dwwp.cz5(); f[n++]=dwwp.csa(s_dir,dir); - dwwp.v0= n++; dwwp.v2=pF; dwwp.v3=dF; dwwp.v4=pP; dwwp.cz5(); f[n++]=dwwp.csa(s_dir,dir); - dwwp.v0= n++; dwwp.v2=dF; dwwp.v3=pF; dwwp.v4=pP; dwwp.v4=dP; dwwp.cz6(); f[n++]=dwwp.csa(s_dir,dir); - - - - // lemmas - dl2.v1=label; - dl2.v0= n++; dl2.v2=pL; dl2.v3=dP; dl2.cz4(); f[c++]=dl2.csa(s_dir,dir); - dl2.v0= n++; dl2.cz3(); f[c++]=dl2.csa(s_dir,dir); - dl2.v0= n++; dl2.v2=dL; dl2.v3=pP; dl2.cz4(); f[c++]=dl2.csa(s_dir,dir); - dl2.v0= n++; dl2.cz3(); f[c++]=dl2.csa(s_dir,dir); + int pF = form[prnt], dF = form[dpnt]; + int pL = is.plemmas[i][prnt], dL = is.plemmas[i][dpnt]; + int pP = pposs[prnt], dP = pposs[dpnt]; + + int prntLS = pF == -1 ? -1 : cluster.getLP(pF), chldLS = dF == -1 ? -1 : cluster.getLP(dF); + + final int dir = (prnt < dpnt) ? ra : la; + + if (pF > maxForm) + pF = -1; + if (pL > maxForm) + pL = -1; + + if (dF > maxForm) + dF = -1; + if (dL > maxForm) + dL = -1; + + int n = 3, c = 0; + + dl2.v1 = label; + dl2.v0 = n++; + dl2.v2 = pF; + dl2.v3 = dP; + dl2.cz4(); + f[c++] = dl2.csa(s_dir, dir); + dl2.v0 = n++; + dl2.cz3(); + f[c++] = dl2.csa(s_dir, dir); + dl2.v0 = n++; + dl2.v2 = dF; + dl2.v3 = pP; + dl2.cz4(); + f[c++] = dl2.csa(s_dir, dir); + dl2.v0 = n++; + dl2.cz3(); + f[c++] = dl2.csa(s_dir, dir); + + dwwp.v1 = label; + dwwp.v0 = n++; + dwwp.v2 = pF; + dwwp.v3 = dF; + dwwp.cz4(); + f[c++] = dwwp.csa(s_dir, dir); + + dl1.v1 = label; + dl1.v0 = n++; + dl1.v2 = dP; + dl1.cz3(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = pP; + dl1.cz3(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v3 = dP; + dl1.cz4(); + f[c++] = dl1.csa(s_dir, dir); + + int pPm1 = prnt > 0 ? pposs[prnt - 1] : s_str, dPm1 = dpnt > 0 ? pposs[dpnt - 1] : s_str; + int pPp1 = prnt < pposs.length - 1 ? pposs[prnt + 1] : s_end, + dPp1 = dpnt < pposs.length - 1 ? pposs[dpnt + 1] : s_end; + + int pPm2 = prnt > 1 ? pposs[prnt - 2] : s_str, dPm2 = dpnt > 1 ? pposs[dpnt - 2] : s_str; + int pPp2 = prnt < pposs.length - 2 ? pposs[prnt + 2] : s_end, + dPp2 = dpnt < pposs.length - 2 ? pposs[dpnt + 2] : s_end; + + int pFm1 = prnt > 0 ? form[prnt - 1] : s_stwrd, dFm1 = dpnt > 0 ? form[dpnt - 1] : s_stwrd; + int pFp1 = prnt < form.length - 1 ? form[prnt + 1] : s_stwrd, + dFp1 = dpnt < form.length - 1 ? form[dpnt + 1] : s_stwrd; + + dl1.v0 = n++; + dl1.v2 = pP; + dl1.v3 = pPp1; + dl1.v4 = dP; + dl1.v5 = dPp1; + dl1.cz6(); + f[n++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v5 = dPm1; + dl1.cz6(); + f[n++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v3 = pPm1; + dl1.cz6(); + f[n++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v5 = dPp1; + dl1.cz6(); + f[n++] = dl1.csa(s_dir, dir); + + dl1.v0 = n++; + dl1.v3 = pPm1; + dl1.cz5(); + f[n++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v3 = dPm1; + dl1.cz5(); + f[n++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v3 = dPp1; + dl1.cz5(); + f[n++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v3 = pPp1; + dl1.cz5(); + f[n++] = dl1.csa(s_dir, dir); + + dl1.v0 = n++; + dl1.v2 = pP; + dl1.v3 = pPp2; + dl1.v4 = dP; + dl1.v5 = dPp2; + dl1.cz6(); + f[n++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v5 = dPm2; + dl1.cz6(); + f[n++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v3 = pPm2; + dl1.cz6(); + f[n++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v5 = dPp2; + dl1.cz6(); + f[n++] = dl1.csa(s_dir, dir); + + dl1.v0 = n++; + dl1.v3 = pPm2; + dl1.cz5(); + f[n++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v3 = dPm2; + dl1.cz5(); + f[n++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v3 = dPp2; + dl1.cz5(); + f[n++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v3 = pPp2; + dl1.cz5(); + f[n++] = dl1.csa(s_dir, dir); + + dl2.v0 = n++; + dl2.v3 = dFm1; + dl2.v3 = pPp1; + dl2.v4 = pP; + dl2.cz5(); + f[n++] = dl2.getVal(); + dl2.v0 = n++; + dl2.v3 = dFp1; + dl2.v3 = pPm1; + dl2.cz5(); + f[n++] = dl2.getVal(); + dl2.v0 = n++; + dl2.v3 = pFm1; + dl2.v3 = dPp1; + dl2.v4 = dP; + dl2.cz5(); + f[n++] = dl2.getVal(); + dl2.v0 = n++; + dl2.v3 = pFp1; + dl2.v3 = dPm1; + dl2.cz5(); + f[n++] = dl2.getVal(); + + dl2.v0 = n++; + dl2.v3 = dFm1; + dl2.v3 = dPm2; + dl2.v4 = pP; + dl2.cz5(); + f[n++] = dl2.getVal(); + dl2.v0 = n++; + dl2.v3 = dFp1; + dl2.v3 = dPp2; + dl2.cz5(); + f[n++] = dl2.getVal(); + dl2.v0 = n++; + dl2.v3 = pFm1; + dl2.v3 = pPm2; + dl2.v4 = dP; + dl2.cz5(); + f[n++] = dl2.getVal(); + dl2.v0 = n++; + dl2.v3 = pFp1; + dl2.v3 = pPp2; + dl2.cz5(); + f[n++] = dl2.getVal(); + + dwwp.v0 = n++; + dwwp.v2 = pF; + dwwp.v3 = dF; + dwwp.v4 = dP; + dwwp.cz5(); + f[n++] = dwwp.csa(s_dir, dir); + dwwp.v0 = n++; + dwwp.v2 = pF; + dwwp.v3 = dF; + dwwp.v4 = pP; + dwwp.cz5(); + f[n++] = dwwp.csa(s_dir, dir); + dwwp.v0 = n++; + dwwp.v2 = dF; + dwwp.v3 = pF; + dwwp.v4 = pP; + dwwp.v4 = dP; + dwwp.cz6(); + f[n++] = dwwp.csa(s_dir, dir); - - dwwp.v1=label; - dwwp.v0= n++; dwwp.v2=pL; dwwp.v3=dL; dwwp.cz4(); f[c++]=dwwp.csa(s_dir,dir); - - dwp.v1= label; - dwp.v0=n++;dwp.v2=dL; dwp.v3=pP;dwp.v4=dP;dwp.v5=pL; dwp.cz6(); f[c++]=dwp.csa(s_dir,dir); - dwp.v0=n++;dwp.cz5(); f[c++]=dwp.csa(s_dir,dir); + // lemmas - dwp.v0=n++;dwp.v2=pL; dwp.cz5(); f[c++]=dwp.csa(s_dir,dir); - dwwp.v0= n++; dwwp.v2=pL; dwwp.v3=dL; dwwp.v4=dP; dwwp.cz5(); f[c++]=dwwp.csa(s_dir,dir); - dwwp.v0= n++; dwwp.v4=pP; dwwp.cz5(); f[c++]=dwwp.csa(s_dir,dir); + dl2.v1 = label; + dl2.v0 = n++; + dl2.v2 = pL; + dl2.v3 = dP; + dl2.cz4(); + f[c++] = dl2.csa(s_dir, dir); + dl2.v0 = n++; + dl2.cz3(); + f[c++] = dl2.csa(s_dir, dir); + dl2.v0 = n++; + dl2.v2 = dL; + dl2.v3 = pP; + dl2.cz4(); + f[c++] = dl2.csa(s_dir, dir); + dl2.v0 = n++; + dl2.cz3(); + f[c++] = dl2.csa(s_dir, dir); + + dwwp.v1 = label; + dwwp.v0 = n++; + dwwp.v2 = pL; + dwwp.v3 = dL; + dwwp.cz4(); + f[c++] = dwwp.csa(s_dir, dir); + + dwp.v1 = label; + dwp.v0 = n++; + dwp.v2 = dL; + dwp.v3 = pP; + dwp.v4 = dP; + dwp.v5 = pL; + dwp.cz6(); + f[c++] = dwp.csa(s_dir, dir); + dwp.v0 = n++; + dwp.cz5(); + f[c++] = dwp.csa(s_dir, dir); + + dwp.v0 = n++; + dwp.v2 = pL; + dwp.cz5(); + f[c++] = dwp.csa(s_dir, dir); + dwwp.v0 = n++; + dwwp.v2 = pL; + dwwp.v3 = dL; + dwwp.v4 = dP; + dwwp.cz5(); + f[c++] = dwwp.csa(s_dir, dir); + dwwp.v0 = n++; + dwwp.v4 = pP; + dwwp.cz5(); + f[c++] = dwwp.csa(s_dir, dir); - // cluster - d2pw.v1=label; - d2pw.v0=n++; d2pw.v2=prntLS; d2pw.v3=chldLS; d2pw.cz4(); f[c++]=d2pw.csa(s_dir,dir); - d2pw.v0=n++; d2pw.v4=pF; d2pw.cz5(); f[c++]=d2pw.csa(s_dir,dir); - d2pw.v0=n++; d2pw.v4=dF; d2pw.cz5(); f[c++]=d2pw.csa(s_dir,dir); - d2pw.v0=n++; d2pw.v5=pF; d2pw.cz6(); f[c++]=d2pw.csa(s_dir,dir); - - - d2pp.v1=label; - d2pp.v0=n++; d2pp.v2=prntLS; d2pp.v3=chldLS; d2pp.cz4(); f[c++]=d2pp.csa(s_dir,dir); - d2pp.v0=n++; d2pp.v4=pP; d2pp.cz5(); f[c++]=d2pp.csa(s_dir,dir); - d2pp.v0=n++; d2pp.v4=dP; d2pp.cz5(); f[c++]=d2pp.csa(s_dir,dir); - d2pp.v0=n++; d2pp.v5=pP; d2pp.cz6(); f[c++]=d2pp.csa(s_dir,dir); - - - short[] prel = is.plabels[i]; - short[] phead = is.pheads[i]; + d2pw.v1 = label; + d2pw.v0 = n++; + d2pw.v2 = prntLS; + d2pw.v3 = chldLS; + d2pw.cz4(); + f[c++] = d2pw.csa(s_dir, dir); + d2pw.v0 = n++; + d2pw.v4 = pF; + d2pw.cz5(); + f[c++] = d2pw.csa(s_dir, dir); + d2pw.v0 = n++; + d2pw.v4 = dF; + d2pw.cz5(); + f[c++] = d2pw.csa(s_dir, dir); + d2pw.v0 = n++; + d2pw.v5 = pF; + d2pw.cz6(); + f[c++] = d2pw.csa(s_dir, dir); + + d2pp.v1 = label; + d2pp.v0 = n++; + d2pp.v2 = prntLS; + d2pp.v3 = chldLS; + d2pp.cz4(); + f[c++] = d2pp.csa(s_dir, dir); + d2pp.v0 = n++; + d2pp.v4 = pP; + d2pp.cz5(); + f[c++] = d2pp.csa(s_dir, dir); + d2pp.v0 = n++; + d2pp.v4 = dP; + d2pp.cz5(); + f[c++] = d2pp.csa(s_dir, dir); + d2pp.v0 = n++; + d2pp.v5 = pP; + d2pp.cz6(); + f[c++] = d2pp.csa(s_dir, dir); - - //take those in for stacking - // dl2.v1=label; - // dl2.v0= n++;dl2.v2=prel[dpnt];dl2.v3=pP;dl2.v4=dP; dl2.v5=prnt==phead[dpnt]?1:2; dl2.cz6(); f[c++]=dl2.csa(s_dir,dir); - // dl2.v0= n++;dl2.v2=pP;dl2.v3=dP; dl2.v4=prnt==phead[dpnt]?1:2; dl2.cz5(); f[c++]=dl2.csa(s_dir,dir); - - - - if (feats==null) return; - - short[] featsP =feats[prnt], featsD =feats[dpnt]; - dlf.v0= n++; dlf.v1=label; dlf.v2=pP; dlf.v3=dP; + // take those in for stacking + // dl2.v1=label; + // dl2.v0= n++;dl2.v2=prel[dpnt];dl2.v3=pP;dl2.v4=dP; + // dl2.v5=prnt==phead[dpnt]?1:2; dl2.cz6(); f[c++]=dl2.csa(s_dir,dir); + // dl2.v0= n++;dl2.v2=pP;dl2.v3=dP; dl2.v4=prnt==phead[dpnt]?1:2; + // dl2.cz5(); f[c++]=dl2.csa(s_dir,dir); + + if (feats == null) + return; + + short[] featsP = feats[prnt], featsD = feats[dpnt]; + dlf.v0 = n++; + dlf.v1 = label; + dlf.v2 = pP; + dlf.v3 = dP; extractFeat(f, c, dir, featsP, featsD); return; } + @Override + public void gcm(Instances is, int i, int p, int d, int gc, int label, Cluster cluster, long[] f) { + for (int k = 0; k < f.length; k++) + f[k] = 0; - public void gcm(Instances is , int i, int p, int d, int gc, int label,Cluster cluster, long[] f) { - - for(int k=0;k<f.length;k++) f[k]=0; + short[] pos = is.pposs[i]; + int[] forms = is.forms[i]; + int[] lemmas = is.plemmas[i]; + short[][] feats = is.feats[i]; - short[] pos= is.pposs[i]; - int[] forms=is.forms[i]; - int[] lemmas=is.plemmas[i]; - short[][] feats=is.feats[i]; - int pP = pos[p], dP = pos[d]; int prntF = forms[p], chldF = forms[d]; int prntL = lemmas[p], chldL = lemmas[d]; - int prntLS = prntF==-1?-1:cluster.getLP(prntF), chldLS = chldF==-1?-1:cluster.getLP(chldF); + int prntLS = prntF == -1 ? -1 : cluster.getLP(prntF), chldLS = chldF == -1 ? -1 : cluster.getLP(chldF); - int gP = gc != -1 ? pos[gc] : s_str; - int gcF = gc != -1 ? forms[gc] : s_stwrd; + int gP = gc != -1 ? pos[gc] : s_str; + int gcF = gc != -1 ? forms[gc] : s_stwrd; int gcL = gc != -1 ? lemmas[gc] : s_stwrd; - int gcLS = (gc != -1) && (gcF!=-1) ? cluster.getLP(gcF) : s_stwrd; - - if (prntF>maxForm) prntF=-1; - if (prntL>maxForm) prntL=-1; - - if (chldF>maxForm) chldF=-1; - if (chldL>maxForm) chldL=-1; - - if (gcF>maxForm) gcF=-1; - if (gcL>maxForm) gcL=-1; - - - int dir= (p < d)? ra:la, dir_gra =(d < gc)? ra:la; - - int n=84,c=0; - - //dl1.v023(); - dl1.v1=label; - dl1.v0= n++; dl1.v2=pP; dl1.v3=dP;dl1.v4=gP; dl1.cz5(); dl1.cs(s_dir,dir);f[c++]=dl1.csa(s_dir,dir_gra); - dl1.v0= n++; dl1.v2=pP; dl1.v3=gP; dl1.cz4();dl1.cs(s_dir,dir);f[c++]=dl1.csa(s_dir,dir_gra); - dl1.v0= n++; dl1.v2=dP; dl1.cz4(); dl1.cs(s_dir,dir);f[c++]=dl1.csa(s_dir,dir_gra); - - dwwp.v1=label; - dwwp.v0= n++; dwwp.v2=prntF; dwwp.v3=gcF; - dwwp.cz4(); dwwp.cs(s_dir,dir);f[c++]=dwwp.csa(s_dir,dir_gra); - - dwwp.v0= n++; dwwp.v2=chldF; dwwp.v3=gcF; - dwwp.cz4(); dwwp.cs(s_dir,dir);f[c++]=dwwp.csa(s_dir,dir_gra); - - dwp.v1=label; - dwp.v0= n++; dwp.v2=gcF; dwp.v3=pP; - dwp.cz4(); dwp.cs(s_dir,dir);f[c++]=dwp.csa(s_dir,dir_gra); - - dwp.v0= n++; dwp.v2=gcF; dwp.v3=dP; - dwp.cz4(); dwp.cs(s_dir,dir);f[c++]=dwp.csa(s_dir,dir_gra); - - dwp.v0= n++; dwp.v2=prntF; dwp.v3=gP; - dwp.cz4(); dwp.cs(s_dir,dir);f[c++]=dwp.csa(s_dir,dir_gra); - - dwp.v0= n++; dwp.v2=chldF; dwp.v3=gP; - dwp.cz4(); dwp.cs(s_dir,dir); f[c++]=dwp.csa(s_dir,dir_gra); - + int gcLS = (gc != -1) && (gcF != -1) ? cluster.getLP(gcF) : s_stwrd; + + if (prntF > maxForm) + prntF = -1; + if (prntL > maxForm) + prntL = -1; + + if (chldF > maxForm) + chldF = -1; + if (chldL > maxForm) + chldL = -1; + + if (gcF > maxForm) + gcF = -1; + if (gcL > maxForm) + gcL = -1; + + int dir = (p < d) ? ra : la, dir_gra = (d < gc) ? ra : la; + + int n = 84, c = 0; + + // dl1.v023(); + dl1.v1 = label; + dl1.v0 = n++; + dl1.v2 = pP; + dl1.v3 = dP; + dl1.v4 = gP; + dl1.cz5(); + dl1.cs(s_dir, dir); + f[c++] = dl1.csa(s_dir, dir_gra); + dl1.v0 = n++; + dl1.v2 = pP; + dl1.v3 = gP; + dl1.cz4(); + dl1.cs(s_dir, dir); + f[c++] = dl1.csa(s_dir, dir_gra); + dl1.v0 = n++; + dl1.v2 = dP; + dl1.cz4(); + dl1.cs(s_dir, dir); + f[c++] = dl1.csa(s_dir, dir_gra); + + dwwp.v1 = label; + dwwp.v0 = n++; + dwwp.v2 = prntF; + dwwp.v3 = gcF; + dwwp.cz4(); + dwwp.cs(s_dir, dir); + f[c++] = dwwp.csa(s_dir, dir_gra); + + dwwp.v0 = n++; + dwwp.v2 = chldF; + dwwp.v3 = gcF; + dwwp.cz4(); + dwwp.cs(s_dir, dir); + f[c++] = dwwp.csa(s_dir, dir_gra); + + dwp.v1 = label; + dwp.v0 = n++; + dwp.v2 = gcF; + dwp.v3 = pP; + dwp.cz4(); + dwp.cs(s_dir, dir); + f[c++] = dwp.csa(s_dir, dir_gra); + + dwp.v0 = n++; + dwp.v2 = gcF; + dwp.v3 = dP; + dwp.cz4(); + dwp.cs(s_dir, dir); + f[c++] = dwp.csa(s_dir, dir_gra); + + dwp.v0 = n++; + dwp.v2 = prntF; + dwp.v3 = gP; + dwp.cz4(); + dwp.cs(s_dir, dir); + f[c++] = dwp.csa(s_dir, dir_gra); + + dwp.v0 = n++; + dwp.v2 = chldF; + dwp.v3 = gP; + dwp.cz4(); + dwp.cs(s_dir, dir); + f[c++] = dwp.csa(s_dir, dir_gra); // lemma - dwwp.v0= n++; dwwp.v2=prntL; dwwp.v3=gcL; - dwwp.cz4();dwwp.cs(s_dir,dir);f[c++]=dwwp.csa(s_dir,dir_gra); - - dwwp.v0= n++; dwwp.v2=chldL; dwwp.v3=gcL; - dwwp.cz4(); dwwp.cs(s_dir,dir);f[c++]=dwwp.csa(s_dir,dir_gra); - - dwp.v0= n++; dwp.v2=gcL; dwp.v3=pP; - dwp.cz4(); dwp.cs(s_dir,dir);f[c++]=dwp.csa(s_dir,dir_gra); - - dwp.v0= n++; dwp.v2=gcL; dwp.v3=dP; - dwp.cz4(); dwp.cs(s_dir,dir);f[c++]=dwp.csa(s_dir,dir_gra); - - dwp.v0= n++; dwp.v2=prntL; dwp.v3=gP; - dwp.cz4(); dwp.cs(s_dir,dir);f[c++]=dwp.csa(s_dir,dir_gra); - - dwp.v0= n++; dwp.v2=chldL; dwp.v3=gP; - dwp.cz4(); dwp.cs(s_dir,dir); f[c++]=dwp.csa(s_dir,dir_gra); - - - // clusters - - d2lp.v1= label; - d2lp.v0= n++; d2lp.v2=prntLS; d2lp.v3=gcLS; d2lp.cz4(); d2lp.cs(s_dir,dir);f[c++]=d2lp.csa(s_dir,dir_gra);// f.add(li.l2i(l)); - d2lp.v0= n++; d2lp.v2=chldLS; d2lp.v3=gcLS; d2lp.cz4(); d2lp.cs(s_dir,dir);f[c++]=d2lp.csa(s_dir,dir_gra); - d3lp.v0= n++; d3lp.v1= label; d3lp.v2=prntLS; d3lp.v3=chldLS; d3lp.v4=gcLS; d3lp.cz5(); d3lp.cs(s_dir,dir);f[c++]=d3lp.csa(s_dir,dir_gra); - - //_f83; - d2lp.v0= n++; d2lp.v2=prntLS; d2lp.v3=chldLS; d2lp.v4=gcF; d2lp.cz5(); f[c++]=d2lp.csa(s_dir,dir); - d2lp.v0= n++; d2lp.v2=prntLS; d2lp.v3=gcLS; d2lp.v4=chldF; d2lp.cz5(); f[c++]=d2lp.csa(s_dir,dir); - d2lp.v0= n++; d2lp.v2=chldLS; d2lp.v3=gcLS; d2lp.v4=prntF; d2lp.cz5(); f[c++]=d2lp.csa(s_dir,dir); - - d2pp.v1= label; - d2pp.v0= n++; d2pp.v2=prntLS; d2pp.v3=chldLS; d2pp.v4=gP; d2pp.cz5(); f[c++]=d2pp.csa(s_dir,dir); - d2pp.v0= n++; d2pp.v2=prntLS; d2pp.v3=gcLS; d2pp.v4=dP; d2pp.cz5(); f[c++]=d2pp.csa(s_dir,dir); - d2pp.v0= n++; d2pp.v2=chldLS; d2pp.v3=gcLS; d2pp.v4=pP; d2pp.cz5(); f[c++]=d2pp.csa(s_dir,dir); + dwwp.v0 = n++; + dwwp.v2 = prntL; + dwwp.v3 = gcL; + dwwp.cz4(); + dwwp.cs(s_dir, dir); + f[c++] = dwwp.csa(s_dir, dir_gra); + + dwwp.v0 = n++; + dwwp.v2 = chldL; + dwwp.v3 = gcL; + dwwp.cz4(); + dwwp.cs(s_dir, dir); + f[c++] = dwwp.csa(s_dir, dir_gra); + + dwp.v0 = n++; + dwp.v2 = gcL; + dwp.v3 = pP; + dwp.cz4(); + dwp.cs(s_dir, dir); + f[c++] = dwp.csa(s_dir, dir_gra); + + dwp.v0 = n++; + dwp.v2 = gcL; + dwp.v3 = dP; + dwp.cz4(); + dwp.cs(s_dir, dir); + f[c++] = dwp.csa(s_dir, dir_gra); + + dwp.v0 = n++; + dwp.v2 = prntL; + dwp.v3 = gP; + dwp.cz4(); + dwp.cs(s_dir, dir); + f[c++] = dwp.csa(s_dir, dir_gra); + + dwp.v0 = n++; + dwp.v2 = chldL; + dwp.v3 = gP; + dwp.cz4(); + dwp.cs(s_dir, dir); + f[c++] = dwp.csa(s_dir, dir_gra); + // clusters + d2lp.v1 = label; + d2lp.v0 = n++; + d2lp.v2 = prntLS; + d2lp.v3 = gcLS; + d2lp.cz4(); + d2lp.cs(s_dir, dir); + f[c++] = d2lp.csa(s_dir, dir_gra);// f.add(li.l2i(l)); + d2lp.v0 = n++; + d2lp.v2 = chldLS; + d2lp.v3 = gcLS; + d2lp.cz4(); + d2lp.cs(s_dir, dir); + f[c++] = d2lp.csa(s_dir, dir_gra); + d3lp.v0 = n++; + d3lp.v1 = label; + d3lp.v2 = prntLS; + d3lp.v3 = chldLS; + d3lp.v4 = gcLS; + d3lp.cz5(); + d3lp.cs(s_dir, dir); + f[c++] = d3lp.csa(s_dir, dir_gra); + + // _f83; + d2lp.v0 = n++; + d2lp.v2 = prntLS; + d2lp.v3 = chldLS; + d2lp.v4 = gcF; + d2lp.cz5(); + f[c++] = d2lp.csa(s_dir, dir); + d2lp.v0 = n++; + d2lp.v2 = prntLS; + d2lp.v3 = gcLS; + d2lp.v4 = chldF; + d2lp.cz5(); + f[c++] = d2lp.csa(s_dir, dir); + d2lp.v0 = n++; + d2lp.v2 = chldLS; + d2lp.v3 = gcLS; + d2lp.v4 = prntF; + d2lp.cz5(); + f[c++] = d2lp.csa(s_dir, dir); + + d2pp.v1 = label; + d2pp.v0 = n++; + d2pp.v2 = prntLS; + d2pp.v3 = chldLS; + d2pp.v4 = gP; + d2pp.cz5(); + f[c++] = d2pp.csa(s_dir, dir); + d2pp.v0 = n++; + d2pp.v2 = prntLS; + d2pp.v3 = gcLS; + d2pp.v4 = dP; + d2pp.cz5(); + f[c++] = d2pp.csa(s_dir, dir); + d2pp.v0 = n++; + d2pp.v2 = chldLS; + d2pp.v3 = gcLS; + d2pp.v4 = pP; + d2pp.cz5(); + f[c++] = d2pp.csa(s_dir, dir); // linear features int prntPm1 = p != 0 ? pos[p - 1] : s_str; // parent-pos-minus1 - int chldPm1 = d - 1 >=0 ? pos[d - 1] : s_str; // child-pos-minus1 + int chldPm1 = d - 1 >= 0 ? pos[d - 1] : s_str; // child-pos-minus1 int prntPp1 = p != pos.length - 1 ? pos[p + 1] : s_end; int chldPp1 = d != pos.length - 1 ? pos[d + 1] : s_end; - int gcPm1 = gc > 0 ? pos[gc - 1] : s_str; - int gcPp1 = gc < pos.length - 1 ? pos[gc + 1] : s_end; - - dl1.v0= n++; dl1.v2=gP; dl1.v3=gcPp1;dl1.v4=dP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v2=gP; dl1.v3=gcPm1;dl1.v4=dP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v2=gP; dl1.v3=dP;dl1.v4=chldPp1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v2=gP; dl1.v3=dP;dl1.v4=chldPm1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v2=gP; dl1.v3=gcPp1;dl1.v4=chldPm1;dl1.v5=dP;dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v2=gcPm1; dl1.v3=gP;dl1.v4=chldPm1;dl1.v5=dP; dl1.cz6();f[c++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v2=gP; dl1.v3=gcPp1;dl1.v4=dP;dl1.v5=chldPp1; dl1.cz6();f[c++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v2=gcPm1; dl1.v3=gP;dl1.v4=dP;dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v2=gP; dl1.v3=gcPp1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v2=gP; dl1.v3=gcPm1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v2=gP; dl1.v3=pP;dl1.v4=prntPp1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v2=gP; dl1.v3=pP;dl1.v4=prntPm1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v2=gP; dl1.v3=gcPp1;dl1.v4=prntPm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v2=gcPm1; dl1.v3=gP;dl1.v4=prntPm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v2=gP; dl1.v3=gcPp1;dl1.v4=pP;dl1.v5=prntPp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v2=gcPm1; dl1.v3=gP; dl1.v4=pP; dl1.v5=prntPp1;dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); - - - int pLSp1 = p != pos.length - 1 ? forms[p + 1]==-1?-1:cluster.getLP(forms[p + 1]): _cend; - int cLSp1 = d != pos.length - 1 ? forms[d + 1] ==-1?-1:cluster.getLP(forms[d + 1]):_cend; - int gcLSp1 = gc < pos.length -1 ? forms[gc + 1] ==-1?-1:cluster.getLP(forms[gc + 1]) : s_end; - - int pLSm1 = p != 0 ? lemmas[p - 1]==-1?-1:cluster.getLP(lemmas[p - 1]): _cstr; - int cLSm1 = d - 1 >=0 ? lemmas[d - 1] ==-1?-1:cluster.getLP(lemmas[d - 1]):_cstr; - int gcLSm1 = gc > 0 ? lemmas[gc - 1] ==-1?-1:cluster.getLP(lemmas[gc - 1]) : _cstr; - - - dl1.v0= n++; dl1.v2=gP; dl1.v3=gcLSp1;dl1.v4=dP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v2=gP; dl1.v3=gcLSm1;dl1.v4=dP; dl1.cz5();f[c++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v2=gP; dl1.v3=dP;dl1.v4=cLSp1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v2=gP; dl1.v3=dP;dl1.v4=cLSm1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v2=gP; dl1.v3=gcLSp1;dl1.v4=cLSm1;dl1.v5=dP;dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v2=gcLSm1; dl1.v3=gP;dl1.v4=cLSm1;dl1.v5=dP; dl1.cz6();f[c++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v2=gP; dl1.v3=gcLSp1;dl1.v4=dP;dl1.v5=cLSp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v2=cLSm1; dl1.v3=gP;dl1.v4=dP;dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v2=gP; dl1.v3=gcLSp1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v2=gP; dl1.v3=gcLSm1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v2=gP; dl1.v3=pP;dl1.v4=pLSp1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v2=gP; dl1.v3=pP;dl1.v4=pLSm1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v2=gP; dl1.v3=gcLSp1;dl1.v4=pLSm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v2=gcLSm1; dl1.v3=gP;dl1.v4=pLSm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v2=gP; dl1.v3=gcLSp1;dl1.v4=pP;dl1.v5=pLSp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v2=gcLSm1; dl1.v3=gP; dl1.v4=pP; dl1.v5=pLSp1;dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); - - - - short[] prel = is.plabels[i],phead=is.pheads[i]; - - int g = p==phead[d]?1:2 ; - if (gc>=0) g += d==phead[gc]?4:8; - - int gr = gc==-1?s_relend:prel[gc]; + int gcPm1 = gc > 0 ? pos[gc - 1] : s_str; + int gcPp1 = gc < pos.length - 1 ? pos[gc + 1] : s_end; + + dl1.v0 = n++; + dl1.v2 = gP; + dl1.v3 = gcPp1; + dl1.v4 = dP; + dl1.cz5(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = gP; + dl1.v3 = gcPm1; + dl1.v4 = dP; + dl1.cz5(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = gP; + dl1.v3 = dP; + dl1.v4 = chldPp1; + dl1.cz5(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = gP; + dl1.v3 = dP; + dl1.v4 = chldPm1; + dl1.cz5(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = gP; + dl1.v3 = gcPp1; + dl1.v4 = chldPm1; + dl1.v5 = dP; + dl1.cz6(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = gcPm1; + dl1.v3 = gP; + dl1.v4 = chldPm1; + dl1.v5 = dP; + dl1.cz6(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = gP; + dl1.v3 = gcPp1; + dl1.v4 = dP; + dl1.v5 = chldPp1; + dl1.cz6(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = gcPm1; + dl1.v3 = gP; + dl1.v4 = dP; + dl1.cz6(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = gP; + dl1.v3 = gcPp1; + dl1.v4 = pP; + dl1.cz5(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = gP; + dl1.v3 = gcPm1; + dl1.v4 = pP; + dl1.cz5(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = gP; + dl1.v3 = pP; + dl1.v4 = prntPp1; + dl1.cz5(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = gP; + dl1.v3 = pP; + dl1.v4 = prntPm1; + dl1.cz5(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = gP; + dl1.v3 = gcPp1; + dl1.v4 = prntPm1; + dl1.v5 = pP; + dl1.cz6(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = gcPm1; + dl1.v3 = gP; + dl1.v4 = prntPm1; + dl1.v5 = pP; + dl1.cz6(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = gP; + dl1.v3 = gcPp1; + dl1.v4 = pP; + dl1.v5 = prntPp1; + dl1.cz6(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = gcPm1; + dl1.v3 = gP; + dl1.v4 = pP; + dl1.v5 = prntPp1; + dl1.cz6(); + f[c++] = dl1.csa(s_dir, dir); + + int pLSp1 = p != pos.length - 1 ? forms[p + 1] == -1 ? -1 : cluster.getLP(forms[p + 1]) : _cend; + int cLSp1 = d != pos.length - 1 ? forms[d + 1] == -1 ? -1 : cluster.getLP(forms[d + 1]) : _cend; + int gcLSp1 = gc < pos.length - 1 ? forms[gc + 1] == -1 ? -1 : cluster.getLP(forms[gc + 1]) : s_end; + + int pLSm1 = p != 0 ? lemmas[p - 1] == -1 ? -1 : cluster.getLP(lemmas[p - 1]) : _cstr; + int cLSm1 = d - 1 >= 0 ? lemmas[d - 1] == -1 ? -1 : cluster.getLP(lemmas[d - 1]) : _cstr; + int gcLSm1 = gc > 0 ? lemmas[gc - 1] == -1 ? -1 : cluster.getLP(lemmas[gc - 1]) : _cstr; + + dl1.v0 = n++; + dl1.v2 = gP; + dl1.v3 = gcLSp1; + dl1.v4 = dP; + dl1.cz5(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = gP; + dl1.v3 = gcLSm1; + dl1.v4 = dP; + dl1.cz5(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = gP; + dl1.v3 = dP; + dl1.v4 = cLSp1; + dl1.cz5(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = gP; + dl1.v3 = dP; + dl1.v4 = cLSm1; + dl1.cz5(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = gP; + dl1.v3 = gcLSp1; + dl1.v4 = cLSm1; + dl1.v5 = dP; + dl1.cz6(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = gcLSm1; + dl1.v3 = gP; + dl1.v4 = cLSm1; + dl1.v5 = dP; + dl1.cz6(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = gP; + dl1.v3 = gcLSp1; + dl1.v4 = dP; + dl1.v5 = cLSp1; + dl1.cz6(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = cLSm1; + dl1.v3 = gP; + dl1.v4 = dP; + dl1.cz6(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = gP; + dl1.v3 = gcLSp1; + dl1.v4 = pP; + dl1.cz5(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = gP; + dl1.v3 = gcLSm1; + dl1.v4 = pP; + dl1.cz5(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = gP; + dl1.v3 = pP; + dl1.v4 = pLSp1; + dl1.cz5(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = gP; + dl1.v3 = pP; + dl1.v4 = pLSm1; + dl1.cz5(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = gP; + dl1.v3 = gcLSp1; + dl1.v4 = pLSm1; + dl1.v5 = pP; + dl1.cz6(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = gcLSm1; + dl1.v3 = gP; + dl1.v4 = pLSm1; + dl1.v5 = pP; + dl1.cz6(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = gP; + dl1.v3 = gcLSp1; + dl1.v4 = pP; + dl1.v5 = pLSp1; + dl1.cz6(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = gcLSm1; + dl1.v3 = gP; + dl1.v4 = pP; + dl1.v5 = pLSp1; + dl1.cz6(); + f[c++] = dl1.csa(s_dir, dir); + + if (gc >= 0) { + } // take those in for stacking - /* - dl2.v1=label; - dl2.v0= n++;dl2.v2=prel[d];dl2.v3=g;dl2.v4=gP;dl2.v5=dP;dl2.cz6();f[c++]=dl2.csa(s_dir,dir); - dl2.v0= n++;dl2.v2=prel[d];dl2.v3=g;dl2.v4=gP;dl2.v5=pP;dl2.cz6();f[c++]=dl2.csa(s_dir,dir); - dl2.v0= n++;dl2.v2=prel[d];dl2.v3=g;dl2.v4=gP;dl2.v5=pP;dl2.v6=dP;dl2.cz7();f[c++]=dl2.csa(s_dir,dir); - - dl2.v0= n++;dl2.v2=gr;dl2.v3=g;dl2.v4=gP;dl2.v5=dP;dl2.cz6();f[c++]=dl2.csa(s_dir,dir); - dl2.v0= n++;dl2.v2=gr;dl2.v3=g;dl2.v4=gP;dl2.v5=pP;dl2.cz6();f[c++]=dl2.csa(s_dir,dir); - dl2.v0= n++;dl2.v2=gr;dl2.v3=g;dl2.v4=gP;dl2.v5=pP;dl2.v6=dP;dl2.cz7();f[c++]=dl2.csa(s_dir,dir); - -*/ - if (feats==null) return; - - short[] featsP =feats[d]; - short[] featsD =gc!=-1?feats[gc]:null; - - dlf.v0= n++; dlf.v1=label; dlf.v2=gP; dlf.v3=dP; + /* + * dl2.v1=label; dl2.v0= + * n++;dl2.v2=prel[d];dl2.v3=g;dl2.v4=gP;dl2.v5=dP;dl2.cz6();f[c++]=dl2. + * csa(s_dir,dir); dl2.v0= + * n++;dl2.v2=prel[d];dl2.v3=g;dl2.v4=gP;dl2.v5=pP;dl2.cz6();f[c++]=dl2. + * csa(s_dir,dir); dl2.v0= + * n++;dl2.v2=prel[d];dl2.v3=g;dl2.v4=gP;dl2.v5=pP;dl2.v6=dP;dl2.cz7();f + * [c++]=dl2.csa(s_dir,dir); + * + * dl2.v0= + * n++;dl2.v2=gr;dl2.v3=g;dl2.v4=gP;dl2.v5=dP;dl2.cz6();f[c++]=dl2.csa( + * s_dir,dir); dl2.v0= + * n++;dl2.v2=gr;dl2.v3=g;dl2.v4=gP;dl2.v5=pP;dl2.cz6();f[c++]=dl2.csa( + * s_dir,dir); dl2.v0= + * n++;dl2.v2=gr;dl2.v3=g;dl2.v4=gP;dl2.v5=pP;dl2.v6=dP;dl2.cz7();f[c++] + * =dl2.csa(s_dir,dir); + * + */ + if (feats == null) + return; + + short[] featsP = feats[d]; + short[] featsD = gc != -1 ? feats[gc] : null; + + dlf.v0 = n++; + dlf.v1 = label; + dlf.v2 = gP; + dlf.v3 = dP; extractFeat(f, c, dir, featsP, featsD); return; } + @Override + public void siblingm(Instances is, int i, short pos[], int forms[], int[] lemmas, short[][] feats, int prnt, int d, + int sblng, int label, Cluster cluster, long[] f, int v) { - public void siblingm(Instances is , int i,short pos[], int forms[], int[] lemmas, short[][] feats, int prnt, int d, int sblng, int label, Cluster cluster, long[] f, int v) - { - - for(int k=0;k<f.length;k++) f[k]=0; + for (int k = 0; k < f.length; k++) + f[k] = 0; int pP = pos[prnt], dP = pos[d]; - int prntF = forms[prnt],chldF = forms[d]; + int prntF = forms[prnt], chldF = forms[d]; int prntL = lemmas[prnt], chldL = lemmas[d]; - int prntLS = prntF==-1?-1:cluster.getLP(prntF), chldLS = chldF==-1?-1:cluster.getLP(chldF); - - int sP = sblng!=-1 ? pos[sblng] : s_str, sblF = sblng!=-1 ? forms[sblng] : s_stwrd, sblL = sblng!=-1 ? lemmas[sblng] : s_stwrd; + int prntLS = prntF == -1 ? -1 : cluster.getLP(prntF), chldLS = chldF == -1 ? -1 : cluster.getLP(chldF); - int sblLS = (sblng != -1)&&(sblF!=-1) ? cluster.getLP(sblF) : s_stwrd; + int sP = sblng != -1 ? pos[sblng] : s_str, sblF = sblng != -1 ? forms[sblng] : s_stwrd, + sblL = sblng != -1 ? lemmas[sblng] : s_stwrd; + int sblLS = (sblng != -1) && (sblF != -1) ? cluster.getLP(sblF) : s_stwrd; - int dir= (prnt < d)? ra:la; + int dir = (prnt < d) ? ra : la; - int abs = Math.abs(prnt-d); + int abs = Math.abs(prnt - d); final int dist; - if (abs > 10)dist=d10;else if (abs>5) dist=d5;else if( abs==5)dist=d4;else if (abs==4)dist=d3;else if (abs==3)dist=d2; - else if (abs==2)dist=d1; else dist=di0; - - int n=147; - - if (prntF>maxForm) prntF=-1; - if (prntL>maxForm) prntL=-1; - - if (chldF>maxForm) chldF=-1; - if (chldL>maxForm) chldL=-1; - - if (sblF>maxForm) sblF=-1; - if (sblL>maxForm) sblL=-1; - - - dl1.v0= n++; dl1.v1=label;dl1.v2=pP; dl1.v3=dP;dl1.v4=sP; dl1.cz5(); f[0]=dl1.csa(s_dir,dir);f[1]=dl1.csa(s_dist,dist); - dl1.v0= n++; dl1.v3=sP; dl1.cz4(); f[2]=dl1.csa(s_dir,dir); f[3]=dl1.csa(s_dist,dist); - dl1.v0= n++; dl1.v2=dP;dl1.cz4(); f[4]=dl1.csa(s_dir,dir); f[5]=dl1.csa(s_dist,dist); + if (abs > 10) + dist = d10; + else if (abs > 5) + dist = d5; + else if (abs == 5) + dist = d4; + else if (abs == 4) + dist = d3; + else if (abs == 3) + dist = d2; + else if (abs == 2) + dist = d1; + else + dist = di0; + + int n = 147; + + if (prntF > maxForm) + prntF = -1; + if (prntL > maxForm) + prntL = -1; + + if (chldF > maxForm) + chldF = -1; + if (chldL > maxForm) + chldL = -1; + + if (sblF > maxForm) + sblF = -1; + if (sblL > maxForm) + sblL = -1; + + dl1.v0 = n++; + dl1.v1 = label; + dl1.v2 = pP; + dl1.v3 = dP; + dl1.v4 = sP; + dl1.cz5(); + f[0] = dl1.csa(s_dir, dir); + f[1] = dl1.csa(s_dist, dist); + dl1.v0 = n++; + dl1.v3 = sP; + dl1.cz4(); + f[2] = dl1.csa(s_dir, dir); + f[3] = dl1.csa(s_dist, dist); + dl1.v0 = n++; + dl1.v2 = dP; + dl1.cz4(); + f[4] = dl1.csa(s_dir, dir); + f[5] = dl1.csa(s_dist, dist); // sibling only could be tried - dwwp.v1=label; - dwwp.v0= n++; dwwp.v2=prntF; dwwp.v3=sblF; dwwp.cz4(); f[6]=dwwp.csa(s_dir,dir); f[7]=dwwp.csa(s_dist,dist); - dwwp.v0= n++; dwwp.v2=chldF; dwwp.cz4(); f[8]=dwwp.csa(s_dir,dir); f[9]=dwwp.csa(s_dist,dist); - dwp.v0= n++; dwp.v1=label; dwp.v2=sblF; dwp.v3=pP; dwp.cz4(); f[10]=dwp.csa(s_dir,dir); f[11]=dwp.csa(s_dist,dist); - dwp.v0= n++; /*dwp.v1=label; */dwp.v3=dP; dwp.cz4(); f[12]=dwp.csa(s_dir,dir); f[13]=dwp.csa(s_dist,dist); - dwp.v0= n++; /*dwp.v1=label;*/ dwp.v2=prntF; dwp.v3=sP; dwp.cz4(); f[14]=dwp.csa(s_dir,dir); f[15]=dwp.csa(s_dist,dist); - dwp.v0= n++; /*dwp.v1=label;*/ dwp.v2=chldF; dwp.cz4(); f[16]=dwp.csa(s_dir,dir); f[17]=dwp.csa(s_dist,dist); - - //lemmas - dwwp.v0= n++; dwwp.v2=prntL; dwwp.v3=sblL; dwwp.cz4(); f[18]=dwwp.csa(s_dir,dir); - dwwp.v0= n++; dwwp.v2=chldL; dwwp.cz4(); f[19]=dwwp.csa(s_dir,dir); f[20]=dwwp.csa(s_dist,dist); - dwp.v0= n++; /*dwp.v1=label;*/ dwp.v2=sblL; dwp.v3=pP; dwp.cz4(); f[21]=dwp.csa(s_dir,dir); f[22]=dwp.csa(s_dist,dist); - dwp.v0= n++; /*dwp.v1=label; */ dwp.v3=dP; dwp.cz4(); f[23]=dwp.csa(s_dir,dir);f[24]=dwp.csa(s_dist,dist); - dwp.v0= n++; /*dwp.v1=label;*/ dwp.v2=prntL; dwp.v3=sP; dwp.cz4(); f[25]=dwp.csa(s_dir,dir); f[26]=dwp.csa(s_dist,dist); - dwp.v0= n++; /*dwp.v1=label;*/ dwp.v2=chldL; dwp.cz4(); f[27]=dwp.csa(s_dir,dir);f[28]=dwp.csa(s_dist,dist); + dwwp.v1 = label; + dwwp.v0 = n++; + dwwp.v2 = prntF; + dwwp.v3 = sblF; + dwwp.cz4(); + f[6] = dwwp.csa(s_dir, dir); + f[7] = dwwp.csa(s_dist, dist); + dwwp.v0 = n++; + dwwp.v2 = chldF; + dwwp.cz4(); + f[8] = dwwp.csa(s_dir, dir); + f[9] = dwwp.csa(s_dist, dist); + dwp.v0 = n++; + dwp.v1 = label; + dwp.v2 = sblF; + dwp.v3 = pP; + dwp.cz4(); + f[10] = dwp.csa(s_dir, dir); + f[11] = dwp.csa(s_dist, dist); + dwp.v0 = n++; + /* dwp.v1=label; */dwp.v3 = dP; + dwp.cz4(); + f[12] = dwp.csa(s_dir, dir); + f[13] = dwp.csa(s_dist, dist); + dwp.v0 = n++; + /* dwp.v1=label; */ dwp.v2 = prntF; + dwp.v3 = sP; + dwp.cz4(); + f[14] = dwp.csa(s_dir, dir); + f[15] = dwp.csa(s_dist, dist); + dwp.v0 = n++; + /* dwp.v1=label; */ dwp.v2 = chldF; + dwp.cz4(); + f[16] = dwp.csa(s_dir, dir); + f[17] = dwp.csa(s_dist, dist); + // lemmas + dwwp.v0 = n++; + dwwp.v2 = prntL; + dwwp.v3 = sblL; + dwwp.cz4(); + f[18] = dwwp.csa(s_dir, dir); + dwwp.v0 = n++; + dwwp.v2 = chldL; + dwwp.cz4(); + f[19] = dwwp.csa(s_dir, dir); + f[20] = dwwp.csa(s_dist, dist); + dwp.v0 = n++; + /* dwp.v1=label; */ dwp.v2 = sblL; + dwp.v3 = pP; + dwp.cz4(); + f[21] = dwp.csa(s_dir, dir); + f[22] = dwp.csa(s_dist, dist); + dwp.v0 = n++; + /* dwp.v1=label; */ dwp.v3 = dP; + dwp.cz4(); + f[23] = dwp.csa(s_dir, dir); + f[24] = dwp.csa(s_dist, dist); + dwp.v0 = n++; + /* dwp.v1=label; */ dwp.v2 = prntL; + dwp.v3 = sP; + dwp.cz4(); + f[25] = dwp.csa(s_dir, dir); + f[26] = dwp.csa(s_dist, dist); + dwp.v0 = n++; + /* dwp.v1=label; */ dwp.v2 = chldL; + dwp.cz4(); + f[27] = dwp.csa(s_dir, dir); + f[28] = dwp.csa(s_dist, dist); // clusters - d2lp.v1=label; - d2lp.v0= n++; d2lp.v2=prntLS; d2lp.v3=sblLS; d2lp.cz4(); f[29]=d2lp.csa(s_dir,dir); - d2lp.v0= n++; d2lp.v2=chldLS; d2lp.v3=sblLS; d2lp.cz4(); f[30]=d2lp.csa(s_dir,dir); f[31]=d2lp.csa(s_dist,dist); - - d3lp.v1= label; - d3lp.v0= n++; d3lp.v2=prntLS; d3lp.v3=chldLS; d3lp.v4=sblLS;d3lp.cz5(); f[32]=d3lp.csa(s_dir,dir); - - d2lp.v0= n++; d2lp.v2=prntLS; d2lp.v3=chldLS; d2lp.v4=sblF; d2lp.cz5(); f[33]=d2lp.csa(s_dir,dir); f[34]=d2lp.csa(s_dist,dist); - d2lp.v0= n++; d2lp.v2=prntLS; d2lp.v3=sblLS; d2lp.v4=chldF; d2lp.cz5(); f[35]=d2lp.csa(s_dir,dir); f[36]=d2lp.csa(s_dist,dist); - d2lp.v0= n++; d2lp.v2=chldLS; d2lp.v3=sblLS; d2lp.v4=prntF; d2lp.cz5(); f[37]=d2lp.csa(s_dir,dir); f[38]=d2lp.csa(s_dist,dist); - - d2pp.v1=label; - d2pp.v0= n++; d2pp.v2=prntLS; d2pp.v3=chldLS; d2pp.v4=sP; d2pp.cz5(); f[39]=d2pp.csa(s_dir,dir); f[40]=d2pp.csa(s_dist,dist); - d2pp.v0= n++; d2pp.v2=prntLS; d2pp.v3=sblLS; d2pp.v4=dP; d2pp.cz5(); f[41]=d2pp.csa(s_dir,dir); f[42]=d2pp.csa(s_dist,dist); - d2pp.v0= n++; d2pp.v2=chldLS; d2pp.v3=sblLS; d2pp.v4=pP; d2pp.cz5(); f[43]=d2pp.csa(s_dir,dir); f[44]=d2pp.csa(s_dist,dist); - - - int prntPm1 = prnt!=0 ? pos[prnt-1] : s_str; - int chldPm1 = d-1>=0 ? pos[d-1] : s_str; - int prntPp1 = prnt!=pos.length-1 ? pos[prnt+1] : s_end; - int chldPp1 = d!=pos.length-1 ? pos[d+1] : s_end; + d2lp.v1 = label; + d2lp.v0 = n++; + d2lp.v2 = prntLS; + d2lp.v3 = sblLS; + d2lp.cz4(); + f[29] = d2lp.csa(s_dir, dir); + d2lp.v0 = n++; + d2lp.v2 = chldLS; + d2lp.v3 = sblLS; + d2lp.cz4(); + f[30] = d2lp.csa(s_dir, dir); + f[31] = d2lp.csa(s_dist, dist); + + d3lp.v1 = label; + d3lp.v0 = n++; + d3lp.v2 = prntLS; + d3lp.v3 = chldLS; + d3lp.v4 = sblLS; + d3lp.cz5(); + f[32] = d3lp.csa(s_dir, dir); + + d2lp.v0 = n++; + d2lp.v2 = prntLS; + d2lp.v3 = chldLS; + d2lp.v4 = sblF; + d2lp.cz5(); + f[33] = d2lp.csa(s_dir, dir); + f[34] = d2lp.csa(s_dist, dist); + d2lp.v0 = n++; + d2lp.v2 = prntLS; + d2lp.v3 = sblLS; + d2lp.v4 = chldF; + d2lp.cz5(); + f[35] = d2lp.csa(s_dir, dir); + f[36] = d2lp.csa(s_dist, dist); + d2lp.v0 = n++; + d2lp.v2 = chldLS; + d2lp.v3 = sblLS; + d2lp.v4 = prntF; + d2lp.cz5(); + f[37] = d2lp.csa(s_dir, dir); + f[38] = d2lp.csa(s_dist, dist); + + d2pp.v1 = label; + d2pp.v0 = n++; + d2pp.v2 = prntLS; + d2pp.v3 = chldLS; + d2pp.v4 = sP; + d2pp.cz5(); + f[39] = d2pp.csa(s_dir, dir); + f[40] = d2pp.csa(s_dist, dist); + d2pp.v0 = n++; + d2pp.v2 = prntLS; + d2pp.v3 = sblLS; + d2pp.v4 = dP; + d2pp.cz5(); + f[41] = d2pp.csa(s_dir, dir); + f[42] = d2pp.csa(s_dist, dist); + d2pp.v0 = n++; + d2pp.v2 = chldLS; + d2pp.v3 = sblLS; + d2pp.v4 = pP; + d2pp.cz5(); + f[43] = d2pp.csa(s_dir, dir); + f[44] = d2pp.csa(s_dist, dist); + + int prntPm1 = prnt != 0 ? pos[prnt - 1] : s_str; + int chldPm1 = d - 1 >= 0 ? pos[d - 1] : s_str; + int prntPp1 = prnt != pos.length - 1 ? pos[prnt + 1] : s_end; + int chldPp1 = d != pos.length - 1 ? pos[d + 1] : s_end; // sibling part of speech minus and plus 1 - int sblPm1 = sblng>0 ? pos[sblng-1]:s_str; - int sblPp1 = sblng<pos.length-1 ? pos[sblng + 1]:s_end; - - dl1.v0=n++; dl1.v2=sP; dl1.v3=sblPp1;dl1.v4=pP; dl1.cz5(); f[45]=dl1.csa(s_dir,dir); - dl1.v0=n++; dl1.v2=sP; dl1.v3=sblPm1;dl1.v4=pP; dl1.cz5(); f[46]=dl1.csa(s_dir,dir);// f.add(li.l2i(l)); - dl1.v0=n++; dl1.v2=sP; dl1.v3=pP;dl1.v4=prntPp1;dl1.cz5(); f[47]=dl1.csa(s_dir,dir);// f.add(li.l2i(l)); - dl1.v0=n++; dl1.v2=sP; dl1.v3=pP;dl1.v4=prntPm1; dl1.cz5(); f[48]=dl1.csa(s_dir,dir);// f.add(li.l2i(l)); - dl1.v0=n++; dl1.v2=sP; dl1.v3=sblPp1;dl1.v4=prntPm1;dl1.v5=pP; dl1.cz6(); f[49]=dl1.csa(s_dir,dir); - dl1.v0=n++; dl1.v2=sblPm1; dl1.v3=sP;dl1.v4=prntPm1;dl1.v5=pP;dl1.cz6(); f[50]=dl1.csa(s_dir,dir);// f.add(li.l2i(l)); - dl1.v0=n++; dl1.v2=sP; dl1.v3=sblPp1;dl1.v4=pP;dl1.v5=prntPp1; dl1.cz6(); f[51]=dl1.csa(s_dir,dir);// f.add(li.l2i(l)); - dl1.v0=n++; dl1.v2=sblPm1; dl1.v3=sP; dl1.v4=pP;dl1.v5=prntPp1; dl1.cz6(); f[52]=dl1.csa(s_dir,dir);// f.add(li.l2i(l)); - dl1.v0=n++; dl1.v2=sP; dl1.v3=sblPp1;dl1.v4=dP; dl1.cz5(); f[53]=dl1.csa(s_dir,dir); - dl1.v0=n++; dl1.v2=sP; dl1.v3=sblPm1;dl1.v4=dP; dl1.cz5(); f[54]=dl1.csa(s_dir,dir); - dl1.v0=n++; dl1.v2=sP; dl1.v3=dP;dl1.v4=chldPp1;dl1.cz5(); f[55]=dl1.csa(s_dir,dir); - dl1.v0=n++; dl1.v2=sP; dl1.v3=dP;dl1.v4=chldPm1; dl1.cz5(); f[56]=dl1.csa(s_dir,dir); - dl1.v0=n++; dl1.v2=sP; dl1.v3=sblPp1;dl1.v4=chldPm1;dl1.v5=dP; dl1.cz6(); f[57]=dl1.csa(s_dir,dir);// f.add(li.l2i(l)); - dl1.v0=n++; dl1.v2=sblPm1; dl1.v3=sP;dl1.v4=chldPm1;dl1.v5=dP;dl1.cz6(); f[58]=dl1.csa(s_dir,dir); - dl1.v0= n++;dl1.v2=sP; dl1.v3=sblPp1;dl1.v4=dP;dl1.v5=chldPp1;dl1.cz6();f[59]=dl1.csa(s_dir,dir);// f.add(li.l2i(l)); - dl1.v0= n++; dl1.v2=sblPm1; dl1.v3=sP;dl1.v4=dP;dl1.v5=chldPp1;dl1.cz6(); f[60]=dl1.csa(s_dir,dir); - - int c=61; - - int pLSp1 = prnt != pos.length - 1 ? forms[prnt + 1]==-1?-1:cluster.getLP(forms[prnt + 1]): _cend; - int cLSp1 = d != pos.length - 1 ? forms[d + 1] ==-1?-1:cluster.getLP(forms[d + 1]):_cend; - int sLSp1 = sblng < pos.length -1 ? forms[sblng + 1] ==-1?-1:cluster.getLP(forms[sblng + 1]) : _cend; - - int pLSm1 = prnt!=0 ? forms[prnt - 1]==-1?-1:cluster.getLP(forms[prnt - 1]): _cstr; - int cLSm1 = d-1>=0 ? forms[d - 1] ==-1?-1:cluster.getLP(forms[d - 1]):_cstr; - int sLSm1 = sblng>0 ? forms[sblng - 1] ==-1?-1:cluster.getLP(forms[sblng - 1]):_cstr; - - //int c=61; - - dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSm1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0=n++; dl1.v2=sP; dl1.v3=pP;dl1.v4=pLSp1;dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0=n++; dl1.v2=sP; dl1.v3=pP;dl1.v4=pLSm1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=pLSm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP;dl1.v4=pLSm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=pP;dl1.v5=pLSp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP; dl1.v4=pP;dl1.v5=pLSp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=dP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSm1;dl1.v4=dP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0=n++; dl1.v2=sP; dl1.v3=dP;dl1.v4=cLSp1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0=n++;dl1.v2=sP; dl1.v3=dP;dl1.v4=cLSm1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSm1;dl1.v4=cLSm1;dl1.v5=dP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP;dl1.v4=cLSm1;dl1.v5=dP;dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0=n++;dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=dP;dl1.v5=cLSp1;dl1.cz6();f[c++]=dl1.csa(s_dir,dir); - dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP;dl1.v4=dP;dl1.v5=cLSp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); - - - - dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSm1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0=n++; dl1.v2=sP; dl1.v3=pP;dl1.v4=pLSp1;dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0=n++; dl1.v2=sP; dl1.v3=pP;dl1.v4=pLSm1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=pLSm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP;dl1.v4=pLSm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=pP;dl1.v5=pLSp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP; dl1.v4=pP;dl1.v5=pLSp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=dP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSm1;dl1.v4=dP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0=n++; dl1.v2=sP; dl1.v3=dP;dl1.v4=cLSp1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0=n++;dl1.v2=sP; dl1.v3=dP;dl1.v4=cLSm1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSm1;dl1.v4=cLSm1;dl1.v5=dP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP;dl1.v4=cLSm1;dl1.v5=dP;dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0= n++;dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=dP;dl1.v5=cLSp1;dl1.cz6();f[c++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v2=sLSm1; dl1.v3=sP;dl1.v4=dP;dl1.v5=cLSp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); + int sblPm1 = sblng > 0 ? pos[sblng - 1] : s_str; + int sblPp1 = sblng < pos.length - 1 ? pos[sblng + 1] : s_end; + + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = sblPp1; + dl1.v4 = pP; + dl1.cz5(); + f[45] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = sblPm1; + dl1.v4 = pP; + dl1.cz5(); + f[46] = dl1.csa(s_dir, dir);// f.add(li.l2i(l)); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = pP; + dl1.v4 = prntPp1; + dl1.cz5(); + f[47] = dl1.csa(s_dir, dir);// f.add(li.l2i(l)); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = pP; + dl1.v4 = prntPm1; + dl1.cz5(); + f[48] = dl1.csa(s_dir, dir);// f.add(li.l2i(l)); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = sblPp1; + dl1.v4 = prntPm1; + dl1.v5 = pP; + dl1.cz6(); + f[49] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = sblPm1; + dl1.v3 = sP; + dl1.v4 = prntPm1; + dl1.v5 = pP; + dl1.cz6(); + f[50] = dl1.csa(s_dir, dir);// f.add(li.l2i(l)); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = sblPp1; + dl1.v4 = pP; + dl1.v5 = prntPp1; + dl1.cz6(); + f[51] = dl1.csa(s_dir, dir);// f.add(li.l2i(l)); + dl1.v0 = n++; + dl1.v2 = sblPm1; + dl1.v3 = sP; + dl1.v4 = pP; + dl1.v5 = prntPp1; + dl1.cz6(); + f[52] = dl1.csa(s_dir, dir);// f.add(li.l2i(l)); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = sblPp1; + dl1.v4 = dP; + dl1.cz5(); + f[53] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = sblPm1; + dl1.v4 = dP; + dl1.cz5(); + f[54] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = dP; + dl1.v4 = chldPp1; + dl1.cz5(); + f[55] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = dP; + dl1.v4 = chldPm1; + dl1.cz5(); + f[56] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = sblPp1; + dl1.v4 = chldPm1; + dl1.v5 = dP; + dl1.cz6(); + f[57] = dl1.csa(s_dir, dir);// f.add(li.l2i(l)); + dl1.v0 = n++; + dl1.v2 = sblPm1; + dl1.v3 = sP; + dl1.v4 = chldPm1; + dl1.v5 = dP; + dl1.cz6(); + f[58] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = sblPp1; + dl1.v4 = dP; + dl1.v5 = chldPp1; + dl1.cz6(); + f[59] = dl1.csa(s_dir, dir);// f.add(li.l2i(l)); + dl1.v0 = n++; + dl1.v2 = sblPm1; + dl1.v3 = sP; + dl1.v4 = dP; + dl1.v5 = chldPp1; + dl1.cz6(); + f[60] = dl1.csa(s_dir, dir); + + int c = 61; + + int pLSp1 = prnt != pos.length - 1 ? forms[prnt + 1] == -1 ? -1 : cluster.getLP(forms[prnt + 1]) : _cend; + int cLSp1 = d != pos.length - 1 ? forms[d + 1] == -1 ? -1 : cluster.getLP(forms[d + 1]) : _cend; + int sLSp1 = sblng < pos.length - 1 ? forms[sblng + 1] == -1 ? -1 : cluster.getLP(forms[sblng + 1]) : _cend; + + int pLSm1 = prnt != 0 ? forms[prnt - 1] == -1 ? -1 : cluster.getLP(forms[prnt - 1]) : _cstr; + int cLSm1 = d - 1 >= 0 ? forms[d - 1] == -1 ? -1 : cluster.getLP(forms[d - 1]) : _cstr; + int sLSm1 = sblng > 0 ? forms[sblng - 1] == -1 ? -1 : cluster.getLP(forms[sblng - 1]) : _cstr; + + // int c=61; + + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = sLSp1; + dl1.v4 = pP; + dl1.cz5(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = sLSm1; + dl1.v4 = pP; + dl1.cz5(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = pP; + dl1.v4 = pLSp1; + dl1.cz5(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = pP; + dl1.v4 = pLSm1; + dl1.cz5(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = sLSp1; + dl1.v4 = pLSm1; + dl1.v5 = pP; + dl1.cz6(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = sLSm1; + dl1.v3 = sP; + dl1.v4 = pLSm1; + dl1.v5 = pP; + dl1.cz6(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = sLSp1; + dl1.v4 = pP; + dl1.v5 = pLSp1; + dl1.cz6(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = sLSm1; + dl1.v3 = sP; + dl1.v4 = pP; + dl1.v5 = pLSp1; + dl1.cz6(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = sLSp1; + dl1.v4 = dP; + dl1.cz5(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = sLSm1; + dl1.v4 = dP; + dl1.cz5(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = dP; + dl1.v4 = cLSp1; + dl1.cz5(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = dP; + dl1.v4 = cLSm1; + dl1.cz5(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = sLSm1; + dl1.v4 = cLSm1; + dl1.v5 = dP; + dl1.cz6(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = sLSm1; + dl1.v3 = sP; + dl1.v4 = cLSm1; + dl1.v5 = dP; + dl1.cz6(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = sLSp1; + dl1.v4 = dP; + dl1.v5 = cLSp1; + dl1.cz6(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = sLSm1; + dl1.v3 = sP; + dl1.v4 = dP; + dl1.v5 = cLSp1; + dl1.cz6(); + f[c++] = dl1.csa(s_dir, dir); + + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = sLSp1; + dl1.v4 = pP; + dl1.cz5(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = sLSm1; + dl1.v4 = pP; + dl1.cz5(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = pP; + dl1.v4 = pLSp1; + dl1.cz5(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = pP; + dl1.v4 = pLSm1; + dl1.cz5(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = sLSp1; + dl1.v4 = pLSm1; + dl1.v5 = pP; + dl1.cz6(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = sLSm1; + dl1.v3 = sP; + dl1.v4 = pLSm1; + dl1.v5 = pP; + dl1.cz6(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = sLSp1; + dl1.v4 = pP; + dl1.v5 = pLSp1; + dl1.cz6(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = sLSm1; + dl1.v3 = sP; + dl1.v4 = pP; + dl1.v5 = pLSp1; + dl1.cz6(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = sLSp1; + dl1.v4 = dP; + dl1.cz5(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = sLSm1; + dl1.v4 = dP; + dl1.cz5(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = dP; + dl1.v4 = cLSp1; + dl1.cz5(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = dP; + dl1.v4 = cLSm1; + dl1.cz5(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = sLSm1; + dl1.v4 = cLSm1; + dl1.v5 = dP; + dl1.cz6(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = sLSm1; + dl1.v3 = sP; + dl1.v4 = cLSm1; + dl1.v5 = dP; + dl1.cz6(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = sLSp1; + dl1.v4 = dP; + dl1.v5 = cLSp1; + dl1.cz6(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = sLSm1; + dl1.v3 = sP; + dl1.v4 = dP; + dl1.v5 = cLSp1; + dl1.cz6(); + f[c++] = dl1.csa(s_dir, dir); // take those in for stacking - - /* - short[] prel = is.plabels[i],phead=is.pheads[i]; - - int g = prnt==phead[d]?1:2 ; - if (sblng>=0) g += prnt==phead[sblng]?4:8; - - int gr = sblng==-1?s_relend:prel[sblng]; - - dl2.v0= n++;dl2.v2=prel[d];dl2.v3=g;dl2.v4=sP;dl2.v5=dP;dl2.cz6();f[c++]=dl2.csa(s_dir,dir); - dl2.v0= n++;dl2.v2=prel[d];dl2.v3=g;dl2.v4=sP;dl2.v5=pP;dl2.cz6();f[c++]=dl2.csa(s_dir,dir); - dl2.v0= n++;dl2.v2=prel[d];dl2.v3=g;dl2.v4=sP;dl2.v5=pP;dl2.v6=dP;dl2.cz7();f[c++]=dl2.csa(s_dir,dir); - - dl2.v0= n++;dl2.v2=gr;dl2.v3=g;dl2.v4=sP;dl2.v5=dP;dl2.cz6();f[c++]=dl2.csa(s_dir,dir); - dl2.v0= n++;dl2.v2=gr;dl2.v3=g;dl2.v4=sP;dl2.v5=pP;dl2.cz6();f[c++]=dl2.csa(s_dir,dir); - dl2.v0= n++;dl2.v2=gr;dl2.v3=g;dl2.v4=sP;dl2.v5=pP;dl2.v6=dP;dl2.cz7();f[c++]=dl2.csa(s_dir,dir); -*/ - - if (feats==null) return; - - int cnt=c; - - short[] featsP =feats[d]; - short[] featsSbl =sblng!=-1?feats[sblng]:null; - - dlf.v0= n++; dlf.v1=label; dlf.v2=sP; dlf.v3=dP; - - - cnt = extractFeat(f, cnt ,dir, featsP, featsSbl); - - featsP =feats[prnt]; - featsSbl =sblng!=-1?feats[sblng]:null; - - dlf.v0= n++; dlf.v1=label; dlf.v2=pP; dlf.v3=sP; - if (featsP!=null && featsSbl!=null) { - for(short i1=0;i1<featsP.length;i1++) { - for(short i2=0;i2<featsSbl.length;i2++) { - dlf.v4=featsP[i1]; dlf.v5=featsSbl[i2]; - dlf.cz6(); f[cnt++]=dlf.csa(s_dir,prnt<sblng?1:2); + /* + * short[] prel = is.plabels[i],phead=is.pheads[i]; + * + * int g = prnt==phead[d]?1:2 ; if (sblng>=0) g += + * prnt==phead[sblng]?4:8; + * + * int gr = sblng==-1?s_relend:prel[sblng]; + * + * + * dl2.v0= + * n++;dl2.v2=prel[d];dl2.v3=g;dl2.v4=sP;dl2.v5=dP;dl2.cz6();f[c++]=dl2. + * csa(s_dir,dir); dl2.v0= + * n++;dl2.v2=prel[d];dl2.v3=g;dl2.v4=sP;dl2.v5=pP;dl2.cz6();f[c++]=dl2. + * csa(s_dir,dir); dl2.v0= + * n++;dl2.v2=prel[d];dl2.v3=g;dl2.v4=sP;dl2.v5=pP;dl2.v6=dP;dl2.cz7();f + * [c++]=dl2.csa(s_dir,dir); + * + * dl2.v0= + * n++;dl2.v2=gr;dl2.v3=g;dl2.v4=sP;dl2.v5=dP;dl2.cz6();f[c++]=dl2.csa( + * s_dir,dir); dl2.v0= + * n++;dl2.v2=gr;dl2.v3=g;dl2.v4=sP;dl2.v5=pP;dl2.cz6();f[c++]=dl2.csa( + * s_dir,dir); dl2.v0= + * n++;dl2.v2=gr;dl2.v3=g;dl2.v4=sP;dl2.v5=pP;dl2.v6=dP;dl2.cz7();f[c++] + * =dl2.csa(s_dir,dir); + */ + + if (feats == null) + return; + + int cnt = c; + + short[] featsP = feats[d]; + short[] featsSbl = sblng != -1 ? feats[sblng] : null; + + dlf.v0 = n++; + dlf.v1 = label; + dlf.v2 = sP; + dlf.v3 = dP; + + cnt = extractFeat(f, cnt, dir, featsP, featsSbl); + + featsP = feats[prnt]; + featsSbl = sblng != -1 ? feats[sblng] : null; + + dlf.v0 = n++; + dlf.v1 = label; + dlf.v2 = pP; + dlf.v3 = sP; + if (featsP != null && featsSbl != null) { + for (short i1 = 0; i1 < featsP.length; i1++) { + for (short i2 = 0; i2 < featsSbl.length; i2++) { + dlf.v4 = featsP[i1]; + dlf.v5 = featsSbl[i2]; + dlf.cz6(); + f[cnt++] = dlf.csa(s_dir, prnt < sblng ? 1 : 2); } - } - } else if (featsP==null && featsSbl!=null) { + } + } else if (featsP == null && featsSbl != null) { - for(short i2=0;i2<featsSbl.length;i2++) { - dlf.v4=nofeat; dlf.v5=featsSbl[i2]; - dlf.cz6(); f[cnt++]=dlf.csa(s_dir,dir); + for (short i2 = 0; i2 < featsSbl.length; i2++) { + dlf.v4 = nofeat; + dlf.v5 = featsSbl[i2]; + dlf.cz6(); + f[cnt++] = dlf.csa(s_dir, dir); } - } else if (featsP!=null && featsSbl==null) { + } else if (featsP != null && featsSbl == null) { - for(short i1=0;i1<featsP.length;i1++) { - dlf.v4=featsP[i1]; dlf.v5=nofeat; - dlf.cz6(); f[cnt++]=dlf.csa(s_dir,dir); - } + for (short i1 = 0; i1 < featsP.length; i1++) { + dlf.v4 = featsP[i1]; + dlf.v5 = nofeat; + dlf.cz6(); + f[cnt++] = dlf.csa(s_dir, dir); + } } - + return; } private int extractFeat(long[] f, int cnt, int dir, short[] featsP, short[] featsD) { - if (featsP!=null && featsD!=null) { - for(short i1=0;i1<featsP.length;i1++) { - for(short i2=0;i2<featsD.length;i2++) { - dlf.v4=featsP[i1]; dlf.v5=featsD[i2]; - dlf.cz6(); f[cnt++]=dlf.csa(s_dir,dir); + if (featsP != null && featsD != null) { + for (short i1 = 0; i1 < featsP.length; i1++) { + for (short i2 = 0; i2 < featsD.length; i2++) { + dlf.v4 = featsP[i1]; + dlf.v5 = featsD[i2]; + dlf.cz6(); + f[cnt++] = dlf.csa(s_dir, dir); } - } - } else if (featsP==null && featsD!=null) { + } + } else if (featsP == null && featsD != null) { - for(short i2=0;i2<featsD.length;i2++) { - dlf.v4=nofeat; dlf.v5=featsD[i2]; - dlf.cz6(); f[cnt++]=dlf.csa(s_dir,dir); + for (short i2 = 0; i2 < featsD.length; i2++) { + dlf.v4 = nofeat; + dlf.v5 = featsD[i2]; + dlf.cz6(); + f[cnt++] = dlf.csa(s_dir, dir); - } - } else if (featsP!=null && featsD==null) { + } + } else if (featsP != null && featsD == null) { - for(short i1=0;i1<featsP.length;i1++) { - dlf.v4=featsP[i1]; dlf.v5=nofeat; - dlf.cz6(); f[cnt++]=dlf.csa(s_dir,dir); + for (short i1 = 0; i1 < featsP.length; i1++) { + dlf.v4 = featsP[i1]; + dlf.v5 = nofeat; + dlf.cz6(); + f[cnt++] = dlf.csa(s_dir, dir); - } + } } return cnt; } - public IFV encodeCat2(Instances is, int ic, short pposs[], int forms[], int[] lemmas, short[] heads, short[] types, short feats[][], - Cluster cluster, IFV f, Long2IntInterface li) { + public IFV encodeCat2(Instances is, int ic, short pposs[], int forms[], int[] lemmas, short[] heads, short[] types, + short feats[][], Cluster cluster, IFV f, Long2IntInterface li) { - - long[] svs = new long[250]; + long[] svs = new long[250]; for (int i = 1; i < heads.length; i++) { + int n = basic(pposs, forms, heads[i], i, cluster, f); + firstm(is, ic, heads[i], i, types[i], cluster, svs); + for (long sv : svs) + f.add(li.l2i(sv)); - int n =basic(pposs, forms, heads[i], i, cluster, f); - firstm(is, ic, heads[i], i, types[i], cluster,svs); - for(int k=0;k<svs.length;k++) f.add(li.l2i(svs[k])); - - int ch,cmi,cmo; + int ch, cmi, cmo; if (heads[i] < i) { ch = rightmostRight(heads, heads[i], i); cmi = leftmostLeft(heads, i, heads[i]); @@ -698,33 +1660,36 @@ final public class ExtractorClusterStacked implements Extractor { cmo = leftmostLeft(heads, i, 0); } - siblingm(is,ic,pposs, forms,lemmas, feats, heads[i], i, ch,types[i], cluster, svs,n); - for(int k=0;k<svs.length;k++) f.add(li.l2i(svs[k])); + siblingm(is, ic, pposs, forms, lemmas, feats, heads[i], i, ch, types[i], cluster, svs, n); + for (long sv : svs) + f.add(li.l2i(sv)); + gcm(is, ic, heads[i], i, cmi, types[i], cluster, svs); + for (long sv : svs) + f.add(li.l2i(sv)); - gcm(is, ic,heads[i],i,cmi, types[i], cluster, svs); - for(int k=0;k<svs.length;k++) f.add(li.l2i(svs[k])); - - gcm(is, ic, heads[i],i,cmo, types[i], cluster, svs); - for(int k=0;k<svs.length;k++)f.add(li.l2i(svs[k])); + gcm(is, ic, heads[i], i, cmo, types[i], cluster, svs); + for (long sv : svs) + f.add(li.l2i(sv)); } return f; } - public FV encodeCat(Instances is, int ic, short pposs[], int forms[], int[] lemmas, short[] heads, short[] types, short feats[][], Cluster cluster, FV f) { - + @Override + public FV encodeCat(Instances is, int ic, short pposs[], int forms[], int[] lemmas, short[] heads, short[] types, + short feats[][], Cluster cluster, FV f) { - long[] svs = new long[250]; + long[] svs = new long[250]; for (int i = 1; i < heads.length; i++) { + int n = basic(pposs, forms, heads[i], i, cluster, f); + firstm(is, ic, heads[i], i, types[i], cluster, svs); + for (long sv : svs) + dl1.map(f, sv); - int n =basic(pposs, forms, heads[i], i, cluster, f); - firstm(is, ic, heads[i], i, types[i], cluster,svs); - for(int k=0;k<svs.length;k++) dl1.map(f,svs[k]); - - int ch,cmi,cmo; + int ch, cmi, cmo; if (heads[i] < i) { ch = rightmostRight(heads, heads[i], i); cmi = leftmostLeft(heads, i, heads[i]); @@ -736,64 +1701,77 @@ final public class ExtractorClusterStacked implements Extractor { cmo = leftmostLeft(heads, i, 0); } - siblingm(is,ic,pposs, forms,lemmas, feats, heads[i], i, ch,types[i], cluster, svs,n); - for(int k=0;k<svs.length;k++) dl1.map(f,svs[k]); - + siblingm(is, ic, pposs, forms, lemmas, feats, heads[i], i, ch, types[i], cluster, svs, n); + for (long sv : svs) + dl1.map(f, sv); - gcm(is, ic,heads[i],i,cmi, types[i], cluster, svs); - for(int k=0;k<svs.length;k++) dl1.map(f,svs[k]); + gcm(is, ic, heads[i], i, cmi, types[i], cluster, svs); + for (long sv : svs) + dl1.map(f, sv); - gcm(is, ic, heads[i],i,cmo, types[i], cluster, svs); - for(int k=0;k<svs.length;k++) dl1.map(f,svs[k]); + gcm(is, ic, heads[i], i, cmo, types[i], cluster, svs); + for (long sv : svs) + dl1.map(f, sv); } return f; } - - public float encode3(short[] pos, short heads[] , short[] types, DataF d2) { + @Override + public float encode3(short[] pos, short heads[], short[] types, DataF d2) { double v = 0; for (int i = 1; i < heads.length; i++) { - int dir= (heads[i] < i)? 0:1; + int dir = (heads[i] < i) ? 0 : 1; v += d2.pl[heads[i]][i]; v += d2.lab[heads[i]][i][types[i]][dir]; - boolean left = i<heads[i]; + boolean left = i < heads[i]; short[] labels = Edges.get(pos[heads[i]], pos[i], left); - int lid=-1; - for(int k=0;k<labels.length;k++) if (types[i]== labels[k]) {lid= k;break;} + int lid = -1; + for (int k = 0; k < labels.length; k++) + if (types[i] == labels[k]) { + lid = k; + break; + } - int ch,cmi,cmo; + int ch, cmi, cmo; if (heads[i] < i) { ch = rightmostRight(heads, heads[i], i); cmi = leftmostLeft(heads, i, heads[i]); cmo = rightmostRight(heads, i, heads.length); - if (ch==-1) ch=heads[i]; - if (cmi==-1) cmi=heads[i]; - if (cmo==-1) cmo=heads[i]; + if (ch == -1) + ch = heads[i]; + if (cmi == -1) + cmi = heads[i]; + if (cmo == -1) + cmo = heads[i]; } else { ch = leftmostLeft(heads, heads[i], i); cmi = rightmostRight(heads, i, heads[i]); cmo = leftmostLeft(heads, i, 0); - if (ch==-1) ch=i; - if (cmi==-1) cmi=i; - if (cmo==-1) cmo=i; + if (ch == -1) + ch = i; + if (cmi == -1) + cmi = i; + if (cmo == -1) + cmo = i; } v += d2.sib[heads[i]][i][ch][dir][lid]; v += d2.gra[heads[i]][i][cmi][dir][lid]; v += d2.gra[heads[i]][i][cmo][dir][lid]; } - return (float)v; + return (float) v; } /** * Provide the scores of the edges + * * @param pos * @param heads * @param types @@ -801,151 +1779,163 @@ final public class ExtractorClusterStacked implements Extractor { * @param d2 * @return */ - public static float encode3(short[] pos, short heads[] , short[] types, float[] edgesScores, DataF d2) { + public static float encode3(short[] pos, short heads[], short[] types, float[] edgesScores, DataF d2) { double v = 0; for (int i = 1; i < heads.length; i++) { - int dir= (heads[i] < i)? 0:1; + int dir = (heads[i] < i) ? 0 : 1; edgesScores[i] = d2.pl[heads[i]][i]; edgesScores[i] += d2.lab[heads[i]][i][types[i]][dir]; - boolean left = i<heads[i]; + boolean left = i < heads[i]; short[] labels = Edges.get(pos[heads[i]], pos[i], left); - int lid=-1; - for(int k=0;k<labels.length;k++) if (types[i]== labels[k]) {lid= k;break;} + int lid = -1; + for (int k = 0; k < labels.length; k++) + if (types[i] == labels[k]) { + lid = k; + break; + } - int ch,cmi,cmo; + int ch, cmi, cmo; if (heads[i] < i) { ch = rightmostRight(heads, heads[i], i); cmi = leftmostLeft(heads, i, heads[i]); cmo = rightmostRight(heads, i, heads.length); - if (ch==-1) ch=heads[i]; - if (cmi==-1) cmi=heads[i]; - if (cmo==-1) cmo=heads[i]; + if (ch == -1) + ch = heads[i]; + if (cmi == -1) + cmi = heads[i]; + if (cmo == -1) + cmo = heads[i]; } else { ch = leftmostLeft(heads, heads[i], i); cmi = rightmostRight(heads, i, heads[i]); cmo = leftmostLeft(heads, i, 0); - if (ch==-1) ch=i; - if (cmi==-1) cmi=i; - if (cmo==-1) cmo=i; + if (ch == -1) + ch = i; + if (cmi == -1) + cmi = i; + if (cmo == -1) + cmo = i; } edgesScores[i] += d2.sib[heads[i]][i][ch][dir][lid]; edgesScores[i] += d2.gra[heads[i]][i][cmi][dir][lid]; edgesScores[i] += d2.gra[heads[i]][i][cmo][dir][lid]; - v+=edgesScores[i]; + v += edgesScores[i]; } - return (float)v; + return (float) v; } - private static int rightmostRight(short[] heads, int head, int max) { int rightmost = -1; - for (int i = head + 1; i < max; i++) if (heads[i] == head) rightmost = i; + for (int i = head + 1; i < max; i++) + if (heads[i] == head) + rightmost = i; return rightmost; } private static int leftmostLeft(short[] heads, int head, int min) { int leftmost = -1; - for (int i = head - 1; i > min; i--) if (heads[i] == head) leftmost = i; + for (int i = head - 1; i > min; i--) + if (heads[i] == head) + leftmost = i; return leftmost; } - public static final String REL = "REL",END = "END",STR = "STR",LA = "LA",RA = "RA"; + public static final String REL = "REL", END = "END", STR = "STR", LA = "LA", RA = "RA"; - private static int ra,la; + private static int ra, la; private static int s_str; - private static int s_end, _cend,_cstr, s_stwrd,s_relend; + private static int s_end, _cend, _cstr, s_stwrd; - protected static final String TYPE = "TYPE",DIR = "D"; + protected static final String TYPE = "TYPE", DIR = "D"; public static final String POS = "POS"; - protected static final String DIST = "DIST",MID = "MID", FEAT="F"; - - private static final String _0 = "0",_4 = "4",_3 = "3", _2 = "2",_1 = "1",_5 = "5",_10 = "10"; - - private static int di0, d4,d3,d2,d1,d5,d10; - + protected static final String DIST = "DIST", MID = "MID", FEAT = "F"; - private static final String WORD = "WORD",STWRD = "STWRD", STPOS = "STPOS"; + private static final String _0 = "0", _4 = "4", _3 = "3", _2 = "2", _1 = "1", _5 = "5", _10 = "10"; + private static int di0, d4, d3, d2, d1, d5, d10; + private static final String WORD = "WORD", STWRD = "STWRD", STPOS = "STPOS"; private static int nofeat; - public static int maxForm; - /** * Initialize the features. + * * @param maxFeatures */ static public void initFeatures() { - MFB mf = new MFB(); mf.register(POS, MID); s_str = mf.register(POS, STR); s_end = mf.register(POS, END); - s_relend = mf.register(REL, END); - - _cstr= mf.register(Cluster.SPATH,STR); - _cend=mf.register(Cluster.SPATH,END); + mf.register(REL, END); + _cstr = mf.register(Cluster.SPATH, STR); + _cend = mf.register(Cluster.SPATH, END); mf.register(TYPE, POS); - s_stwrd=mf.register(WORD,STWRD); - mf.register(POS,STPOS); + s_stwrd = mf.register(WORD, STWRD); + mf.register(POS, STPOS); la = mf.register(DIR, LA); ra = mf.register(DIR, RA); - // mf.register(TYPE, CHAR); + // mf.register(TYPE, CHAR); mf.register(TYPE, FEAT); - nofeat=mf.register(FEAT, "NOFEAT"); - - for(int k=0;k<215;k++) mf.register(TYPE, "F"+k); - + nofeat = mf.register(FEAT, "NOFEAT"); - di0=mf.register(DIST, _0); - d1=mf.register(DIST, _1); - d2=mf.register(DIST, _2); - d3=mf.register(DIST, _3); - d4=mf.register(DIST, _4); - d5=mf.register(DIST, _5); - // d5l=mf.register(DIST, _5l); - d10=mf.register(DIST, _10); + for (int k = 0; k < 215; k++) + mf.register(TYPE, "F" + k); + di0 = mf.register(DIST, _0); + d1 = mf.register(DIST, _1); + d2 = mf.register(DIST, _2); + d3 = mf.register(DIST, _3); + d4 = mf.register(DIST, _4); + d5 = mf.register(DIST, _5); + // d5l=mf.register(DIST, _5l); + d10 = mf.register(DIST, _10); } - /* (non-Javadoc) + /* + * (non-Javadoc) + * * @see extractors.Extractor#getType() */ @Override public int getType() { - + return s_type; } - /* (non-Javadoc) + /* + * (non-Javadoc) + * * @see extractors.Extractor#setMaxForm(int) */ @Override public void setMaxForm(int max) { - maxForm = max; + maxForm = max; } - /* (non-Javadoc) + /* + * (non-Javadoc) + * * @see extractors.Extractor#getMaxForm() */ @Override @@ -953,6 +1943,4 @@ final public class ExtractorClusterStacked implements Extractor { return maxForm; } - - } diff --git a/dependencyParser/mate-tools/src/extractors/ExtractorClusterStackedR2.java b/dependencyParser/mate-tools/src/extractors/ExtractorClusterStackedR2.java index de82f42..d1776b6 100644 --- a/dependencyParser/mate-tools/src/extractors/ExtractorClusterStackedR2.java +++ b/dependencyParser/mate-tools/src/extractors/ExtractorClusterStackedR2.java @@ -1,6 +1,5 @@ package extractors; - import java.util.Arrays; import is2.data.Cluster; @@ -12,25 +11,24 @@ import is2.data.IFV; import is2.data.Instances; import is2.data.Long2IntInterface; import is2.data.MFB; -import is2.util.DB; - - final public class ExtractorClusterStackedR2 implements Extractor { - public static int s_rel,s_word,s_type,s_dir,s_dist,s_feat,s_child,s_spath,s_lpath,s_pos; + public static int s_rel, s_word, s_type, s_dir, s_dist, s_feat, s_child, s_spath, s_lpath, s_pos; MFB mf; - final D4 d0 ,dl1,dl2, dwr,dr,dwwp,dw,dwp,dlf,d3lp, d2lp,d2pw,d2pp ; + final D4 d0, dl1, dl2, dwr, dr, dwwp, dw, dwp, dlf, d3lp, d2lp, d2pw, d2pp; public final Long2IntInterface li; public ExtractorClusterStackedR2(Long2IntInterface li) { - + initFeatures(); - this.li=li; - d0 = new D4(li);dl1 = new D4(li);dl2 = new D4(li); + this.li = li; + d0 = new D4(li); + dl1 = new D4(li); + dl2 = new D4(li); dwr = new D4(li); dr = new D4(li); dwwp = new D4(li); @@ -39,672 +37,1643 @@ final public class ExtractorClusterStackedR2 implements Extractor { dwp = new D4(li); dlf = new D4(li); - d3lp = new D4(li); d2lp = new D4(li); d2pw = new D4(li); d2pp = new D4(li); + d3lp = new D4(li); + d2lp = new D4(li); + d2pw = new D4(li); + d2pp = new D4(li); } + @Override public void initStat() { - - + mf = new MFB(); s_rel = mf.getFeatureCounter().get(REL).intValue(); - s_pos = mf.getFeatureCounter().get(POS).intValue(); + s_pos = mf.getFeatureCounter().get(POS).intValue(); s_word = mf.getFeatureCounter().get(WORD).intValue(); - s_type = mf.getFeatureCounter().get(TYPE).intValue();//mf.getFeatureBits(); + s_type = mf.getFeatureCounter().get(TYPE).intValue();// mf.getFeatureBits(); s_dir = mf.getFeatureCounter().get(DIR); la = mf.getValue(DIR, LA); ra = mf.getValue(DIR, RA); - s_dist = mf.getFeatureCounter().get(DIST);//mf.getFeatureBits(DIST); - s_feat = mf.getFeatureCounter().get(FEAT);//mf.getFeatureBits(Pipe.FEAT); - s_spath = mf.getFeatureCounter().get(Cluster.SPATH)==null?0:mf.getFeatureCounter().get(Cluster.SPATH);//mf.getFeatureBits(Cluster.SPATH); - s_lpath = mf.getFeatureCounter().get(Cluster.LPATH)==null?0:mf.getFeatureCounter().get(Cluster.LPATH);//mf.getFeatureBits(Cluster.LPATH); + s_dist = mf.getFeatureCounter().get(DIST);// mf.getFeatureBits(DIST); + s_feat = mf.getFeatureCounter().get(FEAT);// mf.getFeatureBits(Pipe.FEAT); + s_spath = mf.getFeatureCounter().get(Cluster.SPATH) == null ? 0 : mf.getFeatureCounter().get(Cluster.SPATH);// mf.getFeatureBits(Cluster.SPATH); + s_lpath = mf.getFeatureCounter().get(Cluster.LPATH) == null ? 0 : mf.getFeatureCounter().get(Cluster.LPATH);// mf.getFeatureBits(Cluster.LPATH); } - public void init(){ - // DB.println("init"); - d0.a0 = s_type;d0.a1 = s_pos;d0.a2 = s_pos;d0.a3 = s_pos;d0.a4 = s_pos;d0.a5 = s_pos;d0.a6 = s_pos;d0.a7 = s_pos; - dl1.a0 = s_type;dl1.a1 = s_rel; dl1.a2 = s_pos;dl1.a3 = s_pos; dl1.a4 = s_pos; dl1.a5 = s_pos; dl1.a6 = s_pos; dl1.a7 = s_pos; - dl2.a0 = s_type;dl2.a1 = s_rel;dl2.a2 = s_word;dl2.a3 = s_pos;dl2.a4 = s_pos;dl2.a5 = s_pos;dl2.a6 = s_pos;dl2.a7 = s_pos; - dwp.a0 = s_type; dwp.a1 = s_rel; dwp.a2 = s_word; dwp.a3 = s_pos; dwp.a4 = s_pos; dwp.a5 = s_word; - dwwp.a0 = s_type; dwwp.a1 = s_rel; dwwp.a2 = s_word; dwwp.a3 = s_word; dwwp.a4 = s_pos; dwwp.a5 = s_word; - dlf.a0 = s_type;dlf.a1 = s_rel; dlf.a2 = s_pos;dlf.a3 = s_pos; dlf.a4 = s_feat; dlf.a5 = s_feat; dlf.a6 = s_pos; dlf.a7 = s_pos; - d3lp.a0 = s_type; d3lp.a1 = s_rel; d3lp.a2 = s_lpath; d3lp.a3 = s_lpath; d3lp.a4 = s_lpath; d3lp.a5 = s_word; d3lp.a6 = s_spath; d3lp.a7 = s_spath; - d2lp.a0 = s_type; d2lp.a1 = s_rel; d2lp.a2 = s_lpath; d2lp.a3 = s_lpath; d2lp.a4 = s_word; d2lp.a5 = s_word; //d3lp.a6 = s_spath; d3lp.a7 = s_spath; - d2pw.a0 = s_type; d2pw.a1 = s_rel; d2pw.a2 = s_lpath; d2pw.a3 = s_lpath; d2pw.a4 = s_word; d2pw.a5 = s_word; //d3lp.a6 = s_spath; d3lp.a7 = s_spath; - d2pp.a0 = s_type; d2pp.a1 = s_rel; d2pp.a2 = s_lpath; d2pp.a3 = s_lpath; d2pp.a4 = s_pos; d2pp.a5 = s_pos; //d3lp.a6 = s_spath; d3lp.a7 = s_spath; + @Override + public void init() { + // DB.println("init"); + d0.a0 = s_type; + d0.a1 = s_pos; + d0.a2 = s_pos; + d0.a3 = s_pos; + d0.a4 = s_pos; + d0.a5 = s_pos; + d0.a6 = s_pos; + d0.a7 = s_pos; + dl1.a0 = s_type; + dl1.a1 = s_rel; + dl1.a2 = s_pos; + dl1.a3 = s_pos; + dl1.a4 = s_pos; + dl1.a5 = s_pos; + dl1.a6 = s_pos; + dl1.a7 = s_pos; + dl2.a0 = s_type; + dl2.a1 = s_rel; + dl2.a2 = s_word; + dl2.a3 = s_pos; + dl2.a4 = s_pos; + dl2.a5 = s_pos; + dl2.a6 = s_pos; + dl2.a7 = s_pos; + dwp.a0 = s_type; + dwp.a1 = s_rel; + dwp.a2 = s_word; + dwp.a3 = s_pos; + dwp.a4 = s_pos; + dwp.a5 = s_word; + dwwp.a0 = s_type; + dwwp.a1 = s_rel; + dwwp.a2 = s_word; + dwwp.a3 = s_word; + dwwp.a4 = s_pos; + dwwp.a5 = s_word; + dlf.a0 = s_type; + dlf.a1 = s_rel; + dlf.a2 = s_pos; + dlf.a3 = s_pos; + dlf.a4 = s_feat; + dlf.a5 = s_feat; + dlf.a6 = s_pos; + dlf.a7 = s_pos; + d3lp.a0 = s_type; + d3lp.a1 = s_rel; + d3lp.a2 = s_lpath; + d3lp.a3 = s_lpath; + d3lp.a4 = s_lpath; + d3lp.a5 = s_word; + d3lp.a6 = s_spath; + d3lp.a7 = s_spath; + d2lp.a0 = s_type; + d2lp.a1 = s_rel; + d2lp.a2 = s_lpath; + d2lp.a3 = s_lpath; + d2lp.a4 = s_word; + d2lp.a5 = s_word; // d3lp.a6 = s_spath; d3lp.a7 = s_spath; + d2pw.a0 = s_type; + d2pw.a1 = s_rel; + d2pw.a2 = s_lpath; + d2pw.a3 = s_lpath; + d2pw.a4 = s_word; + d2pw.a5 = s_word; // d3lp.a6 = s_spath; d3lp.a7 = s_spath; + d2pp.a0 = s_type; + d2pp.a1 = s_rel; + d2pp.a2 = s_lpath; + d2pp.a3 = s_lpath; + d2pp.a4 = s_pos; + d2pp.a5 = s_pos; // d3lp.a6 = s_spath; d3lp.a7 = s_spath; } - - public int basic(short[] pposs, int[] form, int p, int d, Cluster cluster, IFV f) - { - - d0.clean(); dl1.clean(); dl2.clean(); dwp.clean(); dwwp.clean(); dlf.clean(); d3lp.clean(); - - d3lp.clean(); d2lp.clean();d2pw.clean(); d2pp.clean(); - - int n=1; - int dir= (p < d)? ra:la; - // d0.v0= n; d0.v1=pposs[p]; d0.v2=pposs[d]; //d0.stop=4; - int end= (p >= d ? p : d); + @Override + public int basic(short[] pposs, int[] form, int p, int d, Cluster cluster, IFV f) { + + d0.clean(); + dl1.clean(); + dl2.clean(); + dwp.clean(); + dwwp.clean(); + dlf.clean(); + d3lp.clean(); + + d3lp.clean(); + d2lp.clean(); + d2pw.clean(); + d2pp.clean(); + + int n = 1; + int dir = (p < d) ? ra : la; + // d0.v0= n; d0.v1=pposs[p]; d0.v2=pposs[d]; //d0.stop=4; + int end = (p >= d ? p : d); int start = (p >= d ? d : p) + 1; - StringBuilder s = new StringBuilder(end-start); - int[] x = new int[end-start]; - int c=0; - for(int i = start ; i <end ; i++) { - //d0.v3=pposs[i]; - //d0.cz4(); - //d0.csa(s_dir,dir,f); -// s.append((char)pposs[i]); - x[c++] =pposs[i]; + StringBuilder s = new StringBuilder(end - start); + int[] x = new int[end - start]; + int c = 0; + for (int i = start; i < end; i++) { + // d0.v3=pposs[i]; + // d0.cz4(); + // d0.csa(s_dir,dir,f); + // s.append((char)pposs[i]); + x[c++] = pposs[i]; } - + Arrays.sort(x); - for(int i = 0;i<x.length ; i++) { - if (i==0 || x[i]!=x[i-1] ) s.append(x[i]); - } + for (int i = 0; i < x.length; i++) { + if (i == 0 || x[i] != x[i - 1]) + s.append(x[i]); + } int v = mf.register("px", s.toString()); - - dwp.v0 = n++; dwp.v1 = 1;dwp.v2 = v; dwp.v3 = pposs[p]; dwp.v4 = pposs[d]; dwp.cz5(); dwp.csa(s_dir,dir,f); - + + dwp.v0 = n++; + dwp.v1 = 1; + dwp.v2 = v; + dwp.v3 = pposs[p]; + dwp.v4 = pposs[d]; + dwp.cz5(); + dwp.csa(s_dir, dir, f); + return n; } + @Override + public void firstm(Instances is, int i, int prnt, int dpnt, int label, Cluster cluster, long[] f) { - public void firstm(Instances is, int i, - int prnt, int dpnt, int label, Cluster cluster, long[] f) - { - - - //short[] pposs, int[] form, int[] lemmas, short[][] feats - for(int k=0;k<f.length;k++) f[k]=0; + // short[] pposs, int[] form, int[] lemmas, short[][] feats + for (int k = 0; k < f.length; k++) + f[k] = 0; short[] pposs = is.pposs[i]; - int[] form =is.forms[i]; + int[] form = is.forms[i]; short[][] feats = is.feats[i]; - - - int pF = form[prnt],dF = form[dpnt]; - int pL = is.plemmas[i][prnt],dL = is.plemmas[i][dpnt]; - int pP = pposs[prnt],dP = pposs[dpnt]; - - int prntLS = pF==-1?-1:cluster.getLP(pF), chldLS = dF==-1?-1:cluster.getLP(dF); - - final int dir= (prnt < dpnt)? ra:la; - - if (pF>maxForm) pF=-1; - if (pL>maxForm) pL=-1; - - if (dF>maxForm) dF=-1; - if (dL>maxForm) dL=-1; - - - int n=3,c=0; - - dl2.v1=label; - dl2.v0= n++; dl2.v2=pF; dl2.v3=dP; dl2.cz4(); f[c++]=dl2.csa(s_dir,dir); - dl2.v0= n++; dl2.cz3(); f[c++]=dl2.csa(s_dir,dir); - dl2.v0= n++; dl2.v2=dF; dl2.v3=pP; dl2.cz4(); f[c++]=dl2.csa(s_dir,dir); - dl2.v0= n++; dl2.cz3(); f[c++]=dl2.csa(s_dir,dir); - - - dwwp.v1=label; - dwwp.v0= n++; dwwp.v2=pF; dwwp.v3=dF; dwwp.cz4(); f[c++]=dwwp.csa(s_dir,dir); - - dl1.v1=label; - dl1.v0= n++; dl1.v2=dP; dl1.cz3(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v2=pP; dl1.cz3(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v3=dP; dl1.cz4(); f[c++]=dl1.csa(s_dir,dir); - - int pPm1 = prnt > 0 ? pposs[prnt - 1] : s_str, dPm1 = dpnt > 0 ? pposs[dpnt - 1] : s_str; - int pPp1 = prnt < pposs.length - 1 ? pposs[prnt + 1]:s_end, dPp1 = dpnt < pposs.length - 1 ? pposs[dpnt + 1]:s_end; - - int pPm2 = prnt > 1 ? pposs[prnt - 2] : s_str, dPm2 = dpnt > 1 ? pposs[dpnt - 2] : s_str; - int pPp2 = prnt < pposs.length - 2 ? pposs[prnt + 2]:s_end, dPp2 = dpnt < pposs.length - 2 ? pposs[dpnt + 2]:s_end; - - int pFm1 = prnt > 0 ? form[prnt - 1] : s_stwrd, dFm1 = dpnt > 0 ? form[dpnt - 1] : s_stwrd; - int pFp1 = prnt < form.length - 1 ? form[prnt + 1]:s_stwrd, dFp1 = dpnt < form.length - 1 ? form[dpnt + 1]:s_stwrd; - - - - dl1.v0= n++;dl1.v2=pP; dl1.v3=pPp1; dl1.v4=dP;dl1.v5=dPp1; dl1.cz6(); f[n++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v5=dPm1; dl1.cz6(); f[n++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v3=pPm1; dl1.cz6(); f[n++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v5=dPp1; dl1.cz6(); f[n++]=dl1.csa(s_dir,dir); - - - dl1.v0= n++; dl1.v3=pPm1; dl1.cz5(); f[n++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v3=dPm1; dl1.cz5(); f[n++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v3=dPp1; dl1.cz5(); f[n++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v3=pPp1; dl1.cz5(); f[n++]=dl1.csa(s_dir,dir); - - dl1.v0= n++;dl1.v2=pP; dl1.v3=pPp2; dl1.v4=dP;dl1.v5=dPp2; dl1.cz6(); f[n++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v5=dPm2; dl1.cz6(); f[n++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v3=pPm2; dl1.cz6(); f[n++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v5=dPp2; dl1.cz6(); f[n++]=dl1.csa(s_dir,dir); - - dl1.v0= n++; dl1.v3=pPm2; dl1.cz5(); f[n++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v3=dPm2; dl1.cz5(); f[n++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v3=dPp2; dl1.cz5(); f[n++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v3=pPp2; dl1.cz5(); f[n++]=dl1.csa(s_dir,dir); - - - - dl2.v0= n++; dl2.v3=dFm1; dl2.v3=pPp1;dl2.v4=pP; dl2.cz5(); f[n++]=dl2.getVal(); - dl2.v0= n++; dl2.v3=dFp1; dl2.v3=pPm1; dl2.cz5(); f[n++]=dl2.getVal(); - dl2.v0= n++; dl2.v3=pFm1; dl2.v3=dPp1;dl2.v4=dP; dl2.cz5(); f[n++]=dl2.getVal(); - dl2.v0= n++; dl2.v3=pFp1; dl2.v3=dPm1; dl2.cz5(); f[n++]=dl2.getVal(); - - - dl2.v0= n++; dl2.v3=dFm1; dl2.v3=dPm2;dl2.v4=pP; dl2.cz5(); f[n++]=dl2.getVal(); - dl2.v0= n++; dl2.v3=dFp1; dl2.v3=dPp2; dl2.cz5(); f[n++]=dl2.getVal(); - dl2.v0= n++; dl2.v3=pFm1; dl2.v3=pPm2;dl2.v4=dP; dl2.cz5(); f[n++]=dl2.getVal(); - dl2.v0= n++; dl2.v3=pFp1; dl2.v3=pPp2; dl2.cz5(); f[n++]=dl2.getVal(); - - - dwwp.v0= n++; dwwp.v2=pF; dwwp.v3=dF; dwwp.v4=dP; dwwp.cz5(); f[n++]=dwwp.csa(s_dir,dir); - dwwp.v0= n++; dwwp.v2=pF; dwwp.v3=dF; dwwp.v4=pP; dwwp.cz5(); f[n++]=dwwp.csa(s_dir,dir); - dwwp.v0= n++; dwwp.v2=dF; dwwp.v3=pF; dwwp.v4=pP; dwwp.v4=dP; dwwp.cz6(); f[n++]=dwwp.csa(s_dir,dir); - - - - // lemmas - - dl2.v1=label; - dl2.v0= n++; dl2.v2=pL; dl2.v3=dP; dl2.cz4(); f[c++]=dl2.csa(s_dir,dir); - dl2.v0= n++; dl2.cz3(); f[c++]=dl2.csa(s_dir,dir); - dl2.v0= n++; dl2.v2=dL; dl2.v3=pP; dl2.cz4(); f[c++]=dl2.csa(s_dir,dir); - dl2.v0= n++; dl2.cz3(); f[c++]=dl2.csa(s_dir,dir); - - dwwp.v1=label; - dwwp.v0= n++; dwwp.v2=pL; dwwp.v3=dL; dwwp.cz4(); f[c++]=dwwp.csa(s_dir,dir); + int pF = form[prnt], dF = form[dpnt]; + int pL = is.plemmas[i][prnt], dL = is.plemmas[i][dpnt]; + int pP = pposs[prnt], dP = pposs[dpnt]; + + int prntLS = pF == -1 ? -1 : cluster.getLP(pF), chldLS = dF == -1 ? -1 : cluster.getLP(dF); + + final int dir = (prnt < dpnt) ? ra : la; + + if (pF > maxForm) + pF = -1; + if (pL > maxForm) + pL = -1; + + if (dF > maxForm) + dF = -1; + if (dL > maxForm) + dL = -1; + + int n = 3, c = 0; + + dl2.v1 = label; + dl2.v0 = n++; + dl2.v2 = pF; + dl2.v3 = dP; + dl2.cz4(); + f[c++] = dl2.csa(s_dir, dir); + dl2.v0 = n++; + dl2.cz3(); + f[c++] = dl2.csa(s_dir, dir); + dl2.v0 = n++; + dl2.v2 = dF; + dl2.v3 = pP; + dl2.cz4(); + f[c++] = dl2.csa(s_dir, dir); + dl2.v0 = n++; + dl2.cz3(); + f[c++] = dl2.csa(s_dir, dir); + + dwwp.v1 = label; + dwwp.v0 = n++; + dwwp.v2 = pF; + dwwp.v3 = dF; + dwwp.cz4(); + f[c++] = dwwp.csa(s_dir, dir); + + dl1.v1 = label; + dl1.v0 = n++; + dl1.v2 = dP; + dl1.cz3(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = pP; + dl1.cz3(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v3 = dP; + dl1.cz4(); + f[c++] = dl1.csa(s_dir, dir); + + int pPm1 = prnt > 0 ? pposs[prnt - 1] : s_str, dPm1 = dpnt > 0 ? pposs[dpnt - 1] : s_str; + int pPp1 = prnt < pposs.length - 1 ? pposs[prnt + 1] : s_end, + dPp1 = dpnt < pposs.length - 1 ? pposs[dpnt + 1] : s_end; + + int pPm2 = prnt > 1 ? pposs[prnt - 2] : s_str, dPm2 = dpnt > 1 ? pposs[dpnt - 2] : s_str; + int pPp2 = prnt < pposs.length - 2 ? pposs[prnt + 2] : s_end, + dPp2 = dpnt < pposs.length - 2 ? pposs[dpnt + 2] : s_end; + + int pFm1 = prnt > 0 ? form[prnt - 1] : s_stwrd, dFm1 = dpnt > 0 ? form[dpnt - 1] : s_stwrd; + int pFp1 = prnt < form.length - 1 ? form[prnt + 1] : s_stwrd, + dFp1 = dpnt < form.length - 1 ? form[dpnt + 1] : s_stwrd; + + dl1.v0 = n++; + dl1.v2 = pP; + dl1.v3 = pPp1; + dl1.v4 = dP; + dl1.v5 = dPp1; + dl1.cz6(); + f[n++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v5 = dPm1; + dl1.cz6(); + f[n++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v3 = pPm1; + dl1.cz6(); + f[n++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v5 = dPp1; + dl1.cz6(); + f[n++] = dl1.csa(s_dir, dir); + + dl1.v0 = n++; + dl1.v3 = pPm1; + dl1.cz5(); + f[n++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v3 = dPm1; + dl1.cz5(); + f[n++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v3 = dPp1; + dl1.cz5(); + f[n++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v3 = pPp1; + dl1.cz5(); + f[n++] = dl1.csa(s_dir, dir); + + dl1.v0 = n++; + dl1.v2 = pP; + dl1.v3 = pPp2; + dl1.v4 = dP; + dl1.v5 = dPp2; + dl1.cz6(); + f[n++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v5 = dPm2; + dl1.cz6(); + f[n++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v3 = pPm2; + dl1.cz6(); + f[n++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v5 = dPp2; + dl1.cz6(); + f[n++] = dl1.csa(s_dir, dir); + + dl1.v0 = n++; + dl1.v3 = pPm2; + dl1.cz5(); + f[n++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v3 = dPm2; + dl1.cz5(); + f[n++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v3 = dPp2; + dl1.cz5(); + f[n++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v3 = pPp2; + dl1.cz5(); + f[n++] = dl1.csa(s_dir, dir); + + dl2.v0 = n++; + dl2.v3 = dFm1; + dl2.v3 = pPp1; + dl2.v4 = pP; + dl2.cz5(); + f[n++] = dl2.getVal(); + dl2.v0 = n++; + dl2.v3 = dFp1; + dl2.v3 = pPm1; + dl2.cz5(); + f[n++] = dl2.getVal(); + dl2.v0 = n++; + dl2.v3 = pFm1; + dl2.v3 = dPp1; + dl2.v4 = dP; + dl2.cz5(); + f[n++] = dl2.getVal(); + dl2.v0 = n++; + dl2.v3 = pFp1; + dl2.v3 = dPm1; + dl2.cz5(); + f[n++] = dl2.getVal(); + + dl2.v0 = n++; + dl2.v3 = dFm1; + dl2.v3 = dPm2; + dl2.v4 = pP; + dl2.cz5(); + f[n++] = dl2.getVal(); + dl2.v0 = n++; + dl2.v3 = dFp1; + dl2.v3 = dPp2; + dl2.cz5(); + f[n++] = dl2.getVal(); + dl2.v0 = n++; + dl2.v3 = pFm1; + dl2.v3 = pPm2; + dl2.v4 = dP; + dl2.cz5(); + f[n++] = dl2.getVal(); + dl2.v0 = n++; + dl2.v3 = pFp1; + dl2.v3 = pPp2; + dl2.cz5(); + f[n++] = dl2.getVal(); + + dwwp.v0 = n++; + dwwp.v2 = pF; + dwwp.v3 = dF; + dwwp.v4 = dP; + dwwp.cz5(); + f[n++] = dwwp.csa(s_dir, dir); + dwwp.v0 = n++; + dwwp.v2 = pF; + dwwp.v3 = dF; + dwwp.v4 = pP; + dwwp.cz5(); + f[n++] = dwwp.csa(s_dir, dir); + dwwp.v0 = n++; + dwwp.v2 = dF; + dwwp.v3 = pF; + dwwp.v4 = pP; + dwwp.v4 = dP; + dwwp.cz6(); + f[n++] = dwwp.csa(s_dir, dir); - dwp.v1= label; - dwp.v0=n++;dwp.v2=dL; dwp.v3=pP;dwp.v4=dP;dwp.v5=pL; dwp.cz6(); f[c++]=dwp.csa(s_dir,dir); - dwp.v0=n++;dwp.cz5(); f[c++]=dwp.csa(s_dir,dir); + // lemmas - dwp.v0=n++;dwp.v2=pL; dwp.cz5(); f[c++]=dwp.csa(s_dir,dir); - dwwp.v0= n++; dwwp.v2=pL; dwwp.v3=dL; dwwp.v4=dP; dwwp.cz5(); f[c++]=dwwp.csa(s_dir,dir); - dwwp.v0= n++; dwwp.v4=pP; dwwp.cz5(); f[c++]=dwwp.csa(s_dir,dir); + dl2.v1 = label; + dl2.v0 = n++; + dl2.v2 = pL; + dl2.v3 = dP; + dl2.cz4(); + f[c++] = dl2.csa(s_dir, dir); + dl2.v0 = n++; + dl2.cz3(); + f[c++] = dl2.csa(s_dir, dir); + dl2.v0 = n++; + dl2.v2 = dL; + dl2.v3 = pP; + dl2.cz4(); + f[c++] = dl2.csa(s_dir, dir); + dl2.v0 = n++; + dl2.cz3(); + f[c++] = dl2.csa(s_dir, dir); + + dwwp.v1 = label; + dwwp.v0 = n++; + dwwp.v2 = pL; + dwwp.v3 = dL; + dwwp.cz4(); + f[c++] = dwwp.csa(s_dir, dir); + + dwp.v1 = label; + dwp.v0 = n++; + dwp.v2 = dL; + dwp.v3 = pP; + dwp.v4 = dP; + dwp.v5 = pL; + dwp.cz6(); + f[c++] = dwp.csa(s_dir, dir); + dwp.v0 = n++; + dwp.cz5(); + f[c++] = dwp.csa(s_dir, dir); + + dwp.v0 = n++; + dwp.v2 = pL; + dwp.cz5(); + f[c++] = dwp.csa(s_dir, dir); + dwwp.v0 = n++; + dwwp.v2 = pL; + dwwp.v3 = dL; + dwwp.v4 = dP; + dwwp.cz5(); + f[c++] = dwwp.csa(s_dir, dir); + dwwp.v0 = n++; + dwwp.v4 = pP; + dwwp.cz5(); + f[c++] = dwwp.csa(s_dir, dir); - // cluster - d2pw.v1=label; - d2pw.v0=n++; d2pw.v2=prntLS; d2pw.v3=chldLS; d2pw.cz4(); f[c++]=d2pw.csa(s_dir,dir); - d2pw.v0=n++; d2pw.v4=pF; d2pw.cz5(); f[c++]=d2pw.csa(s_dir,dir); - d2pw.v0=n++; d2pw.v4=dF; d2pw.cz5(); f[c++]=d2pw.csa(s_dir,dir); - d2pw.v0=n++; d2pw.v5=pF; d2pw.cz6(); f[c++]=d2pw.csa(s_dir,dir); - - - d2pp.v1=label; - d2pp.v0=n++; d2pp.v2=prntLS; d2pp.v3=chldLS; d2pp.cz4(); f[c++]=d2pp.csa(s_dir,dir); - d2pp.v0=n++; d2pp.v4=pP; d2pp.cz5(); f[c++]=d2pp.csa(s_dir,dir); - d2pp.v0=n++; d2pp.v4=dP; d2pp.cz5(); f[c++]=d2pp.csa(s_dir,dir); - d2pp.v0=n++; d2pp.v5=pP; d2pp.cz6(); f[c++]=d2pp.csa(s_dir,dir); - - - short[] prel = is.plabels[i]; - short[] phead = is.pheads[i]; - - - //take those in for stacking - // dl2.v1=label; - // dl2.v0= n++;dl2.v2=prel[dpnt];dl2.v3=pP;dl2.v4=dP; dl2.v5=prnt==phead[dpnt]?1:2; dl2.cz6(); f[c++]=dl2.csa(s_dir,dir); - // dl2.v0= n++;dl2.v2=pP;dl2.v3=dP; dl2.v4=prnt==phead[dpnt]?1:2; dl2.cz5(); f[c++]=dl2.csa(s_dir,dir); - - - - if (feats==null) return; + d2pw.v1 = label; + d2pw.v0 = n++; + d2pw.v2 = prntLS; + d2pw.v3 = chldLS; + d2pw.cz4(); + f[c++] = d2pw.csa(s_dir, dir); + d2pw.v0 = n++; + d2pw.v4 = pF; + d2pw.cz5(); + f[c++] = d2pw.csa(s_dir, dir); + d2pw.v0 = n++; + d2pw.v4 = dF; + d2pw.cz5(); + f[c++] = d2pw.csa(s_dir, dir); + d2pw.v0 = n++; + d2pw.v5 = pF; + d2pw.cz6(); + f[c++] = d2pw.csa(s_dir, dir); + + d2pp.v1 = label; + d2pp.v0 = n++; + d2pp.v2 = prntLS; + d2pp.v3 = chldLS; + d2pp.cz4(); + f[c++] = d2pp.csa(s_dir, dir); + d2pp.v0 = n++; + d2pp.v4 = pP; + d2pp.cz5(); + f[c++] = d2pp.csa(s_dir, dir); + d2pp.v0 = n++; + d2pp.v4 = dP; + d2pp.cz5(); + f[c++] = d2pp.csa(s_dir, dir); + d2pp.v0 = n++; + d2pp.v5 = pP; + d2pp.cz6(); + f[c++] = d2pp.csa(s_dir, dir); - short[] featsP =feats[prnt], featsD =feats[dpnt]; - dlf.v0= n++; dlf.v1=label; dlf.v2=pP; dlf.v3=dP; + // take those in for stacking + // dl2.v1=label; + // dl2.v0= n++;dl2.v2=prel[dpnt];dl2.v3=pP;dl2.v4=dP; + // dl2.v5=prnt==phead[dpnt]?1:2; dl2.cz6(); f[c++]=dl2.csa(s_dir,dir); + // dl2.v0= n++;dl2.v2=pP;dl2.v3=dP; dl2.v4=prnt==phead[dpnt]?1:2; + // dl2.cz5(); f[c++]=dl2.csa(s_dir,dir); + + if (feats == null) + return; + + short[] featsP = feats[prnt], featsD = feats[dpnt]; + dlf.v0 = n++; + dlf.v1 = label; + dlf.v2 = pP; + dlf.v3 = dP; extractFeat(f, c, dir, featsP, featsD); return; } + @Override + public void gcm(Instances is, int i, int p, int d, int gc, int label, Cluster cluster, long[] f) { + for (int k = 0; k < f.length; k++) + f[k] = 0; - public void gcm(Instances is , int i, int p, int d, int gc, int label,Cluster cluster, long[] f) { - - for(int k=0;k<f.length;k++) f[k]=0; + short[] pos = is.pposs[i]; + int[] forms = is.forms[i]; + int[] lemmas = is.plemmas[i]; + short[][] feats = is.feats[i]; - short[] pos= is.pposs[i]; - int[] forms=is.forms[i]; - int[] lemmas=is.plemmas[i]; - short[][] feats=is.feats[i]; - int pP = pos[p], dP = pos[d]; int prntF = forms[p], chldF = forms[d]; int prntL = lemmas[p], chldL = lemmas[d]; - int prntLS = prntF==-1?-1:cluster.getLP(prntF), chldLS = chldF==-1?-1:cluster.getLP(chldF); + int prntLS = prntF == -1 ? -1 : cluster.getLP(prntF), chldLS = chldF == -1 ? -1 : cluster.getLP(chldF); - int gP = gc != -1 ? pos[gc] : s_str; - int gcF = gc != -1 ? forms[gc] : s_stwrd; + int gP = gc != -1 ? pos[gc] : s_str; + int gcF = gc != -1 ? forms[gc] : s_stwrd; int gcL = gc != -1 ? lemmas[gc] : s_stwrd; - int gcLS = (gc != -1) && (gcF!=-1) ? cluster.getLP(gcF) : s_stwrd; - - if (prntF>maxForm) prntF=-1; - if (prntL>maxForm) prntL=-1; - - if (chldF>maxForm) chldF=-1; - if (chldL>maxForm) chldL=-1; - - if (gcF>maxForm) gcF=-1; - if (gcL>maxForm) gcL=-1; - - - int dir= (p < d)? ra:la, dir_gra =(d < gc)? ra:la; - - int n=84,c=0; - - //dl1.v023(); - dl1.v1=label; - dl1.v0= n++; dl1.v2=pP; dl1.v3=dP;dl1.v4=gP; dl1.cz5(); dl1.cs(s_dir,dir);f[c++]=dl1.csa(s_dir,dir_gra); - dl1.v0= n++; dl1.v2=pP; dl1.v3=gP; dl1.cz4();dl1.cs(s_dir,dir);f[c++]=dl1.csa(s_dir,dir_gra); - dl1.v0= n++; dl1.v2=dP; dl1.cz4(); dl1.cs(s_dir,dir);f[c++]=dl1.csa(s_dir,dir_gra); - - dwwp.v1=label; - dwwp.v0= n++; dwwp.v2=prntF; dwwp.v3=gcF; - dwwp.cz4(); dwwp.cs(s_dir,dir);f[c++]=dwwp.csa(s_dir,dir_gra); - - dwwp.v0= n++; dwwp.v2=chldF; dwwp.v3=gcF; - dwwp.cz4(); dwwp.cs(s_dir,dir);f[c++]=dwwp.csa(s_dir,dir_gra); - - dwp.v1=label; - dwp.v0= n++; dwp.v2=gcF; dwp.v3=pP; - dwp.cz4(); dwp.cs(s_dir,dir);f[c++]=dwp.csa(s_dir,dir_gra); - - dwp.v0= n++; dwp.v2=gcF; dwp.v3=dP; - dwp.cz4(); dwp.cs(s_dir,dir);f[c++]=dwp.csa(s_dir,dir_gra); - - dwp.v0= n++; dwp.v2=prntF; dwp.v3=gP; - dwp.cz4(); dwp.cs(s_dir,dir);f[c++]=dwp.csa(s_dir,dir_gra); - - dwp.v0= n++; dwp.v2=chldF; dwp.v3=gP; - dwp.cz4(); dwp.cs(s_dir,dir); f[c++]=dwp.csa(s_dir,dir_gra); - + int gcLS = (gc != -1) && (gcF != -1) ? cluster.getLP(gcF) : s_stwrd; + + if (prntF > maxForm) + prntF = -1; + if (prntL > maxForm) + prntL = -1; + + if (chldF > maxForm) + chldF = -1; + if (chldL > maxForm) + chldL = -1; + + if (gcF > maxForm) + gcF = -1; + if (gcL > maxForm) + gcL = -1; + + int dir = (p < d) ? ra : la, dir_gra = (d < gc) ? ra : la; + + int n = 84, c = 0; + + // dl1.v023(); + dl1.v1 = label; + dl1.v0 = n++; + dl1.v2 = pP; + dl1.v3 = dP; + dl1.v4 = gP; + dl1.cz5(); + dl1.cs(s_dir, dir); + f[c++] = dl1.csa(s_dir, dir_gra); + dl1.v0 = n++; + dl1.v2 = pP; + dl1.v3 = gP; + dl1.cz4(); + dl1.cs(s_dir, dir); + f[c++] = dl1.csa(s_dir, dir_gra); + dl1.v0 = n++; + dl1.v2 = dP; + dl1.cz4(); + dl1.cs(s_dir, dir); + f[c++] = dl1.csa(s_dir, dir_gra); + + dwwp.v1 = label; + dwwp.v0 = n++; + dwwp.v2 = prntF; + dwwp.v3 = gcF; + dwwp.cz4(); + dwwp.cs(s_dir, dir); + f[c++] = dwwp.csa(s_dir, dir_gra); + + dwwp.v0 = n++; + dwwp.v2 = chldF; + dwwp.v3 = gcF; + dwwp.cz4(); + dwwp.cs(s_dir, dir); + f[c++] = dwwp.csa(s_dir, dir_gra); + + dwp.v1 = label; + dwp.v0 = n++; + dwp.v2 = gcF; + dwp.v3 = pP; + dwp.cz4(); + dwp.cs(s_dir, dir); + f[c++] = dwp.csa(s_dir, dir_gra); + + dwp.v0 = n++; + dwp.v2 = gcF; + dwp.v3 = dP; + dwp.cz4(); + dwp.cs(s_dir, dir); + f[c++] = dwp.csa(s_dir, dir_gra); + + dwp.v0 = n++; + dwp.v2 = prntF; + dwp.v3 = gP; + dwp.cz4(); + dwp.cs(s_dir, dir); + f[c++] = dwp.csa(s_dir, dir_gra); + + dwp.v0 = n++; + dwp.v2 = chldF; + dwp.v3 = gP; + dwp.cz4(); + dwp.cs(s_dir, dir); + f[c++] = dwp.csa(s_dir, dir_gra); // lemma - dwwp.v0= n++; dwwp.v2=prntL; dwwp.v3=gcL; - dwwp.cz4();dwwp.cs(s_dir,dir);f[c++]=dwwp.csa(s_dir,dir_gra); - - dwwp.v0= n++; dwwp.v2=chldL; dwwp.v3=gcL; - dwwp.cz4(); dwwp.cs(s_dir,dir);f[c++]=dwwp.csa(s_dir,dir_gra); - - dwp.v0= n++; dwp.v2=gcL; dwp.v3=pP; - dwp.cz4(); dwp.cs(s_dir,dir);f[c++]=dwp.csa(s_dir,dir_gra); - - dwp.v0= n++; dwp.v2=gcL; dwp.v3=dP; - dwp.cz4(); dwp.cs(s_dir,dir);f[c++]=dwp.csa(s_dir,dir_gra); - - dwp.v0= n++; dwp.v2=prntL; dwp.v3=gP; - dwp.cz4(); dwp.cs(s_dir,dir);f[c++]=dwp.csa(s_dir,dir_gra); - - dwp.v0= n++; dwp.v2=chldL; dwp.v3=gP; - dwp.cz4(); dwp.cs(s_dir,dir); f[c++]=dwp.csa(s_dir,dir_gra); - - - // clusters - - d2lp.v1= label; - d2lp.v0= n++; d2lp.v2=prntLS; d2lp.v3=gcLS; d2lp.cz4(); d2lp.cs(s_dir,dir);f[c++]=d2lp.csa(s_dir,dir_gra);// f.add(li.l2i(l)); - d2lp.v0= n++; d2lp.v2=chldLS; d2lp.v3=gcLS; d2lp.cz4(); d2lp.cs(s_dir,dir);f[c++]=d2lp.csa(s_dir,dir_gra); - d3lp.v0= n++; d3lp.v1= label; d3lp.v2=prntLS; d3lp.v3=chldLS; d3lp.v4=gcLS; d3lp.cz5(); d3lp.cs(s_dir,dir);f[c++]=d3lp.csa(s_dir,dir_gra); - - //_f83; - d2lp.v0= n++; d2lp.v2=prntLS; d2lp.v3=chldLS; d2lp.v4=gcF; d2lp.cz5(); f[c++]=d2lp.csa(s_dir,dir); - d2lp.v0= n++; d2lp.v2=prntLS; d2lp.v3=gcLS; d2lp.v4=chldF; d2lp.cz5(); f[c++]=d2lp.csa(s_dir,dir); - d2lp.v0= n++; d2lp.v2=chldLS; d2lp.v3=gcLS; d2lp.v4=prntF; d2lp.cz5(); f[c++]=d2lp.csa(s_dir,dir); - - d2pp.v1= label; - d2pp.v0= n++; d2pp.v2=prntLS; d2pp.v3=chldLS; d2pp.v4=gP; d2pp.cz5(); f[c++]=d2pp.csa(s_dir,dir); - d2pp.v0= n++; d2pp.v2=prntLS; d2pp.v3=gcLS; d2pp.v4=dP; d2pp.cz5(); f[c++]=d2pp.csa(s_dir,dir); - d2pp.v0= n++; d2pp.v2=chldLS; d2pp.v3=gcLS; d2pp.v4=pP; d2pp.cz5(); f[c++]=d2pp.csa(s_dir,dir); + dwwp.v0 = n++; + dwwp.v2 = prntL; + dwwp.v3 = gcL; + dwwp.cz4(); + dwwp.cs(s_dir, dir); + f[c++] = dwwp.csa(s_dir, dir_gra); + + dwwp.v0 = n++; + dwwp.v2 = chldL; + dwwp.v3 = gcL; + dwwp.cz4(); + dwwp.cs(s_dir, dir); + f[c++] = dwwp.csa(s_dir, dir_gra); + + dwp.v0 = n++; + dwp.v2 = gcL; + dwp.v3 = pP; + dwp.cz4(); + dwp.cs(s_dir, dir); + f[c++] = dwp.csa(s_dir, dir_gra); + + dwp.v0 = n++; + dwp.v2 = gcL; + dwp.v3 = dP; + dwp.cz4(); + dwp.cs(s_dir, dir); + f[c++] = dwp.csa(s_dir, dir_gra); + + dwp.v0 = n++; + dwp.v2 = prntL; + dwp.v3 = gP; + dwp.cz4(); + dwp.cs(s_dir, dir); + f[c++] = dwp.csa(s_dir, dir_gra); + + dwp.v0 = n++; + dwp.v2 = chldL; + dwp.v3 = gP; + dwp.cz4(); + dwp.cs(s_dir, dir); + f[c++] = dwp.csa(s_dir, dir_gra); + // clusters + d2lp.v1 = label; + d2lp.v0 = n++; + d2lp.v2 = prntLS; + d2lp.v3 = gcLS; + d2lp.cz4(); + d2lp.cs(s_dir, dir); + f[c++] = d2lp.csa(s_dir, dir_gra);// f.add(li.l2i(l)); + d2lp.v0 = n++; + d2lp.v2 = chldLS; + d2lp.v3 = gcLS; + d2lp.cz4(); + d2lp.cs(s_dir, dir); + f[c++] = d2lp.csa(s_dir, dir_gra); + d3lp.v0 = n++; + d3lp.v1 = label; + d3lp.v2 = prntLS; + d3lp.v3 = chldLS; + d3lp.v4 = gcLS; + d3lp.cz5(); + d3lp.cs(s_dir, dir); + f[c++] = d3lp.csa(s_dir, dir_gra); + + // _f83; + d2lp.v0 = n++; + d2lp.v2 = prntLS; + d2lp.v3 = chldLS; + d2lp.v4 = gcF; + d2lp.cz5(); + f[c++] = d2lp.csa(s_dir, dir); + d2lp.v0 = n++; + d2lp.v2 = prntLS; + d2lp.v3 = gcLS; + d2lp.v4 = chldF; + d2lp.cz5(); + f[c++] = d2lp.csa(s_dir, dir); + d2lp.v0 = n++; + d2lp.v2 = chldLS; + d2lp.v3 = gcLS; + d2lp.v4 = prntF; + d2lp.cz5(); + f[c++] = d2lp.csa(s_dir, dir); + + d2pp.v1 = label; + d2pp.v0 = n++; + d2pp.v2 = prntLS; + d2pp.v3 = chldLS; + d2pp.v4 = gP; + d2pp.cz5(); + f[c++] = d2pp.csa(s_dir, dir); + d2pp.v0 = n++; + d2pp.v2 = prntLS; + d2pp.v3 = gcLS; + d2pp.v4 = dP; + d2pp.cz5(); + f[c++] = d2pp.csa(s_dir, dir); + d2pp.v0 = n++; + d2pp.v2 = chldLS; + d2pp.v3 = gcLS; + d2pp.v4 = pP; + d2pp.cz5(); + f[c++] = d2pp.csa(s_dir, dir); // linear features int prntPm1 = p != 0 ? pos[p - 1] : s_str; // parent-pos-minus1 - int chldPm1 = d - 1 >=0 ? pos[d - 1] : s_str; // child-pos-minus1 + int chldPm1 = d - 1 >= 0 ? pos[d - 1] : s_str; // child-pos-minus1 int prntPp1 = p != pos.length - 1 ? pos[p + 1] : s_end; int chldPp1 = d != pos.length - 1 ? pos[d + 1] : s_end; - int gcPm1 = gc > 0 ? pos[gc - 1] : s_str; - int gcPp1 = gc < pos.length - 1 ? pos[gc + 1] : s_end; - - dl1.v0= n++; dl1.v2=gP; dl1.v3=gcPp1;dl1.v4=dP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v2=gP; dl1.v3=gcPm1;dl1.v4=dP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v2=gP; dl1.v3=dP;dl1.v4=chldPp1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v2=gP; dl1.v3=dP;dl1.v4=chldPm1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v2=gP; dl1.v3=gcPp1;dl1.v4=chldPm1;dl1.v5=dP;dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v2=gcPm1; dl1.v3=gP;dl1.v4=chldPm1;dl1.v5=dP; dl1.cz6();f[c++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v2=gP; dl1.v3=gcPp1;dl1.v4=dP;dl1.v5=chldPp1; dl1.cz6();f[c++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v2=gcPm1; dl1.v3=gP;dl1.v4=dP;dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v2=gP; dl1.v3=gcPp1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v2=gP; dl1.v3=gcPm1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v2=gP; dl1.v3=pP;dl1.v4=prntPp1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v2=gP; dl1.v3=pP;dl1.v4=prntPm1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v2=gP; dl1.v3=gcPp1;dl1.v4=prntPm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v2=gcPm1; dl1.v3=gP;dl1.v4=prntPm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v2=gP; dl1.v3=gcPp1;dl1.v4=pP;dl1.v5=prntPp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v2=gcPm1; dl1.v3=gP; dl1.v4=pP; dl1.v5=prntPp1;dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); - - - int pLSp1 = p != pos.length - 1 ? forms[p + 1]==-1?-1:cluster.getLP(forms[p + 1]): _cend; - int cLSp1 = d != pos.length - 1 ? forms[d + 1] ==-1?-1:cluster.getLP(forms[d + 1]):_cend; - int gcLSp1 = gc < pos.length -1 ? forms[gc + 1] ==-1?-1:cluster.getLP(forms[gc + 1]) : s_end; - - int pLSm1 = p != 0 ? lemmas[p - 1]==-1?-1:cluster.getLP(lemmas[p - 1]): _cstr; - int cLSm1 = d - 1 >=0 ? lemmas[d - 1] ==-1?-1:cluster.getLP(lemmas[d - 1]):_cstr; - int gcLSm1 = gc > 0 ? lemmas[gc - 1] ==-1?-1:cluster.getLP(lemmas[gc - 1]) : _cstr; - - - dl1.v0= n++; dl1.v2=gP; dl1.v3=gcLSp1;dl1.v4=dP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v2=gP; dl1.v3=gcLSm1;dl1.v4=dP; dl1.cz5();f[c++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v2=gP; dl1.v3=dP;dl1.v4=cLSp1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v2=gP; dl1.v3=dP;dl1.v4=cLSm1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v2=gP; dl1.v3=gcLSp1;dl1.v4=cLSm1;dl1.v5=dP;dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v2=gcLSm1; dl1.v3=gP;dl1.v4=cLSm1;dl1.v5=dP; dl1.cz6();f[c++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v2=gP; dl1.v3=gcLSp1;dl1.v4=dP;dl1.v5=cLSp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v2=cLSm1; dl1.v3=gP;dl1.v4=dP;dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v2=gP; dl1.v3=gcLSp1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v2=gP; dl1.v3=gcLSm1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v2=gP; dl1.v3=pP;dl1.v4=pLSp1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v2=gP; dl1.v3=pP;dl1.v4=pLSm1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v2=gP; dl1.v3=gcLSp1;dl1.v4=pLSm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v2=gcLSm1; dl1.v3=gP;dl1.v4=pLSm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v2=gP; dl1.v3=gcLSp1;dl1.v4=pP;dl1.v5=pLSp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v2=gcLSm1; dl1.v3=gP; dl1.v4=pP; dl1.v5=pLSp1;dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); - - - - short[] prel = is.plabels[i],phead=is.pheads[i]; - - int g = p==phead[d]?1:2 ; - if (gc>=0) g += d==phead[gc]?4:8; - - int gr = gc==-1?s_relend:prel[gc]; + int gcPm1 = gc > 0 ? pos[gc - 1] : s_str; + int gcPp1 = gc < pos.length - 1 ? pos[gc + 1] : s_end; + + dl1.v0 = n++; + dl1.v2 = gP; + dl1.v3 = gcPp1; + dl1.v4 = dP; + dl1.cz5(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = gP; + dl1.v3 = gcPm1; + dl1.v4 = dP; + dl1.cz5(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = gP; + dl1.v3 = dP; + dl1.v4 = chldPp1; + dl1.cz5(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = gP; + dl1.v3 = dP; + dl1.v4 = chldPm1; + dl1.cz5(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = gP; + dl1.v3 = gcPp1; + dl1.v4 = chldPm1; + dl1.v5 = dP; + dl1.cz6(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = gcPm1; + dl1.v3 = gP; + dl1.v4 = chldPm1; + dl1.v5 = dP; + dl1.cz6(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = gP; + dl1.v3 = gcPp1; + dl1.v4 = dP; + dl1.v5 = chldPp1; + dl1.cz6(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = gcPm1; + dl1.v3 = gP; + dl1.v4 = dP; + dl1.cz6(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = gP; + dl1.v3 = gcPp1; + dl1.v4 = pP; + dl1.cz5(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = gP; + dl1.v3 = gcPm1; + dl1.v4 = pP; + dl1.cz5(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = gP; + dl1.v3 = pP; + dl1.v4 = prntPp1; + dl1.cz5(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = gP; + dl1.v3 = pP; + dl1.v4 = prntPm1; + dl1.cz5(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = gP; + dl1.v3 = gcPp1; + dl1.v4 = prntPm1; + dl1.v5 = pP; + dl1.cz6(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = gcPm1; + dl1.v3 = gP; + dl1.v4 = prntPm1; + dl1.v5 = pP; + dl1.cz6(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = gP; + dl1.v3 = gcPp1; + dl1.v4 = pP; + dl1.v5 = prntPp1; + dl1.cz6(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = gcPm1; + dl1.v3 = gP; + dl1.v4 = pP; + dl1.v5 = prntPp1; + dl1.cz6(); + f[c++] = dl1.csa(s_dir, dir); + + int pLSp1 = p != pos.length - 1 ? forms[p + 1] == -1 ? -1 : cluster.getLP(forms[p + 1]) : _cend; + int cLSp1 = d != pos.length - 1 ? forms[d + 1] == -1 ? -1 : cluster.getLP(forms[d + 1]) : _cend; + int gcLSp1 = gc < pos.length - 1 ? forms[gc + 1] == -1 ? -1 : cluster.getLP(forms[gc + 1]) : s_end; + + int pLSm1 = p != 0 ? lemmas[p - 1] == -1 ? -1 : cluster.getLP(lemmas[p - 1]) : _cstr; + int cLSm1 = d - 1 >= 0 ? lemmas[d - 1] == -1 ? -1 : cluster.getLP(lemmas[d - 1]) : _cstr; + int gcLSm1 = gc > 0 ? lemmas[gc - 1] == -1 ? -1 : cluster.getLP(lemmas[gc - 1]) : _cstr; + + dl1.v0 = n++; + dl1.v2 = gP; + dl1.v3 = gcLSp1; + dl1.v4 = dP; + dl1.cz5(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = gP; + dl1.v3 = gcLSm1; + dl1.v4 = dP; + dl1.cz5(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = gP; + dl1.v3 = dP; + dl1.v4 = cLSp1; + dl1.cz5(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = gP; + dl1.v3 = dP; + dl1.v4 = cLSm1; + dl1.cz5(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = gP; + dl1.v3 = gcLSp1; + dl1.v4 = cLSm1; + dl1.v5 = dP; + dl1.cz6(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = gcLSm1; + dl1.v3 = gP; + dl1.v4 = cLSm1; + dl1.v5 = dP; + dl1.cz6(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = gP; + dl1.v3 = gcLSp1; + dl1.v4 = dP; + dl1.v5 = cLSp1; + dl1.cz6(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = cLSm1; + dl1.v3 = gP; + dl1.v4 = dP; + dl1.cz6(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = gP; + dl1.v3 = gcLSp1; + dl1.v4 = pP; + dl1.cz5(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = gP; + dl1.v3 = gcLSm1; + dl1.v4 = pP; + dl1.cz5(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = gP; + dl1.v3 = pP; + dl1.v4 = pLSp1; + dl1.cz5(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = gP; + dl1.v3 = pP; + dl1.v4 = pLSm1; + dl1.cz5(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = gP; + dl1.v3 = gcLSp1; + dl1.v4 = pLSm1; + dl1.v5 = pP; + dl1.cz6(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = gcLSm1; + dl1.v3 = gP; + dl1.v4 = pLSm1; + dl1.v5 = pP; + dl1.cz6(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = gP; + dl1.v3 = gcLSp1; + dl1.v4 = pP; + dl1.v5 = pLSp1; + dl1.cz6(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = gcLSm1; + dl1.v3 = gP; + dl1.v4 = pP; + dl1.v5 = pLSp1; + dl1.cz6(); + f[c++] = dl1.csa(s_dir, dir); + + if (gc >= 0) { + } // take those in for stacking - /* - dl2.v1=label; - dl2.v0= n++;dl2.v2=prel[d];dl2.v3=g;dl2.v4=gP;dl2.v5=dP;dl2.cz6();f[c++]=dl2.csa(s_dir,dir); - dl2.v0= n++;dl2.v2=prel[d];dl2.v3=g;dl2.v4=gP;dl2.v5=pP;dl2.cz6();f[c++]=dl2.csa(s_dir,dir); - dl2.v0= n++;dl2.v2=prel[d];dl2.v3=g;dl2.v4=gP;dl2.v5=pP;dl2.v6=dP;dl2.cz7();f[c++]=dl2.csa(s_dir,dir); - - dl2.v0= n++;dl2.v2=gr;dl2.v3=g;dl2.v4=gP;dl2.v5=dP;dl2.cz6();f[c++]=dl2.csa(s_dir,dir); - dl2.v0= n++;dl2.v2=gr;dl2.v3=g;dl2.v4=gP;dl2.v5=pP;dl2.cz6();f[c++]=dl2.csa(s_dir,dir); - dl2.v0= n++;dl2.v2=gr;dl2.v3=g;dl2.v4=gP;dl2.v5=pP;dl2.v6=dP;dl2.cz7();f[c++]=dl2.csa(s_dir,dir); - -*/ - if (feats==null) return; - - short[] featsP =feats[d]; - short[] featsD =gc!=-1?feats[gc]:null; - - dlf.v0= n++; dlf.v1=label; dlf.v2=gP; dlf.v3=dP; + /* + * dl2.v1=label; dl2.v0= + * n++;dl2.v2=prel[d];dl2.v3=g;dl2.v4=gP;dl2.v5=dP;dl2.cz6();f[c++]=dl2. + * csa(s_dir,dir); dl2.v0= + * n++;dl2.v2=prel[d];dl2.v3=g;dl2.v4=gP;dl2.v5=pP;dl2.cz6();f[c++]=dl2. + * csa(s_dir,dir); dl2.v0= + * n++;dl2.v2=prel[d];dl2.v3=g;dl2.v4=gP;dl2.v5=pP;dl2.v6=dP;dl2.cz7();f + * [c++]=dl2.csa(s_dir,dir); + * + * dl2.v0= + * n++;dl2.v2=gr;dl2.v3=g;dl2.v4=gP;dl2.v5=dP;dl2.cz6();f[c++]=dl2.csa( + * s_dir,dir); dl2.v0= + * n++;dl2.v2=gr;dl2.v3=g;dl2.v4=gP;dl2.v5=pP;dl2.cz6();f[c++]=dl2.csa( + * s_dir,dir); dl2.v0= + * n++;dl2.v2=gr;dl2.v3=g;dl2.v4=gP;dl2.v5=pP;dl2.v6=dP;dl2.cz7();f[c++] + * =dl2.csa(s_dir,dir); + * + */ + if (feats == null) + return; + + short[] featsP = feats[d]; + short[] featsD = gc != -1 ? feats[gc] : null; + + dlf.v0 = n++; + dlf.v1 = label; + dlf.v2 = gP; + dlf.v3 = dP; extractFeat(f, c, dir, featsP, featsD); return; } + @Override + public void siblingm(Instances is, int i, short pos[], int forms[], int[] lemmas, short[][] feats, int prnt, int d, + int sblng, int label, Cluster cluster, long[] f, int v) { - public void siblingm(Instances is , int i,short pos[], int forms[], int[] lemmas, short[][] feats, int prnt, int d, int sblng, int label, Cluster cluster, long[] f, int v) - { - - for(int k=0;k<f.length;k++) f[k]=0; + for (int k = 0; k < f.length; k++) + f[k] = 0; int pP = pos[prnt], dP = pos[d]; - int prntF = forms[prnt],chldF = forms[d]; + int prntF = forms[prnt], chldF = forms[d]; int prntL = lemmas[prnt], chldL = lemmas[d]; - int prntLS = prntF==-1?-1:cluster.getLP(prntF), chldLS = chldF==-1?-1:cluster.getLP(chldF); + int prntLS = prntF == -1 ? -1 : cluster.getLP(prntF), chldLS = chldF == -1 ? -1 : cluster.getLP(chldF); - int sP = sblng!=-1 ? pos[sblng] : s_str, sblF = sblng!=-1 ? forms[sblng] : s_stwrd, sblL = sblng!=-1 ? lemmas[sblng] : s_stwrd; + int sP = sblng != -1 ? pos[sblng] : s_str, sblF = sblng != -1 ? forms[sblng] : s_stwrd, + sblL = sblng != -1 ? lemmas[sblng] : s_stwrd; - int sblLS = (sblng != -1)&&(sblF!=-1) ? cluster.getLP(sblF) : s_stwrd; + int sblLS = (sblng != -1) && (sblF != -1) ? cluster.getLP(sblF) : s_stwrd; + int dir = (prnt < d) ? ra : la; - int dir= (prnt < d)? ra:la; - - int abs = Math.abs(prnt-d); + int abs = Math.abs(prnt - d); final int dist; - if (abs > 10)dist=d10;else if (abs>5) dist=d5;else if( abs==5)dist=d4;else if (abs==4)dist=d3;else if (abs==3)dist=d2; - else if (abs==2)dist=d1; else dist=di0; - - int n=147; - - if (prntF>maxForm) prntF=-1; - if (prntL>maxForm) prntL=-1; - - if (chldF>maxForm) chldF=-1; - if (chldL>maxForm) chldL=-1; - - if (sblF>maxForm) sblF=-1; - if (sblL>maxForm) sblL=-1; - - - dl1.v0= n++; dl1.v1=label;dl1.v2=pP; dl1.v3=dP;dl1.v4=sP; dl1.cz5(); f[0]=dl1.csa(s_dir,dir);f[1]=dl1.csa(s_dist,dist); - dl1.v0= n++; dl1.v3=sP; dl1.cz4(); f[2]=dl1.csa(s_dir,dir); f[3]=dl1.csa(s_dist,dist); - dl1.v0= n++; dl1.v2=dP;dl1.cz4(); f[4]=dl1.csa(s_dir,dir); f[5]=dl1.csa(s_dist,dist); + if (abs > 10) + dist = d10; + else if (abs > 5) + dist = d5; + else if (abs == 5) + dist = d4; + else if (abs == 4) + dist = d3; + else if (abs == 3) + dist = d2; + else if (abs == 2) + dist = d1; + else + dist = di0; + + int n = 147; + + if (prntF > maxForm) + prntF = -1; + if (prntL > maxForm) + prntL = -1; + + if (chldF > maxForm) + chldF = -1; + if (chldL > maxForm) + chldL = -1; + + if (sblF > maxForm) + sblF = -1; + if (sblL > maxForm) + sblL = -1; + + dl1.v0 = n++; + dl1.v1 = label; + dl1.v2 = pP; + dl1.v3 = dP; + dl1.v4 = sP; + dl1.cz5(); + f[0] = dl1.csa(s_dir, dir); + f[1] = dl1.csa(s_dist, dist); + dl1.v0 = n++; + dl1.v3 = sP; + dl1.cz4(); + f[2] = dl1.csa(s_dir, dir); + f[3] = dl1.csa(s_dist, dist); + dl1.v0 = n++; + dl1.v2 = dP; + dl1.cz4(); + f[4] = dl1.csa(s_dir, dir); + f[5] = dl1.csa(s_dist, dist); // sibling only could be tried - dwwp.v1=label; - dwwp.v0= n++; dwwp.v2=prntF; dwwp.v3=sblF; dwwp.cz4(); f[6]=dwwp.csa(s_dir,dir); f[7]=dwwp.csa(s_dist,dist); - dwwp.v0= n++; dwwp.v2=chldF; dwwp.cz4(); f[8]=dwwp.csa(s_dir,dir); f[9]=dwwp.csa(s_dist,dist); - dwp.v0= n++; dwp.v1=label; dwp.v2=sblF; dwp.v3=pP; dwp.cz4(); f[10]=dwp.csa(s_dir,dir); f[11]=dwp.csa(s_dist,dist); - dwp.v0= n++; /*dwp.v1=label; */dwp.v3=dP; dwp.cz4(); f[12]=dwp.csa(s_dir,dir); f[13]=dwp.csa(s_dist,dist); - dwp.v0= n++; /*dwp.v1=label;*/ dwp.v2=prntF; dwp.v3=sP; dwp.cz4(); f[14]=dwp.csa(s_dir,dir); f[15]=dwp.csa(s_dist,dist); - dwp.v0= n++; /*dwp.v1=label;*/ dwp.v2=chldF; dwp.cz4(); f[16]=dwp.csa(s_dir,dir); f[17]=dwp.csa(s_dist,dist); - - //lemmas - dwwp.v0= n++; dwwp.v2=prntL; dwwp.v3=sblL; dwwp.cz4(); f[18]=dwwp.csa(s_dir,dir); - dwwp.v0= n++; dwwp.v2=chldL; dwwp.cz4(); f[19]=dwwp.csa(s_dir,dir); f[20]=dwwp.csa(s_dist,dist); - dwp.v0= n++; /*dwp.v1=label;*/ dwp.v2=sblL; dwp.v3=pP; dwp.cz4(); f[21]=dwp.csa(s_dir,dir); f[22]=dwp.csa(s_dist,dist); - dwp.v0= n++; /*dwp.v1=label; */ dwp.v3=dP; dwp.cz4(); f[23]=dwp.csa(s_dir,dir);f[24]=dwp.csa(s_dist,dist); - dwp.v0= n++; /*dwp.v1=label;*/ dwp.v2=prntL; dwp.v3=sP; dwp.cz4(); f[25]=dwp.csa(s_dir,dir); f[26]=dwp.csa(s_dist,dist); - dwp.v0= n++; /*dwp.v1=label;*/ dwp.v2=chldL; dwp.cz4(); f[27]=dwp.csa(s_dir,dir);f[28]=dwp.csa(s_dist,dist); + dwwp.v1 = label; + dwwp.v0 = n++; + dwwp.v2 = prntF; + dwwp.v3 = sblF; + dwwp.cz4(); + f[6] = dwwp.csa(s_dir, dir); + f[7] = dwwp.csa(s_dist, dist); + dwwp.v0 = n++; + dwwp.v2 = chldF; + dwwp.cz4(); + f[8] = dwwp.csa(s_dir, dir); + f[9] = dwwp.csa(s_dist, dist); + dwp.v0 = n++; + dwp.v1 = label; + dwp.v2 = sblF; + dwp.v3 = pP; + dwp.cz4(); + f[10] = dwp.csa(s_dir, dir); + f[11] = dwp.csa(s_dist, dist); + dwp.v0 = n++; + /* dwp.v1=label; */dwp.v3 = dP; + dwp.cz4(); + f[12] = dwp.csa(s_dir, dir); + f[13] = dwp.csa(s_dist, dist); + dwp.v0 = n++; + /* dwp.v1=label; */ dwp.v2 = prntF; + dwp.v3 = sP; + dwp.cz4(); + f[14] = dwp.csa(s_dir, dir); + f[15] = dwp.csa(s_dist, dist); + dwp.v0 = n++; + /* dwp.v1=label; */ dwp.v2 = chldF; + dwp.cz4(); + f[16] = dwp.csa(s_dir, dir); + f[17] = dwp.csa(s_dist, dist); + // lemmas + dwwp.v0 = n++; + dwwp.v2 = prntL; + dwwp.v3 = sblL; + dwwp.cz4(); + f[18] = dwwp.csa(s_dir, dir); + dwwp.v0 = n++; + dwwp.v2 = chldL; + dwwp.cz4(); + f[19] = dwwp.csa(s_dir, dir); + f[20] = dwwp.csa(s_dist, dist); + dwp.v0 = n++; + /* dwp.v1=label; */ dwp.v2 = sblL; + dwp.v3 = pP; + dwp.cz4(); + f[21] = dwp.csa(s_dir, dir); + f[22] = dwp.csa(s_dist, dist); + dwp.v0 = n++; + /* dwp.v1=label; */ dwp.v3 = dP; + dwp.cz4(); + f[23] = dwp.csa(s_dir, dir); + f[24] = dwp.csa(s_dist, dist); + dwp.v0 = n++; + /* dwp.v1=label; */ dwp.v2 = prntL; + dwp.v3 = sP; + dwp.cz4(); + f[25] = dwp.csa(s_dir, dir); + f[26] = dwp.csa(s_dist, dist); + dwp.v0 = n++; + /* dwp.v1=label; */ dwp.v2 = chldL; + dwp.cz4(); + f[27] = dwp.csa(s_dir, dir); + f[28] = dwp.csa(s_dist, dist); // clusters - d2lp.v1=label; - d2lp.v0= n++; d2lp.v2=prntLS; d2lp.v3=sblLS; d2lp.cz4(); f[29]=d2lp.csa(s_dir,dir); - d2lp.v0= n++; d2lp.v2=chldLS; d2lp.v3=sblLS; d2lp.cz4(); f[30]=d2lp.csa(s_dir,dir); f[31]=d2lp.csa(s_dist,dist); - - d3lp.v1= label; - d3lp.v0= n++; d3lp.v2=prntLS; d3lp.v3=chldLS; d3lp.v4=sblLS;d3lp.cz5(); f[32]=d3lp.csa(s_dir,dir); - - d2lp.v0= n++; d2lp.v2=prntLS; d2lp.v3=chldLS; d2lp.v4=sblF; d2lp.cz5(); f[33]=d2lp.csa(s_dir,dir); f[34]=d2lp.csa(s_dist,dist); - d2lp.v0= n++; d2lp.v2=prntLS; d2lp.v3=sblLS; d2lp.v4=chldF; d2lp.cz5(); f[35]=d2lp.csa(s_dir,dir); f[36]=d2lp.csa(s_dist,dist); - d2lp.v0= n++; d2lp.v2=chldLS; d2lp.v3=sblLS; d2lp.v4=prntF; d2lp.cz5(); f[37]=d2lp.csa(s_dir,dir); f[38]=d2lp.csa(s_dist,dist); - - d2pp.v1=label; - d2pp.v0= n++; d2pp.v2=prntLS; d2pp.v3=chldLS; d2pp.v4=sP; d2pp.cz5(); f[39]=d2pp.csa(s_dir,dir); f[40]=d2pp.csa(s_dist,dist); - d2pp.v0= n++; d2pp.v2=prntLS; d2pp.v3=sblLS; d2pp.v4=dP; d2pp.cz5(); f[41]=d2pp.csa(s_dir,dir); f[42]=d2pp.csa(s_dist,dist); - d2pp.v0= n++; d2pp.v2=chldLS; d2pp.v3=sblLS; d2pp.v4=pP; d2pp.cz5(); f[43]=d2pp.csa(s_dir,dir); f[44]=d2pp.csa(s_dist,dist); - - - int prntPm1 = prnt!=0 ? pos[prnt-1] : s_str; - int chldPm1 = d-1>=0 ? pos[d-1] : s_str; - int prntPp1 = prnt!=pos.length-1 ? pos[prnt+1] : s_end; - int chldPp1 = d!=pos.length-1 ? pos[d+1] : s_end; + d2lp.v1 = label; + d2lp.v0 = n++; + d2lp.v2 = prntLS; + d2lp.v3 = sblLS; + d2lp.cz4(); + f[29] = d2lp.csa(s_dir, dir); + d2lp.v0 = n++; + d2lp.v2 = chldLS; + d2lp.v3 = sblLS; + d2lp.cz4(); + f[30] = d2lp.csa(s_dir, dir); + f[31] = d2lp.csa(s_dist, dist); + + d3lp.v1 = label; + d3lp.v0 = n++; + d3lp.v2 = prntLS; + d3lp.v3 = chldLS; + d3lp.v4 = sblLS; + d3lp.cz5(); + f[32] = d3lp.csa(s_dir, dir); + + d2lp.v0 = n++; + d2lp.v2 = prntLS; + d2lp.v3 = chldLS; + d2lp.v4 = sblF; + d2lp.cz5(); + f[33] = d2lp.csa(s_dir, dir); + f[34] = d2lp.csa(s_dist, dist); + d2lp.v0 = n++; + d2lp.v2 = prntLS; + d2lp.v3 = sblLS; + d2lp.v4 = chldF; + d2lp.cz5(); + f[35] = d2lp.csa(s_dir, dir); + f[36] = d2lp.csa(s_dist, dist); + d2lp.v0 = n++; + d2lp.v2 = chldLS; + d2lp.v3 = sblLS; + d2lp.v4 = prntF; + d2lp.cz5(); + f[37] = d2lp.csa(s_dir, dir); + f[38] = d2lp.csa(s_dist, dist); + + d2pp.v1 = label; + d2pp.v0 = n++; + d2pp.v2 = prntLS; + d2pp.v3 = chldLS; + d2pp.v4 = sP; + d2pp.cz5(); + f[39] = d2pp.csa(s_dir, dir); + f[40] = d2pp.csa(s_dist, dist); + d2pp.v0 = n++; + d2pp.v2 = prntLS; + d2pp.v3 = sblLS; + d2pp.v4 = dP; + d2pp.cz5(); + f[41] = d2pp.csa(s_dir, dir); + f[42] = d2pp.csa(s_dist, dist); + d2pp.v0 = n++; + d2pp.v2 = chldLS; + d2pp.v3 = sblLS; + d2pp.v4 = pP; + d2pp.cz5(); + f[43] = d2pp.csa(s_dir, dir); + f[44] = d2pp.csa(s_dist, dist); + + int prntPm1 = prnt != 0 ? pos[prnt - 1] : s_str; + int chldPm1 = d - 1 >= 0 ? pos[d - 1] : s_str; + int prntPp1 = prnt != pos.length - 1 ? pos[prnt + 1] : s_end; + int chldPp1 = d != pos.length - 1 ? pos[d + 1] : s_end; // sibling part of speech minus and plus 1 - int sblPm1 = sblng>0 ? pos[sblng-1]:s_str; - int sblPp1 = sblng<pos.length-1 ? pos[sblng + 1]:s_end; - - dl1.v0=n++; dl1.v2=sP; dl1.v3=sblPp1;dl1.v4=pP; dl1.cz5(); f[45]=dl1.csa(s_dir,dir); - dl1.v0=n++; dl1.v2=sP; dl1.v3=sblPm1;dl1.v4=pP; dl1.cz5(); f[46]=dl1.csa(s_dir,dir);// f.add(li.l2i(l)); - dl1.v0=n++; dl1.v2=sP; dl1.v3=pP;dl1.v4=prntPp1;dl1.cz5(); f[47]=dl1.csa(s_dir,dir);// f.add(li.l2i(l)); - dl1.v0=n++; dl1.v2=sP; dl1.v3=pP;dl1.v4=prntPm1; dl1.cz5(); f[48]=dl1.csa(s_dir,dir);// f.add(li.l2i(l)); - dl1.v0=n++; dl1.v2=sP; dl1.v3=sblPp1;dl1.v4=prntPm1;dl1.v5=pP; dl1.cz6(); f[49]=dl1.csa(s_dir,dir); - dl1.v0=n++; dl1.v2=sblPm1; dl1.v3=sP;dl1.v4=prntPm1;dl1.v5=pP;dl1.cz6(); f[50]=dl1.csa(s_dir,dir);// f.add(li.l2i(l)); - dl1.v0=n++; dl1.v2=sP; dl1.v3=sblPp1;dl1.v4=pP;dl1.v5=prntPp1; dl1.cz6(); f[51]=dl1.csa(s_dir,dir);// f.add(li.l2i(l)); - dl1.v0=n++; dl1.v2=sblPm1; dl1.v3=sP; dl1.v4=pP;dl1.v5=prntPp1; dl1.cz6(); f[52]=dl1.csa(s_dir,dir);// f.add(li.l2i(l)); - dl1.v0=n++; dl1.v2=sP; dl1.v3=sblPp1;dl1.v4=dP; dl1.cz5(); f[53]=dl1.csa(s_dir,dir); - dl1.v0=n++; dl1.v2=sP; dl1.v3=sblPm1;dl1.v4=dP; dl1.cz5(); f[54]=dl1.csa(s_dir,dir); - dl1.v0=n++; dl1.v2=sP; dl1.v3=dP;dl1.v4=chldPp1;dl1.cz5(); f[55]=dl1.csa(s_dir,dir); - dl1.v0=n++; dl1.v2=sP; dl1.v3=dP;dl1.v4=chldPm1; dl1.cz5(); f[56]=dl1.csa(s_dir,dir); - dl1.v0=n++; dl1.v2=sP; dl1.v3=sblPp1;dl1.v4=chldPm1;dl1.v5=dP; dl1.cz6(); f[57]=dl1.csa(s_dir,dir);// f.add(li.l2i(l)); - dl1.v0=n++; dl1.v2=sblPm1; dl1.v3=sP;dl1.v4=chldPm1;dl1.v5=dP;dl1.cz6(); f[58]=dl1.csa(s_dir,dir); - dl1.v0= n++;dl1.v2=sP; dl1.v3=sblPp1;dl1.v4=dP;dl1.v5=chldPp1;dl1.cz6();f[59]=dl1.csa(s_dir,dir);// f.add(li.l2i(l)); - dl1.v0= n++; dl1.v2=sblPm1; dl1.v3=sP;dl1.v4=dP;dl1.v5=chldPp1;dl1.cz6(); f[60]=dl1.csa(s_dir,dir); - - int c=61; - - int pLSp1 = prnt != pos.length - 1 ? forms[prnt + 1]==-1?-1:cluster.getLP(forms[prnt + 1]): _cend; - int cLSp1 = d != pos.length - 1 ? forms[d + 1] ==-1?-1:cluster.getLP(forms[d + 1]):_cend; - int sLSp1 = sblng < pos.length -1 ? forms[sblng + 1] ==-1?-1:cluster.getLP(forms[sblng + 1]) : _cend; - - int pLSm1 = prnt!=0 ? forms[prnt - 1]==-1?-1:cluster.getLP(forms[prnt - 1]): _cstr; - int cLSm1 = d-1>=0 ? forms[d - 1] ==-1?-1:cluster.getLP(forms[d - 1]):_cstr; - int sLSm1 = sblng>0 ? forms[sblng - 1] ==-1?-1:cluster.getLP(forms[sblng - 1]):_cstr; - - //int c=61; - - dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSm1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0=n++; dl1.v2=sP; dl1.v3=pP;dl1.v4=pLSp1;dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0=n++; dl1.v2=sP; dl1.v3=pP;dl1.v4=pLSm1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=pLSm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP;dl1.v4=pLSm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=pP;dl1.v5=pLSp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP; dl1.v4=pP;dl1.v5=pLSp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=dP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSm1;dl1.v4=dP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0=n++; dl1.v2=sP; dl1.v3=dP;dl1.v4=cLSp1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0=n++;dl1.v2=sP; dl1.v3=dP;dl1.v4=cLSm1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSm1;dl1.v4=cLSm1;dl1.v5=dP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP;dl1.v4=cLSm1;dl1.v5=dP;dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0=n++;dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=dP;dl1.v5=cLSp1;dl1.cz6();f[c++]=dl1.csa(s_dir,dir); - dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP;dl1.v4=dP;dl1.v5=cLSp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); - - - - dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSm1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0=n++; dl1.v2=sP; dl1.v3=pP;dl1.v4=pLSp1;dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0=n++; dl1.v2=sP; dl1.v3=pP;dl1.v4=pLSm1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=pLSm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP;dl1.v4=pLSm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=pP;dl1.v5=pLSp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP; dl1.v4=pP;dl1.v5=pLSp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=dP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSm1;dl1.v4=dP; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0=n++; dl1.v2=sP; dl1.v3=dP;dl1.v4=cLSp1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0=n++;dl1.v2=sP; dl1.v3=dP;dl1.v4=cLSm1; dl1.cz5(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSm1;dl1.v4=cLSm1;dl1.v5=dP; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP;dl1.v4=cLSm1;dl1.v5=dP;dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); - dl1.v0= n++;dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=dP;dl1.v5=cLSp1;dl1.cz6();f[c++]=dl1.csa(s_dir,dir); - dl1.v0= n++; dl1.v2=sLSm1; dl1.v3=sP;dl1.v4=dP;dl1.v5=cLSp1; dl1.cz6(); f[c++]=dl1.csa(s_dir,dir); + int sblPm1 = sblng > 0 ? pos[sblng - 1] : s_str; + int sblPp1 = sblng < pos.length - 1 ? pos[sblng + 1] : s_end; + + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = sblPp1; + dl1.v4 = pP; + dl1.cz5(); + f[45] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = sblPm1; + dl1.v4 = pP; + dl1.cz5(); + f[46] = dl1.csa(s_dir, dir);// f.add(li.l2i(l)); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = pP; + dl1.v4 = prntPp1; + dl1.cz5(); + f[47] = dl1.csa(s_dir, dir);// f.add(li.l2i(l)); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = pP; + dl1.v4 = prntPm1; + dl1.cz5(); + f[48] = dl1.csa(s_dir, dir);// f.add(li.l2i(l)); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = sblPp1; + dl1.v4 = prntPm1; + dl1.v5 = pP; + dl1.cz6(); + f[49] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = sblPm1; + dl1.v3 = sP; + dl1.v4 = prntPm1; + dl1.v5 = pP; + dl1.cz6(); + f[50] = dl1.csa(s_dir, dir);// f.add(li.l2i(l)); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = sblPp1; + dl1.v4 = pP; + dl1.v5 = prntPp1; + dl1.cz6(); + f[51] = dl1.csa(s_dir, dir);// f.add(li.l2i(l)); + dl1.v0 = n++; + dl1.v2 = sblPm1; + dl1.v3 = sP; + dl1.v4 = pP; + dl1.v5 = prntPp1; + dl1.cz6(); + f[52] = dl1.csa(s_dir, dir);// f.add(li.l2i(l)); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = sblPp1; + dl1.v4 = dP; + dl1.cz5(); + f[53] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = sblPm1; + dl1.v4 = dP; + dl1.cz5(); + f[54] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = dP; + dl1.v4 = chldPp1; + dl1.cz5(); + f[55] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = dP; + dl1.v4 = chldPm1; + dl1.cz5(); + f[56] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = sblPp1; + dl1.v4 = chldPm1; + dl1.v5 = dP; + dl1.cz6(); + f[57] = dl1.csa(s_dir, dir);// f.add(li.l2i(l)); + dl1.v0 = n++; + dl1.v2 = sblPm1; + dl1.v3 = sP; + dl1.v4 = chldPm1; + dl1.v5 = dP; + dl1.cz6(); + f[58] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = sblPp1; + dl1.v4 = dP; + dl1.v5 = chldPp1; + dl1.cz6(); + f[59] = dl1.csa(s_dir, dir);// f.add(li.l2i(l)); + dl1.v0 = n++; + dl1.v2 = sblPm1; + dl1.v3 = sP; + dl1.v4 = dP; + dl1.v5 = chldPp1; + dl1.cz6(); + f[60] = dl1.csa(s_dir, dir); + + int c = 61; + + int pLSp1 = prnt != pos.length - 1 ? forms[prnt + 1] == -1 ? -1 : cluster.getLP(forms[prnt + 1]) : _cend; + int cLSp1 = d != pos.length - 1 ? forms[d + 1] == -1 ? -1 : cluster.getLP(forms[d + 1]) : _cend; + int sLSp1 = sblng < pos.length - 1 ? forms[sblng + 1] == -1 ? -1 : cluster.getLP(forms[sblng + 1]) : _cend; + + int pLSm1 = prnt != 0 ? forms[prnt - 1] == -1 ? -1 : cluster.getLP(forms[prnt - 1]) : _cstr; + int cLSm1 = d - 1 >= 0 ? forms[d - 1] == -1 ? -1 : cluster.getLP(forms[d - 1]) : _cstr; + int sLSm1 = sblng > 0 ? forms[sblng - 1] == -1 ? -1 : cluster.getLP(forms[sblng - 1]) : _cstr; + + // int c=61; + + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = sLSp1; + dl1.v4 = pP; + dl1.cz5(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = sLSm1; + dl1.v4 = pP; + dl1.cz5(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = pP; + dl1.v4 = pLSp1; + dl1.cz5(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = pP; + dl1.v4 = pLSm1; + dl1.cz5(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = sLSp1; + dl1.v4 = pLSm1; + dl1.v5 = pP; + dl1.cz6(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = sLSm1; + dl1.v3 = sP; + dl1.v4 = pLSm1; + dl1.v5 = pP; + dl1.cz6(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = sLSp1; + dl1.v4 = pP; + dl1.v5 = pLSp1; + dl1.cz6(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = sLSm1; + dl1.v3 = sP; + dl1.v4 = pP; + dl1.v5 = pLSp1; + dl1.cz6(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = sLSp1; + dl1.v4 = dP; + dl1.cz5(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = sLSm1; + dl1.v4 = dP; + dl1.cz5(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = dP; + dl1.v4 = cLSp1; + dl1.cz5(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = dP; + dl1.v4 = cLSm1; + dl1.cz5(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = sLSm1; + dl1.v4 = cLSm1; + dl1.v5 = dP; + dl1.cz6(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = sLSm1; + dl1.v3 = sP; + dl1.v4 = cLSm1; + dl1.v5 = dP; + dl1.cz6(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = sLSp1; + dl1.v4 = dP; + dl1.v5 = cLSp1; + dl1.cz6(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = sLSm1; + dl1.v3 = sP; + dl1.v4 = dP; + dl1.v5 = cLSp1; + dl1.cz6(); + f[c++] = dl1.csa(s_dir, dir); + + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = sLSp1; + dl1.v4 = pP; + dl1.cz5(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = sLSm1; + dl1.v4 = pP; + dl1.cz5(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = pP; + dl1.v4 = pLSp1; + dl1.cz5(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = pP; + dl1.v4 = pLSm1; + dl1.cz5(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = sLSp1; + dl1.v4 = pLSm1; + dl1.v5 = pP; + dl1.cz6(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = sLSm1; + dl1.v3 = sP; + dl1.v4 = pLSm1; + dl1.v5 = pP; + dl1.cz6(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = sLSp1; + dl1.v4 = pP; + dl1.v5 = pLSp1; + dl1.cz6(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = sLSm1; + dl1.v3 = sP; + dl1.v4 = pP; + dl1.v5 = pLSp1; + dl1.cz6(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = sLSp1; + dl1.v4 = dP; + dl1.cz5(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = sLSm1; + dl1.v4 = dP; + dl1.cz5(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = dP; + dl1.v4 = cLSp1; + dl1.cz5(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = dP; + dl1.v4 = cLSm1; + dl1.cz5(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = sLSm1; + dl1.v4 = cLSm1; + dl1.v5 = dP; + dl1.cz6(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = sLSm1; + dl1.v3 = sP; + dl1.v4 = cLSm1; + dl1.v5 = dP; + dl1.cz6(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = sLSp1; + dl1.v4 = dP; + dl1.v5 = cLSp1; + dl1.cz6(); + f[c++] = dl1.csa(s_dir, dir); + dl1.v0 = n++; + dl1.v2 = sLSm1; + dl1.v3 = sP; + dl1.v4 = dP; + dl1.v5 = cLSp1; + dl1.cz6(); + f[c++] = dl1.csa(s_dir, dir); // take those in for stacking - - /* - short[] prel = is.plabels[i],phead=is.pheads[i]; - - int g = prnt==phead[d]?1:2 ; - if (sblng>=0) g += prnt==phead[sblng]?4:8; - - int gr = sblng==-1?s_relend:prel[sblng]; - - - dl2.v0= n++;dl2.v2=prel[d];dl2.v3=g;dl2.v4=sP;dl2.v5=dP;dl2.cz6();f[c++]=dl2.csa(s_dir,dir); - dl2.v0= n++;dl2.v2=prel[d];dl2.v3=g;dl2.v4=sP;dl2.v5=pP;dl2.cz6();f[c++]=dl2.csa(s_dir,dir); - dl2.v0= n++;dl2.v2=prel[d];dl2.v3=g;dl2.v4=sP;dl2.v5=pP;dl2.v6=dP;dl2.cz7();f[c++]=dl2.csa(s_dir,dir); - - dl2.v0= n++;dl2.v2=gr;dl2.v3=g;dl2.v4=sP;dl2.v5=dP;dl2.cz6();f[c++]=dl2.csa(s_dir,dir); - dl2.v0= n++;dl2.v2=gr;dl2.v3=g;dl2.v4=sP;dl2.v5=pP;dl2.cz6();f[c++]=dl2.csa(s_dir,dir); - dl2.v0= n++;dl2.v2=gr;dl2.v3=g;dl2.v4=sP;dl2.v5=pP;dl2.v6=dP;dl2.cz7();f[c++]=dl2.csa(s_dir,dir); -*/ - - if (feats==null) return; - - int cnt=c; - - short[] featsP =feats[d]; - short[] featsSbl =sblng!=-1?feats[sblng]:null; - - dlf.v0= n++; dlf.v1=label; dlf.v2=sP; dlf.v3=dP; - - - cnt = extractFeat(f, cnt ,dir, featsP, featsSbl); - - featsP =feats[prnt]; - featsSbl =sblng!=-1?feats[sblng]:null; - dlf.v0= n++; dlf.v1=label; dlf.v2=pP; dlf.v3=sP; - if (featsP!=null && featsSbl!=null) { - for(short i1=0;i1<featsP.length;i1++) { - for(short i2=0;i2<featsSbl.length;i2++) { - dlf.v4=featsP[i1]; dlf.v5=featsSbl[i2]; - dlf.cz6(); f[cnt++]=dlf.csa(s_dir,prnt<sblng?1:2); + /* + * short[] prel = is.plabels[i],phead=is.pheads[i]; + * + * int g = prnt==phead[d]?1:2 ; if (sblng>=0) g += + * prnt==phead[sblng]?4:8; + * + * int gr = sblng==-1?s_relend:prel[sblng]; + * + * + * dl2.v0= + * n++;dl2.v2=prel[d];dl2.v3=g;dl2.v4=sP;dl2.v5=dP;dl2.cz6();f[c++]=dl2. + * csa(s_dir,dir); dl2.v0= + * n++;dl2.v2=prel[d];dl2.v3=g;dl2.v4=sP;dl2.v5=pP;dl2.cz6();f[c++]=dl2. + * csa(s_dir,dir); dl2.v0= + * n++;dl2.v2=prel[d];dl2.v3=g;dl2.v4=sP;dl2.v5=pP;dl2.v6=dP;dl2.cz7();f + * [c++]=dl2.csa(s_dir,dir); + * + * dl2.v0= + * n++;dl2.v2=gr;dl2.v3=g;dl2.v4=sP;dl2.v5=dP;dl2.cz6();f[c++]=dl2.csa( + * s_dir,dir); dl2.v0= + * n++;dl2.v2=gr;dl2.v3=g;dl2.v4=sP;dl2.v5=pP;dl2.cz6();f[c++]=dl2.csa( + * s_dir,dir); dl2.v0= + * n++;dl2.v2=gr;dl2.v3=g;dl2.v4=sP;dl2.v5=pP;dl2.v6=dP;dl2.cz7();f[c++] + * =dl2.csa(s_dir,dir); + */ + + if (feats == null) + return; + + int cnt = c; + + short[] featsP = feats[d]; + short[] featsSbl = sblng != -1 ? feats[sblng] : null; + + dlf.v0 = n++; + dlf.v1 = label; + dlf.v2 = sP; + dlf.v3 = dP; + + cnt = extractFeat(f, cnt, dir, featsP, featsSbl); + + featsP = feats[prnt]; + featsSbl = sblng != -1 ? feats[sblng] : null; + + dlf.v0 = n++; + dlf.v1 = label; + dlf.v2 = pP; + dlf.v3 = sP; + if (featsP != null && featsSbl != null) { + for (short i1 = 0; i1 < featsP.length; i1++) { + for (short i2 = 0; i2 < featsSbl.length; i2++) { + dlf.v4 = featsP[i1]; + dlf.v5 = featsSbl[i2]; + dlf.cz6(); + f[cnt++] = dlf.csa(s_dir, prnt < sblng ? 1 : 2); } - } - } else if (featsP==null && featsSbl!=null) { + } + } else if (featsP == null && featsSbl != null) { - for(short i2=0;i2<featsSbl.length;i2++) { - dlf.v4=nofeat; dlf.v5=featsSbl[i2]; - dlf.cz6(); f[cnt++]=dlf.csa(s_dir,dir); + for (short i2 = 0; i2 < featsSbl.length; i2++) { + dlf.v4 = nofeat; + dlf.v5 = featsSbl[i2]; + dlf.cz6(); + f[cnt++] = dlf.csa(s_dir, dir); } - } else if (featsP!=null && featsSbl==null) { + } else if (featsP != null && featsSbl == null) { - for(short i1=0;i1<featsP.length;i1++) { - dlf.v4=featsP[i1]; dlf.v5=nofeat; - dlf.cz6(); f[cnt++]=dlf.csa(s_dir,dir); - } + for (short i1 = 0; i1 < featsP.length; i1++) { + dlf.v4 = featsP[i1]; + dlf.v5 = nofeat; + dlf.cz6(); + f[cnt++] = dlf.csa(s_dir, dir); + } } - + return; } private int extractFeat(long[] f, int cnt, int dir, short[] featsP, short[] featsD) { - if (featsP!=null && featsD!=null) { - for(short i1=0;i1<featsP.length;i1++) { - for(short i2=0;i2<featsD.length;i2++) { - dlf.v4=featsP[i1]; dlf.v5=featsD[i2]; - dlf.cz6(); f[cnt++]=dlf.csa(s_dir,dir); + if (featsP != null && featsD != null) { + for (short i1 = 0; i1 < featsP.length; i1++) { + for (short i2 = 0; i2 < featsD.length; i2++) { + dlf.v4 = featsP[i1]; + dlf.v5 = featsD[i2]; + dlf.cz6(); + f[cnt++] = dlf.csa(s_dir, dir); } - } - } else if (featsP==null && featsD!=null) { + } + } else if (featsP == null && featsD != null) { - for(short i2=0;i2<featsD.length;i2++) { - dlf.v4=nofeat; dlf.v5=featsD[i2]; - dlf.cz6(); f[cnt++]=dlf.csa(s_dir,dir); + for (short i2 = 0; i2 < featsD.length; i2++) { + dlf.v4 = nofeat; + dlf.v5 = featsD[i2]; + dlf.cz6(); + f[cnt++] = dlf.csa(s_dir, dir); - } - } else if (featsP!=null && featsD==null) { + } + } else if (featsP != null && featsD == null) { - for(short i1=0;i1<featsP.length;i1++) { - dlf.v4=featsP[i1]; dlf.v5=nofeat; - dlf.cz6(); f[cnt++]=dlf.csa(s_dir,dir); + for (short i1 = 0; i1 < featsP.length; i1++) { + dlf.v4 = featsP[i1]; + dlf.v5 = nofeat; + dlf.cz6(); + f[cnt++] = dlf.csa(s_dir, dir); - } + } } return cnt; } - - public FV encodeCat(Instances is, int ic, short pposs[], int forms[], int[] lemmas, short[] heads, short[] types, short feats[][], Cluster cluster, FV f) { - + @Override + public FV encodeCat(Instances is, int ic, short pposs[], int forms[], int[] lemmas, short[] heads, short[] types, + short feats[][], Cluster cluster, FV f) { - long[] svs = new long[250]; + long[] svs = new long[250]; for (int i = 1; i < heads.length; i++) { + int n = basic(pposs, forms, heads[i], i, cluster, f); - int n =basic(pposs, forms, heads[i], i, cluster, f); - - firstm(is, ic, heads[i], i, types[i], cluster,svs); - for(int k=0;k<svs.length;k++) dl1.map(f,svs[k]); + firstm(is, ic, heads[i], i, types[i], cluster, svs); + for (long sv : svs) + dl1.map(f, sv); - int ch,cmi,cmo; + int ch, cmi, cmo; if (heads[i] < i) { ch = rightmostRight(heads, heads[i], i); cmi = leftmostLeft(heads, i, heads[i]); @@ -716,64 +1685,77 @@ final public class ExtractorClusterStackedR2 implements Extractor { cmo = leftmostLeft(heads, i, 0); } - siblingm(is,ic,pposs, forms,lemmas, feats, heads[i], i, ch,types[i], cluster, svs,n); - for(int k=0;k<svs.length;k++) dl1.map(f,svs[k]); - + siblingm(is, ic, pposs, forms, lemmas, feats, heads[i], i, ch, types[i], cluster, svs, n); + for (long sv : svs) + dl1.map(f, sv); - gcm(is, ic,heads[i],i,cmi, types[i], cluster, svs); - for(int k=0;k<svs.length;k++) dl1.map(f,svs[k]); + gcm(is, ic, heads[i], i, cmi, types[i], cluster, svs); + for (long sv : svs) + dl1.map(f, sv); - gcm(is, ic, heads[i],i,cmo, types[i], cluster, svs); - for(int k=0;k<svs.length;k++) dl1.map(f,svs[k]); + gcm(is, ic, heads[i], i, cmo, types[i], cluster, svs); + for (long sv : svs) + dl1.map(f, sv); } return f; } - - public float encode3(short[] pos, short heads[] , short[] types, DataF d2) { + @Override + public float encode3(short[] pos, short heads[], short[] types, DataF d2) { double v = 0; for (int i = 1; i < heads.length; i++) { - int dir= (heads[i] < i)? 0:1; + int dir = (heads[i] < i) ? 0 : 1; v += d2.pl[heads[i]][i]; v += d2.lab[heads[i]][i][types[i]][dir]; - boolean left = i<heads[i]; + boolean left = i < heads[i]; short[] labels = Edges.get(pos[heads[i]], pos[i], left); - int lid=-1; - for(int k=0;k<labels.length;k++) if (types[i]== labels[k]) {lid= k;break;} + int lid = -1; + for (int k = 0; k < labels.length; k++) + if (types[i] == labels[k]) { + lid = k; + break; + } - int ch,cmi,cmo; + int ch, cmi, cmo; if (heads[i] < i) { ch = rightmostRight(heads, heads[i], i); cmi = leftmostLeft(heads, i, heads[i]); cmo = rightmostRight(heads, i, heads.length); - if (ch==-1) ch=heads[i]; - if (cmi==-1) cmi=heads[i]; - if (cmo==-1) cmo=heads[i]; + if (ch == -1) + ch = heads[i]; + if (cmi == -1) + cmi = heads[i]; + if (cmo == -1) + cmo = heads[i]; } else { ch = leftmostLeft(heads, heads[i], i); cmi = rightmostRight(heads, i, heads[i]); cmo = leftmostLeft(heads, i, 0); - if (ch==-1) ch=i; - if (cmi==-1) cmi=i; - if (cmo==-1) cmo=i; + if (ch == -1) + ch = i; + if (cmi == -1) + cmi = i; + if (cmo == -1) + cmo = i; } v += d2.sib[heads[i]][i][ch][dir][lid]; v += d2.gra[heads[i]][i][cmi][dir][lid]; v += d2.gra[heads[i]][i][cmo][dir][lid]; } - return (float)v; + return (float) v; } /** * Provide the scores of the edges + * * @param pos * @param heads * @param types @@ -781,134 +1763,142 @@ final public class ExtractorClusterStackedR2 implements Extractor { * @param d2 * @return */ - public static float encode3(short[] pos, short heads[] , short[] types, float[] edgesScores, DataF d2) { + public static float encode3(short[] pos, short heads[], short[] types, float[] edgesScores, DataF d2) { double v = 0; for (int i = 1; i < heads.length; i++) { - int dir= (heads[i] < i)? 0:1; + int dir = (heads[i] < i) ? 0 : 1; edgesScores[i] = d2.pl[heads[i]][i]; edgesScores[i] += d2.lab[heads[i]][i][types[i]][dir]; - boolean left = i<heads[i]; + boolean left = i < heads[i]; short[] labels = Edges.get(pos[heads[i]], pos[i], left); - int lid=-1; - for(int k=0;k<labels.length;k++) if (types[i]== labels[k]) {lid= k;break;} + int lid = -1; + for (int k = 0; k < labels.length; k++) + if (types[i] == labels[k]) { + lid = k; + break; + } - int ch,cmi,cmo; + int ch, cmi, cmo; if (heads[i] < i) { ch = rightmostRight(heads, heads[i], i); cmi = leftmostLeft(heads, i, heads[i]); cmo = rightmostRight(heads, i, heads.length); - if (ch==-1) ch=heads[i]; - if (cmi==-1) cmi=heads[i]; - if (cmo==-1) cmo=heads[i]; + if (ch == -1) + ch = heads[i]; + if (cmi == -1) + cmi = heads[i]; + if (cmo == -1) + cmo = heads[i]; } else { ch = leftmostLeft(heads, heads[i], i); cmi = rightmostRight(heads, i, heads[i]); cmo = leftmostLeft(heads, i, 0); - if (ch==-1) ch=i; - if (cmi==-1) cmi=i; - if (cmo==-1) cmo=i; + if (ch == -1) + ch = i; + if (cmi == -1) + cmi = i; + if (cmo == -1) + cmo = i; } edgesScores[i] += d2.sib[heads[i]][i][ch][dir][lid]; edgesScores[i] += d2.gra[heads[i]][i][cmi][dir][lid]; edgesScores[i] += d2.gra[heads[i]][i][cmo][dir][lid]; - v+=edgesScores[i]; + v += edgesScores[i]; } - return (float)v; + return (float) v; } - private static int rightmostRight(short[] heads, int head, int max) { int rightmost = -1; - for (int i = head + 1; i < max; i++) if (heads[i] == head) rightmost = i; + for (int i = head + 1; i < max; i++) + if (heads[i] == head) + rightmost = i; return rightmost; } private static int leftmostLeft(short[] heads, int head, int min) { int leftmost = -1; - for (int i = head - 1; i > min; i--) if (heads[i] == head) leftmost = i; + for (int i = head - 1; i > min; i--) + if (heads[i] == head) + leftmost = i; return leftmost; } - public static final String REL = "REL",END = "END",STR = "STR",LA = "LA",RA = "RA"; + public static final String REL = "REL", END = "END", STR = "STR", LA = "LA", RA = "RA"; - private static int ra,la; + private static int ra, la; private static int s_str; - private static int s_end, _cend,_cstr, s_stwrd,s_relend; + private static int s_end, _cend, _cstr, s_stwrd; - protected static final String TYPE = "TYPE",DIR = "D", FEAT="F"; + protected static final String TYPE = "TYPE", DIR = "D", FEAT = "F"; public static final String POS = "POS"; - protected static final String DIST = "DIST",MID = "MID"; - - private static final String _0 = "0",_4 = "4",_3 = "3", _2 = "2",_1 = "1",_5 = "5",_10 = "10"; - - private static int di0, d4,d3,d2,d1,d5,d10; + protected static final String DIST = "DIST", MID = "MID"; + private static final String _0 = "0", _4 = "4", _3 = "3", _2 = "2", _1 = "1", _5 = "5", _10 = "10"; - private static final String WORD = "WORD",STWRD = "STWRD", STPOS = "STPOS"; - + private static int di0, d4, d3, d2, d1, d5, d10; + private static final String WORD = "WORD", STWRD = "STWRD", STPOS = "STPOS"; private static int nofeat; - private static int maxForm; - /** * Initialize the features. + * * @param maxFeatures */ static public void initFeatures() { - MFB mf = new MFB(); mf.register(POS, MID); s_str = mf.register(POS, STR); s_end = mf.register(POS, END); - s_relend = mf.register(REL, END); - - _cstr= mf.register(Cluster.SPATH,STR); - _cend=mf.register(Cluster.SPATH,END); + mf.register(REL, END); + _cstr = mf.register(Cluster.SPATH, STR); + _cend = mf.register(Cluster.SPATH, END); mf.register(TYPE, POS); - s_stwrd=mf.register(WORD,STWRD); - mf.register(POS,STPOS); + s_stwrd = mf.register(WORD, STWRD); + mf.register(POS, STPOS); la = mf.register(DIR, LA); ra = mf.register(DIR, RA); - // mf.register(TYPE, CHAR); + // mf.register(TYPE, CHAR); mf.register(TYPE, FEAT); - nofeat=mf.register(FEAT, "NOFEAT"); - - for(int k=0;k<215;k++) mf.register(TYPE, "F"+k); + nofeat = mf.register(FEAT, "NOFEAT"); + for (int k = 0; k < 215; k++) + mf.register(TYPE, "F" + k); - di0=mf.register(DIST, _0); - d1=mf.register(DIST, _1); - d2=mf.register(DIST, _2); - d3=mf.register(DIST, _3); - d4=mf.register(DIST, _4); - d5=mf.register(DIST, _5); - // d5l=mf.register(DIST, _5l); - d10=mf.register(DIST, _10); - + di0 = mf.register(DIST, _0); + d1 = mf.register(DIST, _1); + d2 = mf.register(DIST, _2); + d3 = mf.register(DIST, _3); + d4 = mf.register(DIST, _4); + d5 = mf.register(DIST, _5); + // d5l=mf.register(DIST, _5l); + d10 = mf.register(DIST, _10); } - /* (non-Javadoc) + /* + * (non-Javadoc) + * * @see extractors.Extractor#getType() */ @Override @@ -916,7 +1906,9 @@ final public class ExtractorClusterStackedR2 implements Extractor { return s_type; } - /* (non-Javadoc) + /* + * (non-Javadoc) + * * @see extractors.Extractor#setMaxForm(java.lang.Integer) */ @Override @@ -924,7 +1916,9 @@ final public class ExtractorClusterStackedR2 implements Extractor { maxForm = max; } - /* (non-Javadoc) + /* + * (non-Javadoc) + * * @see extractors.Extractor#getMaxForm() */ @Override @@ -932,6 +1926,4 @@ final public class ExtractorClusterStackedR2 implements Extractor { return maxForm; } - - } diff --git a/dependencyParser/mate-tools/src/extractors/ExtractorFactory.java b/dependencyParser/mate-tools/src/extractors/ExtractorFactory.java index 9543111..20827d4 100644 --- a/dependencyParser/mate-tools/src/extractors/ExtractorFactory.java +++ b/dependencyParser/mate-tools/src/extractors/ExtractorFactory.java @@ -1,5 +1,5 @@ /** - * + * */ package extractors; @@ -7,22 +7,21 @@ import is2.data.Long2IntInterface; /** * @author Dr. Bernd Bohnet, 29.04.2011 - * - * + * + * */ public class ExtractorFactory { public static final int StackedClustered = 4; public static final int StackedClusteredR2 = 5; - - private int type=-1; - + private int type = -1; + /** * @param stackedClusteredR22 */ public ExtractorFactory(int t) { - type=t; + type = t; } /** @@ -31,14 +30,13 @@ public class ExtractorFactory { * @return */ public Extractor getExtractor(Long2IntInterface l2i) { - switch(type) - { - case StackedClustered: - return new ExtractorClusterStacked(l2i); - case StackedClusteredR2: - return new ExtractorClusterStackedR2(l2i); - } + switch (type) { + case StackedClustered: + return new ExtractorClusterStacked(l2i); + case StackedClusteredR2: + return new ExtractorClusterStackedR2(l2i); + } return null; } - + } diff --git a/dependencyParser/mate-tools/src/extractors/ExtractorReranker.java b/dependencyParser/mate-tools/src/extractors/ExtractorReranker.java index 2761f26..bf068b2 100644 --- a/dependencyParser/mate-tools/src/extractors/ExtractorReranker.java +++ b/dependencyParser/mate-tools/src/extractors/ExtractorReranker.java @@ -1,5 +1,6 @@ package extractors; +import java.util.Arrays; import is2.data.Cluster; import is2.data.D4; @@ -9,24 +10,21 @@ import is2.data.MFB; import is2.data.ParseNBest; import is2.util.DB; -import java.util.Arrays; - - - final public class ExtractorReranker { - public static int s_rel,s_word,s_type,s_dir,s_dist,s_feat,s_child,s_spath,s_lpath,s_pos; - public static int d0,d1,d2,d3,d4,d5,d10; - + public static int s_rel, s_word, s_type, s_dir, s_dist, s_feat, s_child, s_spath, s_lpath, s_pos; + public static int d0, d1, d2, d3, d4, d5, d10; + MFB mf; - final D4 dl1,dl2, dwr,dr,dwwp,dw,dwp,dlf,d3lp, d2lp,d2pw,d2pp ; + final D4 dl1, dl2, dwr, dr, dwwp, dw, dwp, dlf, d3lp, d2lp, d2pw, d2pp; public final Long2IntInterface li; public ExtractorReranker(Long2IntInterface li) { - this.li=li; - dl1 = new D4(li);dl2 = new D4(li); + this.li = li; + dl1 = new D4(li); + dl2 = new D4(li); dwr = new D4(li); dr = new D4(li); dwwp = new D4(li); @@ -35,110 +33,126 @@ final public class ExtractorReranker { dwp = new D4(li); dlf = new D4(li); - d3lp = new D4(li); d2lp = new D4(li); d2pw = new D4(li); d2pp = new D4(li); + d3lp = new D4(li); + d2lp = new D4(li); + d2pw = new D4(li); + d2pp = new D4(li); } public static void initStat() { DB.println("init called "); MFB mf = new MFB(); - s_rel = mf.getFeatureCounter().get(REL).intValue();; - s_pos = mf.getFeatureCounter().get(POS).intValue(); + s_rel = mf.getFeatureCounter().get(REL).intValue(); + ; + s_pos = mf.getFeatureCounter().get(POS).intValue(); s_word = mf.getFeatureCounter().get(WORD).intValue(); - s_type = mf.getFeatureCounter().get(TYPE).intValue();//mf.getFeatureBits(); + s_type = mf.getFeatureCounter().get(TYPE).intValue();// mf.getFeatureBits(); s_dir = mf.getFeatureCounter().get(DIR); - la = mf.getValue(DIR, LA); - ra = mf.getValue(DIR, RA); - s_dist = mf.getFeatureCounter().get(DIST);//mf.getFeatureBits(DIST); - s_feat = mf.getFeatureCounter().get(FEAT);//mf.getFeatureBits(Pipe.FEAT); - s_spath = mf.getFeatureCounter().get(Cluster.SPATH)==null?0:mf.getFeatureCounter().get(Cluster.SPATH);//mf.getFeatureBits(Cluster.SPATH); - s_lpath = mf.getFeatureCounter().get(Cluster.LPATH)==null?0:mf.getFeatureCounter().get(Cluster.LPATH);//mf.getFeatureBits(Cluster.LPATH); + mf.getValue(DIR, LA); + mf.getValue(DIR, RA); + s_dist = mf.getFeatureCounter().get(DIST);// mf.getFeatureBits(DIST); + s_feat = mf.getFeatureCounter().get(FEAT);// mf.getFeatureBits(Pipe.FEAT); + s_spath = mf.getFeatureCounter().get(Cluster.SPATH) == null ? 0 : mf.getFeatureCounter().get(Cluster.SPATH);// mf.getFeatureBits(Cluster.SPATH); + s_lpath = mf.getFeatureCounter().get(Cluster.LPATH) == null ? 0 : mf.getFeatureCounter().get(Cluster.LPATH);// mf.getFeatureBits(Cluster.LPATH); } - public void init(){ + public void init() { mf = new MFB(); - dl1.a0 = s_type;dl1.a1 = 3; dl1.a2 = s_pos;dl1.a3 = s_pos; dl1.a4 = s_pos; dl1.a5 = s_pos; dl1.a6 = s_pos; dl1.a7 = s_pos; - dl2.a0 = s_type;dl2.a1 = 3;dl2.a2 = s_rel;dl2.a3 = s_rel;dl2.a4 = s_rel;dl2.a5 = s_rel;dl2.a6 = s_rel;dl2.a7 = s_rel;dl2.a8 = s_rel; dl2.a9 = s_rel; - dwp.a0 = s_type; dwp.a1 = 3; dwp.a2 = s_word; dwp.a3 = s_rel; dwp.a4 = s_rel; dwp.a5 = s_rel;dwp.a6 = s_rel;dwp.a7 = s_rel; - dwwp.a0 = s_type; dwwp.a1 = 3; dwwp.a2 = s_word; dwwp.a3 = s_word; dwwp.a4 = s_pos; dwwp.a5 = s_word;dwwp.a6 = s_pos;dwwp.a7 = s_pos; + dl1.a0 = s_type; + dl1.a1 = 3; + dl1.a2 = s_pos; + dl1.a3 = s_pos; + dl1.a4 = s_pos; + dl1.a5 = s_pos; + dl1.a6 = s_pos; + dl1.a7 = s_pos; + dl2.a0 = s_type; + dl2.a1 = 3; + dl2.a2 = s_rel; + dl2.a3 = s_rel; + dl2.a4 = s_rel; + dl2.a5 = s_rel; + dl2.a6 = s_rel; + dl2.a7 = s_rel; + dl2.a8 = s_rel; + dl2.a9 = s_rel; + dwp.a0 = s_type; + dwp.a1 = 3; + dwp.a2 = s_word; + dwp.a3 = s_rel; + dwp.a4 = s_rel; + dwp.a5 = s_rel; + dwp.a6 = s_rel; + dwp.a7 = s_rel; + dwwp.a0 = s_type; + dwwp.a1 = 3; + dwwp.a2 = s_word; + dwwp.a3 = s_word; + dwwp.a4 = s_pos; + dwwp.a5 = s_word; + dwwp.a6 = s_pos; + dwwp.a7 = s_pos; } + public static final String REL = "REL", END = "END", STR = "STR", LA = "LA", RA = "RA", FEAT = "F"; - + private static int s_end, s_stwrd, s_relend; - - - public static final String REL = "REL",END = "END",STR = "STR",LA = "LA",RA = "RA", FEAT="F"; - - private static int ra,la; - private static int s_str; - private static int s_end, _cend,_cstr, s_stwrd,s_relend; - - protected static final String TYPE = "TYPE",DIR = "D"; + protected static final String TYPE = "TYPE", DIR = "D"; public static final String POS = "POS"; - protected static final String DIST = "DIST",MID = "MID"; - - private static final String _0 = "0",_4 = "4",_3 = "3", _2 = "2",_1 = "1",_5 = "5",_10 = "10"; - - + protected static final String DIST = "DIST", MID = "MID"; - private static final String WORD = "WORD",STWRD = "STWRD", STPOS = "STPOS"; - - - - private static int nofeat; + private static final String _0 = "0", _4 = "4", _3 = "3", _2 = "2", _1 = "1", _5 = "5", _10 = "10"; + private static final String WORD = "WORD", STWRD = "STWRD", STPOS = "STPOS"; public static int maxForm; - - final public static int _FC =60; - + final public static int _FC = 60; /** * Initialize the features. + * * @param maxFeatures */ static public void initFeatures() { - MFB mf = new MFB(); mf.register(POS, MID); - s_str = mf.register(POS, STR); + mf.register(POS, STR); s_end = mf.register(POS, END); s_relend = mf.register(REL, END); - _cstr= mf.register(Cluster.SPATH,STR); - _cend=mf.register(Cluster.SPATH,END); - + mf.register(Cluster.SPATH, STR); + mf.register(Cluster.SPATH, END); mf.register(TYPE, POS); - s_stwrd=mf.register(WORD,STWRD); - mf.register(POS,STPOS); + s_stwrd = mf.register(WORD, STWRD); + mf.register(POS, STPOS); - la = mf.register(DIR, LA); - ra = mf.register(DIR, RA); + mf.register(DIR, LA); + mf.register(DIR, RA); - // mf.register(TYPE, CHAR); + // mf.register(TYPE, CHAR); mf.register(TYPE, FEAT); - nofeat=mf.register(FEAT, "NOFEAT"); - - for(int k=0;k<60;k++) mf.register(TYPE, "F"+k); + mf.register(FEAT, "NOFEAT"); + for (int k = 0; k < 60; k++) + mf.register(TYPE, "F" + k); - d0 =mf.register(DIST, _0); - d1= mf.register(DIST, _1); - d2 =mf.register(DIST, _2); - d3= mf.register(DIST, _3); - d4= mf.register(DIST, _4); - d5= mf.register(DIST, _5); - // d5l=mf.register(DIST, _5l); - d10= mf.register(DIST, _10); - + d0 = mf.register(DIST, _0); + d1 = mf.register(DIST, _1); + d2 = mf.register(DIST, _2); + d3 = mf.register(DIST, _3); + d4 = mf.register(DIST, _4); + d5 = mf.register(DIST, _5); + // d5l=mf.register(DIST, _5l); + d10 = mf.register(DIST, _10); } @@ -146,264 +160,526 @@ final public class ExtractorReranker { * @param is * @param n * @param parseNBest - * @param vs + * @param vs */ public void extractFeatures3(Instances is, int i, ParseNBest parse, int rank, long[] v) { - - int f=1,n=0; - - for(short k= 0; k<is.length(i)-1;k++) { - - short[] chld = children(parse.heads,k); - - f=2; - + + int f = 1, n = 0; + + for (short k = 0; k < is.length(i) - 1; k++) { + + short[] chld = children(parse.heads, k); + + f = 2; + int fm = is.forms[i][k]; - int hh = k!=0? is.pposs[i][parse.heads[k]]:s_end; + int hh = k != 0 ? is.pposs[i][parse.heads[k]] : s_end; int h = is.pposs[i][k]; int hrel = parse.labels[k]; - int hhrel = k!=0? parse.labels[parse.heads[k]]:s_relend; - int hhf = k!=0? is.forms[i][parse.heads[k]]:s_stwrd; + int hhrel = k != 0 ? parse.labels[parse.heads[k]] : s_relend; + int hhf = k != 0 ? is.forms[i][parse.heads[k]] : s_stwrd; + int rlast = chld.length > 0 ? parse.labels[chld[chld.length - 1]] : s_relend; - - int rlast = chld.length>0?parse.labels[chld[chld.length-1]]:s_relend; - - int [] rels = new int[chld.length]; - int [] pss = new int[chld.length]; - for(int j=0;j<chld.length;j++) { + int[] rels = new int[chld.length]; + int[] pss = new int[chld.length]; + for (int j = 0; j < chld.length; j++) { rels[j] = parse.labels[chld[j]]; pss[j] = is.pposs[i][chld[j]]; } StringBuilder rl = new StringBuilder(chld.length); StringBuilder psl = new StringBuilder(chld.length); - for(int j=0;j<chld.length;j++) { - rl.append((char)rels[j]); - psl.append((char)pss[j]); + for (int j = 0; j < chld.length; j++) { + rl.append((char) rels[j]); + psl.append((char) pss[j]); } - + int rli = mf.register("rli", rl.toString()); int pli = mf.register("pli", psl.toString()); - dwwp.v0=f++; dwwp.v2=rli; dwwp.cz3(); v[n++]=dwwp.getVal(); - dwwp.v0=f++; dwwp.v2=pli; dwwp.cz3(); v[n++]=dwwp.getVal(); - - dwwp.v0=f++; dwwp.v2=rli; dwwp.v3=h;dwwp.cz4(); v[n++]=dwwp.getVal(); - dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=h;dwwp.cz4(); v[n++]=dwwp.getVal(); - - dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=hh; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal(); - dwwp.v0=f++; dwwp.v2=rli; dwwp.v3=hh; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal(); - - dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=rli; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal(); - dwwp.v0=f++; dwwp.v2=fm; dwwp.v3=rli; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal(); - dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=fm; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal(); - - - - dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=rli; dwwp.v4=hh; dwwp.cz5(); v[n++]=dwwp.getVal(); - dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=rli; dwwp.v4=hh; dwwp.v5=h; dwwp.cz6(); v[n++]=dwwp.getVal(); - - dwp.v0= f++; dwp.v2=rli; dwp.v3=hrel;dwp.v4=hh; dwp.v5=h; dwp.cz6(); v[n++]=dwp.getVal(); + dwwp.v0 = f++; + dwwp.v2 = rli; + dwwp.cz3(); + v[n++] = dwwp.getVal(); + dwwp.v0 = f++; + dwwp.v2 = pli; + dwwp.cz3(); + v[n++] = dwwp.getVal(); + + dwwp.v0 = f++; + dwwp.v2 = rli; + dwwp.v3 = h; + dwwp.cz4(); + v[n++] = dwwp.getVal(); + dwwp.v0 = f++; + dwwp.v2 = pli; + dwwp.v3 = h; + dwwp.cz4(); + v[n++] = dwwp.getVal(); + + dwwp.v0 = f++; + dwwp.v2 = pli; + dwwp.v3 = hh; + dwwp.v4 = h; + dwwp.cz5(); + v[n++] = dwwp.getVal(); + dwwp.v0 = f++; + dwwp.v2 = rli; + dwwp.v3 = hh; + dwwp.v4 = h; + dwwp.cz5(); + v[n++] = dwwp.getVal(); + + dwwp.v0 = f++; + dwwp.v2 = pli; + dwwp.v3 = rli; + dwwp.v4 = h; + dwwp.cz5(); + v[n++] = dwwp.getVal(); + dwwp.v0 = f++; + dwwp.v2 = fm; + dwwp.v3 = rli; + dwwp.v4 = h; + dwwp.cz5(); + v[n++] = dwwp.getVal(); + dwwp.v0 = f++; + dwwp.v2 = pli; + dwwp.v3 = fm; + dwwp.v4 = h; + dwwp.cz5(); + v[n++] = dwwp.getVal(); + + dwwp.v0 = f++; + dwwp.v2 = pli; + dwwp.v3 = rli; + dwwp.v4 = hh; + dwwp.cz5(); + v[n++] = dwwp.getVal(); + dwwp.v0 = f++; + dwwp.v2 = pli; + dwwp.v3 = rli; + dwwp.v4 = hh; + dwwp.v5 = h; + dwwp.cz6(); + v[n++] = dwwp.getVal(); + + dwp.v0 = f++; + dwp.v2 = rli; + dwp.v3 = hrel; + dwp.v4 = hh; + dwp.v5 = h; + dwp.cz6(); + v[n++] = dwp.getVal(); Arrays.sort(rels); Arrays.sort(pss); rl = new StringBuilder(chld.length); psl = new StringBuilder(chld.length); - for(int j=0;j<chld.length;j++) { - rl.append((char)rels[j]); - psl.append((char)pss[j]); + for (int j = 0; j < chld.length; j++) { + rl.append((char) rels[j]); + psl.append((char) pss[j]); } rli = mf.register("rli", rl.toString()); pli = mf.register("pli", psl.toString()); - - dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=rli; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal(); - dwwp.v0=f++; dwwp.v2=fm; dwwp.v3=rli; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal(); - dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=fm; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal(); - - dwwp.v0=f++; dwwp.v2=rli; dwwp.v3=h;dwwp.cz4(); v[n++]=dwwp.getVal(); - - dl1.v0= f++; dl1.v2=h; dl1.v3=hrel; dl1.v4=hhrel;dl1.v5=hh; dl1.v6=rlast; dl1.cz6(); v[n++]=dl1.getVal(); - dwp.v0= f++; dwp.v2=fm; dwp.v3=hrel; dwp.v4=hh; dwp.cz5(); v[n++]=dwp.getVal(); - dwp.v0= f++; dwp.v2=hhf; dwp.v3=hrel; dwp.v4=hh; dwp.v5=h; dwp.cz6(); v[n++]=dwp.getVal(); - - dwwp.v0=f++; dwwp.v2=fm; dwwp.v3=hhf; dwwp.v4=hrel; dwwp.v5=hhrel; dwwp.cz6(); v[n++]=dwwp.getVal(); - dwwp.v0=f++; dwwp.v2=h; dwwp.v3=hhf; dwwp.v4=hrel; dwwp.v5=hhrel; dwwp.cz6(); v[n++]=dwwp.getVal(); - dwwp.v0=f++; dwwp.v2=fm; dwwp.v3=hh; dwwp.v4=hrel; dwwp.v5=hhrel; dwwp.cz6(); v[n++]=dwwp.getVal(); - - dwwp.v0=f++; dwwp.v2=fm; dwwp.v3=hhf; dwwp.v4=h; dwwp.v5=hh; dwwp.cz6(); v[n++]=dwwp.getVal(); - dwwp.v0=f++; dwwp.v2=h; dwwp.v3=hhf; dwwp.v4=hrel; dwwp.v5=hh; dwwp.cz6(); v[n++]=dwwp.getVal(); - dwwp.v0=f++; dwwp.v2=fm; dwwp.v3=hh; dwwp.v4=h; dwwp.v5=hrel; dwwp.cz6(); v[n++]=dwwp.getVal(); - - - // dl1.v0= f++; dl1.v2=h;dl1.v3=hrel; dl1.v4=hhrel;dl1.v5=hh; dl1.v6=hhhrel;dl1.v7=hhh; dl1.v8=rlast; dl1.cz9(); v[n++]=dl1.getVal(); -// dl1.v0= f++; dl1.v2=h;dl1.v3=hrel; dl1.v4=hhrel;dl1.v5=hh; dl1.v6=hhhrel;dl1.v7=hhh; dl1.v8=rlast; dl1.cz9(); v[n++]=dl1.getVal(); - // dl1.v0= f++; dl1.v2=h;dl1.v3=hrel; dl1.v4=dir;dl1.v5=hh; dl1.v6=hhh;dl1.v7=rlast; dl1.v8=r1; dl1.cz9(); v[n++]=dl1.getVal(); - // dl1.v0= f++; dl1.v2=h;dl1.v3=hh; dl1.v4=hhh;dl1.v5=hrel; dl1.cz6(); v[n++]=dl1.getVal(); - - + dwwp.v0 = f++; + dwwp.v2 = pli; + dwwp.v3 = rli; + dwwp.v4 = h; + dwwp.cz5(); + v[n++] = dwwp.getVal(); + dwwp.v0 = f++; + dwwp.v2 = fm; + dwwp.v3 = rli; + dwwp.v4 = h; + dwwp.cz5(); + v[n++] = dwwp.getVal(); + dwwp.v0 = f++; + dwwp.v2 = pli; + dwwp.v3 = fm; + dwwp.v4 = h; + dwwp.cz5(); + v[n++] = dwwp.getVal(); + + dwwp.v0 = f++; + dwwp.v2 = rli; + dwwp.v3 = h; + dwwp.cz4(); + v[n++] = dwwp.getVal(); + + dl1.v0 = f++; + dl1.v2 = h; + dl1.v3 = hrel; + dl1.v4 = hhrel; + dl1.v5 = hh; + dl1.v6 = rlast; + dl1.cz6(); + v[n++] = dl1.getVal(); + dwp.v0 = f++; + dwp.v2 = fm; + dwp.v3 = hrel; + dwp.v4 = hh; + dwp.cz5(); + v[n++] = dwp.getVal(); + dwp.v0 = f++; + dwp.v2 = hhf; + dwp.v3 = hrel; + dwp.v4 = hh; + dwp.v5 = h; + dwp.cz6(); + v[n++] = dwp.getVal(); + + dwwp.v0 = f++; + dwwp.v2 = fm; + dwwp.v3 = hhf; + dwwp.v4 = hrel; + dwwp.v5 = hhrel; + dwwp.cz6(); + v[n++] = dwwp.getVal(); + dwwp.v0 = f++; + dwwp.v2 = h; + dwwp.v3 = hhf; + dwwp.v4 = hrel; + dwwp.v5 = hhrel; + dwwp.cz6(); + v[n++] = dwwp.getVal(); + dwwp.v0 = f++; + dwwp.v2 = fm; + dwwp.v3 = hh; + dwwp.v4 = hrel; + dwwp.v5 = hhrel; + dwwp.cz6(); + v[n++] = dwwp.getVal(); + + dwwp.v0 = f++; + dwwp.v2 = fm; + dwwp.v3 = hhf; + dwwp.v4 = h; + dwwp.v5 = hh; + dwwp.cz6(); + v[n++] = dwwp.getVal(); + dwwp.v0 = f++; + dwwp.v2 = h; + dwwp.v3 = hhf; + dwwp.v4 = hrel; + dwwp.v5 = hh; + dwwp.cz6(); + v[n++] = dwwp.getVal(); + dwwp.v0 = f++; + dwwp.v2 = fm; + dwwp.v3 = hh; + dwwp.v4 = h; + dwwp.v5 = hrel; + dwwp.cz6(); + v[n++] = dwwp.getVal(); + + // dl1.v0= f++; dl1.v2=h;dl1.v3=hrel; dl1.v4=hhrel;dl1.v5=hh; + // dl1.v6=hhhrel;dl1.v7=hhh; dl1.v8=rlast; dl1.cz9(); + // v[n++]=dl1.getVal(); + // dl1.v0= f++; dl1.v2=h;dl1.v3=hrel; dl1.v4=hhrel;dl1.v5=hh; + // dl1.v6=hhhrel;dl1.v7=hhh; dl1.v8=rlast; dl1.cz9(); + // v[n++]=dl1.getVal(); + // dl1.v0= f++; dl1.v2=h;dl1.v3=hrel; dl1.v4=dir;dl1.v5=hh; + // dl1.v6=hhh;dl1.v7=rlast; dl1.v8=r1; dl1.cz9(); + // v[n++]=dl1.getVal(); + // dl1.v0= f++; dl1.v2=h;dl1.v3=hh; dl1.v4=hhh;dl1.v5=hrel; + // dl1.cz6(); v[n++]=dl1.getVal(); + short hp = parse.heads[k]; - short[] hchld = hp==-1?new short[0]:children(parse.heads,hp); + short[] hchld = hp == -1 ? new short[0] : children(parse.heads, hp); - int [] hrels = new int[hchld.length]; - int [] hpss = new int[hchld.length]; - for(int j=0;j<hchld.length;j++) { + int[] hrels = new int[hchld.length]; + int[] hpss = new int[hchld.length]; + for (int j = 0; j < hchld.length; j++) { hrels[j] = parse.labels[hchld[j]]; hpss[j] = is.pposs[i][hchld[j]]; } - - + StringBuilder hrl = new StringBuilder(hchld.length); StringBuilder hpsl = new StringBuilder(hchld.length); - for(int j=0;j<hchld.length;j++) { - hrl.append((char)hrels[j]); - hpsl.append((char)hpss[j]); + for (int j = 0; j < hchld.length; j++) { + hrl.append((char) hrels[j]); + hpsl.append((char) hpss[j]); } int hrli = mf.register("rli", hrl.toString()); int hpli = mf.register("pli", hpsl.toString()); - dwwp.v0=f++; dwwp.v2=hpli; dwwp.v3=hrli; dwwp.cz4(); v[n++]=dwwp.getVal(); - dwwp.v0=f++; dwwp.v2=fm; dwwp.v3=hrli; dwwp.cz4(); v[n++]=dwwp.getVal(); - dwwp.v0=f++; dwwp.v2=hpli; dwwp.v3=fm; dwwp.cz4(); v[n++]=dwwp.getVal(); - - dwwp.v0=f++; dwwp.v2=hpli; dwwp.v3=rli; dwwp.v4=hrel;dwwp.v5=h; dwwp.cz5(); v[n++]=dwwp.getVal(); - dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=hrli;dwwp.v4=hrel;dwwp.v5=h; dwwp.cz5(); v[n++]=dwwp.getVal(); - dwwp.v0=f++; dwwp.v2=hpli; dwwp.v3=hpli;dwwp.v4=hrel;dwwp.v5=h; dwwp.cz5(); v[n++]=dwwp.getVal(); + dwwp.v0 = f++; + dwwp.v2 = hpli; + dwwp.v3 = hrli; + dwwp.cz4(); + v[n++] = dwwp.getVal(); + dwwp.v0 = f++; + dwwp.v2 = fm; + dwwp.v3 = hrli; + dwwp.cz4(); + v[n++] = dwwp.getVal(); + dwwp.v0 = f++; + dwwp.v2 = hpli; + dwwp.v3 = fm; + dwwp.cz4(); + v[n++] = dwwp.getVal(); + + dwwp.v0 = f++; + dwwp.v2 = hpli; + dwwp.v3 = rli; + dwwp.v4 = hrel; + dwwp.v5 = h; + dwwp.cz5(); + v[n++] = dwwp.getVal(); + dwwp.v0 = f++; + dwwp.v2 = pli; + dwwp.v3 = hrli; + dwwp.v4 = hrel; + dwwp.v5 = h; + dwwp.cz5(); + v[n++] = dwwp.getVal(); + dwwp.v0 = f++; + dwwp.v2 = hpli; + dwwp.v3 = hpli; + dwwp.v4 = hrel; + dwwp.v5 = h; + dwwp.cz5(); + v[n++] = dwwp.getVal(); - - } - - v[n]=Integer.MIN_VALUE; + + v[n] = Integer.MIN_VALUE; } /** - * This works seem works well with n-best n=8 (88.858074) , n=10 (88.836884), n=12 (88.858) - * n=14 (88.913417) n=16 (88.79546) n=20 (88.80621) n 50 (88.729364) - * 1-best: 88.749605 - * + * This works seem works well with n-best n=8 (88.858074) , n=10 + * (88.836884), n=12 (88.858) n=14 (88.913417) n=16 (88.79546) n=20 + * (88.80621) n 50 (88.729364) 1-best: 88.749605 + * * @param is * @param i * @param parse * @param rank * @param v - * @param cluster + * @param cluster */ public void extractFeatures(Instances is, int i, ParseNBest parse, int rank, long[] v, Cluster cluster) { - + // mf.getValue(REL, "SB"); - - int f=1,n=0; - - for(short k= 0; k<is.length(i)-1;k++) { - - short[] chld = children(parse.heads,k); - - int abs = Math.abs(parse.heads[k]-k); - final int dist; - if (abs > 10)dist=d10;else if (abs>5) dist=d5;else if( abs==5)dist=d4;else if (abs==4)dist=d3;else if (abs==3)dist=d2; - else if (abs==2)dist=d1; else dist=d0; - - - f=2; - + + int f = 1, n = 0; + + for (short k = 0; k < is.length(i) - 1; k++) { + + short[] chld = children(parse.heads, k); + + int abs = Math.abs(parse.heads[k] - k); + if (abs > 10) { + } else if (abs > 5) { + } else if (abs == 5) { + } else if (abs == 4) { + } else if (abs == 3) { + } else if (abs == 2) { + } else { + } + + f = 2; + int fm = is.forms[i][k]; - int hh = k!=0? is.pposs[i][parse.heads[k]]:s_end; + int hh = k != 0 ? is.pposs[i][parse.heads[k]] : s_end; int h = is.pposs[i][k]; - int hrel = parse.labels[k];//is.labels[i][k]; - int hhrel = k!=0? parse.labels[parse.heads[k]]:s_relend; - int hhf = k!=0? is.forms[i][parse.heads[k]]:s_stwrd; - - int r1 = chld.length>0?parse.labels[chld[0]]:s_relend; - int rlast = chld.length>0?parse.labels[chld[chld.length-1]]:s_relend; - - int [] rels = new int[chld.length]; - int [] pss = new int[chld.length]; - int [] cls = new int[chld.length]; - + int hrel = parse.labels[k];// is.labels[i][k]; + int hhrel = k != 0 ? parse.labels[parse.heads[k]] : s_relend; + int hhf = k != 0 ? is.forms[i][parse.heads[k]] : s_stwrd; + + int rlast = chld.length > 0 ? parse.labels[chld[chld.length - 1]] : s_relend; + + int[] rels = new int[chld.length]; + int[] pss = new int[chld.length]; int[] rc = new int[30]; // 20 was a good length - - for(int j=0;j<chld.length;j++) { + + for (int j = 0; j < chld.length; j++) { rels[j] = parse.labels[chld[j]]; - if (rels[j]<rc.length) rc[rels[j]]++; + if (rels[j] < rc.length) + rc[rels[j]]++; pss[j] = is.pposs[i][chld[j]]; -// cls[j] = is.forms[i][chld[j]]==-1?0:cluster.getLP(is.forms[i][chld[j]]); -// cls[j] = cls[j]==-1?0:cls[j]; + // cls[j] = + // is.forms[i][chld[j]]==-1?0:cluster.getLP(is.forms[i][chld[j]]); + // cls[j] = cls[j]==-1?0:cls[j]; } StringBuilder rl = new StringBuilder(chld.length); StringBuilder psl = new StringBuilder(chld.length); - StringBuilder csl = new StringBuilder(chld.length); - for(int j=0;j<chld.length;j++) { - rl.append((char)rels[j]); - psl.append((char)pss[j]); -// csl.append((char)cls[j]); + new StringBuilder(chld.length); + for (int j = 0; j < chld.length; j++) { + rl.append((char) rels[j]); + psl.append((char) pss[j]); + // csl.append((char)cls[j]); } - + int rli = mf.register("rli", rl.toString()); int pli = mf.register("pli", psl.toString()); -// int cli = mf.register("cli", csl.toString()); - - - dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=rli; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal(); - dwwp.v0=f++; dwwp.v2=fm; dwwp.v3=rli; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal(); - dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=fm; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal(); - // dwwp.v0=f++; dwwp.v2=cli; dwwp.v3=fm; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal(); - - dwwp.v0=f++; dwwp.v2=rli; dwwp.cz3(); v[n++]=dwwp.getVal(); - dwwp.v0=f++; dwwp.v2=pli; dwwp.cz3(); v[n++]=dwwp.getVal(); - //dwwp.v0=f++; dwwp.v2=cli; dwwp.cz3(); v[n++]=dwwp.getVal(); - - // dwwp.v0=f++; dwwp.v2=cli;dwwp.v3=h; dwwp.cz4(); v[n++]=dwwp.getVal(); - - for(int j=1;j<rc.length;j++) { - dwwp.v0=f++; dwwp.v2=rc[j]==0?1:rc[j]==1?2:3; dwwp.v3=j; dwwp.cz4(); v[n++]=dwwp.getVal();// + // int cli = mf.register("cli", csl.toString()); + + dwwp.v0 = f++; + dwwp.v2 = pli; + dwwp.v3 = rli; + dwwp.v4 = h; + dwwp.cz5(); + v[n++] = dwwp.getVal(); + dwwp.v0 = f++; + dwwp.v2 = fm; + dwwp.v3 = rli; + dwwp.v4 = h; + dwwp.cz5(); + v[n++] = dwwp.getVal(); + dwwp.v0 = f++; + dwwp.v2 = pli; + dwwp.v3 = fm; + dwwp.v4 = h; + dwwp.cz5(); + v[n++] = dwwp.getVal(); + // dwwp.v0=f++; dwwp.v2=cli; dwwp.v3=fm; dwwp.v4=h; dwwp.cz5(); + // v[n++]=dwwp.getVal(); + + dwwp.v0 = f++; + dwwp.v2 = rli; + dwwp.cz3(); + v[n++] = dwwp.getVal(); + dwwp.v0 = f++; + dwwp.v2 = pli; + dwwp.cz3(); + v[n++] = dwwp.getVal(); + // dwwp.v0=f++; dwwp.v2=cli; dwwp.cz3(); v[n++]=dwwp.getVal(); + + // dwwp.v0=f++; dwwp.v2=cli;dwwp.v3=h; dwwp.cz4(); + // v[n++]=dwwp.getVal(); + + for (int j = 1; j < rc.length; j++) { + dwwp.v0 = f++; + dwwp.v2 = rc[j] == 0 ? 1 : rc[j] == 1 ? 2 : 3; + dwwp.v3 = j; + dwwp.cz4(); + v[n++] = dwwp.getVal();// } - - dwwp.v0=f++; dwwp.v2=rli; dwwp.v3=h;dwwp.cz4(); v[n++]=dwwp.getVal(); - dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=h;dwwp.cz4(); v[n++]=dwwp.getVal(); - - dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=hh; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal(); - dwwp.v0=f++; dwwp.v2=rli; dwwp.v3=hh; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal(); - - dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=rli; dwwp.v4=hh; dwwp.cz5(); v[n++]=dwwp.getVal(); - dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=rli; dwwp.v4=hh; dwwp.v5=h; dwwp.cz6(); v[n++]=dwwp.getVal(); - dwp.v0= f++; dwp.v2=rli; dwp.v3=hrel;dwp.v4=hh; dwp.v5=h; dwp.cz6(); v[n++]=dwp.getVal(); + dwwp.v0 = f++; + dwwp.v2 = rli; + dwwp.v3 = h; + dwwp.cz4(); + v[n++] = dwwp.getVal(); + dwwp.v0 = f++; + dwwp.v2 = pli; + dwwp.v3 = h; + dwwp.cz4(); + v[n++] = dwwp.getVal(); + + dwwp.v0 = f++; + dwwp.v2 = pli; + dwwp.v3 = hh; + dwwp.v4 = h; + dwwp.cz5(); + v[n++] = dwwp.getVal(); + dwwp.v0 = f++; + dwwp.v2 = rli; + dwwp.v3 = hh; + dwwp.v4 = h; + dwwp.cz5(); + v[n++] = dwwp.getVal(); + + dwwp.v0 = f++; + dwwp.v2 = pli; + dwwp.v3 = rli; + dwwp.v4 = hh; + dwwp.cz5(); + v[n++] = dwwp.getVal(); + dwwp.v0 = f++; + dwwp.v2 = pli; + dwwp.v3 = rli; + dwwp.v4 = hh; + dwwp.v5 = h; + dwwp.cz6(); + v[n++] = dwwp.getVal(); + + dwp.v0 = f++; + dwp.v2 = rli; + dwp.v3 = hrel; + dwp.v4 = hh; + dwp.v5 = h; + dwp.cz6(); + v[n++] = dwp.getVal(); + + // dwwp.v0=f++; dwwp.v2=h; dwwp.v3=hh; dwwp.v4=dist; dwwp.cz5(); + // v[n++]=dwwp.getVal(); - //dwwp.v0=f++; dwwp.v2=h; dwwp.v3=hh; dwwp.v4=dist; dwwp.cz5(); v[n++]=dwwp.getVal(); - Arrays.sort(rels); Arrays.sort(pss); rl = new StringBuilder(chld.length); psl = new StringBuilder(chld.length); - for(int j=0;j<chld.length;j++) { - rl.append((char)rels[j]); - psl.append((char)pss[j]); + for (int j = 0; j < chld.length; j++) { + rl.append((char) rels[j]); + psl.append((char) pss[j]); } rli = mf.register("rli", rl.toString()); pli = mf.register("pli", psl.toString()); - dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=rli; dwwp.v4=1; dwwp.v5=h; dwwp.cz6(); v[n++]=dwwp.getVal(); - dwwp.v0=f++; dwwp.v2=fm; dwwp.v3=rli; dwwp.v4=1; dwwp.v5=h; dwwp.cz6(); v[n++]=dwwp.getVal(); - dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=fm; dwwp.v4=1; dwwp.v5=h; dwwp.cz6(); v[n++]=dwwp.getVal(); - - dwwp.v0=f++; dwwp.v2=rli; dwwp.v3=h;dwwp.cz4(); v[n++]=dwwp.getVal(); - - dl1.v0= f++; dl1.v2=h; dl1.v3=hrel; dl1.v4=hhrel;dl1.v5=hh; dl1.v6=rlast; dl1.cz6(); v[n++]=dl1.getVal(); - dwp.v0= f++; dwp.v2=fm; dwp.v3=hrel; dwp.v4=hh; dwp.cz5(); v[n++]=dwp.getVal(); - dwp.v0= f++; dwp.v2=hhf; dwp.v3=hrel;dwp.v4=hh; dwp.v5=h; dwp.cz6(); v[n++]=dwp.getVal(); + dwwp.v0 = f++; + dwwp.v2 = pli; + dwwp.v3 = rli; + dwwp.v4 = 1; + dwwp.v5 = h; + dwwp.cz6(); + v[n++] = dwwp.getVal(); + dwwp.v0 = f++; + dwwp.v2 = fm; + dwwp.v3 = rli; + dwwp.v4 = 1; + dwwp.v5 = h; + dwwp.cz6(); + v[n++] = dwwp.getVal(); + dwwp.v0 = f++; + dwwp.v2 = pli; + dwwp.v3 = fm; + dwwp.v4 = 1; + dwwp.v5 = h; + dwwp.cz6(); + v[n++] = dwwp.getVal(); + + dwwp.v0 = f++; + dwwp.v2 = rli; + dwwp.v3 = h; + dwwp.cz4(); + v[n++] = dwwp.getVal(); + + dl1.v0 = f++; + dl1.v2 = h; + dl1.v3 = hrel; + dl1.v4 = hhrel; + dl1.v5 = hh; + dl1.v6 = rlast; + dl1.cz6(); + v[n++] = dl1.getVal(); + dwp.v0 = f++; + dwp.v2 = fm; + dwp.v3 = hrel; + dwp.v4 = hh; + dwp.cz5(); + v[n++] = dwp.getVal(); + dwp.v0 = f++; + dwp.v2 = hhf; + dwp.v3 = hrel; + dwp.v4 = hh; + dwp.v5 = h; + dwp.cz6(); + v[n++] = dwp.getVal(); } - - v[n]=Integer.MIN_VALUE; + + v[n] = Integer.MIN_VALUE; } - - /** - * Works well! + /** + * + * Works well! + * * @param is * @param i * @param parse @@ -411,211 +687,394 @@ final public class ExtractorReranker { * @param v */ public void extractFeatures6(Instances is, int i, ParseNBest parse, int rank, long[] v) { - + // mf.getValue(REL, "SB"); - - int f=1,n=0; - - for(short k= 0; k<is.length(i)-1;k++) { - - short[] chld = children(parse.heads,k); - - f=2; - + + int f = 1, n = 0; + + for (short k = 0; k < is.length(i) - 1; k++) { + + short[] chld = children(parse.heads, k); + + f = 2; + int fm = is.forms[i][k]; - int hh = k!=0? is.pposs[i][parse.heads[k]]:s_end; + int hh = k != 0 ? is.pposs[i][parse.heads[k]] : s_end; int h = is.pposs[i][k]; - int hrel = parse.labels[k];//is.labels[i][k]; - int hhrel = k!=0? parse.labels[parse.heads[k]]:s_relend; - int hhf = k!=0? is.forms[i][parse.heads[k]]:s_stwrd; - - int r1 = chld.length>0?parse.labels[chld[0]]:s_relend; - int rlast = chld.length>0?parse.labels[chld[chld.length-1]]:s_relend; - - int [] rels = new int[chld.length]; - int [] pss = new int[chld.length]; - + int hrel = parse.labels[k];// is.labels[i][k]; + int hhrel = k != 0 ? parse.labels[parse.heads[k]] : s_relend; + int hhf = k != 0 ? is.forms[i][parse.heads[k]] : s_stwrd; + + int rlast = chld.length > 0 ? parse.labels[chld[chld.length - 1]] : s_relend; + + int[] rels = new int[chld.length]; + int[] pss = new int[chld.length]; + int[] rc = new int[30]; // 20 was a good length - - for(int j=0;j<chld.length;j++) { + + for (int j = 0; j < chld.length; j++) { rels[j] = parse.labels[chld[j]]; - if (rels[j]<rc.length) rc[rels[j]]++; - // if (rels[j]==sb) numSB++; + if (rels[j] < rc.length) + rc[rels[j]]++; + // if (rels[j]==sb) numSB++; pss[j] = is.pposs[i][chld[j]]; } StringBuilder rl = new StringBuilder(chld.length); StringBuilder psl = new StringBuilder(chld.length); - for(int j=0;j<chld.length;j++) { - rl.append((char)rels[j]); - psl.append((char)pss[j]); + for (int j = 0; j < chld.length; j++) { + rl.append((char) rels[j]); + psl.append((char) pss[j]); } - + int rli = mf.register("rli", rl.toString()); int pli = mf.register("pli", psl.toString()); - - - dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=rli; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal(); - dwwp.v0=f++; dwwp.v2=fm; dwwp.v3=rli; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal(); - dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=fm; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal(); - - dwwp.v0=f++; dwwp.v2=rli; dwwp.cz3(); v[n++]=dwwp.getVal(); - dwwp.v0=f++; dwwp.v2=pli; dwwp.cz3(); v[n++]=dwwp.getVal(); - - for(int j=1;j<rc.length;j++) { - dwwp.v0=f++; dwwp.v2=rc[j]==0?1:rc[j]==1?2:3; dwwp.v3=j; dwwp.cz4(); v[n++]=dwwp.getVal();// - } - - dwwp.v0=f++; dwwp.v2=rli; dwwp.v3=h;dwwp.cz4(); v[n++]=dwwp.getVal(); - dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=h;dwwp.cz4(); v[n++]=dwwp.getVal(); - - dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=hh; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal(); - dwwp.v0=f++; dwwp.v2=rli; dwwp.v3=hh; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal(); - - dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=rli; dwwp.v4=hh; dwwp.cz5(); v[n++]=dwwp.getVal(); - dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=rli; dwwp.v4=hh; dwwp.v5=h; dwwp.cz6(); v[n++]=dwwp.getVal(); - dwp.v0= f++; dwp.v2=rli; dwp.v3=hrel;dwp.v4=hh; dwp.v5=h; dwp.cz6(); v[n++]=dwp.getVal(); + dwwp.v0 = f++; + dwwp.v2 = pli; + dwwp.v3 = rli; + dwwp.v4 = h; + dwwp.cz5(); + v[n++] = dwwp.getVal(); + dwwp.v0 = f++; + dwwp.v2 = fm; + dwwp.v3 = rli; + dwwp.v4 = h; + dwwp.cz5(); + v[n++] = dwwp.getVal(); + dwwp.v0 = f++; + dwwp.v2 = pli; + dwwp.v3 = fm; + dwwp.v4 = h; + dwwp.cz5(); + v[n++] = dwwp.getVal(); + + dwwp.v0 = f++; + dwwp.v2 = rli; + dwwp.cz3(); + v[n++] = dwwp.getVal(); + dwwp.v0 = f++; + dwwp.v2 = pli; + dwwp.cz3(); + v[n++] = dwwp.getVal(); + + for (int j = 1; j < rc.length; j++) { + dwwp.v0 = f++; + dwwp.v2 = rc[j] == 0 ? 1 : rc[j] == 1 ? 2 : 3; + dwwp.v3 = j; + dwwp.cz4(); + v[n++] = dwwp.getVal();// + } + dwwp.v0 = f++; + dwwp.v2 = rli; + dwwp.v3 = h; + dwwp.cz4(); + v[n++] = dwwp.getVal(); + dwwp.v0 = f++; + dwwp.v2 = pli; + dwwp.v3 = h; + dwwp.cz4(); + v[n++] = dwwp.getVal(); + + dwwp.v0 = f++; + dwwp.v2 = pli; + dwwp.v3 = hh; + dwwp.v4 = h; + dwwp.cz5(); + v[n++] = dwwp.getVal(); + dwwp.v0 = f++; + dwwp.v2 = rli; + dwwp.v3 = hh; + dwwp.v4 = h; + dwwp.cz5(); + v[n++] = dwwp.getVal(); + + dwwp.v0 = f++; + dwwp.v2 = pli; + dwwp.v3 = rli; + dwwp.v4 = hh; + dwwp.cz5(); + v[n++] = dwwp.getVal(); + dwwp.v0 = f++; + dwwp.v2 = pli; + dwwp.v3 = rli; + dwwp.v4 = hh; + dwwp.v5 = h; + dwwp.cz6(); + v[n++] = dwwp.getVal(); + + dwp.v0 = f++; + dwp.v2 = rli; + dwp.v3 = hrel; + dwp.v4 = hh; + dwp.v5 = h; + dwp.cz6(); + v[n++] = dwp.getVal(); Arrays.sort(rels); Arrays.sort(pss); rl = new StringBuilder(chld.length); psl = new StringBuilder(chld.length); - for(int j=0;j<chld.length;j++) { - rl.append((char)rels[j]); - psl.append((char)pss[j]); + for (int j = 0; j < chld.length; j++) { + rl.append((char) rels[j]); + psl.append((char) pss[j]); } rli = mf.register("rli", rl.toString()); pli = mf.register("pli", psl.toString()); - - dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=rli; dwwp.v4=1; dwwp.v5=h; dwwp.cz6(); v[n++]=dwwp.getVal(); - dwwp.v0=f++; dwwp.v2=fm; dwwp.v3=rli; dwwp.v4=1; dwwp.v5=h; dwwp.cz6(); v[n++]=dwwp.getVal(); - dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=fm; dwwp.v4=1; dwwp.v5=h; dwwp.cz6(); v[n++]=dwwp.getVal(); - - dwwp.v0=f++; dwwp.v2=rli; dwwp.v3=h;dwwp.cz4(); v[n++]=dwwp.getVal(); - - dl1.v0= f++; dl1.v2=h; dl1.v3=hrel; dl1.v4=hhrel;dl1.v5=hh; dl1.v6=rlast; dl1.cz6(); v[n++]=dl1.getVal(); - dwp.v0= f++; dwp.v2=fm; dwp.v3=hrel; dwp.v4=hh; dwp.cz5(); v[n++]=dwp.getVal(); - dwp.v0= f++; dwp.v2=hhf; dwp.v3=hrel;dwp.v4=hh; dwp.v5=h; dwp.cz6(); v[n++]=dwp.getVal(); + dwwp.v0 = f++; + dwwp.v2 = pli; + dwwp.v3 = rli; + dwwp.v4 = 1; + dwwp.v5 = h; + dwwp.cz6(); + v[n++] = dwwp.getVal(); + dwwp.v0 = f++; + dwwp.v2 = fm; + dwwp.v3 = rli; + dwwp.v4 = 1; + dwwp.v5 = h; + dwwp.cz6(); + v[n++] = dwwp.getVal(); + dwwp.v0 = f++; + dwwp.v2 = pli; + dwwp.v3 = fm; + dwwp.v4 = 1; + dwwp.v5 = h; + dwwp.cz6(); + v[n++] = dwwp.getVal(); + + dwwp.v0 = f++; + dwwp.v2 = rli; + dwwp.v3 = h; + dwwp.cz4(); + v[n++] = dwwp.getVal(); + + dl1.v0 = f++; + dl1.v2 = h; + dl1.v3 = hrel; + dl1.v4 = hhrel; + dl1.v5 = hh; + dl1.v6 = rlast; + dl1.cz6(); + v[n++] = dl1.getVal(); + dwp.v0 = f++; + dwp.v2 = fm; + dwp.v3 = hrel; + dwp.v4 = hh; + dwp.cz5(); + v[n++] = dwp.getVal(); + dwp.v0 = f++; + dwp.v2 = hhf; + dwp.v3 = hrel; + dwp.v4 = hh; + dwp.v5 = h; + dwp.cz6(); + v[n++] = dwp.getVal(); } - - v[n]=Integer.MIN_VALUE; + + v[n] = Integer.MIN_VALUE; } - - public void extractFeatures2(Instances is, int i, ParseNBest parse, int rank, long[] v) { - - - - int f=1,n=0; - - for(short k= 0; k<is.length(i)-1;k++) { - - short[] chld = children(parse.heads,k); - - f=2; - + + int f = 1, n = 0; + + for (short k = 0; k < is.length(i) - 1; k++) { + + short[] chld = children(parse.heads, k); + + f = 2; + int fm = is.forms[i][k]; - int hh = k!=0? is.pposs[i][parse.heads[k]]:s_end; + int hh = k != 0 ? is.pposs[i][parse.heads[k]] : s_end; int h = is.pposs[i][k]; - int hrel = parse.labels[k];//is.labels[i][k]; - int hhrel = k!=0? parse.labels[parse.heads[k]]:s_relend; - int hhf = k!=0? is.forms[i][parse.heads[k]]:s_stwrd; - - int r1 = chld.length>0?parse.labels[chld[0]]:s_relend; - int rlast = chld.length>0?parse.labels[chld[chld.length-1]]:s_relend; - - int [] rels = new int[chld.length]; - int [] pss = new int[chld.length]; - - - - for(int j=0;j<chld.length;j++) { + int hrel = parse.labels[k];// is.labels[i][k]; + int hhrel = k != 0 ? parse.labels[parse.heads[k]] : s_relend; + int hhf = k != 0 ? is.forms[i][parse.heads[k]] : s_stwrd; + + int rlast = chld.length > 0 ? parse.labels[chld[chld.length - 1]] : s_relend; + + int[] rels = new int[chld.length]; + int[] pss = new int[chld.length]; + + for (int j = 0; j < chld.length; j++) { rels[j] = parse.labels[chld[j]]; pss[j] = is.pposs[i][chld[j]]; } StringBuilder rl = new StringBuilder(chld.length); StringBuilder psl = new StringBuilder(chld.length); - for(int j=0;j<chld.length;j++) { - rl.append((char)rels[j]); - psl.append((char)pss[j]); + for (int j = 0; j < chld.length; j++) { + rl.append((char) rels[j]); + psl.append((char) pss[j]); } - + int rli = mf.register("rli", rl.toString()); int pli = mf.register("pli", psl.toString()); - - - dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=rli; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal(); - dwwp.v0=f++; dwwp.v2=fm; dwwp.v3=rli; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal(); - dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=fm; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal(); - - dwwp.v0=f++; dwwp.v2=rli; dwwp.cz3(); v[n++]=dwwp.getVal(); - dwwp.v0=f++; dwwp.v2=pli; dwwp.cz3(); v[n++]=dwwp.getVal(); - - dwwp.v0=f++; dwwp.v2=rli; dwwp.v3=h;dwwp.cz4(); v[n++]=dwwp.getVal(); - dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=h;dwwp.cz4(); v[n++]=dwwp.getVal(); - - dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=hh; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal(); - dwwp.v0=f++; dwwp.v2=rli; dwwp.v3=hh; dwwp.v4=h; dwwp.cz5(); v[n++]=dwwp.getVal(); - - dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=rli; dwwp.v4=hh; dwwp.cz5(); v[n++]=dwwp.getVal(); - dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=rli; dwwp.v4=hh; dwwp.v5=h; dwwp.cz6(); v[n++]=dwwp.getVal(); - - dwp.v0= f++; dwp.v2=rli; dwp.v3=hrel;dwp.v4=hh; dwp.v5=h; dwp.cz6(); v[n++]=dwp.getVal(); + dwwp.v0 = f++; + dwwp.v2 = pli; + dwwp.v3 = rli; + dwwp.v4 = h; + dwwp.cz5(); + v[n++] = dwwp.getVal(); + dwwp.v0 = f++; + dwwp.v2 = fm; + dwwp.v3 = rli; + dwwp.v4 = h; + dwwp.cz5(); + v[n++] = dwwp.getVal(); + dwwp.v0 = f++; + dwwp.v2 = pli; + dwwp.v3 = fm; + dwwp.v4 = h; + dwwp.cz5(); + v[n++] = dwwp.getVal(); + + dwwp.v0 = f++; + dwwp.v2 = rli; + dwwp.cz3(); + v[n++] = dwwp.getVal(); + dwwp.v0 = f++; + dwwp.v2 = pli; + dwwp.cz3(); + v[n++] = dwwp.getVal(); + + dwwp.v0 = f++; + dwwp.v2 = rli; + dwwp.v3 = h; + dwwp.cz4(); + v[n++] = dwwp.getVal(); + dwwp.v0 = f++; + dwwp.v2 = pli; + dwwp.v3 = h; + dwwp.cz4(); + v[n++] = dwwp.getVal(); + + dwwp.v0 = f++; + dwwp.v2 = pli; + dwwp.v3 = hh; + dwwp.v4 = h; + dwwp.cz5(); + v[n++] = dwwp.getVal(); + dwwp.v0 = f++; + dwwp.v2 = rli; + dwwp.v3 = hh; + dwwp.v4 = h; + dwwp.cz5(); + v[n++] = dwwp.getVal(); + + dwwp.v0 = f++; + dwwp.v2 = pli; + dwwp.v3 = rli; + dwwp.v4 = hh; + dwwp.cz5(); + v[n++] = dwwp.getVal(); + dwwp.v0 = f++; + dwwp.v2 = pli; + dwwp.v3 = rli; + dwwp.v4 = hh; + dwwp.v5 = h; + dwwp.cz6(); + v[n++] = dwwp.getVal(); + + dwp.v0 = f++; + dwp.v2 = rli; + dwp.v3 = hrel; + dwp.v4 = hh; + dwp.v5 = h; + dwp.cz6(); + v[n++] = dwp.getVal(); Arrays.sort(rels); Arrays.sort(pss); rl = new StringBuilder(chld.length); psl = new StringBuilder(chld.length); - for(int j=0;j<chld.length;j++) { - rl.append((char)rels[j]); - psl.append((char)pss[j]); + for (int j = 0; j < chld.length; j++) { + rl.append((char) rels[j]); + psl.append((char) pss[j]); } rli = mf.register("rli", rl.toString()); pli = mf.register("pli", psl.toString()); - - dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=rli; dwwp.v4=1; dwwp.v5=h; dwwp.cz6(); v[n++]=dwwp.getVal(); - dwwp.v0=f++; dwwp.v2=fm; dwwp.v3=rli; dwwp.v4=1; dwwp.v5=h; dwwp.cz6(); v[n++]=dwwp.getVal(); - dwwp.v0=f++; dwwp.v2=pli; dwwp.v3=fm; dwwp.v4=1; dwwp.v5=h; dwwp.cz6(); v[n++]=dwwp.getVal(); - - dwwp.v0=f++; dwwp.v2=rli; dwwp.v3=h;dwwp.cz4(); v[n++]=dwwp.getVal(); - - dl1.v0= f++; dl1.v2=h; dl1.v3=hrel; dl1.v4=hhrel;dl1.v5=hh; dl1.v6=rlast; dl1.cz6(); v[n++]=dl1.getVal(); - dwp.v0= f++; dwp.v2=fm; dwp.v3=hrel; dwp.v4=hh; dwp.cz5(); v[n++]=dwp.getVal(); - dwp.v0= f++; dwp.v2=hhf; dwp.v3=hrel;dwp.v4=hh; dwp.v5=h; dwp.cz6(); v[n++]=dwp.getVal(); + dwwp.v0 = f++; + dwwp.v2 = pli; + dwwp.v3 = rli; + dwwp.v4 = 1; + dwwp.v5 = h; + dwwp.cz6(); + v[n++] = dwwp.getVal(); + dwwp.v0 = f++; + dwwp.v2 = fm; + dwwp.v3 = rli; + dwwp.v4 = 1; + dwwp.v5 = h; + dwwp.cz6(); + v[n++] = dwwp.getVal(); + dwwp.v0 = f++; + dwwp.v2 = pli; + dwwp.v3 = fm; + dwwp.v4 = 1; + dwwp.v5 = h; + dwwp.cz6(); + v[n++] = dwwp.getVal(); + + dwwp.v0 = f++; + dwwp.v2 = rli; + dwwp.v3 = h; + dwwp.cz4(); + v[n++] = dwwp.getVal(); + + dl1.v0 = f++; + dl1.v2 = h; + dl1.v3 = hrel; + dl1.v4 = hhrel; + dl1.v5 = hh; + dl1.v6 = rlast; + dl1.cz6(); + v[n++] = dl1.getVal(); + dwp.v0 = f++; + dwp.v2 = fm; + dwp.v3 = hrel; + dwp.v4 = hh; + dwp.cz5(); + v[n++] = dwp.getVal(); + dwp.v0 = f++; + dwp.v2 = hhf; + dwp.v3 = hrel; + dwp.v4 = hh; + dwp.v5 = h; + dwp.cz6(); + v[n++] = dwp.getVal(); } - - v[n]=Integer.MIN_VALUE; - } + v[n] = Integer.MIN_VALUE; + } - /** * @param parse * @param k * @return */ private short[] children(short[] heads, short h) { - - int c=0; - for(int k=0;k<heads.length;k++) if (heads[k] ==h ) c++; - + + int c = 0; + for (short head : heads) + if (head == h) + c++; + short[] clds = new short[c]; - c=0; - for(int k=0;k<heads.length;k++) if (heads[k] ==h ) clds[c++]=(short)k; + c = 0; + for (int k = 0; k < heads.length; k++) + if (heads[k] == h) + clds[c++] = (short) k; return clds; } - - } diff --git a/dependencyParser/mate-tools/src/extractors/ParallelExtract.java b/dependencyParser/mate-tools/src/extractors/ParallelExtract.java index a2ef72c..5e0ec08 100755 --- a/dependencyParser/mate-tools/src/extractors/ParallelExtract.java +++ b/dependencyParser/mate-tools/src/extractors/ParallelExtract.java @@ -1,25 +1,22 @@ package extractors; +import java.util.ArrayList; +import java.util.concurrent.Callable; + import is2.data.Cluster; import is2.data.DataF; import is2.data.Edges; import is2.data.F2SF; -import is2.data.FV; import is2.data.Instances; import is2.data.Long2IntInterface; -import java.util.ArrayList; -import java.util.concurrent.Callable; - - /** * @author Bernd Bohnet, 30.08.2009 - * - * This class implements a parallel feature extractor. + * + * This class implements a parallel feature extractor. */ -final public class ParallelExtract implements Callable<Object> -{ - // the data space of the weights for a dependency tree +final public class ParallelExtract implements Callable<Object> { + // the data space of the weights for a dependency tree final DataF d; // the data extractor does the actual work @@ -29,166 +26,168 @@ final public class ParallelExtract implements Callable<Object> private int i; private F2SF para; - + private Cluster cluster; - + private Long2IntInterface li; - - public ParallelExtract(Extractor e, Instances is, int i, DataF d, F2SF para,Cluster cluster, Long2IntInterface li) { - - this.is =is; - extractor=e; - this.d =d; - this.i=i; - this.para=para; + + public ParallelExtract(Extractor e, Instances is, int i, DataF d, F2SF para, Cluster cluster, + Long2IntInterface li) { + + this.is = is; + extractor = e; + this.d = d; + this.i = i; + this.para = para; this.cluster = cluster; - this.li=li; + this.li = li; } - public static class DSet { - int w1,w2; + int w1, w2; } - + + @Override public Object call() { try { - - F2SF f= para; - - - short[] pos=is.pposs[i]; - int[] forms=is.forms[i]; - int[] lemmas=is.plemmas[i]; - short[][] feats=is.feats[i]; - int length = pos.length; - - long[] svs = new long[250]; - - int type=extractor.getType(); - - while (true) { - - DSet set = get(); - if (set ==null) break; - - int w1=set.w1; - int w2=set.w2; - - f.clear(); - int n =extractor.basic(pos, forms, w1, w2,cluster, f); - d.pl[w1][w2]=f.getScoreF(); - - short[] labels = Edges.get(pos[w1], pos[w2],false); - float[][] lab = d.lab[w1][w2]; - - extractor.firstm(is, i, w1, w2, 0, cluster, svs); - - if (labels!=null) { - - - for (int l = labels.length - 1; l >= 0; l--) { - - short label = labels[l]; - - f.clear(); - for(int k=svs.length-1;k>=0;k--) if (svs[k]>0) f.add(li.l2i(svs[k]+label*type)); - lab[label][0]=f.getScoreF(); - } - } - labels = Edges.get(pos[w1], pos[w2],true); + F2SF f = para; - if (labels!=null) { - - for (int l = labels.length - 1; l >= 0; l--) { + short[] pos = is.pposs[i]; + int[] forms = is.forms[i]; + int[] lemmas = is.plemmas[i]; + short[][] feats = is.feats[i]; + int length = pos.length; - int label = labels[l]; - f.clear(); - for(int k=svs.length-1;k>=0;k--) if (svs[k]>0) f.add(li.l2i(svs[k]+label*type)); - lab[label][1]=f.getScoreF(); - } - } + long[] svs = new long[250]; + + int type = extractor.getType(); + + while (true) { + + DSet set = get(); + if (set == null) + break; + + int w1 = set.w1; + int w2 = set.w2; - int s = w1<w2 ? w1 : w2; - int e = w1<w2 ? w2 : w1; + f.clear(); + int n = extractor.basic(pos, forms, w1, w2, cluster, f); + d.pl[w1][w2] = f.getScoreF(); - int sg = w1<w2 ? w1 : 0; - int eg = w1<w2 ? length : w1+1; + short[] labels = Edges.get(pos[w1], pos[w2], false); + float[][] lab = d.lab[w1][w2]; - - for(int m=s;m<e;m++) { - for(int dir=0;dir<2;dir++) { - labels = Edges.get(pos[w1], pos[w2],dir==1); - float lab2[]= new float[labels.length]; + extractor.firstm(is, i, w1, w2, 0, cluster, svs); + + if (labels != null) { - int g = (m==s||e==m) ? -1 : m; - - - extractor.siblingm(is,i,pos,forms,lemmas,feats, w1, w2, g, 0, cluster, svs,n); - for (int l = labels.length - 1; l >= 0; l--) { - int label = labels[l]; + short label = labels[l]; + f.clear(); - - for(int k=svs.length-1;k>=0;k--) { - if (svs[k]>0) f.add(li.l2i(svs[k]+label*type)); - } - lab2[l] = (float)f.score;//f.getScoreF(); + for (int k = svs.length - 1; k >= 0; k--) + if (svs[k] > 0) + f.add(li.l2i(svs[k] + label * type)); + lab[label][0] = f.getScoreF(); } - d.sib[w1][w2][m][dir]=lab2; } - } - - for(int m=sg;m<eg;m++) { - for(int dir=0;dir<2;dir++) { - labels = Edges.get(pos[w1], pos[w2],dir==1); - float[] lab2 = new float[labels.length]; - - int g = (m==s||e==m) ? -1 : m; - - extractor.gcm(is, i, w1,w2,g, 0, cluster, svs); - + + labels = Edges.get(pos[w1], pos[w2], true); + + if (labels != null) { + for (int l = labels.length - 1; l >= 0; l--) { int label = labels[l]; - f.clear(); - for(int k=svs.length-1;k>=0;k--) { - if (svs[k]>0) f.add(li.l2i(svs[k]+label*type)); + for (int k = svs.length - 1; k >= 0; k--) + if (svs[k] > 0) + f.add(li.l2i(svs[k] + label * type)); + lab[label][1] = f.getScoreF(); + } + } + + int s = w1 < w2 ? w1 : w2; + int e = w1 < w2 ? w2 : w1; + + int sg = w1 < w2 ? w1 : 0; + int eg = w1 < w2 ? length : w1 + 1; + + for (int m = s; m < e; m++) { + for (int dir = 0; dir < 2; dir++) { + labels = Edges.get(pos[w1], pos[w2], dir == 1); + float lab2[] = new float[labels.length]; + + int g = (m == s || e == m) ? -1 : m; + + extractor.siblingm(is, i, pos, forms, lemmas, feats, w1, w2, g, 0, cluster, svs, n); + + for (int l = labels.length - 1; l >= 0; l--) { + + int label = labels[l]; + f.clear(); + + for (int k = svs.length - 1; k >= 0; k--) { + if (svs[k] > 0) + f.add(li.l2i(svs[k] + label * type)); + } + lab2[l] = f.score;// f.getScoreF(); } - lab2[l] = f.getScoreF(); + d.sib[w1][w2][m][dir] = lab2; } - d.gra[w1][w2][m][dir] =lab2; } - } - } - } catch(Exception e ) { + for (int m = sg; m < eg; m++) { + for (int dir = 0; dir < 2; dir++) { + labels = Edges.get(pos[w1], pos[w2], dir == 1); + float[] lab2 = new float[labels.length]; + + int g = (m == s || e == m) ? -1 : m; + + extractor.gcm(is, i, w1, w2, g, 0, cluster, svs); + + for (int l = labels.length - 1; l >= 0; l--) { + + int label = labels[l]; + + f.clear(); + for (int k = svs.length - 1; k >= 0; k--) { + if (svs[k] > 0) + f.add(li.l2i(svs[k] + label * type)); + } + lab2[l] = f.getScoreF(); + } + d.gra[w1][w2][m][dir] = lab2; + } + } + + } + } catch (Exception e) { e.printStackTrace(); } return null; } - static ArrayList<DSet> sets = new ArrayList<DSet>(); - - private DSet get() { - + + private DSet get() { + synchronized (sets) { - if (sets.size()==0) return null; - return sets.remove(sets.size()-1); + if (sets.size() == 0) + return null; + return sets.remove(sets.size() - 1); } } - static public void add(int w1, int w2){ - DSet ds =new DSet(); - ds.w1=w1; - ds.w2=w2; + + static public void add(int w1, int w2) { + DSet ds = new DSet(); + ds.w1 = w1; + ds.w2 = w2; sets.add(ds); } - - - - + } diff --git a/dependencyParser/mate-tools/src/is2/data/Closed.java b/dependencyParser/mate-tools/src/is2/data/Closed.java index 378d0c6..3e938a8 100755 --- a/dependencyParser/mate-tools/src/is2/data/Closed.java +++ b/dependencyParser/mate-tools/src/is2/data/Closed.java @@ -1,31 +1,28 @@ package is2.data; - - -final public class Closed { +final public class Closed { public double p; - short b,e,m; + short b, e, m; byte dir; - + Closed d; Open u; - public Closed(short s, short t, int m, int dir,Open u, Closed d, float score) { + public Closed(short s, short t, int m, int dir, Open u, Closed d, float score) { this.b = s; this.e = t; - this.m = (short)m; - this.dir = (byte)dir; - this.u=u; - this.d =d; - p=score; + this.m = (short) m; + this.dir = (byte) dir; + this.u = u; + this.d = d; + p = score; } - public void create(Parse parse) { - if (u != null) u.create(parse); - if (d != null) d.create(parse); + if (u != null) + u.create(parse); + if (d != null) + d.create(parse); } } - - diff --git a/dependencyParser/mate-tools/src/is2/data/Cluster.java b/dependencyParser/mate-tools/src/is2/data/Cluster.java index 485713d..5cc8427 100644 --- a/dependencyParser/mate-tools/src/is2/data/Cluster.java +++ b/dependencyParser/mate-tools/src/is2/data/Cluster.java @@ -1,12 +1,8 @@ /** - * + * */ package is2.data; - - -import is2.util.DB; - import java.io.BufferedReader; import java.io.DataInputStream; import java.io.DataOutputStream; @@ -14,27 +10,29 @@ import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; +import is2.util.DB; + /** * @author Dr. Bernd Bohnet, 28.10.2010 - * - * + * + * */ final public class Cluster { public static final String LPATH = "LP"; public static final String SPATH = "SP"; - // [word][p] p = [0:long-path | 1:short-path] - final private short[][] word2path; - + // [word][p] p = [0:long-path | 1:short-path] + final private short[][] word2path; + public Cluster() { - word2path =new short[0][0]; + word2path = new short[0][0]; } - + /** * @param clusterFile * @param mf - * + * */ public Cluster(String clusterFile, IEncoderPlus mf, int ls) { @@ -42,57 +40,59 @@ final public class Cluster { // register words try { - BufferedReader inputReader = new BufferedReader(new InputStreamReader(new FileInputStream(clusterFile),"UTF-8"),32768); + BufferedReader inputReader = new BufferedReader( + new InputStreamReader(new FileInputStream(clusterFile), "UTF-8"), 32768); - int cnt=0; + int cnt = 0; String line; - while ((line =inputReader.readLine())!=null) { + while ((line = inputReader.readLine()) != null) { cnt++; try { - String[] split = line.split(REGEX); - mf.register(SPATH, split[0].length()<ls?split[0]:split[0].substring(0,ls)); - mf.register(LPATH, split[0]); - mf.register(PipeGen.WORD, split[1]); - } catch(Exception e) { - System.out.println("Error in cluster line "+cnt+" error: "+e.getMessage()); + String[] split = line.split(REGEX); + mf.register(SPATH, split[0].length() < ls ? split[0] : split[0].substring(0, ls)); + mf.register(LPATH, split[0]); + mf.register(PipeGen.WORD, split[1]); + } catch (Exception e) { + System.out.println("Error in cluster line " + cnt + " error: " + e.getMessage()); } } - System.out.println("read number of clusters "+cnt); + System.out.println("read number of clusters " + cnt); inputReader.close(); - + } catch (Exception e) { e.printStackTrace(); } - + word2path = new short[mf.getFeatureCounter().get(PipeGen.WORD)][2]; - // insert words try { String line; - BufferedReader inputReader = new BufferedReader(new InputStreamReader(new FileInputStream(clusterFile),"UTF-8"),32768); + BufferedReader inputReader = new BufferedReader( + new InputStreamReader(new FileInputStream(clusterFile), "UTF-8"), 32768); - while ((line =inputReader.readLine())!=null) { + while ((line = inputReader.readLine()) != null) { String[] split = line.split(REGEX); int wd = mf.getValue(PipeGen.WORD, split[1]); - word2path[wd][0] = (short)mf.getValue(SPATH, split[0].length()<ls?split[0]:split[0].substring(0,ls)); - word2path[wd][1] = (short)mf.getValue(LPATH, split[0]); + word2path[wd][0] = (short) mf.getValue(SPATH, + split[0].length() < ls ? split[0] : split[0].substring(0, ls)); + word2path[wd][1] = (short) mf.getValue(LPATH, split[0]); } inputReader.close(); - int fill=0; - for(int l = 0; l<word2path.length; l++ ){ - if (word2path[l][0]!=0) fill++; + int fill = 0; + for (short[] element : word2path) { + if (element[0] != 0) + fill++; } /* - for(int l = 0; l<word2path.length; l++ ){ - if (word2path[l][1]!=0) fillL++; - if (word2path[l][1]<-1) System.out.println("lower "+word2path[l][1]); - } - */ - System.out.println("filled "+fill+" of "+word2path.length); - + * for(int l = 0; l<word2path.length; l++ ){ if (word2path[l][1]!=0) + * fillL++; if (word2path[l][1]<-1) + * System.out.println("lower "+word2path[l][1]); } + */ + System.out.println("filled " + fill + " of " + word2path.length); + } catch (Exception e) { e.printStackTrace(); } @@ -100,56 +100,61 @@ final public class Cluster { /** * Read the cluster + * * @param dos - * @throws IOException + * @throws IOException */ public Cluster(DataInputStream dis) throws IOException { word2path = new short[dis.readInt()][2]; - for(int i =0;i<word2path.length;i++) { - word2path[i][0]=dis.readShort(); - word2path[i][1]=dis.readShort(); + for (int i = 0; i < word2path.length; i++) { + word2path[i][0] = dis.readShort(); + word2path[i][1] = dis.readShort(); } - DB.println("Read cluster with "+word2path.length+" words "); + DB.println("Read cluster with " + word2path.length + " words "); } - + /** * Write the cluster + * * @param dos - * @throws IOException + * @throws IOException */ public void write(DataOutputStream dos) throws IOException { dos.writeInt(word2path.length); - for(short[] i : word2path) { + for (short[] i : word2path) { dos.writeShort(i[0]); dos.writeShort(i[1]); } - + } /** - * @param form the id of a word form + * @param form + * the id of a word form * @return the short path to the word form in the cluster - - final public int getSP(int form) { - if (word2path.length<form) return -1; - return word2path[form][0]; - } - */ + * + * final public int getSP(int form) { if (word2path.length<form) + * return -1; return word2path[form][0]; } + */ /** * get the long path to a word form in the cluster - * @param form the id of a word form + * + * @param form + * the id of a word form * @return the long path to the word */ final public int getLP(int form) { - if (word2path.length<=form || word2path[form].length<=0) return -1; - return word2path[form][0]==0?-1:word2path[form][0]; + if (word2path.length <= form || word2path[form].length <= 0) + return -1; + return word2path[form][0] == 0 ? -1 : word2path[form][0]; } - + final public int getLP(int form, int l) { - if (word2path.length<form) return -1; - return word2path[form][l]==0?-1:word2path[form][l]; + if (word2path.length < form) + return -1; + return word2path[form][l] == 0 ? -1 : word2path[form][l]; } final public int size() { diff --git a/dependencyParser/mate-tools/src/is2/data/D4.java b/dependencyParser/mate-tools/src/is2/data/D4.java index 8be3df2..d607668 100644 --- a/dependencyParser/mate-tools/src/is2/data/D4.java +++ b/dependencyParser/mate-tools/src/is2/data/D4.java @@ -1,134 +1,181 @@ /** - * + * */ package is2.data; -import is2.util.DB; - /** * @author Dr. Bernd Bohnet, 30.10.2010 - * - * This class computes the mapping of features to the weight vector. + * + * This class computes the mapping of features to the weight vector. */ final public class D4 extends DX { private long shift; private long h; - - private final Long2IntInterface _li; + private final Long2IntInterface _li; + public D4(Long2IntInterface li) { - _li=li; + _li = li; } - - + + @Override final public void clean() { - v0=0;v1=0;v2=0;v3=0;v4=0;v5=0;v6=0;v7=0;v8=0; - shift=0;h=0; - } - - final public void cz3(){ - if (v0<0||v1<0||v2<0) { h=-1;return;} - - h= v0+v1*(shift =a0)+(long)v2*(shift *=a1); - shift *=a2; - } - - final public long c3(){ - if (v0<0||v1<0||v2<0) { h=-1;return h;} - - h= v0+v1*(shift =a0)+(long)v2*(shift *=a1); - shift *=a2; - return h; + v0 = 0; + v1 = 0; + v2 = 0; + v3 = 0; + v4 = 0; + v5 = 0; + v6 = 0; + v7 = 0; + v8 = 0; + shift = 0; + h = 0; } - - final public void cz4(){ - if (v0<0||v1<0||v2<0||v3<0) {h=-1;return;} - - h =v0+v1*(shift =a0)+v2*(shift *=a1)+v3*(shift *=a2); - shift *=a3; + + @Override + final public void cz3() { + if (v0 < 0 || v1 < 0 || v2 < 0) { + h = -1; + return; + } + + h = v0 + v1 * (shift = a0) + v2 * (shift *= a1); + shift *= a2; } - final public long c4(){ - if (v0<0||v1<0||v2<0||v3<0) {h=-1;return h;} - - h =v0+v1*(shift =a0)+v2*(shift *=a1)+v3*(shift *=a2); - shift *=a3; + final public long c3() { + if (v0 < 0 || v1 < 0 || v2 < 0) { + h = -1; + return h; + } + + h = v0 + v1 * (shift = a0) + v2 * (shift *= a1); + shift *= a2; return h; } - - final public void cz5(){ - - if (v0<0||v1<0||v2<0||v3<0||v4<0) {h=-1;return;} - - h =v0+v1*(shift =a0)+v2*(shift *=a1)+v3*(shift*=a2)+v4*(shift*=a3); - shift*=a4; + @Override + final public void cz4() { + if (v0 < 0 || v1 < 0 || v2 < 0 || v3 < 0) { + h = -1; + return; + } + h = v0 + v1 * (shift = a0) + v2 * (shift *= a1) + v3 * (shift *= a2); + shift *= a3; } - final public long c5(){ - - if (v0<0||v1<0||v2<0||v3<0||v4<0) {h=-1;return h;} - - h =v0+v1*(shift =a0)+v2*(shift *=a1)+v3*(shift *=a2)+v4*(shift*=a3); - shift*=a4; + final public long c4() { + if (v0 < 0 || v1 < 0 || v2 < 0 || v3 < 0) { + h = -1; + return h; + } + + h = v0 + v1 * (shift = a0) + v2 * (shift *= a1) + v3 * (shift *= a2); + shift *= a3; return h; } - - final public void cz6(){ + @Override + final public void cz5() { + + if (v0 < 0 || v1 < 0 || v2 < 0 || v3 < 0 || v4 < 0) { + h = -1; + return; + } - if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0) {h=-1; return;} + h = v0 + v1 * (shift = a0) + v2 * (shift *= a1) + v3 * (shift *= a2) + v4 * (shift *= a3); + shift *= a4; - h =v0+v1*(shift =a0)+v2*(shift *=a1)+v3*(shift *=a2); - h +=v4*(shift*=a3)+v5*(shift*=a4); - shift*=a5; } - final public long c6(){ + final public long c5() { - if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0) {h=-1; return h;} + if (v0 < 0 || v1 < 0 || v2 < 0 || v3 < 0 || v4 < 0) { + h = -1; + return h; + } - h =v0+v1*(shift =a0)+v2*(shift *=a1)+v3*(shift *=a2); - h +=v4*(shift*=a3)+v5*(shift*=a4); - shift*=a5; + h = v0 + v1 * (shift = a0) + v2 * (shift *= a1) + v3 * (shift *= a2) + v4 * (shift *= a3); + shift *= a4; return h; } + @Override + final public void cz6() { + + if (v0 < 0 || v1 < 0 || v2 < 0 || v3 < 0 || v4 < 0 || v5 < 0) { + h = -1; + return; + } + h = v0 + v1 * (shift = a0) + v2 * (shift *= a1) + v3 * (shift *= a2); + h += v4 * (shift *= a3) + v5 * (shift *= a4); + shift *= a5; + } + + final public long c6() { + + if (v0 < 0 || v1 < 0 || v2 < 0 || v3 < 0 || v4 < 0 || v5 < 0) { + h = -1; + return h; + } + + h = v0 + v1 * (shift = a0) + v2 * (shift *= a1) + v3 * (shift *= a2); + h += v4 * (shift *= a3) + v5 * (shift *= a4); + shift *= a5; + return h; + } + + @Override final public long cs(int b, int v) { - if (h<0) {h=-1; return h;} - - h += v*shift; - shift *=b; + if (h < 0) { + h = -1; + return h; + } + + h += v * shift; + shift *= b; return h; } + @Override final public void csa(int b, int v, IFV f) { - if (h<0) {h=-1; return;} + if (h < 0) { + h = -1; + return; + } - h += v*shift; - shift *=b; + h += v * shift; + shift *= b; f.add(_li.l2i(h)); } + @Override final public long csa(int b, int v) { - if (h<0) {h=-1; return-1; } + if (h < 0) { + h = -1; + return -1; + } - h += v*shift; - shift *=b; + h += v * shift; + shift *= b; return h; } - public final long getVal(){ + @Override + public final long getVal() { return h; } - - public final void map(IFV f, long l){ - if (l>0) f.add(this._li.l2i(l)); + + @Override + public final void map(IFV f, long l) { + if (l > 0) + f.add(this._li.l2i(l)); } - + /** * @param f */ @@ -136,56 +183,70 @@ final public class D4 extends DX { f.add(_li.l2i(h)); } + @Override final public void cz7() { - if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0||v6<0) {h=-1; return;} + if (v0 < 0 || v1 < 0 || v2 < 0 || v3 < 0 || v4 < 0 || v5 < 0 || v6 < 0) { + h = -1; + return; + } + + h = v0 + v1 * (shift = a0) + v2 * (shift *= a1) + v3 * (shift *= a2); + h += v4 * (shift *= a3) + v5 * (shift *= a4) + v6 * (shift *= a5); + shift *= a6; - h =v0+v1*(shift =a0)+v2*(shift *=a1)+v3*(shift *=a2); - h +=v4*(shift*=a3)+v5*(shift*=a4)+v6*(shift*=a5); - shift*=a6; - } final public long c7() { - if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0||v6<0) {h=-1; return h;} - - h =v0+v1*(shift =a0)+v2*(shift *=a1)+v3*(shift *=a2); - h +=v4*(shift*=a3)+v5*(shift*=a4)+v6*(shift*=a5); - shift*=a6; + if (v0 < 0 || v1 < 0 || v2 < 0 || v3 < 0 || v4 < 0 || v5 < 0 || v6 < 0) { + h = -1; + return h; + } + + h = v0 + v1 * (shift = a0) + v2 * (shift *= a1) + v3 * (shift *= a2); + h += v4 * (shift *= a3) + v5 * (shift *= a4) + v6 * (shift *= a5); + shift *= a6; return h; } /** - * + * */ + @Override final public void cz8() { - if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0||v6<0||v7<0) {h=-1; return;} + if (v0 < 0 || v1 < 0 || v2 < 0 || v3 < 0 || v4 < 0 || v5 < 0 || v6 < 0 || v7 < 0) { + h = -1; + return; + } - h =v0+v1*(shift =a0)+v2*(shift *=a1)+v3*(shift *=a2); - h +=v4*(shift*=a3)+v5*(shift*=a4)+v6*(shift*=a5)+v7*(shift*=a6); - shift*=a7; + h = v0 + v1 * (shift = a0) + v2 * (shift *= a1) + v3 * (shift *= a2); + h += v4 * (shift *= a3) + v5 * (shift *= a4) + v6 * (shift *= a5) + v7 * (shift *= a6); + shift *= a7; } final public void cz9() { - if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0||v6<0||v7<0||v8<0) {h=-1; return;} + if (v0 < 0 || v1 < 0 || v2 < 0 || v3 < 0 || v4 < 0 || v5 < 0 || v6 < 0 || v7 < 0 || v8 < 0) { + h = -1; + return; + } - h =v0+v1*(shift =a0)+v2*(shift *=a1)+v3*(shift *=a2); - h +=v4*(shift*=a3)+v5*(shift*=a4)+v6*(shift*=a5)+v7*(shift*=a6)+v8*(shift*=a7); - shift*=a8; + h = v0 + v1 * (shift = a0) + v2 * (shift *= a1) + v3 * (shift *= a2); + h += v4 * (shift *= a3) + v5 * (shift *= a4) + v6 * (shift *= a5) + v7 * (shift *= a6) + v8 * (shift *= a7); + shift *= a8; } - - /* (non-Javadoc) + /* + * (non-Javadoc) + * * @see is2.data.DX#computeLabeValue(short, short) */ @Override public int computeLabeValue(int label, int shift) { - return label*shift; + return label * shift; } - + @Override public void fix() { - - } + } } \ No newline at end of file diff --git a/dependencyParser/mate-tools/src/is2/data/D6.java b/dependencyParser/mate-tools/src/is2/data/D6.java index 3694249..0a17d51 100644 --- a/dependencyParser/mate-tools/src/is2/data/D6.java +++ b/dependencyParser/mate-tools/src/is2/data/D6.java @@ -1,5 +1,5 @@ /** - * + * */ package is2.data; @@ -7,146 +7,202 @@ import is2.util.DB; /** * @author Dr. Bernd Bohnet, 30.10.2010 - * - * This class computes the mapping of features to the weight vector. + * + * This class computes the mapping of features to the weight vector. */ final public class D6 extends DX { private long shift; private long h; - - private final Long2IntInterface _li; + private final Long2IntInterface _li; + public D6(Long2IntInterface li) { - _li=li; + _li = li; } - boolean fixed =false; - + boolean fixed = false; + + @Override public void fix() { - + if (fixed) { DB.println("warning: already fixed"); - // return; + // return; } - - long t0= 1, t1=a0, t2=t1*a1, t3=t2*a2,t4=t3*a3, t5=t4*a4,t6=t5*a5, t7=t6*a6, t8=t7*a7, t9=t8*a8; - - - - - a0=t0;a1=t1;a2=t2;a3=t3;a4=t4;a5=t5;a6=t6;a7=t7;a8=t8; a9=t9; - - fixed=true; - } - + long t0 = 1, t1 = a0, t2 = t1 * a1, t3 = t2 * a2, t4 = t3 * a3, t5 = t4 * a4, t6 = t5 * a5, t7 = t6 * a6, + t8 = t7 * a7, t9 = t8 * a8; + + a0 = t0; + a1 = t1; + a2 = t2; + a3 = t3; + a4 = t4; + a5 = t5; + a6 = t6; + a7 = t7; + a8 = t8; + a9 = t9; + + fixed = true; + } + @Override final public void clean() { - v0=0;v1=0;v2=0;v3=0;v4=0;v5=0;v6=0;v7=0;v8=0; - shift=0;h=0; - } - - final public void cz3(){ - if (v0<0||v1<0||v2<0) { h=-1;return;} - - h= v0+v1*a1+v2*a2; - shift =a3; - } - - final public long c3(){ - if (v0<0||v1<0||v2<0) { h=-1;return h;} - - h= v0+v1*a1+v2*a2; - shift =a3; - return h; + v0 = 0; + v1 = 0; + v2 = 0; + v3 = 0; + v4 = 0; + v5 = 0; + v6 = 0; + v7 = 0; + v8 = 0; + shift = 0; + h = 0; } - - final public void cz4(){ - if (v0<0||v1<0||v2<0||v3<0) {h=-1;return;} - - h =v0+v1*a1+v2*a2+v3*a3; - shift =a4; + + @Override + final public void cz3() { + if (v0 < 0 || v1 < 0 || v2 < 0) { + h = -1; + return; + } + + h = v0 + v1 * a1 + v2 * a2; + shift = a3; } - final public long c4(){ - if (v0<0||v1<0||v2<0||v3<0) {h=-1;return h;} - - h =v0+v1*a1+v2*a2+v3*a3; - shift =a4; + final public long c3() { + if (v0 < 0 || v1 < 0 || v2 < 0) { + h = -1; + return h; + } + + h = v0 + v1 * a1 + v2 * a2; + shift = a3; return h; } - - final public void cz5(){ - - if (v0<0||v1<0||v2<0||v3<0||v4<0) {h=-1;return;} - - h =v0+v1*a1+v2*a2+v3*a3+v4*a4; - shift=a5; + @Override + final public void cz4() { + if (v0 < 0 || v1 < 0 || v2 < 0 || v3 < 0) { + h = -1; + return; + } + h = v0 + v1 * a1 + v2 * a2 + v3 * a3; + shift = a4; } - final public long c5(){ - - if (v0<0||v1<0||v2<0||v3<0||v4<0) {h=-1;return h;} - - h =v0+v1*a1+v2*a2+v3*a3+v4*a4; - shift=a5; + final public long c4() { + if (v0 < 0 || v1 < 0 || v2 < 0 || v3 < 0) { + h = -1; + return h; + } + + h = v0 + v1 * a1 + v2 * a2 + v3 * a3; + shift = a4; return h; } - - final public void cz6(){ + @Override + final public void cz5() { + + if (v0 < 0 || v1 < 0 || v2 < 0 || v3 < 0 || v4 < 0) { + h = -1; + return; + } - if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0) {h=-1; return;} + h = v0 + v1 * a1 + v2 * a2 + v3 * a3 + v4 * a4; + shift = a5; - h =v0+v1*a1+v2*a2+v3*a3 +v4*a4+v5*a5; - shift=a6; } - final public long c6(){ + final public long c5() { - if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0) {h=-1; return h;} + if (v0 < 0 || v1 < 0 || v2 < 0 || v3 < 0 || v4 < 0) { + h = -1; + return h; + } - h =v0+v1*a1+v2*a2+v3*a3 +v4*a4+v5*a5; - shift=a6; + h = v0 + v1 * a1 + v2 * a2 + v3 * a3 + v4 * a4; + shift = a5; return h; } + @Override + final public void cz6() { + + if (v0 < 0 || v1 < 0 || v2 < 0 || v3 < 0 || v4 < 0 || v5 < 0) { + h = -1; + return; + } + + h = v0 + v1 * a1 + v2 * a2 + v3 * a3 + v4 * a4 + v5 * a5; + shift = a6; + } + + final public long c6() { + if (v0 < 0 || v1 < 0 || v2 < 0 || v3 < 0 || v4 < 0 || v5 < 0) { + h = -1; + return h; + } + + h = v0 + v1 * a1 + v2 * a2 + v3 * a3 + v4 * a4 + v5 * a5; + shift = a6; + return h; + } + + @Override final public long cs(int b, int v) { - if (h<0) {h=-1; return h;} - - h += v*shift; - shift *=b; + if (h < 0) { + h = -1; + return h; + } + + h += v * shift; + shift *= b; return h; } + @Override final public void csa(int b, int v, IFV f) { - if (h<0) {h=-1; return;} + if (h < 0) { + h = -1; + return; + } - h += v*shift; - shift *=b; + h += v * shift; + shift *= b; f.add(_li.l2i(h)); } + @Override final public long csa(int b, int v) { - if (h<0) {h=-1; return-1; } + if (h < 0) { + h = -1; + return -1; + } - h += v*shift; - shift *=b; + h += v * shift; + shift *= b; return h; } - public final long getVal(){ + @Override + public final long getVal() { return h; } - - public final void map(IFV f, long l){ - if (l>0) f.add(this._li.l2i(l)); + + @Override + public final void map(IFV f, long l) { + if (l > 0) + f.add(this._li.l2i(l)); } - + /** * @param f */ @@ -154,44 +210,51 @@ final public class D6 extends DX { f.add(_li.l2i(h)); } + @Override final public void cz7() { - if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0||v6<0) {h=-1; return;} + if (v0 < 0 || v1 < 0 || v2 < 0 || v3 < 0 || v4 < 0 || v5 < 0 || v6 < 0) { + h = -1; + return; + } + + h = v0 + v1 * a1 + v2 * a2 + v3 * a3 + v4 * a4 + v5 * a5 + v6 * a6; + shift = a7; - h =v0+v1*a1+v2*a2+v3*a3 +v4*a4+v5*a5+v6*a6; - shift=a7; - } final public long c7() { - if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0||v6<0) {h=-1; return h;} + if (v0 < 0 || v1 < 0 || v2 < 0 || v3 < 0 || v4 < 0 || v5 < 0 || v6 < 0) { + h = -1; + return h; + } - h =v0+v1*a1+v2*a2+v3*a3 +v4*a4+v5*a5+v6*a6; - shift=a7; + h = v0 + v1 * a1 + v2 * a2 + v3 * a3 + v4 * a4 + v5 * a5 + v6 * a6; + shift = a7; return h; } /** - * + * */ + @Override final public void cz8() { - if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0||v6<0||v7<0) {h=-1; return;} + if (v0 < 0 || v1 < 0 || v2 < 0 || v3 < 0 || v4 < 0 || v5 < 0 || v6 < 0 || v7 < 0) { + h = -1; + return; + } - h =v0+v1*a1+v2*a2+v3*a3 +v4*a4+v5*a5+v6*a6+v7*a7; - shift=a8; + h = v0 + v1 * a1 + v2 * a2 + v3 * a3 + v4 * a4 + v5 * a5 + v6 * a6 + v7 * a7; + shift = a8; } - - - /* (non-Javadoc) + /* + * (non-Javadoc) + * * @see is2.data.DX#computeLabeValue(short, short) */ @Override public int computeLabeValue(int label, int shift) { - return label*shift; + return label * shift; } - - - - } \ No newline at end of file diff --git a/dependencyParser/mate-tools/src/is2/data/D7.java b/dependencyParser/mate-tools/src/is2/data/D7.java index f4675d8..319c54a 100644 --- a/dependencyParser/mate-tools/src/is2/data/D7.java +++ b/dependencyParser/mate-tools/src/is2/data/D7.java @@ -1,164 +1,225 @@ /** - * + * */ package is2.data; - /** * @author Dr. Bernd Bohnet, 30.10.2010 - * - * This class computes the mapping of features to the weight vector. + * + * This class computes the mapping of features to the weight vector. */ final public class D7 extends DX { - + private long shift; private long h; - private final Long2IntInterface _li; - + private final Long2IntInterface _li; + public D7(Long2IntInterface li) { - _li=li; + _li = li; } - boolean fixed =false; - + boolean fixed = false; + + @Override public void fix() { - - long t0= 1, t1=a0, t2=t1*a1, t3=t2*a2,t4=t3*a3, t5=t4*a4,t6=t5*a5, t7=t6*a6, t8=t7*a7, t9=t8*a8; - - a0=t0;a1=t1;a2=t2;a3=t3;a4=t4;a5=t5;a6=t6;a7=t7;a8=t8; a9=t9; - - } - + long t0 = 1, t1 = a0, t2 = t1 * a1, t3 = t2 * a2, t4 = t3 * a3, t5 = t4 * a4, t6 = t5 * a5, t7 = t6 * a6, + t8 = t7 * a7, t9 = t8 * a8; + + a0 = t0; + a1 = t1; + a2 = t2; + a3 = t3; + a4 = t4; + a5 = t5; + a6 = t6; + a7 = t7; + a8 = t8; + a9 = t9; + + } + @Override final public void clean() { - v0=0;v1=0;v2=0;v3=0;v4=0;v5=0;v6=0;v7=0;v8=0; - shift=0;h=0; - } - - final public void cz3(){ - if (v2<0) { h=-1;return;} - - h= v0+v1*a1+v2*a2; - shift =a3; - } - - final public long c3(){ - if (v2<0) { h=-1;return h;} - - h= v0+v1*a1+v2*a2; - shift =a3; + v0 = 0; + v1 = 0; + v2 = 0; + v3 = 0; + v4 = 0; + v5 = 0; + v6 = 0; + v7 = 0; + v8 = 0; + shift = 0; + h = 0; + } + + @Override + final public void cz3() { + if (v2 < 0) { + h = -1; + return; + } + + h = v0 + v1 * a1 + v2 * a2; + shift = a3; + } + + final public long c3() { + if (v2 < 0) { + h = -1; + return h; + } + + h = v0 + v1 * a1 + v2 * a2; + shift = a3; return h; } - - final public long d3(){ - if (v2<0)return -1; - return v0+v2*a2; - } - - final public void cz4(){ - // if (v0<0||v1<0||v2<0||v3<0) {h=-1;return;} - if (v2<0||v3<0) {h=-1;return;} - - h =v0+v1*a1+v2*a2+v3*a3; - shift =a4; - } - - final public long c4(){ - if (v2<0||v3<0) {h=-1;return h;} - - h =v0+v1*a1+v2*a2+v3*a3; - shift =a4; + + final public long d3() { + if (v2 < 0) + return -1; + return v0 + v2 * a2; + } + + @Override + final public void cz4() { + // if (v0<0||v1<0||v2<0||v3<0) {h=-1;return;} + if (v2 < 0 || v3 < 0) { + h = -1; + return; + } + + h = v0 + v1 * a1 + v2 * a2 + v3 * a3; + shift = a4; + } + + final public long c4() { + if (v2 < 0 || v3 < 0) { + h = -1; + return h; + } + + h = v0 + v1 * a1 + v2 * a2 + v3 * a3; + shift = a4; return h; } - - - final public long d4(){ - if (v2<0||v3<0) return -1; - return v0+v2*a2+v3*a3; + + final public long d4() { + if (v2 < 0 || v3 < 0) + return -1; + return v0 + v2 * a2 + v3 * a3; } - - final public void cz5(){ - - if (v2<0||v3<0||v4<0) {h=-1;return;} - - h =v0+v1*a1+v2*a2+v3*a3+v4*a4; - shift=a5; + @Override + final public void cz5() { + + if (v2 < 0 || v3 < 0 || v4 < 0) { + h = -1; + return; + } + + h = v0 + v1 * a1 + v2 * a2 + v3 * a3 + v4 * a4; + shift = a5; } - final public long c5(){ - - if (v2<0||v3<0||v4<0) {h=-1;return h;} - - h =v0+v1*a1+v2*a2+v3*a3+v4*a4; - shift=a5; + final public long c5() { + + if (v2 < 0 || v3 < 0 || v4 < 0) { + h = -1; + return h; + } + + h = v0 + v1 * a1 + v2 * a2 + v3 * a3 + v4 * a4; + shift = a5; return h; } - - final public long d5(){ - if (v2<0||v3<0||v4<0) return -1; - return v0+v2*a2+v3*a3+v4*a4; + + final public long d5() { + if (v2 < 0 || v3 < 0 || v4 < 0) + return -1; + return v0 + v2 * a2 + v3 * a3 + v4 * a4; } - - final public void cz6(){ + @Override + final public void cz6() { - if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0) {h=-1; return;} + if (v0 < 0 || v1 < 0 || v2 < 0 || v3 < 0 || v4 < 0 || v5 < 0) { + h = -1; + return; + } - h =v0+v1*a1+v2*a2+v3*a3 +v4*a4+v5*a5; - shift=a6; + h = v0 + v1 * a1 + v2 * a2 + v3 * a3 + v4 * a4 + v5 * a5; + shift = a6; } - final public long c6(){ + final public long c6() { - if (v2<0||v3<0||v4<0||v5<0) {h=-1; return h;} + if (v2 < 0 || v3 < 0 || v4 < 0 || v5 < 0) { + h = -1; + return h; + } - h =v0+v1*a1+v2*a2+v3*a3 +v4*a4+v5*a5; - shift=a6; + h = v0 + v1 * a1 + v2 * a2 + v3 * a3 + v4 * a4 + v5 * a5; + shift = a6; return h; } - - final public long d6(){ - if (v2<0||v3<0||v4<0||v5<0) return -1; - return v0+v2*a2+v3*a3 +v4*a4+v5*a5; - } + final public long d6() { + if (v2 < 0 || v3 < 0 || v4 < 0 || v5 < 0) + return -1; + return v0 + v2 * a2 + v3 * a3 + v4 * a4 + v5 * a5; + } + @Override final public long cs(int b, int v) { - if (h<0) {h=-1; return h;} - - h += v*shift; - shift *=b; + if (h < 0) { + h = -1; + return h; + } + + h += v * shift; + shift *= b; return h; } + @Override final public void csa(int b, int v, IFV f) { - if (h<0) {h=-1; return;} + if (h < 0) { + h = -1; + return; + } - h += v*shift; - shift *=b; + h += v * shift; + shift *= b; f.add(_li.l2i(h)); } + @Override final public long csa(int b, int v) { - if (h<0) {h=-1; return-1; } + if (h < 0) { + h = -1; + return -1; + } - h += v*shift; - shift *=b; + h += v * shift; + shift *= b; return h; } - public final long getVal(){ + @Override + public final long getVal() { return h; } - - public final void map(IFV f, long l){ - if (l>0) f.add(this._li.l2i(l)); + + @Override + public final void map(IFV f, long l) { + if (l > 0) + f.add(this._li.l2i(l)); } - + /** * @param f */ @@ -166,55 +227,64 @@ final public class D7 extends DX { f.add(_li.l2i(h)); } + @Override final public void cz7() { - if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0||v6<0) {h=-1; return;} + if (v0 < 0 || v1 < 0 || v2 < 0 || v3 < 0 || v4 < 0 || v5 < 0 || v6 < 0) { + h = -1; + return; + } + + h = v0 + v1 * a1 + v2 * a2 + v3 * a3 + v4 * a4 + v5 * a5 + v6 * a6; + shift = a7; - h =v0+v1*a1+v2*a2+v3*a3 +v4*a4+v5*a5+v6*a6; - shift=a7; - } - final public long c7() { - if (v2<0||v3<0||v4<0||v5<0||v6<0) {h=-1; return h;} + if (v2 < 0 || v3 < 0 || v4 < 0 || v5 < 0 || v6 < 0) { + h = -1; + return h; + } - h =v0+v1*a1+v2*a2+v3*a3 +v4*a4+v5*a5+v6*a6; - shift=a7; + h = v0 + v1 * a1 + v2 * a2 + v3 * a3 + v4 * a4 + v5 * a5 + v6 * a6; + shift = a7; return h; } - + final public long d7() { - if (v2<0||v3<0||v4<0||v5<0||v6<0) return -1; - return v0+v2*a2+v3*a3 +v4*a4+v5*a5+v6*a6; + if (v2 < 0 || v3 < 0 || v4 < 0 || v5 < 0 || v6 < 0) + return -1; + return v0 + v2 * a2 + v3 * a3 + v4 * a4 + v5 * a5 + v6 * a6; } /** - * + * */ + @Override final public void cz8() { - if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0||v6<0||v7<0) {h=-1; return;} + if (v0 < 0 || v1 < 0 || v2 < 0 || v3 < 0 || v4 < 0 || v5 < 0 || v6 < 0 || v7 < 0) { + h = -1; + return; + } - h =v0+v1*a1+v2*a2+v3*a3 +v4*a4+v5*a5+v6*a6+v7*a7; - shift=a8; + h = v0 + v1 * a1 + v2 * a2 + v3 * a3 + v4 * a4 + v5 * a5 + v6 * a6 + v7 * a7; + shift = a8; } - + final public long d8() { - if (v2<0||v3<0||v4<0||v5<0||v6<0||v7<0) {return-1;} - return v0+v2*a2+v3*a3 +v4*a4+v5*a5+v6*a6+v7*a7; + if (v2 < 0 || v3 < 0 || v4 < 0 || v5 < 0 || v6 < 0 || v7 < 0) { + return -1; + } + return v0 + v2 * a2 + v3 * a3 + v4 * a4 + v5 * a5 + v6 * a6 + v7 * a7; } - - - /* (non-Javadoc) + /* + * (non-Javadoc) + * * @see is2.data.DX#computeLabeValue(short, short) */ @Override public int computeLabeValue(int label, int shift) { - return label*shift; + return label * shift; } - - - - } \ No newline at end of file diff --git a/dependencyParser/mate-tools/src/is2/data/DPSTree.java b/dependencyParser/mate-tools/src/is2/data/DPSTree.java index 554f756..085c0c7 100644 --- a/dependencyParser/mate-tools/src/is2/data/DPSTree.java +++ b/dependencyParser/mate-tools/src/is2/data/DPSTree.java @@ -1,24 +1,17 @@ /** - * + * */ package is2.data; -import is2.util.DB; - -import java.util.ArrayList; -import java.util.Collections; -import java.util.Stack; - /** * @author Dr. Bernd Bohnet, 17.01.2011 - * - * Dynamic phrase structure tree. + * + * Dynamic phrase structure tree. */ public class DPSTree { - - private int size=0; - + private int size = 0; + public int[] heads; public int[] labels; @@ -30,86 +23,87 @@ public class DPSTree { heads = new int[initialCapacity]; labels = new int[initialCapacity]; } - - + /** - * Increases the capacity of this <tt>Graph</tt> instance, if - * necessary, to ensure that it can hold at least the number of nodes - * specified by the minimum capacity argument. - * - * @param minCapacity the desired minimum capacity. - */ - private void ensureCapacity(int minCapacity) { - - - if (minCapacity > heads.length) { - - int newCapacity =minCapacity + 1; - - if (newCapacity < minCapacity) newCapacity = minCapacity; - int oldIndex[] = heads; - heads = new int[newCapacity]; - System.arraycopy(oldIndex, 0, heads, 0, oldIndex.length); - - oldIndex = labels; - labels = new int[newCapacity]; - System.arraycopy(oldIndex, 0, labels, 0, oldIndex.length); - - } - } - - - final public int size() { - return size; - } - - - final public boolean isEmpty() { - return size == 0; - } - - final public void clear() { - size = 0; - } - - final public void createTerminals(int terminals) { - ensureCapacity(terminals+1); - size= terminals+1; - } - - final public int create(int phrase) { - - ensureCapacity(size+1); - labels[size] =phrase; - size++; - return size-1; - } - - public int create(int phrase, int nodeId) { - - if (nodeId<0) return this.create(phrase); -// DB.println("create phrase "+nodeId+" label "+phrase); - ensureCapacity(nodeId+1); - labels[nodeId] =phrase; - if (size<nodeId) size=nodeId+1; + * Increases the capacity of this <tt>Graph</tt> instance, if necessary, to + * ensure that it can hold at least the number of nodes specified by the + * minimum capacity argument. + * + * @param minCapacity + * the desired minimum capacity. + */ + private void ensureCapacity(int minCapacity) { + + if (minCapacity > heads.length) { + + int newCapacity = minCapacity + 1; + + if (newCapacity < minCapacity) + newCapacity = minCapacity; + int oldIndex[] = heads; + heads = new int[newCapacity]; + System.arraycopy(oldIndex, 0, heads, 0, oldIndex.length); + + oldIndex = labels; + labels = new int[newCapacity]; + System.arraycopy(oldIndex, 0, labels, 0, oldIndex.length); + + } + } + + final public int size() { + return size; + } + + final public boolean isEmpty() { + return size == 0; + } + + final public void clear() { + size = 0; + } + + final public void createTerminals(int terminals) { + ensureCapacity(terminals + 1); + size = terminals + 1; + } + + final public int create(int phrase) { + + ensureCapacity(size + 1); + labels[size] = phrase; + size++; + return size - 1; + } + + public int create(int phrase, int nodeId) { + + if (nodeId < 0) + return this.create(phrase); + // DB.println("create phrase "+nodeId+" label "+phrase); + ensureCapacity(nodeId + 1); + labels[nodeId] = phrase; + if (size < nodeId) + size = nodeId + 1; return nodeId; } public void createEdge(int i, int j) { - heads[i] =j; -// DB.println("create edge "+i+"\t "+j); - } - + heads[i] = j; + // DB.println("create edge "+i+"\t "+j); + } + + @Override public DPSTree clone() { - DPSTree ps = new DPSTree(this.size+1); - - for(int k=0;k<size;k++) { - ps.heads[k] = heads[k]; - ps.labels[k] = labels[k]; + DPSTree ps = new DPSTree(this.size + 1); + + for (int k = 0; k < size; k++) { + ps.heads[k] = heads[k]; + ps.labels[k] = labels[k]; } - ps.size=size; + ps.size = size; return ps; - + } } \ No newline at end of file diff --git a/dependencyParser/mate-tools/src/is2/data/DX.java b/dependencyParser/mate-tools/src/is2/data/DX.java index c357b58..8650038 100644 --- a/dependencyParser/mate-tools/src/is2/data/DX.java +++ b/dependencyParser/mate-tools/src/is2/data/DX.java @@ -1,20 +1,18 @@ /** - * + * */ package is2.data; -import is2.data.IFV; - /** * @author Dr. Bernd Bohnet, 30.08.2011 - * - * + * + * */ -public abstract class DX { +public abstract class DX { + + public long a0, a1, a2, a3, a4, a5, a6, a7, a8, a9; + public long v0, v1, v2, v3, v4, v5, v6, v7, v8, v9; - public long a0,a1,a2,a3,a4,a5,a6,a7,a8,a9; - public long v0,v1,v2,v3,v4,v5,v6,v7,v8,v9; - public abstract void cz3(); public abstract void cz4(); @@ -51,8 +49,8 @@ public abstract class DX { * @param s_type * @return */ - public abstract int computeLabeValue(int label,int s_type) ; - + public abstract int computeLabeValue(int label, int s_type); + public abstract void fix(); } \ No newline at end of file diff --git a/dependencyParser/mate-tools/src/is2/data/DataF.java b/dependencyParser/mate-tools/src/is2/data/DataF.java index f127fbd..0ec145f 100755 --- a/dependencyParser/mate-tools/src/is2/data/DataF.java +++ b/dependencyParser/mate-tools/src/is2/data/DataF.java @@ -1,39 +1,35 @@ package is2.data; +final public class DataF { + final public short typesLen; + final public int len; -final public class DataF { + // first order features + final public float[][] pl; + + // remove !!!! + // final public float[][] highestLab; + + // final public FV[][][] label; + final public float[][][][] lab; + + public FV fv; + + final public float[][][][][] sib; + + final public float[][][][][] gra; + + public DataF(int length, short types) { + typesLen = types; + len = length; + + pl = new float[length][length]; + lab = new float[length][length][types][2]; + // highestLab = new float[length][length]; + + sib = new float[length][length][length][2][]; + gra = new float[length][length][length][2][]; - final public short typesLen; - final public int len; - - // first order features - final public float[][] pl; - - // remove !!!! -// final public float[][] highestLab; - - //final public FV[][][] label; - final public float[][][][] lab; - - - public FV fv; - - final public float[][][][][] sib; - - final public float[][][][][] gra; - - - public DataF(int length, short types) { - typesLen=types; - len =length; - - pl = new float[length][length]; - lab = new float[length][length][types][2]; - // highestLab = new float[length][length]; - - sib = new float[length][length][length][2][]; - gra = new float[length][length][length][2][]; - - } + } } diff --git a/dependencyParser/mate-tools/src/is2/data/DataFES.java b/dependencyParser/mate-tools/src/is2/data/DataFES.java index 9772858..7e83894 100644 --- a/dependencyParser/mate-tools/src/is2/data/DataFES.java +++ b/dependencyParser/mate-tools/src/is2/data/DataFES.java @@ -1,38 +1,34 @@ package is2.data; +final public class DataFES { + final public short typesLen; + final public int len; -final public class DataFES { + // first order features + final public float[][] pl; + + // remove !!!! + // final public float[][] highestLab; + + // final public FV[][][] label; + final public float[][][] lab; + + public FV fv; + + final public float[][][][] sib; + + final public float[][][][] gra; + + public DataFES(int length, short types) { + typesLen = types; + len = length; + + pl = new float[length][length]; + lab = new float[length][length][types]; + + sib = new float[length][length][length][]; + gra = new float[length][length][length][]; - final public short typesLen; - final public int len; - - // first order features - final public float[][] pl; - - // remove !!!! -// final public float[][] highestLab; - - //final public FV[][][] label; - final public float[][][] lab; - - - public FV fv; - - final public float[][][][] sib; - - final public float[][][][] gra; - - - public DataFES(int length, short types) { - typesLen=types; - len =length; - - pl = new float[length][length]; - lab = new float[length][length][types]; - - sib = new float[length][length][length][]; - gra = new float[length][length][length][]; - - } + } } diff --git a/dependencyParser/mate-tools/src/is2/data/DataT.java b/dependencyParser/mate-tools/src/is2/data/DataT.java index 47691f8..2ae816d 100644 --- a/dependencyParser/mate-tools/src/is2/data/DataT.java +++ b/dependencyParser/mate-tools/src/is2/data/DataT.java @@ -1,25 +1,19 @@ package is2.data; +final public class DataT { + final public short typesLen; + final public int len; -final public class DataT { + // final public FV[][][] label; + // a b lab op + final public float[][][][] lab; + + public DataT(int length, short types) { + typesLen = types; + len = length; - final public short typesLen; - final public int len; - - - //final public FV[][][] label; - // a b lab op - final public float[][][][] lab; + lab = new float[length][length][types][4]; - - - public DataT(int length, short types) { - typesLen=types; - len =length; - - lab = new float[length][length][types][4]; - - - } + } } diff --git a/dependencyParser/mate-tools/src/is2/data/Edges.java b/dependencyParser/mate-tools/src/is2/data/Edges.java index f8b2ef9..a14db91 100644 --- a/dependencyParser/mate-tools/src/is2/data/Edges.java +++ b/dependencyParser/mate-tools/src/is2/data/Edges.java @@ -1,9 +1,8 @@ /** - * + * */ package is2.data; - import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.IOException; @@ -13,104 +12,102 @@ import java.util.Map.Entry; /** * @author Dr. Bernd Bohnet, 13.05.2009; - * - * + * + * */ public final class Edges { - private static short[][][][] edges; - private static HashMap<Short,Integer> labelCount = new HashMap<Short,Integer>(); + private static HashMap<Short, Integer> labelCount = new HashMap<Short, Integer>(); - private static HashMap<String,Integer> slabelCount = new HashMap<String,Integer>(); + private static HashMap<String, Integer> slabelCount = new HashMap<String, Integer>(); - static short[] def = new short[1]; - - private Edges () {} - + + private Edges() { + } + /** * @param length */ public static void init(int length) { - edges = new short[length][length][2][]; + edges = new short[length][length][2][]; } - - - public static void findDefault(){ - - int best =0; - - - - for(Entry<Short,Integer> e : labelCount.entrySet()) { - - - if (best<e.getValue()) { + + public static void findDefault() { + + int best = 0; + + for (Entry<Short, Integer> e : labelCount.entrySet()) { + + if (best < e.getValue()) { best = e.getValue(); - def[0]=e.getKey(); + def[0] = e.getKey(); } } - - // labelCount=null; - // String[] types = new String[mf.getFeatureCounter().get(PipeGen.REL)]; - // for (Entry<String, Integer> e : MFO.getFeatureSet().get(PipeGen.REL).entrySet()) types[e.getValue()] = e.getKey(); + // labelCount=null; + // String[] types = new String[mf.getFeatureCounter().get(PipeGen.REL)]; + // for (Entry<String, Integer> e : + // MFO.getFeatureSet().get(PipeGen.REL).entrySet()) types[e.getValue()] + // = e.getKey(); - is2.util.DB.println("set default label to "+def[0]+" " ); + is2.util.DB.println("set default label to " + def[0] + " "); + + // System.out.println("found default "+def[0]); - // System.out.println("found default "+def[0]); - } - final static public void put(int pos1, int pos2, boolean dir, short label) { - putD(pos1, pos2,dir, label); - // putD(pos2, pos1,!dir, label); + putD(pos1, pos2, dir, label); + // putD(pos2, pos1,!dir, label); } - - + final static public void putD(int pos1, int pos2, boolean dir, short label) { - + Integer lc = labelCount.get(label); - if (lc==null) labelCount.put(label, 1); - else labelCount.put(label, lc+1); + if (lc == null) + labelCount.put(label, 1); + else + labelCount.put(label, lc + 1); - String key = pos1+"-"+pos2+dir+label; + String key = pos1 + "-" + pos2 + dir + label; Integer lcs = slabelCount.get(key); - if (lcs==null) slabelCount.put(key, 1); - else slabelCount.put(key, lcs+1); - - - if (edges[pos1][pos2][dir?0:1]==null) { - edges[pos1][pos2][dir?0:1]=new short[1]; - edges[pos1][pos2][dir?0:1][0]=label; - -// edgesh[pos1][pos2][dir?0:1] = new TIntHashSet(2); -// edgesh[pos1][pos2][dir?0:1].add(label); + if (lcs == null) + slabelCount.put(key, 1); + else + slabelCount.put(key, lcs + 1); + + if (edges[pos1][pos2][dir ? 0 : 1] == null) { + edges[pos1][pos2][dir ? 0 : 1] = new short[1]; + edges[pos1][pos2][dir ? 0 : 1][0] = label; + + // edgesh[pos1][pos2][dir?0:1] = new TIntHashSet(2); + // edgesh[pos1][pos2][dir?0:1].add(label); } else { - short labels[] = edges[pos1][pos2][dir?0:1]; - for(short l : labels) { - //contains label already? - if(l==label) return; + short labels[] = edges[pos1][pos2][dir ? 0 : 1]; + for (short l : labels) { + // contains label already? + if (l == label) + return; } - - short[] nlabels = new short[labels.length+1]; + + short[] nlabels = new short[labels.length + 1]; System.arraycopy(labels, 0, nlabels, 0, labels.length); - nlabels[labels.length]=label; - edges[pos1][pos2][dir?0:1]=nlabels; - - // edgesh[pos1][pos2][dir?0:1].add(label); + nlabels[labels.length] = label; + edges[pos1][pos2][dir ? 0 : 1] = nlabels; + + // edgesh[pos1][pos2][dir?0:1].add(label); } } - + final static public short[] get(int pos1, int pos2, boolean dir) { - - if (pos1<0 || pos2<0 || edges[pos1][pos2][dir?0:1]==null) return def; - return edges[pos1][pos2][dir?0:1]; + + if (pos1 < 0 || pos2 < 0 || edges[pos1][pos2][dir ? 0 : 1] == null) + return def; + return edges[pos1][pos2][dir ? 0 : 1]; } - /** * @param dis */ @@ -119,32 +116,33 @@ public final class Edges { int len = edges.length; d.writeShort(len); - for(int p1 =0;p1<len;p1++) { - for(int p2 =0;p2<len;p2++) { - if (edges[p1][p2][0]==null) d.writeShort(0); + for (int p1 = 0; p1 < len; p1++) { + for (int p2 = 0; p2 < len; p2++) { + if (edges[p1][p2][0] == null) + d.writeShort(0); else { d.writeShort(edges[p1][p2][0].length); - for(int l =0;l<edges[p1][p2][0].length;l++) { + for (int l = 0; l < edges[p1][p2][0].length; l++) { d.writeShort(edges[p1][p2][0][l]); } - + } - if (edges[p1][p2][1]==null) d.writeShort(0); + if (edges[p1][p2][1] == null) + d.writeShort(0); else { d.writeShort(edges[p1][p2][1].length); - for(int l =0;l<edges[p1][p2][1].length;l++) { + for (int l = 0; l < edges[p1][p2][1].length; l++) { d.writeShort(edges[p1][p2][1][l]); } - } + } } } - + d.writeShort(def[0]); } - /** * @param dis */ @@ -152,31 +150,31 @@ public final class Edges { int len = d.readShort(); edges = new short[len][len][2][]; - for(int p1 =0;p1<len;p1++) { - for(int p2 =0;p2<len;p2++) { + for (int p1 = 0; p1 < len; p1++) { + for (int p2 = 0; p2 < len; p2++) { int ll = d.readShort(); - if (ll==0) { - edges[p1][p2][0]=null; + if (ll == 0) { + edges[p1][p2][0] = null; } else { edges[p1][p2][0] = new short[ll]; - for(int l =0;l<ll;l++) { - edges[p1][p2][0][l]=d.readShort(); - } + for (int l = 0; l < ll; l++) { + edges[p1][p2][0][l] = d.readShort(); + } } ll = d.readShort(); - if (ll==0) { - edges[p1][p2][1]=null; + if (ll == 0) { + edges[p1][p2][1] = null; } else { edges[p1][p2][1] = new short[ll]; - for(int l =0;l<ll;l++) { - edges[p1][p2][1][l]=d.readShort(); - } - } + for (int l = 0; l < ll; l++) { + edges[p1][p2][1][l] = d.readShort(); + } + } } } - - def[0]= d.readShort(); + + def[0] = d.readShort(); } @@ -187,38 +185,32 @@ public final class Edges { } String _key; - + public C(String key) { super(); - _key=key; + _key = key; } - /* (non-Javadoc) + /* + * (non-Javadoc) + * * @see java.util.Comparator#compare(java.lang.Object, java.lang.Object) */ @Override public int compare(Short l1, Short l2) { - - // int c1 = labelCount.get(l1); - // int c2 = labelCount.get(l2); - // if (true) return c1==c2?0:c1>c2?-1:1; - - int x1 = slabelCount.get(_key+l1.shortValue()); - int x2 = slabelCount.get(_key+l2.shortValue()); - // System.out.println(x1+" "+x2); - - - return x1==x2?0:x1>x2?-1:1; - - - - } - + // int c1 = labelCount.get(l1); + // int c2 = labelCount.get(l2); + // if (true) return c1==c2?0:c1>c2?-1:1; + + int x1 = slabelCount.get(_key + l1.shortValue()); + int x2 = slabelCount.get(_key + l2.shortValue()); + // System.out.println(x1+" "+x2); + + return x1 == x2 ? 0 : x1 > x2 ? -1 : 1; + + } - - } - - + } diff --git a/dependencyParser/mate-tools/src/is2/data/F2S.java b/dependencyParser/mate-tools/src/is2/data/F2S.java index 1f1f668..37ee11a 100755 --- a/dependencyParser/mate-tools/src/is2/data/F2S.java +++ b/dependencyParser/mate-tools/src/is2/data/F2S.java @@ -1,35 +1,39 @@ package is2.data; - final public class F2S extends IFV { private double[] parameters; - public F2S() {} - + + public F2S() { + } + public double score; - + /** * @param parameters2 */ public F2S(double[] parameters2) { - parameters=parameters2; + parameters = parameters2; } @Override public void add(int i) { - if (i>0)score += parameters[i]; + if (i > 0) + score += parameters[i]; } - + public void setParameters(double[] p) { - parameters =p; + parameters = p; } - + @Override public void clear() { - score =0; + score = 0; } - /* (non-Javadoc) + /* + * (non-Javadoc) + * * @see is2.IFV#getScore() */ @Override @@ -37,12 +41,14 @@ final public class F2S extends IFV { return score; } - /* (non-Javadoc) + /* + * (non-Javadoc) + * * @see is2.IFV#clone() */ @Override public IFV clone() { return new F2S(parameters); } - + } diff --git a/dependencyParser/mate-tools/src/is2/data/F2SD.java b/dependencyParser/mate-tools/src/is2/data/F2SD.java index 45c554f..2cdd793 100755 --- a/dependencyParser/mate-tools/src/is2/data/F2SD.java +++ b/dependencyParser/mate-tools/src/is2/data/F2SD.java @@ -1,29 +1,29 @@ package is2.data; - final public class F2SD extends IFV { final private double[] parameters; - - public double score =0; - + + public double score = 0; + public F2SD(double[] p) { - parameters =p; + parameters = p; } - @Override public void add(int i) { - if (i>0)score += parameters[i]; + if (i > 0) + score += parameters[i]; } - + @Override public void clear() { - score =0; + score = 0; } - - /* (non-Javadoc) + /* + * (non-Javadoc) + * * @see is2.IFV#getScore() */ @Override @@ -31,8 +31,9 @@ final public class F2SD extends IFV { return score; } - - /* (non-Javadoc) + /* + * (non-Javadoc) + * * @see is2.IFV#clone() */ @Override @@ -40,5 +41,4 @@ final public class F2SD extends IFV { return new F2SD(parameters); } - } diff --git a/dependencyParser/mate-tools/src/is2/data/F2SF.java b/dependencyParser/mate-tools/src/is2/data/F2SF.java index 127d775..394352f 100755 --- a/dependencyParser/mate-tools/src/is2/data/F2SF.java +++ b/dependencyParser/mate-tools/src/is2/data/F2SF.java @@ -1,48 +1,45 @@ package is2.data; - final public class F2SF extends IFV { final private float[] parameters; - - - public float score =0; - + public float score = 0; + public F2SF(float[] p) { - parameters =p; + parameters = p; } - + @Override - final public void add(int i) { - if (i>0) score += parameters[i]; + final public void add(int i) { + if (i > 0) + score += parameters[i]; } - - final public void add(int[] i) { - for(int k=0;k<i.length;k++) { - if (i[k]>0) score += parameters[i[k]]; + final public void add(int[] i) { + for (int element : i) { + if (element > 0) + score += parameters[element]; } } - - - final public void sub(float[] px,int i, Long2IntInterface li) { - - if (i>0) { + + final public void sub(float[] px, int i, Long2IntInterface li) { + + if (i > 0) { score -= px[li.l2i(i)]; -// score -= px[i]; - //else score -=px[]; + // score -= px[i]; + // else score -=px[]; } } - @Override public void clear() { - score =0; + score = 0; } - - /* (non-Javadoc) + /* + * (non-Javadoc) + * * @see is2.IFV#getScore() */ @Override @@ -54,7 +51,9 @@ final public class F2SF extends IFV { return score; } - /* (non-Javadoc) + /* + * (non-Javadoc) + * * @see is2.IFV#clone() */ @Override @@ -66,13 +65,13 @@ final public class F2SF extends IFV { * @param l2i */ public void addRel(int i, float f) { - if (i>0) score += parameters[i]*f; - + if (i > 0) + score += parameters[i] * f; + } - + public int length() { return this.parameters.length; } - - + } diff --git a/dependencyParser/mate-tools/src/is2/data/F2SP.java b/dependencyParser/mate-tools/src/is2/data/F2SP.java index 515a788..513fa33 100644 --- a/dependencyParser/mate-tools/src/is2/data/F2SP.java +++ b/dependencyParser/mate-tools/src/is2/data/F2SP.java @@ -1,46 +1,45 @@ package is2.data; - final public class F2SP extends IFV { final private float[] parameters; - - public double score =0; - + + public double score = 0; + public F2SP(float[] p) { - parameters =p; + parameters = p; } - + @Override - final public void add(int i) { - if (i>0) score += parameters[i]; + final public void add(int i) { + if (i > 0) + score += parameters[i]; } - - final public void add(int[] i) { - for(int k=0;k<i.length;k++) { - if (i[k]>0) score += parameters[i[k]]; + final public void add(int[] i) { + for (int element : i) { + if (element > 0) + score += parameters[element]; } } - - - final public void sub(float[] px,int i, Long2IntInterface li) { - - if (i>0) { + + final public void sub(float[] px, int i, Long2IntInterface li) { + + if (i > 0) { score -= px[li.l2i(i)]; -// score -= px[i]; - //else score -=px[]; + // score -= px[i]; + // else score -=px[]; } } - @Override public void clear() { - score =0; + score = 0; } - - /* (non-Javadoc) + /* + * (non-Javadoc) + * * @see is2.IFV#getScore() */ @Override @@ -52,7 +51,9 @@ final public class F2SP extends IFV { return score; } - /* (non-Javadoc) + /* + * (non-Javadoc) + * * @see is2.IFV#clone() */ @Override @@ -64,13 +65,13 @@ final public class F2SP extends IFV { * @param l2i */ public void addRel(int i, float f) { - if (i>0) score += parameters[i]*f; - + if (i > 0) + score += parameters[i] * f; + } - + public int length() { return this.parameters.length; } - - + } diff --git a/dependencyParser/mate-tools/src/is2/data/F2ST.java b/dependencyParser/mate-tools/src/is2/data/F2ST.java index 2ef062b..4c30144 100644 --- a/dependencyParser/mate-tools/src/is2/data/F2ST.java +++ b/dependencyParser/mate-tools/src/is2/data/F2ST.java @@ -1,30 +1,29 @@ package is2.data; - final public class F2ST extends IFV { final private short[] parameters; - - - public int score =0; - + public int score = 0; + public F2ST(short[] p) { - parameters =p; + parameters = p; } - + @Override - final public void add(int i) { - if (i>0) score += parameters[i]; + final public void add(int i) { + if (i > 0) + score += parameters[i]; } - + @Override public void clear() { - score =0; + score = 0; } - - /* (non-Javadoc) + /* + * (non-Javadoc) + * * @see is2.IFV#getScore() */ @Override @@ -36,12 +35,14 @@ final public class F2ST extends IFV { return score; } - /* (non-Javadoc) + /* + * (non-Javadoc) + * * @see is2.IFV#clone() */ @Override public IFV clone() { return new F2ST(this.parameters); } - + } diff --git a/dependencyParser/mate-tools/src/is2/data/FV.java b/dependencyParser/mate-tools/src/is2/data/FV.java index 1cfbeba..10c0030 100755 --- a/dependencyParser/mate-tools/src/is2/data/FV.java +++ b/dependencyParser/mate-tools/src/is2/data/FV.java @@ -1,26 +1,22 @@ package is2.data; - - - - import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.IOException; -public final class FV extends IFV { - +public final class FV extends IFV { + private FV subfv1; private FV subfv2; private boolean negateSecondSubFV = false; - - private int size; - + + private int size; + // content of the nodes NxC private int m_index[]; - + // type of the nodes NxT - + public FV() { this(10); } @@ -29,13 +25,12 @@ public final class FV extends IFV { m_index = new int[initialCapacity]; } - - public FV (FV fv1, FV fv2) { + public FV(FV fv1, FV fv2) { subfv1 = fv1; subfv2 = fv2; } - public FV (FV fv1, FV fv2, boolean negSecond) { + public FV(FV fv1, FV fv2, boolean negSecond) { this(0); subfv1 = fv1; subfv2 = fv2; @@ -44,88 +39,87 @@ public final class FV extends IFV { /** * Read a feature vector + * * @param index * @param value */ public FV(DataInputStream dos, int capacity) throws IOException { this(capacity); - size= m_index.length; - - for (int i=0; i<size; i++) m_index[i] = dos.readInt(); - } + size = m_index.length; + for (int i = 0; i < size; i++) + m_index[i] = dos.readInt(); + } /** * Read a feature vector + * * @param index * @param value */ public FV(DataInputStream dos) throws IOException { this(dos.readInt()); - size= m_index.length; - - for (int i=0; i<size; i++) m_index[i] = dos.readInt(); - - + size = m_index.length; + + for (int i = 0; i < size; i++) + m_index[i] = dos.readInt(); + } /** - * Increases the capacity of this <tt>Graph</tt> instance, if - * necessary, to ensure that it can hold at least the number of nodes - * specified by the minimum capacity argument. - * - * @param minCapacity the desired minimum capacity. - */ - private void ensureCapacity(int minCapacity) { - - - if (minCapacity > m_index.length) { - - int oldIndex[] = m_index; - - int newCapacity = ( m_index.length * 3)/2 + 1; - - - if (newCapacity < minCapacity) newCapacity = minCapacity; - - m_index = new int[newCapacity]; - System.arraycopy(oldIndex, 0, m_index, 0, oldIndex.length); - - } - } - - - final public int size() { - return size; - } - - final public boolean isEmpty() { - return size == 0; - } - - @Override + * Increases the capacity of this <tt>Graph</tt> instance, if necessary, to + * ensure that it can hold at least the number of nodes specified by the + * minimum capacity argument. + * + * @param minCapacity + * the desired minimum capacity. + */ + private void ensureCapacity(int minCapacity) { + + if (minCapacity > m_index.length) { + + int oldIndex[] = m_index; + + int newCapacity = (m_index.length * 3) / 2 + 1; + + if (newCapacity < minCapacity) + newCapacity = minCapacity; + + m_index = new int[newCapacity]; + System.arraycopy(oldIndex, 0, m_index, 0, oldIndex.length); + + } + } + + final public int size() { + return size; + } + + final public boolean isEmpty() { + return size == 0; + } + + @Override final public void clear() { - size = 0; - } - - - final public int createFeature(int i, double v) { - - ensureCapacity(size+1); - m_index[size] =i; - size++; - return size-1; - } - - final public int createFeature(int i) { - - ensureCapacity(size+1); - m_index[size] =i; - size++; - return size-1; - } - - + size = 0; + } + + final public int createFeature(int i, double v) { + + ensureCapacity(size + 1); + m_index[size] = i; + size++; + return size - 1; + } + + final public int createFeature(int i) { + + ensureCapacity(size + 1); + m_index[size] = i; + size++; + return size - 1; + } + final public int getIndex(int i) { return m_index[i]; } @@ -133,57 +127,51 @@ public final class FV extends IFV { public void setIndex(int p, int i) { m_index[p] = i; } - - - /** - * Trims the capacity of this <tt>Graph</tt> instance to true size. - * An application can use this operation to minimize - * the storage of an <tt>Graph</tt> instance. - */ - public void trimToSize() { + + /** + * Trims the capacity of this <tt>Graph</tt> instance to true size. An + * application can use this operation to minimize the storage of an + * <tt>Graph</tt> instance. + */ + public void trimToSize() { if (size < m_index.length) { - - + int oldIndex[] = m_index; - + m_index = new int[size]; System.arraycopy(oldIndex, 0, m_index, 0, size); - + } - - } - - - - - - @Override + + } + + @Override final public void add(int i) { - if (i>=0) { - ensureCapacity(size+1); - m_index[size] =i; + if (i >= 0) { + ensureCapacity(size + 1); + m_index[size] = i; size++; } - } - - final public void add(int[] i) { - - for(int k =0;k<i.length;k++) add(i[k]); - - } - - final public void put(int i, double f) { - if (i>=0) createFeature(i,f); - } - - + } + + final public void add(int[] i) { + + for (int element : i) + add(element); + + } + + final public void put(int i, double f) { + if (i >= 0) + createFeature(i, f); + } + // fv1 - fv2 public FV getDistVector(FV fl2) { return new FV(this, fl2, true); } - public double getScore(double[] parameters, boolean negate) { double score = 0.0; @@ -191,64 +179,78 @@ public final class FV extends IFV { score += subfv1.getScore(parameters, negate); if (null != subfv2) { - if (negate) score += subfv2.getScore(parameters, !negateSecondSubFV); - else score += subfv2.getScore(parameters, negateSecondSubFV); - + if (negate) + score += subfv2.getScore(parameters, !negateSecondSubFV); + else + score += subfv2.getScore(parameters, negateSecondSubFV); + } } - if (negate) for(int i=0;i<size;i++) score -= parameters[m_index[i]]; - else for(int i=0;i<size;i++) score += parameters[m_index[i]]; - - + if (negate) + for (int i = 0; i < size; i++) + score -= parameters[m_index[i]]; + else + for (int i = 0; i < size; i++) + score += parameters[m_index[i]]; + return score; } - - + final public float getScore(float[] parameters, boolean negate) { float score = 0.0F; - if (null != subfv1) { + if (null != subfv1) { score += subfv1.getScore(parameters, negate); if (null != subfv2) { - if (negate) score += subfv2.getScore(parameters, !negateSecondSubFV); - else score += subfv2.getScore(parameters, negateSecondSubFV); - + if (negate) + score += subfv2.getScore(parameters, !negateSecondSubFV); + else + score += subfv2.getScore(parameters, negateSecondSubFV); + } } - - // warning changed the the value - - if (negate) for(int i=0;i<size;i++) score -= parameters[m_index[i]];//*m_value[i]; - else for(int i=0;i<size;i++) score += parameters[m_index[i]];//*m_value[i]; - + + // warning changed the the value + + if (negate) + for (int i = 0; i < size; i++) + score -= parameters[m_index[i]];// *m_value[i]; + else + for (int i = 0; i < size; i++) + score += parameters[m_index[i]];// *m_value[i]; + return score; } final public int getScore(short[] parameters, boolean negate) { int score = 0; - if (null != subfv1) { + if (null != subfv1) { score += subfv1.getScore(parameters, negate); if (null != subfv2) { - if (negate) score += subfv2.getScore(parameters, !negateSecondSubFV); - else score += subfv2.getScore(parameters, negateSecondSubFV); - + if (negate) + score += subfv2.getScore(parameters, !negateSecondSubFV); + else + score += subfv2.getScore(parameters, negateSecondSubFV); + } } - - // warning changed the value - - if (negate) for(int i=0;i<size;i++) score -= parameters[m_index[i]];//*m_value[i]; - else for(int i=0;i<size;i++) score += parameters[m_index[i]];//*m_value[i]; - + + // warning changed the value + + if (negate) + for (int i = 0; i < size; i++) + score -= parameters[m_index[i]];// *m_value[i]; + else + for (int i = 0; i < size; i++) + score += parameters[m_index[i]];// *m_value[i]; + return score; } - - public void update(double[] parameters, double[] total, double alpha_k, double upd) { update(parameters, total, alpha_k, upd, false); } @@ -259,136 +261,137 @@ public final class FV extends IFV { subfv1.update(parameters, total, alpha_k, upd, negate); if (null != subfv2) { - if (negate) subfv2.update(parameters, total, alpha_k, upd, !negateSecondSubFV); - else subfv2.update(parameters, total, alpha_k, upd, negateSecondSubFV); + if (negate) + subfv2.update(parameters, total, alpha_k, upd, !negateSecondSubFV); + else + subfv2.update(parameters, total, alpha_k, upd, negateSecondSubFV); } } if (negate) { - for(int i=0;i<size;i++) { - parameters[m_index[i]] -= alpha_k;//*getValue(i); - total[m_index[i]] -= upd*alpha_k;//*getValue(i); + for (int i = 0; i < size; i++) { + parameters[m_index[i]] -= alpha_k;// *getValue(i); + total[m_index[i]] -= upd * alpha_k;// *getValue(i); } } else { - for(int i=0;i<size;i++){ - parameters[m_index[i]] += alpha_k;//*getValue(i); - total[m_index[i]] += upd*alpha_k;//*getValue(i); + for (int i = 0; i < size; i++) { + parameters[m_index[i]] += alpha_k;// *getValue(i); + total[m_index[i]] += upd * alpha_k;// *getValue(i); } } - } - + public final void update(short[] parameters, short[] total, double alpha_k, double upd, boolean negate) { if (null != subfv1) { subfv1.update(parameters, total, alpha_k, upd, negate); if (null != subfv2) { - if (negate) subfv2.update(parameters, total, alpha_k, upd, !negateSecondSubFV); - else subfv2.update(parameters, total, alpha_k, upd, negateSecondSubFV); + if (negate) + subfv2.update(parameters, total, alpha_k, upd, !negateSecondSubFV); + else + subfv2.update(parameters, total, alpha_k, upd, negateSecondSubFV); } } if (negate) { - for(int i=0;i<size;i++) { - parameters[m_index[i]] -= alpha_k;//*getValue(i); - total[m_index[i]] -= upd*alpha_k;//*getValue(i); + for (int i = 0; i < size; i++) { + parameters[m_index[i]] -= alpha_k;// *getValue(i); + total[m_index[i]] -= upd * alpha_k;// *getValue(i); } } else { - for(int i=0;i<size;i++){ - parameters[m_index[i]] += alpha_k;//*getValue(i); - total[m_index[i]] += upd*alpha_k;//*getValue(i); + for (int i = 0; i < size; i++) { + parameters[m_index[i]] += alpha_k;// *getValue(i); + total[m_index[i]] += upd * alpha_k;// *getValue(i); } } - } - - + public final void update(float[] parameters, float[] total, double alpha_k, double upd, boolean negate) { if (null != subfv1) { subfv1.update(parameters, total, alpha_k, upd, negate); if (null != subfv2 && negate) { - subfv2.update(parameters, total, alpha_k, upd, !negateSecondSubFV); - } else { - subfv2.update(parameters, total, alpha_k, upd, negateSecondSubFV); - } - + subfv2.update(parameters, total, alpha_k, upd, !negateSecondSubFV); + } else { + subfv2.update(parameters, total, alpha_k, upd, negateSecondSubFV); + } + } - + if (negate) { - for(int i=0;i<size;i++){ + for (int i = 0; i < size; i++) { parameters[getIndex(i)] -= alpha_k; - total[getIndex(i)] -= upd*alpha_k; + total[getIndex(i)] -= upd * alpha_k; } } else { - for(int i=0;i<size;i++){ + for (int i = 0; i < size; i++) { parameters[getIndex(i)] += alpha_k; - total[getIndex(i)] += upd*alpha_k; // + total[getIndex(i)] += upd * alpha_k; // } } - } - - - public final void update(float[] parameters, float[] total, double alpha_k, - double upd, boolean negate, float[] totalp, Long2IntInterface li) { + + public final void update(float[] parameters, float[] total, double alpha_k, double upd, boolean negate, + float[] totalp, Long2IntInterface li) { if (null != subfv1) { - subfv1.update(parameters, total, alpha_k, upd, negate,totalp,li); + subfv1.update(parameters, total, alpha_k, upd, negate, totalp, li); if (null != subfv2 && negate) { - subfv2.update(parameters, total, alpha_k, upd, !negateSecondSubFV,totalp,li); - } else { - subfv2.update(parameters, total, alpha_k, upd, negateSecondSubFV,totalp,li); - } + subfv2.update(parameters, total, alpha_k, upd, !negateSecondSubFV, totalp, li); + } else { + subfv2.update(parameters, total, alpha_k, upd, negateSecondSubFV, totalp, li); + } } - + if (negate) { - for(int i=0;i<size;i++){ + for (int i = 0; i < size; i++) { parameters[getIndex(i)] -= alpha_k; - total[getIndex(i)] -= upd*alpha_k; - - totalp[li.l2i(getIndex(i))] -=upd*alpha_k; - // totalp[getIndex(i)] -=upd*alpha_k; + total[getIndex(i)] -= upd * alpha_k; + + totalp[li.l2i(getIndex(i))] -= upd * alpha_k; + // totalp[getIndex(i)] -=upd*alpha_k; } } else { - for(int i=0;i<size;i++){ + for (int i = 0; i < size; i++) { parameters[getIndex(i)] += alpha_k; - total[getIndex(i)] += upd*alpha_k; // + total[getIndex(i)] += upd * alpha_k; // - totalp[li.l2i(getIndex(i))] +=upd*alpha_k; - // totalp[getIndex(i)] +=upd*alpha_k; + totalp[li.l2i(getIndex(i))] += upd * alpha_k; + // totalp[getIndex(i)] +=upd*alpha_k; } } } - - - private static IntIntHash hm1; private static IntIntHash hm2; - + public int dotProduct(FV fl2) { - if (hm1==null) hm1 = new IntIntHash(size(),0.4F); - else hm1.clear(); - + if (hm1 == null) + hm1 = new IntIntHash(size(), 0.4F); + else + hm1.clear(); + addFeaturesToMap(hm1); - - if (hm2==null)hm2 = new IntIntHash(fl2.size,0.4F); - else hm2.clear(); - + + if (hm2 == null) + hm2 = new IntIntHash(fl2.size, 0.4F); + else + hm2.clear(); + fl2.addFeaturesToMap(hm2); int[] keys = hm1.keys(); int result = 0; - for(int i = 0; i < keys.length; i++) result += hm1.get(keys[i])*hm2.get(keys[i]); + for (int key : keys) + result += hm1.get(key) * hm2.get(key); return result; @@ -396,69 +399,73 @@ public final class FV extends IFV { public double twoNorm(FV fl2) { - if (hm1==null) hm1 = new IntIntHash(size(),0.4F); - else hm1.clear(); - + if (hm1 == null) + hm1 = new IntIntHash(size(), 0.4F); + else + hm1.clear(); + addFeaturesToMap(hm1); - - if (hm2==null)hm2 = new IntIntHash(fl2.size,0.4F); - else hm2.clear(); - + + if (hm2 == null) + hm2 = new IntIntHash(fl2.size, 0.4F); + else + hm2.clear(); + fl2.addFeaturesToMap(hm2); int[] keys = hm1.keys(); int result = 0; - for(int i = 0; i < keys.length; i++) result += hm1.get(keys[i])*hm2.get(keys[i]); + for (int key : keys) + result += hm1.get(key) * hm2.get(key); - - return Math.sqrt((double)result); - + return Math.sqrt(result); } - + public void addFeaturesToMap(IntIntHash map) { - + if (null != subfv1) { subfv1.addFeaturesToMap(map); if (null != subfv2) { - subfv2.addFeaturesToMap(map, negateSecondSubFV); - + subfv2.addFeaturesToMap(map, negateSecondSubFV); + } } - - for(int i=0;i<size;i++) if (!map.adjustValue(getIndex(i), 1)) map.put(getIndex(i), 1); - - - + for (int i = 0; i < size; i++) + if (!map.adjustValue(getIndex(i), 1)) + map.put(getIndex(i), 1); + } - - private void addFeaturesToMap(IntIntHash map, boolean negate) { - + if (null != subfv1) { subfv1.addFeaturesToMap(map, negate); if (null != subfv2) { - if (negate) subfv2.addFeaturesToMap(map, !negateSecondSubFV); - else subfv2.addFeaturesToMap(map, negateSecondSubFV); - + if (negate) + subfv2.addFeaturesToMap(map, !negateSecondSubFV); + else + subfv2.addFeaturesToMap(map, negateSecondSubFV); + } } - if (negate) { - for(int i=0;i<size;i++) if (!map . adjustValue(getIndex(i), -1)) map.put(getIndex(i), -1); + if (negate) { + for (int i = 0; i < size; i++) + if (!map.adjustValue(getIndex(i), -1)) + map.put(getIndex(i), -1); } else { - for(int i=0;i<size;i++) if (!map.adjustValue(getIndex(i), 1)) map.put(getIndex(i), 1); + for (int i = 0; i < size; i++) + if (!map.adjustValue(getIndex(i), 1)) + map.put(getIndex(i), 1); } - - + } - @Override public final String toString() { StringBuilder sb = new StringBuilder(); @@ -473,79 +480,83 @@ public final class FV extends IFV { if (null != subfv2) subfv2.toString(sb); } - for(int i=0;i<size;i++) + for (int i = 0; i < size; i++) sb.append(getIndex(i)).append(' '); } public void writeKeys(DataOutputStream dos) throws IOException { - - // int keys[] = keys(); - // dos.writeInt(keys.length); - // for(int i=0;i<keys.length;i++) { - // dos.writeInt(keys[i]); - // } - - - //int keys[] = keys(); + + // int keys[] = keys(); + // dos.writeInt(keys.length); + // for(int i=0;i<keys.length;i++) { + // dos.writeInt(keys[i]); + // } + + // int keys[] = keys(); dos.writeInt(size); - for(int i=0;i<size;i++) { + for (int i = 0; i < size; i++) { dos.writeInt(m_index[i]); } - + } public void readKeys(DataInputStream dos) throws IOException { - + int keys = dos.readInt(); - for (int i=0; i<keys; i++) createFeature(dos.readInt(), 1.0); - - + for (int i = 0; i < keys; i++) + createFeature(dos.readInt(), 1.0); + } - final public static FV cat(FV f1,FV f2) { - if (f1==null) return f2; - if (f2==null) return f1; + final public static FV cat(FV f1, FV f2) { + if (f1 == null) + return f2; + if (f2 == null) + return f1; return new FV(f1, f2); } - final public static FV cat(FV f1,FV f2, FV f3) { + final public static FV cat(FV f1, FV f2, FV f3) { return FV.cat(f1, FV.cat(f2, f3)); } - final public static FV cat(FV f1,FV f2, FV f3, FV f4) { + + final public static FV cat(FV f1, FV f2, FV f3, FV f4) { return FV.cat(f1, FV.cat(f2, FV.cat(f3, f4))); } - final public static FV read(DataInputStream dis) throws IOException { int cap = dis.readInt(); - if (cap == 0) return null; - return new FV(dis,cap); + if (cap == 0) + return null; + return new FV(dis, cap); } - /* (non-Javadoc) + /* + * (non-Javadoc) + * * @see is2.IFV#getScore() */ @Override public double getScore() { - //System.out.println("not implemented"); + // System.out.println("not implemented"); // TODO Auto-generated method stub return 0; } - /* (non-Javadoc) + /* + * (non-Javadoc) + * * @see is2.IFV#clone() */ @Override public IFV clone() { - FV f= new FV(this.size); - for(int i=0;i<this.size;i++) { - f.m_index[i]=m_index[i]; + FV f = new FV(this.size); + for (int i = 0; i < this.size; i++) { + f.m_index[i] = m_index[i]; } - f.size=this.size; + f.size = this.size; return f; } - - } diff --git a/dependencyParser/mate-tools/src/is2/data/FVR.java b/dependencyParser/mate-tools/src/is2/data/FVR.java index f0b6784..c5bc073 100644 --- a/dependencyParser/mate-tools/src/is2/data/FVR.java +++ b/dependencyParser/mate-tools/src/is2/data/FVR.java @@ -1,30 +1,25 @@ package is2.data; - - -import gnu.trove.TIntDoubleHashMap; - - import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.IOException; -public final class FVR extends IFV { - +import gnu.trove.TIntDoubleHashMap; + +public final class FVR extends IFV { + private FVR subfv1; private FVR subfv2; private boolean negateSecondSubFV = false; - - private int size; - - - + + private int size; + // content of the nodes NxC private int m_index[]; private float m_value[]; - + // type of the nodes NxT - + public FVR() { this(10); } @@ -34,13 +29,10 @@ public final class FVR extends IFV { m_value = new float[initialCapacity]; } -/* - public FVR (FVR fv1, FVR fv2) { - subfv1 = fv1; - subfv2 = fv2; - } -*/ - public FVR (FVR fv1, FVR fv2, boolean negSecond) { + /* + * public FVR (FVR fv1, FVR fv2) { subfv1 = fv1; subfv2 = fv2; } + */ + public FVR(FVR fv1, FVR fv2, boolean negSecond) { this(0); subfv1 = fv1; subfv2 = fv2; @@ -49,93 +41,89 @@ public final class FVR extends IFV { /** * Read a feature vector + * * @param index * @param value */ public FVR(DataInputStream dos, int capacity) throws IOException { this(capacity); - size= m_index.length; - - for (int i=0; i<size; i++) m_index[i] = dos.readInt(); - } + size = m_index.length; + for (int i = 0; i < size; i++) + m_index[i] = dos.readInt(); + } /** * Read a feature vector + * * @param index * @param value */ public FVR(DataInputStream dos) throws IOException { this(dos.readInt()); - size= m_index.length; - - for (int i=0; i<size; i++) m_index[i] = dos.readInt(); - - + size = m_index.length; + + for (int i = 0; i < size; i++) + m_index[i] = dos.readInt(); + } /** - * Increases the capacity of this <tt>Graph</tt> instance, if - * necessary, to ensure that it can hold at least the number of nodes - * specified by the minimum capacity argument. - * - * @param minCapacity the desired minimum capacity. - */ - private void ensureCapacity(int minCapacity) { - - - if (minCapacity > m_index.length) { - - int oldIndex[] = m_index; - float oldValue[] = m_value; - - int newCapacity = ( m_index.length * 3)/2 + 1; - - - if (newCapacity < minCapacity) newCapacity = minCapacity; - - m_index = new int[newCapacity]; - m_value = new float[newCapacity]; - - System.arraycopy(oldIndex, 0, m_index, 0, oldIndex.length); - System.arraycopy(oldValue, 0, m_value, 0, oldValue.length); - - } - } - - - final public int size() { - return size; - } - - final public boolean isEmpty() { - return size == 0; - } - - @Override + * Increases the capacity of this <tt>Graph</tt> instance, if necessary, to + * ensure that it can hold at least the number of nodes specified by the + * minimum capacity argument. + * + * @param minCapacity + * the desired minimum capacity. + */ + private void ensureCapacity(int minCapacity) { + + if (minCapacity > m_index.length) { + + int oldIndex[] = m_index; + float oldValue[] = m_value; + + int newCapacity = (m_index.length * 3) / 2 + 1; + + if (newCapacity < minCapacity) + newCapacity = minCapacity; + + m_index = new int[newCapacity]; + m_value = new float[newCapacity]; + + System.arraycopy(oldIndex, 0, m_index, 0, oldIndex.length); + System.arraycopy(oldValue, 0, m_value, 0, oldValue.length); + + } + } + + final public int size() { + return size; + } + + final public boolean isEmpty() { + return size == 0; + } + + @Override final public void clear() { - size = 0; - } - - - final public int createFeature(int i, float v) { - - ensureCapacity(size+1); - m_index[size] =i; - m_value[size] =v; - size++; - return size-1; - } - /* - final public int createFeature(int i) { - - ensureCapacity(size+1); - m_index[size] =i; - size++; - return size-1; - } - */ - + size = 0; + } + + final public int createFeature(int i, float v) { + + ensureCapacity(size + 1); + m_index[size] = i; + m_value[size] = v; + size++; + return size - 1; + } + /* + * final public int createFeature(int i) { + * + * ensureCapacity(size+1); m_index[size] =i; size++; return size-1; } + */ + final public int getIndex(int i) { return m_index[i]; } @@ -143,51 +131,45 @@ public final class FVR extends IFV { public void setIndex(int p, int i) { m_index[p] = i; } - - - /** - * Trims the capacity of this <tt>Graph</tt> instance to true size. - * An application can use this operation to minimize - * the storage of an <tt>Graph</tt> instance. - */ - public void trimToSize() { + + /** + * Trims the capacity of this <tt>Graph</tt> instance to true size. An + * application can use this operation to minimize the storage of an + * <tt>Graph</tt> instance. + */ + public void trimToSize() { if (size < m_index.length) { - - + int oldIndex[] = m_index; - + m_index = new int[size]; System.arraycopy(oldIndex, 0, m_index, 0, size); - + } - - } - - - - - - final public void add(int i) { - if (i>=0) { - ensureCapacity(size+1); - m_index[size] =i; - m_value[size] =1.0f; + + } + + @Override + final public void add(int i) { + if (i >= 0) { + ensureCapacity(size + 1); + m_index[size] = i; + m_value[size] = 1.0f; size++; } - } + } + + final public void add(int i, float f) { + if (i >= 0) + createFeature(i, f); + } - final public void add(int i, float f) { - if (i>=0) createFeature(i,f); - } - - // fv1 - fv2 public FVR getDistVector(FVR fl2) { return new FVR(this, fl2, true); } - public double getScore(double[] parameters, boolean negate) { double score = 0.0; @@ -195,190 +177,180 @@ public final class FVR extends IFV { score += subfv1.getScore(parameters, negate); if (null != subfv2) { - if (negate) score += subfv2.getScore(parameters, !negateSecondSubFV); - else score += subfv2.getScore(parameters, negateSecondSubFV); - + if (negate) + score += subfv2.getScore(parameters, !negateSecondSubFV); + else + score += subfv2.getScore(parameters, negateSecondSubFV); + } } - if (negate) for(int i=0;i<size;i++) score -= parameters[m_index[i]]; - else for(int i=0;i<size;i++) score += parameters[m_index[i]]; - - + if (negate) + for (int i = 0; i < size; i++) + score -= parameters[m_index[i]]; + else + for (int i = 0; i < size; i++) + score += parameters[m_index[i]]; + return score; } - - + final public float getScore(float[] parameters, boolean negate) { float score = 0.0F; - if (null != subfv1) { + if (null != subfv1) { score += subfv1.getScore(parameters, negate); if (null != subfv2) { - if (negate) score += subfv2.getScore(parameters, !negateSecondSubFV); - else score += subfv2.getScore(parameters, negateSecondSubFV); - + if (negate) + score += subfv2.getScore(parameters, !negateSecondSubFV); + else + score += subfv2.getScore(parameters, negateSecondSubFV); + } } - - // warning changed the value - - if (negate) for(int i=0;i<size;i++) score -= parameters[m_index[i]]*m_value[i]; - else for(int i=0;i<size;i++) score += parameters[m_index[i]]*m_value[i]; - + + // warning changed the value + + if (negate) + for (int i = 0; i < size; i++) + score -= parameters[m_index[i]] * m_value[i]; + else + for (int i = 0; i < size; i++) + score += parameters[m_index[i]] * m_value[i]; + return score; } final public int getScore(short[] parameters, boolean negate) { int score = 0; - if (null != subfv1) { + if (null != subfv1) { score += subfv1.getScore(parameters, negate); if (null != subfv2) { - if (negate) score += subfv2.getScore(parameters, !negateSecondSubFV); - else score += subfv2.getScore(parameters, negateSecondSubFV); - + if (negate) + score += subfv2.getScore(parameters, !negateSecondSubFV); + else + score += subfv2.getScore(parameters, negateSecondSubFV); + } } - - // warning changed the value - - if (negate) for(int i=0;i<size;i++) score -= parameters[m_index[i]]*m_value[i]; - else for(int i=0;i<size;i++) score += parameters[m_index[i]]*m_value[i]; - + + // warning changed the value + + if (negate) + for (int i = 0; i < size; i++) + score -= parameters[m_index[i]] * m_value[i]; + else + for (int i = 0; i < size; i++) + score += parameters[m_index[i]] * m_value[i]; + return score; } - - - - - public final void update(float[] parameters, float[] total, double alpha_k, double upd, boolean negate) { if (null != subfv1) { subfv1.update(parameters, total, alpha_k, upd, negate); if (null != subfv2 && negate) { - subfv2.update(parameters, total, alpha_k, upd, !negateSecondSubFV); - } else { - subfv2.update(parameters, total, alpha_k, upd, negateSecondSubFV); - } - + subfv2.update(parameters, total, alpha_k, upd, !negateSecondSubFV); + } else { + subfv2.update(parameters, total, alpha_k, upd, negateSecondSubFV); + } + } - + if (negate) { - for(int i=0;i<size;i++){ - parameters[getIndex(i)] -= alpha_k*m_value[i]; - total[getIndex(i)] -= upd*alpha_k*m_value[i]; + for (int i = 0; i < size; i++) { + parameters[getIndex(i)] -= alpha_k * m_value[i]; + total[getIndex(i)] -= upd * alpha_k * m_value[i]; } } else { - for(int i=0;i<size;i++){ - parameters[getIndex(i)] += alpha_k*m_value[i]; - total[getIndex(i)] += upd*alpha_k*m_value[i]; // + for (int i = 0; i < size; i++) { + parameters[getIndex(i)] += alpha_k * m_value[i]; + total[getIndex(i)] += upd * alpha_k * m_value[i]; // } } - } - - -// private static IntIntHash hm1; -// private static IntIntHash hm2; - + // private static IntIntHash hm1; + // private static IntIntHash hm2; + private static TIntDoubleHashMap hd1; private static TIntDoubleHashMap hd2; - - + public int dotProduct(FVR fl2) { - if (hd1==null) hd1 = new TIntDoubleHashMap(size(),0.4F); - else hd1.clear(); - + if (hd1 == null) + hd1 = new TIntDoubleHashMap(size(), 0.4F); + else + hd1.clear(); + addFeaturesToMap(hd1); - - if (hd2==null)hd2 = new TIntDoubleHashMap(fl2.size,0.4F); - else hd2.clear(); - + + if (hd2 == null) + hd2 = new TIntDoubleHashMap(fl2.size, 0.4F); + else + hd2.clear(); + fl2.addFeaturesToMap(hd2); int[] keys = hd1.keys(); int result = 0; - for(int i = 0; i < keys.length; i++) result += hd1.get(keys[i])*hd2.get(keys[i]); + for (int key : keys) + result += hd1.get(key) * hd2.get(key); return result; } - private void addFeaturesToMap(TIntDoubleHashMap map) { - + if (null != subfv1) { subfv1.addFeaturesToMap(map); if (null != subfv2) { - subfv2.addFeaturesToMap(map, negateSecondSubFV); - - } - } - - - for(int i=0;i<size;i++) if (!map.adjustValue(getIndex(i), m_value[i])) map.put(getIndex(i), m_value[i]); - - - - } + subfv2.addFeaturesToMap(map, negateSecondSubFV); - - - private void addFeaturesToMap(IntIntHash map, boolean negate) { - - if (null != subfv1) { - subfv1.addFeaturesToMap(map, negate); - - if (null != subfv2) { - if (negate) subfv2.addFeaturesToMap(map, !negateSecondSubFV); - else subfv2.addFeaturesToMap(map, negateSecondSubFV); - } } - if (negate) { - for(int i=0;i<size;i++) if (!map . adjustValue(getIndex(i), -1)) map.put(getIndex(i), -1); - } else { - for(int i=0;i<size;i++) if (!map.adjustValue(getIndex(i), 1)) map.put(getIndex(i), 1); - } - - + for (int i = 0; i < size; i++) + if (!map.adjustValue(getIndex(i), m_value[i])) + map.put(getIndex(i), m_value[i]); + } private void addFeaturesToMap(TIntDoubleHashMap map, boolean negate) { - + if (null != subfv1) { subfv1.addFeaturesToMap(map, negate); if (null != subfv2) { - if (negate) subfv2.addFeaturesToMap(map, !negateSecondSubFV); - else subfv2.addFeaturesToMap(map, negateSecondSubFV); - + if (negate) + subfv2.addFeaturesToMap(map, !negateSecondSubFV); + else + subfv2.addFeaturesToMap(map, negateSecondSubFV); + } } - if (negate) { - for(int i=0;i<size;i++) if (!map . adjustValue(getIndex(i), -m_value[i])) map.put(getIndex(i), -m_value[i]); + if (negate) { + for (int i = 0; i < size; i++) + if (!map.adjustValue(getIndex(i), -m_value[i])) + map.put(getIndex(i), -m_value[i]); } else { - for(int i=0;i<size;i++) if (!map.adjustValue(getIndex(i), m_value[i])) map.put(getIndex(i), m_value[i]); + for (int i = 0; i < size; i++) + if (!map.adjustValue(getIndex(i), m_value[i])) + map.put(getIndex(i), m_value[i]); } - - + } - - - + @Override public final String toString() { StringBuilder sb = new StringBuilder(); @@ -393,52 +365,47 @@ public final class FVR extends IFV { if (null != subfv2) subfv2.toString(sb); } - for(int i=0;i<size;i++) + for (int i = 0; i < size; i++) sb.append(getIndex(i)).append('=').append(m_value[i]).append(' '); } public void writeKeys(DataOutputStream dos) throws IOException { - - // int keys[] = keys(); - // dos.writeInt(keys.length); - // for(int i=0;i<keys.length;i++) { - // dos.writeInt(keys[i]); - // } - - - //int keys[] = keys(); + + // int keys[] = keys(); + // dos.writeInt(keys.length); + // for(int i=0;i<keys.length;i++) { + // dos.writeInt(keys[i]); + // } + + // int keys[] = keys(); dos.writeInt(size); - for(int i=0;i<size;i++) { + for (int i = 0; i < size; i++) { dos.writeInt(m_index[i]); } - - } - /* - - final public static FVR cat(FVR f1,FVR f2) { - if (f1==null) return f2; - if (f2==null) return f1; - return new FVR(f1, f2); } - final public static FVR cat(FVR f1,FVR f2, FVR f3) { - return FVR.cat(f1, FVR.cat(f2, f3)); - } - final public static FVR cat(FVR f1,FVR f2, FVR f3, FVR f4) { - return FVR.cat(f1, FVR.cat(f2, FVR.cat(f3, f4))); - } - */ + /* + * + * final public static FVR cat(FVR f1,FVR f2) { if (f1==null) return f2; if + * (f2==null) return f1; return new FVR(f1, f2); } + * + * final public static FVR cat(FVR f1,FVR f2, FVR f3) { return FVR.cat(f1, + * FVR.cat(f2, f3)); } final public static FVR cat(FVR f1,FVR f2, FVR f3, + * FVR f4) { return FVR.cat(f1, FVR.cat(f2, FVR.cat(f3, f4))); } + */ - final public static FVR read(DataInputStream dis) throws IOException { int cap = dis.readInt(); - if (cap == 0) return null; - return new FVR(dis,cap); + if (cap == 0) + return null; + return new FVR(dis, cap); } - /* (non-Javadoc) + /* + * (non-Javadoc) + * * @see is2.IFV#getScore() */ @Override @@ -449,20 +416,20 @@ public final class FVR extends IFV { return 0; } - /* (non-Javadoc) + /* + * (non-Javadoc) + * * @see is2.IFV#clone() */ @Override public IFV clone() { - FVR f= new FVR(this.size); - for(int i=0;i<this.size;i++) { - f.m_index[i]=m_index[i]; - f.m_value[i]=m_value[i]; + FVR f = new FVR(this.size); + for (int i = 0; i < this.size; i++) { + f.m_index[i] = m_index[i]; + f.m_value[i] = m_value[i]; } - f.size=this.size; + f.size = this.size; return f; } - - } diff --git a/dependencyParser/mate-tools/src/is2/data/IEncoder.java b/dependencyParser/mate-tools/src/is2/data/IEncoder.java index 03c4a45..d4ce252 100755 --- a/dependencyParser/mate-tools/src/is2/data/IEncoder.java +++ b/dependencyParser/mate-tools/src/is2/data/IEncoder.java @@ -1,14 +1,12 @@ /** - * + * */ package is2.data; -import java.util.HashMap; - /** * @author Bernd Bohnet, 20.09.2009 - * - * + * + * */ public interface IEncoder { public int getValue(String a, String v); @@ -17,10 +15,10 @@ public interface IEncoder { * @param spath * @param substring */ -// public int register(String spath, String substring); + // public int register(String spath, String substring); /** * @return */ -// public HashMap<String,Integer> getFeatureCounter(); + // public HashMap<String,Integer> getFeatureCounter(); } diff --git a/dependencyParser/mate-tools/src/is2/data/IEncoderPlus.java b/dependencyParser/mate-tools/src/is2/data/IEncoderPlus.java index 2558d6e..b033381 100644 --- a/dependencyParser/mate-tools/src/is2/data/IEncoderPlus.java +++ b/dependencyParser/mate-tools/src/is2/data/IEncoderPlus.java @@ -1,5 +1,5 @@ /** - * + * */ package is2.data; @@ -7,14 +7,13 @@ import java.util.HashMap; /** * @author Bernd Bohnet, 20.09.2009 - * - * + * + * */ public interface IEncoderPlus extends IEncoder { - - final public static String NONE="<None>"; - - + + final public static String NONE = "<None>"; + /** * @param spath * @param substring @@ -24,5 +23,5 @@ public interface IEncoderPlus extends IEncoder { /** * @return */ - public HashMap<String,Integer> getFeatureCounter(); + public HashMap<String, Integer> getFeatureCounter(); } diff --git a/dependencyParser/mate-tools/src/is2/data/IFV.java b/dependencyParser/mate-tools/src/is2/data/IFV.java index 28fbcfe..010f411 100755 --- a/dependencyParser/mate-tools/src/is2/data/IFV.java +++ b/dependencyParser/mate-tools/src/is2/data/IFV.java @@ -2,15 +2,15 @@ package is2.data; public abstract class IFV { - // public double score=0; - - public abstract void add(int i); - - public abstract double getScore(); - - public abstract void clear(); - - @Override + // public double score=0; + + public abstract void add(int i); + + public abstract double getScore(); + + public abstract void clear(); + + @Override public abstract IFV clone(); /** @@ -18,11 +18,12 @@ public abstract class IFV { * @param li */ public void add(long[] gvs, Long2IntInterface li, int l) { - for(int k=0;k<gvs.length;k++) { - if (gvs[k]==Integer.MIN_VALUE) break; - if (gvs[k]>0) add(li.l2i(gvs[k]+l)); + for (long gv : gvs) { + if (gv == Integer.MIN_VALUE) + break; + if (gv > 0) + add(li.l2i(gv + l)); } } - } diff --git a/dependencyParser/mate-tools/src/is2/data/Instances.java b/dependencyParser/mate-tools/src/is2/data/Instances.java index ccb26f9..828f6c6 100755 --- a/dependencyParser/mate-tools/src/is2/data/Instances.java +++ b/dependencyParser/mate-tools/src/is2/data/Instances.java @@ -2,27 +2,19 @@ package is2.data; import java.util.BitSet; -import is2.io.CONLLReader09; -import is2.util.DB; +public class Instances { - - -public class Instances { - - public IEncoder m_encoder; - - protected int size=0; - + protected int size = 0; + protected int capacity; - + public int[][] forms; public int[][] plemmas; public int[][] glemmas; - public short[][] heads; public short[][] pheads; @@ -32,361 +24,329 @@ public class Instances { public short[][] gpos; public short[][] pposs; - public short[][][] feats; - public int[][] predicat; - public short[][] predicateId; - public short[][] semposition; - public short[][][] arg; - public short[][][] argposition; - public BitSet[] pfill; - public short[][] gfeats; public short[][] pfeats; - - public Instances() {} - - - + public Instances() { + } public static int m_unkown = 0; public static int m_count = 0; - public static boolean m_report; - public static boolean m_found =false; + public static boolean m_found = false; - final public void setForm(int i, int p, String x) { - - - forms[i][p] = m_encoder.getValue(PipeGen.WORD,x); - if (forms[i][p]==-1) { - if (m_report) System.out.println("unkwrd "+x); + + forms[i][p] = m_encoder.getValue(PipeGen.WORD, x); + if (forms[i][p] == -1) { + if (m_report) + System.out.println("unkwrd " + x); m_unkown++; - m_found=true; - } + m_found = true; + } m_count++; } - - final public void setRel(int i, int p, String x) { - labels[i][p] = (short)m_encoder.getValue(PipeGen.REL,x); - + labels[i][p] = (short) m_encoder.getValue(PipeGen.REL, x); + } - final public void setHead(int i, int c, int p) { - heads[i][c] =(short)p; + heads[i][c] = (short) p; } - final public int size() { + final public int size() { return size; } + public void setSize(int n) { - size=n; + size = n; } - - - public void init(int ic, IEncoder mf) { init(ic, mf, -1); } - public void init(int ic, IEncoder mf, int version) { - capacity =ic; + capacity = ic; m_encoder = mf; - + forms = new int[capacity][]; plemmas = new int[capacity][]; glemmas = new int[capacity][]; - pposs= new short[capacity][]; - - gpos= new short[capacity][]; - labels= new short[capacity][]; - heads= new short[capacity][]; - plabels= new short[capacity][]; - pheads= new short[capacity][]; + pposs = new short[capacity][]; + + gpos = new short[capacity][]; + labels = new short[capacity][]; + heads = new short[capacity][]; + plabels = new short[capacity][]; + pheads = new short[capacity][]; feats = new short[capacity][][]; gfeats = new short[capacity][]; pfeats = new short[capacity][]; - - predicat =new int[ic][]; + + predicat = new int[ic][]; predicateId = new short[ic][]; semposition = new short[ic][]; - arg= new short[ic][][]; - argposition= new short[ic][][]; - + arg = new short[ic][][]; + argposition = new short[ic][][]; + pfill = new BitSet[ic]; } - public int length(int i) { return forms[i].length; } - public int createInstance09(int length) { - + forms[size] = new int[length]; plemmas[size] = new int[length]; glemmas[size] = new int[length]; - + pposs[size] = new short[length]; - + gpos[size] = new short[length]; - + labels[size] = new short[length]; heads[size] = new short[length]; - + this.pfill[size] = new BitSet(length); - + feats[size] = new short[length][]; gfeats[size] = new short[length]; pfeats[size] = new short[length]; plabels[size] = new short[length]; pheads[size] = new short[length]; - + size++; - - return size-1; - - } -/* - public final void setPPos(int i, int p, String x) { - ppos[i][p] = (short)m_encoder.getValue(PipeGen.POS,x); - + return size - 1; + } -*/ + + /* + * public final void setPPos(int i, int p, String x) { ppos[i][p] = + * (short)m_encoder.getValue(PipeGen.POS,x); + * + * } + */ public final void setPPoss(int i, int p, String x) { - pposs[i][p] = (short)m_encoder.getValue(PipeGen.POS,x); - - } + pposs[i][p] = (short) m_encoder.getValue(PipeGen.POS, x); + } public final void setGPos(int i, int p, String x) { - gpos[i][p] = (short)m_encoder.getValue(PipeGen.POS,x); + gpos[i][p] = (short) m_encoder.getValue(PipeGen.POS, x); } - public void setLemma(int i, int p, String x) { - plemmas[i][p] = m_encoder.getValue(PipeGen.WORD,x); + plemmas[i][p] = m_encoder.getValue(PipeGen.WORD, x); } - public void setGLemma(int i, int p, String x) { - glemmas[i][p] = m_encoder.getValue(PipeGen.WORD,x); + glemmas[i][p] = m_encoder.getValue(PipeGen.WORD, x); } - public void setFeats(int i, int p, String[] fts) { - if (fts==null) { - feats[i][p] =null; - return ; + if (fts == null) { + feats[i][p] = null; + return; } feats[i][p] = new short[fts.length]; - - for(int k=0;k<fts.length;k++) { - feats[i][p][k] = (short)m_encoder.getValue(PipeGen.FEAT,fts[k]); + + for (int k = 0; k < fts.length; k++) { + feats[i][p][k] = (short) m_encoder.getValue(PipeGen.FEAT, fts[k]); } - - } + } public void setFeature(int i, int p, String feature) { - if (feature==null) return; - this.gfeats[i][p]= (short) m_encoder.getValue(PipeGen.FFEATS,feature); -/* if (gfeats[i][p]==-1) { - System.out.println("+"+feature); - new Exception().printStackTrace(); - System.exit(0); - } - */ + if (feature == null) + return; + this.gfeats[i][p] = (short) m_encoder.getValue(PipeGen.FFEATS, feature); + /* + * if (gfeats[i][p]==-1) { System.out.println("+"+feature); new + * Exception().printStackTrace(); System.exit(0); } + */ } + public void setPFeature(int i, int p, String feature) { - if (feature==null) return; - this.pfeats[i][p]= (short) m_encoder.getValue(PipeGen.FFEATS,feature); + if (feature == null) + return; + this.pfeats[i][p] = (short) m_encoder.getValue(PipeGen.FFEATS, feature); } - public int getWValue(String v) { return m_encoder.getValue(PipeGen.WORD, v); } - public final void setPRel(int i, int p, String x) { - plabels[i][p] = (short)m_encoder.getValue(PipeGen.REL,x); + plabels[i][p] = (short) m_encoder.getValue(PipeGen.REL, x); } - public final void setPHead(int i, int c, int p) { - pheads[i][c] =(short)p; + pheads[i][c] = (short) p; } -/* - public String toString(int c) { - StringBuffer s = new StringBuffer(); - for(int i=0;i<length(c);i++) { - s.append(i).append('\t').append(forms[c][i]).append("\t_\t").append(ppos[c][i]).append('\t'). - append('\t').append(heads[c][i]).append('\n'); - } - - return s.toString(); - } -*/ + /* + * public String toString(int c) { StringBuffer s = new StringBuffer(); + * for(int i=0;i<length(c);i++) { + * s.append(i).append('\t').append(forms[c][i]).append("\t_\t").append(ppos[ + * c][i]).append('\t'). append('\t').append(heads[c][i]).append('\n'); } + * + * return s.toString(); } + */ /* - public void setPos(int i, int p, String x) { - ppos[i][p] = (short)m_encoder.getValue(PipeGen.POS,x); - - } -*/ + * public void setPos(int i, int p, String x) { ppos[i][p] = + * (short)m_encoder.getValue(PipeGen.POS,x); + * + * } + */ /** * Create the semantic representation + * * @param inst * @param it * @return */ public boolean createSem(int inst, SentenceData09 it) { - + boolean error = false; - - if (it.sem==null) return error; - + + if (it.sem == null) + return error; + predicat[inst] = new int[it.sem.length]; semposition[inst] = new short[it.sem.length]; predicateId[inst] = new short[it.sem.length]; - - if (it.sem!=null) { + + if (it.sem != null) { arg[inst] = new short[it.sem.length][]; - argposition[inst] =new short[it.sem.length][]; + argposition[inst] = new short[it.sem.length][]; } - if (it.sem==null) return error; - + if (it.sem == null) + return error; + // init sems - - - - - for(int i=0;i<it.sem.length;i++) { - + + for (int i = 0; i < it.sem.length; i++) { + String pred; - short predSense =0; - if (it.sem[i].indexOf('.')>0) { + short predSense = 0; + if (it.sem[i].indexOf('.') > 0) { pred = it.sem[i].substring(0, it.sem[i].indexOf('.')); - predSense = (short)m_encoder.getValue(PipeGen.SENSE, it.sem[i].substring(it.sem[i].indexOf('.')+1, it.sem[i].length())); - //Short.parseShort(it.sem[i].substring(it.sem[i].indexOf('.')+1, it.sem[i].length())); + predSense = (short) m_encoder.getValue(PipeGen.SENSE, + it.sem[i].substring(it.sem[i].indexOf('.') + 1, it.sem[i].length())); + // Short.parseShort(it.sem[i].substring(it.sem[i].indexOf('.')+1, + // it.sem[i].length())); } else { - pred = it.sem[i]; - predSense=(short)m_encoder.getValue(PipeGen.SENSE, ""); + pred = it.sem[i]; + predSense = (short) m_encoder.getValue(PipeGen.SENSE, ""); } - + predicat[inst][i] = m_encoder.getValue(PipeGen.PRED, pred); predicateId[inst][i] = predSense; - - semposition[inst][i]=(short)it.semposition[i]; - + + semposition[inst][i] = (short) it.semposition[i]; + // this can happen too when no arguments have values - if (it.arg==null) { - // DB.println("error arg == null "+i+" sem"+it.sem[i]+" inst number "+inst); - // error =true; + if (it.arg == null) { + // DB.println("error arg == null "+i+" sem"+it.sem[i]+" inst + // number "+inst); + // error =true; continue; } - - - // last pred(s) might have no argument - if (it.arg.length<=i) { - // DB.println("error in instance "+inst+" argument list and number of predicates different arg lists: "+it.arg.length+" preds "+sem.length); - // error =true; + + // last pred(s) might have no argument + if (it.arg.length <= i) { + // DB.println("error in instance "+inst+" argument list and + // number of predicates different arg lists: "+it.arg.length+" + // preds "+sem.length); + // error =true; continue; } - - + // this happens from time to time, if the predicate has no arguments - if (it.arg[i]==null) { - // DB.println("error no args for pred "+i+" "+it.sem[i]+" length "+it.ppos.length); - // error =true; + if (it.arg[i] == null) { + // DB.println("error no args for pred "+i+" "+it.sem[i]+" length + // "+it.ppos.length); + // error =true; continue; } - - int argCount=it.arg[i].length; + + int argCount = it.arg[i].length; arg[inst][i] = new short[it.arg[i].length]; argposition[inst][i] = new short[it.arg[i].length]; - + // add the content of the argument - for(int a=0;a<argCount;a++) { - arg[inst][i][a]=(short)m_encoder.getValue(PipeGen.ARG, it.arg[i][a]); - argposition[inst][i][a]=(short)it.argposition[i][a]; - - //System.out.print(" #"+a+" pos: "+argposition[inst][i][a]+" "+it.arg[i][a]+" "); + for (int a = 0; a < argCount; a++) { + arg[inst][i][a] = (short) m_encoder.getValue(PipeGen.ARG, it.arg[i][a]); + argposition[inst][i][a] = (short) it.argposition[i][a]; + + // System.out.print(" #"+a+" pos: "+argposition[inst][i][a]+" + // "+it.arg[i][a]+" "); } - //System.out.println(""); - + // System.out.println(""); + } - + return error; - - } + } public int predCount(int n) { return pfill[n].cardinality(); } - /** * @param pscnt * @return */ public String print(int pscnt) { StringBuilder s = new StringBuilder(); - - for(int i=0;i<this.length(pscnt);i++) { - s.append(i+"\t"+forms[pscnt][i]+"\t"+this.glemmas[pscnt][i]+"\t"+this.plemmas[pscnt][i]+"\t"+this.gpos[pscnt][i]+"\t" - +this.pposs[pscnt][i]+"\t"+this.gfeats[pscnt][i]+"\t"+(this.feats[pscnt][i]!=null&&this.feats[pscnt][i].length>0?this.feats[pscnt][i][0]:null)+ - "\t l "+(labels[pscnt]!=null&&labels[pscnt].length>i?labels[pscnt][i]:null)+"\t"+ - "\t"+heads[pscnt][i]+"\t"+ - (plabels[pscnt]!=null&&plabels[pscnt].length>i?plabels[pscnt][i]:null)+ - "\t"+this.predicat[pscnt][i]+"\n"); + + for (int i = 0; i < this.length(pscnt); i++) { + s.append(i + "\t" + forms[pscnt][i] + "\t" + this.glemmas[pscnt][i] + "\t" + this.plemmas[pscnt][i] + "\t" + + this.gpos[pscnt][i] + "\t" + this.pposs[pscnt][i] + "\t" + this.gfeats[pscnt][i] + "\t" + + (this.feats[pscnt][i] != null && this.feats[pscnt][i].length > 0 ? this.feats[pscnt][i][0] : null) + + "\t l " + (labels[pscnt] != null && labels[pscnt].length > i ? labels[pscnt][i] : null) + "\t" + + "\t" + heads[pscnt][i] + "\t" + + (plabels[pscnt] != null && plabels[pscnt].length > i ? plabels[pscnt][i] : null) + "\t" + + this.predicat[pscnt][i] + "\n"); } return s.toString(); } public String print1(int pscnt) { StringBuilder s = new StringBuilder(); - - for(int i=0;i<this.length(pscnt);i++) { - s.append(i+"\t"+forms[pscnt][i]+"\t"+"\t"+this.plemmas[pscnt][i]+"\t"+ - +this.pposs[pscnt][i]+ - "\t l "+(labels[pscnt]!=null&&labels[pscnt].length>i?labels[pscnt][i]:null)+"\t"+ - "\t"+heads[pscnt][i]+"\t"+ - (plabels[pscnt]!=null&&plabels[pscnt].length>i?plabels[pscnt][i]:null)+ - "\n"); + + for (int i = 0; i < this.length(pscnt); i++) { + s.append(i + "\t" + forms[pscnt][i] + "\t" + "\t" + this.plemmas[pscnt][i] + "\t" + +this.pposs[pscnt][i] + + "\t l " + (labels[pscnt] != null && labels[pscnt].length > i ? labels[pscnt][i] : null) + "\t" + + "\t" + heads[pscnt][i] + "\t" + + (plabels[pscnt] != null && plabels[pscnt].length > i ? plabels[pscnt][i] : null) + "\n"); } return s.toString(); } - - - } diff --git a/dependencyParser/mate-tools/src/is2/data/InstancesTagger.java b/dependencyParser/mate-tools/src/is2/data/InstancesTagger.java index 8079222..4cf894a 100644 --- a/dependencyParser/mate-tools/src/is2/data/InstancesTagger.java +++ b/dependencyParser/mate-tools/src/is2/data/InstancesTagger.java @@ -1,77 +1,95 @@ /** - * + * */ package is2.data; -import is2.data.IEncoder; -import is2.data.Instances; -import is2.data.SentenceData09; - - /** * @author Dr. Bernd Bohnet, 06.11.2010 - * - * + * + * */ public class InstancesTagger extends Instances { - public short[][][] chars; + public short[][][] chars; public int[][] formlc; + @Override public void init(int ic, IEncoder mf) { - super.init(ic, mf,9); + super.init(ic, mf, 9); chars = new short[capacity][][]; formlc = new int[capacity][]; - // System.out.println("create chars "+capacity ); + // System.out.println("create chars "+capacity ); } public void fillChars(SentenceData09 instance, int i, int cend) { chars[i] = new short[instance.length()][13]; formlc[i] = new int[instance.length()]; - - - for(int k=0;k<instance.length();k++) { - chars[i][k][0]= (short) ( instance.forms[k].length()>0?m_encoder.getValue(PipeGen.CHAR, String.valueOf(instance.forms[k].charAt(0))):cend); - chars[i][k][1]= (short) ( instance.forms[k].length()>1?m_encoder.getValue(PipeGen.CHAR, String.valueOf(instance.forms[k].charAt(1))):cend);//m_encoder.getValue(PipeGen.CHAR, END); - chars[i][k][2]= (short) ( instance.forms[k].length()>2?m_encoder.getValue(PipeGen.CHAR, String.valueOf(instance.forms[k].charAt(2))):cend); - chars[i][k][3]= (short) ( instance.forms[k].length()>3?m_encoder.getValue(PipeGen.CHAR, String.valueOf(instance.forms[k].charAt(3))):cend); - chars[i][k][4]= (short) ( instance.forms[k].length()>4?m_encoder.getValue(PipeGen.CHAR, String.valueOf(instance.forms[k].charAt(4))):cend); - chars[i][k][5]= (short) ( instance.forms[k].length()>5?m_encoder.getValue(PipeGen.CHAR, String.valueOf(instance.forms[k].charAt(5))):cend); - chars[i][k][6]= (short) ( instance.forms[k].length()>0?m_encoder.getValue(PipeGen.CHAR,String.valueOf(instance.forms[k].charAt(instance.forms[k].length()-1))):cend); - chars[i][k][7]= (short) ( instance.forms[k].length()>1?m_encoder.getValue(PipeGen.CHAR,String.valueOf(instance.forms[k].charAt(instance.forms[k].length()-2))):cend);//m_encoder.getValue(PipeGen.CHAR, END); - chars[i][k][8]= (short) ( instance.forms[k].length()>2?m_encoder.getValue(PipeGen.CHAR,String.valueOf(instance.forms[k].charAt(instance.forms[k].length()-3))):cend); - chars[i][k][9]= (short) ( instance.forms[k].length()>3?m_encoder.getValue(PipeGen.CHAR,String.valueOf(instance.forms[k].charAt(instance.forms[k].length()-4))):cend); - chars[i][k][10]= (short) ( instance.forms[k].length()>4?m_encoder.getValue(PipeGen.CHAR,String.valueOf(instance.forms[k].charAt(instance.forms[k].length()-5))):cend); - chars[i][k][11] = (short)instance.forms[k].length(); - chars[i][k][12] = (short) ( instance.forms[k].length()>0?m_encoder.getValue(PipeGen.CHAR, String.valueOf(instance.forms[k].charAt(0))):cend); - formlc[i][k] =m_encoder.getValue(PipeGen.WORD, instance.forms[k].toLowerCase()); + for (int k = 0; k < instance.length(); k++) { + chars[i][k][0] = (short) (instance.forms[k].length() > 0 + ? m_encoder.getValue(PipeGen.CHAR, String.valueOf(instance.forms[k].charAt(0))) : cend); + chars[i][k][1] = (short) (instance.forms[k].length() > 1 + ? m_encoder.getValue(PipeGen.CHAR, String.valueOf(instance.forms[k].charAt(1))) : cend);// m_encoder.getValue(PipeGen.CHAR, + // END); + chars[i][k][2] = (short) (instance.forms[k].length() > 2 + ? m_encoder.getValue(PipeGen.CHAR, String.valueOf(instance.forms[k].charAt(2))) : cend); + chars[i][k][3] = (short) (instance.forms[k].length() > 3 + ? m_encoder.getValue(PipeGen.CHAR, String.valueOf(instance.forms[k].charAt(3))) : cend); + chars[i][k][4] = (short) (instance.forms[k].length() > 4 + ? m_encoder.getValue(PipeGen.CHAR, String.valueOf(instance.forms[k].charAt(4))) : cend); + chars[i][k][5] = (short) (instance.forms[k].length() > 5 + ? m_encoder.getValue(PipeGen.CHAR, String.valueOf(instance.forms[k].charAt(5))) : cend); + + chars[i][k][6] = (short) (instance.forms[k].length() > 0 ? m_encoder.getValue(PipeGen.CHAR, + String.valueOf(instance.forms[k].charAt(instance.forms[k].length() - 1))) : cend); + chars[i][k][7] = (short) (instance.forms[k].length() > 1 ? m_encoder.getValue(PipeGen.CHAR, + String.valueOf(instance.forms[k].charAt(instance.forms[k].length() - 2))) : cend);// m_encoder.getValue(PipeGen.CHAR, + // END); + chars[i][k][8] = (short) (instance.forms[k].length() > 2 ? m_encoder.getValue(PipeGen.CHAR, + String.valueOf(instance.forms[k].charAt(instance.forms[k].length() - 3))) : cend); + chars[i][k][9] = (short) (instance.forms[k].length() > 3 ? m_encoder.getValue(PipeGen.CHAR, + String.valueOf(instance.forms[k].charAt(instance.forms[k].length() - 4))) : cend); + chars[i][k][10] = (short) (instance.forms[k].length() > 4 ? m_encoder.getValue(PipeGen.CHAR, + String.valueOf(instance.forms[k].charAt(instance.forms[k].length() - 5))) : cend); + chars[i][k][11] = (short) instance.forms[k].length(); + chars[i][k][12] = (short) (instance.forms[k].length() > 0 + ? m_encoder.getValue(PipeGen.CHAR, String.valueOf(instance.forms[k].charAt(0))) : cend); + formlc[i][k] = m_encoder.getValue(PipeGen.WORD, instance.forms[k].toLowerCase()); } } - public void fillChars(SentenceData09 instance, int i, String[] what,int cend) { + public void fillChars(SentenceData09 instance, int i, String[] what, int cend) { chars[i] = new short[instance.length()][13]; formlc[i] = new int[instance.length()]; - - - for(int k=0;k<instance.length();k++) { - chars[i][k][0]= (short) m_encoder.getValue(PipeGen.CHAR, String.valueOf(what[k].charAt(0))); - chars[i][k][1]= (short) ( what[k].length()>1?m_encoder.getValue(PipeGen.CHAR, String.valueOf(what[k].charAt(1))):cend);//m_encoder.getValue(PipeGen.CHAR, END); - chars[i][k][2]= (short) ( what[k].length()>2?m_encoder.getValue(PipeGen.CHAR, String.valueOf(what[k].charAt(2))):cend); - chars[i][k][3]= (short) ( what[k].length()>3?m_encoder.getValue(PipeGen.CHAR, String.valueOf(what[k].charAt(3))):cend); - chars[i][k][4]= (short) ( what[k].length()>4?m_encoder.getValue(PipeGen.CHAR, String.valueOf(what[k].charAt(4))):cend); - chars[i][k][5]= (short) ( what[k].length()>5?m_encoder.getValue(PipeGen.CHAR, String.valueOf(what[k].charAt(5))):cend); - chars[i][k][6]= (short) ( m_encoder.getValue(PipeGen.CHAR,String.valueOf(what[k].charAt(what[k].length()-1)))); - chars[i][k][7]= (short) ( what[k].length()>1?m_encoder.getValue(PipeGen.CHAR,String.valueOf(what[k].charAt(what[k].length()-2))):cend);//m_encoder.getValue(PipeGen.CHAR, END); - chars[i][k][8]= (short) ( what[k].length()>2?m_encoder.getValue(PipeGen.CHAR,String.valueOf(what[k].charAt(what[k].length()-3))):cend); - chars[i][k][9]= (short) ( what[k].length()>3?m_encoder.getValue(PipeGen.CHAR,String.valueOf(what[k].charAt(what[k].length()-4))):cend); - chars[i][k][10]= (short) ( what[k].length()>4?m_encoder.getValue(PipeGen.CHAR,String.valueOf(what[k].charAt(what[k].length()-5))):cend); - chars[i][k][11] = (short)what[k].length(); - formlc[i][k] =m_encoder.getValue(PipeGen.WORD, what[k].toLowerCase()); + for (int k = 0; k < instance.length(); k++) { + chars[i][k][0] = (short) m_encoder.getValue(PipeGen.CHAR, String.valueOf(what[k].charAt(0))); + chars[i][k][1] = (short) (what[k].length() > 1 + ? m_encoder.getValue(PipeGen.CHAR, String.valueOf(what[k].charAt(1))) : cend);// m_encoder.getValue(PipeGen.CHAR, + // END); + chars[i][k][2] = (short) (what[k].length() > 2 + ? m_encoder.getValue(PipeGen.CHAR, String.valueOf(what[k].charAt(2))) : cend); + chars[i][k][3] = (short) (what[k].length() > 3 + ? m_encoder.getValue(PipeGen.CHAR, String.valueOf(what[k].charAt(3))) : cend); + chars[i][k][4] = (short) (what[k].length() > 4 + ? m_encoder.getValue(PipeGen.CHAR, String.valueOf(what[k].charAt(4))) : cend); + chars[i][k][5] = (short) (what[k].length() > 5 + ? m_encoder.getValue(PipeGen.CHAR, String.valueOf(what[k].charAt(5))) : cend); + + chars[i][k][6] = (short) (m_encoder.getValue(PipeGen.CHAR, + String.valueOf(what[k].charAt(what[k].length() - 1)))); + chars[i][k][7] = (short) (what[k].length() > 1 + ? m_encoder.getValue(PipeGen.CHAR, String.valueOf(what[k].charAt(what[k].length() - 2))) : cend);// m_encoder.getValue(PipeGen.CHAR, + // END); + chars[i][k][8] = (short) (what[k].length() > 2 + ? m_encoder.getValue(PipeGen.CHAR, String.valueOf(what[k].charAt(what[k].length() - 3))) : cend); + chars[i][k][9] = (short) (what[k].length() > 3 + ? m_encoder.getValue(PipeGen.CHAR, String.valueOf(what[k].charAt(what[k].length() - 4))) : cend); + chars[i][k][10] = (short) (what[k].length() > 4 + ? m_encoder.getValue(PipeGen.CHAR, String.valueOf(what[k].charAt(what[k].length() - 5))) : cend); + chars[i][k][11] = (short) what[k].length(); + formlc[i][k] = m_encoder.getValue(PipeGen.WORD, what[k].toLowerCase()); } } - - } diff --git a/dependencyParser/mate-tools/src/is2/data/IntIntHash.java b/dependencyParser/mate-tools/src/is2/data/IntIntHash.java index 4aec043..1019507 100644 --- a/dependencyParser/mate-tools/src/is2/data/IntIntHash.java +++ b/dependencyParser/mate-tools/src/is2/data/IntIntHash.java @@ -1,11 +1,8 @@ package is2.data; - - import java.util.Arrays; -final public class IntIntHash { - +final public class IntIntHash { protected int _size; protected int _free; @@ -14,123 +11,127 @@ final public class IntIntHash { protected int _autoCompactRemovesRemaining; protected float _autoCompactionFactor; public int _set[]; - private int _values[]; - + private int _values[]; public IntIntHash() { this(102877, 0.5F); } - public IntIntHash(int initialCapacity, float loadFactor) { _loadFactor = loadFactor; _autoCompactionFactor = loadFactor; - setUp((int)Math.ceil(initialCapacity / loadFactor)); + setUp((int) Math.ceil(initialCapacity / loadFactor)); } - - public int size() { return _size;} + public int size() { + return _size; + } public void ensureCapacity(int desiredCapacity) { - if(desiredCapacity > _maxSize - size()) { - rehash(PrimeFinder.nextPrime((int)Math.ceil((desiredCapacity + size()) / _loadFactor) + 1)); + if (desiredCapacity > _maxSize - size()) { + rehash(PrimeFinder.nextPrime((int) Math.ceil((desiredCapacity + size()) / _loadFactor) + 1)); computeMaxSize(capacity()); } } - public void compact() { - rehash(PrimeFinder.nextPrime((int)Math.ceil(size() / _loadFactor) + 1)); + public void compact() { + rehash(PrimeFinder.nextPrime((int) Math.ceil(size() / _loadFactor) + 1)); computeMaxSize(capacity()); - if(_autoCompactionFactor != 0.0F) computeNextAutoCompactionAmount(size()); + if (_autoCompactionFactor != 0.0F) + computeNextAutoCompactionAmount(size()); } public void setAutoCompactionFactor(float factor) { - if(factor < 0.0F) { - throw new IllegalArgumentException((new StringBuilder()).append("Factor must be >= 0: ").append(factor).toString()); - } else - { + if (factor < 0.0F) { + throw new IllegalArgumentException( + (new StringBuilder()).append("Factor must be >= 0: ").append(factor).toString()); + } else { _autoCompactionFactor = factor; return; } } - public float getAutoCompactionFactor() { return _autoCompactionFactor; } - + public float getAutoCompactionFactor() { + return _autoCompactionFactor; + } - private void computeMaxSize(int capacity) - { - _maxSize = Math.min(capacity - 1, (int)Math.floor(capacity * _loadFactor)); + private void computeMaxSize(int capacity) { + _maxSize = Math.min(capacity - 1, (int) Math.floor(capacity * _loadFactor)); _free = capacity - _size; } - private void computeNextAutoCompactionAmount(int size) - { - if(_autoCompactionFactor != 0.0F) + private void computeNextAutoCompactionAmount(int size) { + if (_autoCompactionFactor != 0.0F) _autoCompactRemovesRemaining = Math.round(size * _autoCompactionFactor); } - protected final void postInsertHook(boolean usedFreeSlot) - { - if(usedFreeSlot) _free--; - if(++_size > _maxSize || _free == 0) { + protected final void postInsertHook(boolean usedFreeSlot) { + if (usedFreeSlot) + _free--; + if (++_size > _maxSize || _free == 0) { int newCapacity = _size <= _maxSize ? capacity() : PrimeFinder.nextPrime(capacity() << 1); rehash(newCapacity); computeMaxSize(capacity()); } } - protected int calculateGrownCapacity() { return capacity() << 1; } + protected int calculateGrownCapacity() { + return capacity() << 1; + } - protected int capacity() { return _values.length; } + protected int capacity() { + return _values.length; + } - public boolean contains(int val) { return index(val) >= 0;} + public boolean contains(int val) { + return index(val) >= 0; + } private int index(int v) { int length = _set.length; - int index = Math.abs((computeHashCode(v) /*& 2147483647*/ ) % length); - - while(true) { + int index = Math.abs((computeHashCode(v) /* & 2147483647 */ ) % length); + + while (true) { // first - long l =_set[index]; + long l = _set[index]; if (l == 0) { - // good++; + // good++; return -1; } - // second + // second if (l == v) { return index; } - if(--index < 0) index += length; - } - //return -1; + if (--index < 0) + index += length; + } + // return -1; } - protected int insertionIndex(long val) - { + protected int insertionIndex(long val) { int length = _set.length; - int index = Math.abs((computeHashCode(val) /*& 2147483647*/ ) % length); - while(true) { - if(_set[index] == 0) return index; - if(_set[index] == val) return -index - 1; - if(--index < 0) index += length; + int index = Math.abs((computeHashCode(val) /* & 2147483647 */ ) % length); + while (true) { + if (_set[index] == 0) + return index; + if (_set[index] == val) + return -index - 1; + if (--index < 0) + index += length; - } + } } - public int computeHashCode(long value) - { - return (int)(( value ^ (value&0xffffffff00000000L) >>> 32 ) *31);//0x811c9dc5 ^ // 29 + public int computeHashCode(long value) { + return (int) ((value ^ (value & 0xffffffff00000000L) >>> 32) * 31);// 0x811c9dc5 + // ^ + // // + // 29 } - - - - - - protected int setUp(int initialCapacity) - { + protected int setUp(int initialCapacity) { int capacity = PrimeFinder.nextPrime(initialCapacity); computeMaxSize(capacity); computeNextAutoCompactionAmount(initialCapacity); @@ -139,37 +140,36 @@ final public class IntIntHash { return capacity; } - public void put(int key, int value) - { + public void put(int key, int value) { int index = insertionIndex(key); - doPut(key, value, index); + doPut(key, value, index); } - private void doPut(int key, int value, int index) - { + + private void doPut(int key, int value, int index) { boolean isNewMapping = true; - if(index < 0) - { + if (index < 0) { index = -index - 1; isNewMapping = false; - } + } _set[index] = key; _values[index] = value; - if(isNewMapping) postInsertHook(true); - + if (isNewMapping) + postInsertHook(true); + } - protected void rehash(int newCapacity) - { + protected void rehash(int newCapacity) { int oldCapacity = _set.length; int oldKeys[] = _set; int oldVals[] = _values; _set = new int[newCapacity]; _values = new int[newCapacity]; int i = oldCapacity; - - while(true){ - if(i-- <= 0) break; - if(oldVals[i] != 0) { + + while (true) { + if (i-- <= 0) + break; + if (oldVals[i] != 0) { int o = oldKeys[i]; int index = insertionIndex(o); _set[index] = o; @@ -178,75 +178,66 @@ final public class IntIntHash { } } - int index =0; - - - public int get(int key) - { + int index = 0; + + public int get(int key) { int index = index(key); return index >= 0 ? _values[index] : 0; } - - public void clear() - { + public void clear() { _size = 0; _free = capacity(); Arrays.fill(_set, 0, _set.length, 0); - // Arrays.fill(_values, 0, _values.length, 0); + // Arrays.fill(_values, 0, _values.length, 0); } - public int remove(int key) - { + public int remove(int key) { int prev = 0; int index = index(key); - if(index >= 0) - { + if (index >= 0) { prev = _values[index]; _values[index] = 0; _set[index] = 0; _size--; - if(_autoCompactionFactor != 0.0F) { + if (_autoCompactionFactor != 0.0F) { _autoCompactRemovesRemaining--; - if( _autoCompactRemovesRemaining <= 0) compact(); + if (_autoCompactRemovesRemaining <= 0) + compact(); } } return prev; } - - public int[] getValues() - { + public int[] getValues() { int vals[] = new int[size()]; int v[] = _values; int i = v.length; int j = 0; - do - { - if(i-- <= 0) break; - if(v[i] != 0) vals[j++] = v[i]; - } while(true); + do { + if (i-- <= 0) + break; + if (v[i] != 0) + vals[j++] = v[i]; + } while (true); return vals; } - public int[] keys() - { + public int[] keys() { int keys[] = new int[size()]; int k[] = _set; - // byte states[] = _states; + // byte states[] = _states; int i = k.length; int j = 0; - do - { - if(i-- <= 0) + do { + if (i-- <= 0) break; - if(k[i] != 0) + if (k[i] != 0) keys[j++] = k[i]; - } while(true); + } while (true); return keys; } - /** * @param index2 * @param i @@ -254,17 +245,11 @@ final public class IntIntHash { */ public boolean adjustValue(int key, int i) { int index = index(key); - if (index >= 0){ - _values[index] +=i; + if (index >= 0) { + _values[index] += i; return true; } return false; } - - - - - - } diff --git a/dependencyParser/mate-tools/src/is2/data/Long2Int.java b/dependencyParser/mate-tools/src/is2/data/Long2Int.java index e505cb0..1a4a3c4 100755 --- a/dependencyParser/mate-tools/src/is2/data/Long2Int.java +++ b/dependencyParser/mate-tools/src/is2/data/Long2Int.java @@ -1,111 +1,110 @@ package is2.data; - - /** * @author Bernd Bohnet, 01.09.2009 - * - * Maps for the Hash Kernel the long values to the int values. + * + * Maps for the Hash Kernel the long values to the int values. */ final public class Long2Int implements Long2IntInterface { - public Long2Int() { - size=115911564; + size = 115911564; } - - + public Long2Int(int s) { - size=s; + size = s; } - - public static void main(String args[]) { - - long l =123456; - long l2 =1010119; - System.out.println("l \t"+l+"\t"+printBits(l)); - - long x =100000000; - System.out.println("1m\t"+l2+"\t"+printBits(x)+"\t"+x); - - System.out.println("l2\t"+l2+"\t"+printBits(l)); - - System.out.println("l2*l\t"+l2+"\t"+printBits(l*l2)+" \t "+l*l2); - - System.out.println("l2*l*l2\t"+l2+"\t"+printBits(l*l2*l2)+" \t "+l*l2*l2); - - System.out.println("l2*l*l2\t"+l2+"\t"+printBits(l*l2*l2*l2)+" \t "+l*l2*l2*l2); - - - System.out.println("l2*l*l2\t"+l2+"\t"+printBits((l*l2)%0xfffff)+" \t "+l*l2*l2*l2+"\t "+0xfffff); - System.out.println("l2*l*l2\t"+l2+"\t"+printBits((l*l2)&0xfffffff)+" \t "+l*l2*l2*l2); + + public static void main(String args[]) { + + long l = 123456; + long l2 = 1010119; + System.out.println("l \t" + l + "\t" + printBits(l)); + + long x = 100000000; + System.out.println("1m\t" + l2 + "\t" + printBits(x) + "\t" + x); + + System.out.println("l2\t" + l2 + "\t" + printBits(l)); + + System.out.println("l2*l\t" + l2 + "\t" + printBits(l * l2) + " \t " + l * l2); + + System.out.println("l2*l*l2\t" + l2 + "\t" + printBits(l * l2 * l2) + " \t " + l * l2 * l2); + + System.out.println("l2*l*l2\t" + l2 + "\t" + printBits(l * l2 * l2 * l2) + " \t " + l * l2 * l2 * l2); + + System.out.println( + "l2*l*l2\t" + l2 + "\t" + printBits((l * l2) % 0xfffff) + " \t " + l * l2 * l2 * l2 + "\t " + 0xfffff); + System.out.println("l2*l*l2\t" + l2 + "\t" + printBits((l * l2) & 0xfffffff) + " \t " + l * l2 * l2 * l2); } - - + /** Integer counter for long2int */ - final private int size; //0x03ffffff //0x07ffffff - - - /* (non-Javadoc) + final private int size; // 0x03ffffff //0x07ffffff + + /* + * (non-Javadoc) + * * @see is2.sp09k9992.Long2IntIterface#size() */ - public int size() {return size;} - - /* (non-Javadoc) - * @see is2.sp09k9992.Long2IntIterface#start() - * has no meaning for this implementation + @Override + public int size() { + return size; + } + + /* + * (non-Javadoc) + * + * @see is2.sp09k9992.Long2IntIterface#start() has no meaning for this + * implementation */ - final public void start() {} - + final public void start() { + } - /* (non-Javadoc) + /* + * (non-Javadoc) + * * @see is2.sp09k9992.Long2IntIterface#l2i(long) */ - final public int l2i(long l) { - if (l<0) return -1; - + @Override + final public int l2i(long l) { + if (l < 0) + return -1; + // this works well LAS 88.138 - // int r= (int)(( l ^ (l&0xffffffff00000000L) >>> 29 ));//0x811c9dc5 ^ // 29 - // return Math.abs(r % size); - // this works a bit better and good with 0x03ffffff - // + // int r= (int)(( l ^ (l&0xffffffff00000000L) >>> 29 ));//0x811c9dc5 ^ + // // 29 + // return Math.abs(r % size); + // this works a bit better and good with 0x03ffffff + // /* - long r= l;//26 - l = (l>>12)&0xfffffffffffff000L; - r ^= l;//38 - l = (l>>11)&0xffffffffffffc000L; - r ^= l;//49 - l = (l>>9)& 0xffffffffffff0000L; //53 - r ^= l;//58 - l = (l>>7)&0xfffffffffffc0000L; //62 - r ^=l;//65 - int x = (int)r; - x = x % size; - // return x >= 0 ? x : -x ;// Math.abs(r % size); - - */ - // 26 0x03ffffff + * long r= l;//26 l = (l>>12)&0xfffffffffffff000L; r ^= l;//38 l = + * (l>>11)&0xffffffffffffc000L; r ^= l;//49 l = (l>>9)& + * 0xffffffffffff0000L; //53 r ^= l;//58 l = (l>>7)&0xfffffffffffc0000L; + * //62 r ^=l;//65 int x = (int)r; x = x % size; // return x >= 0 ? x : + * -x ;// Math.abs(r % size); + * + */ + // 26 0x03ffffff // together with 0x07ffffff 27 88.372 - long r= l;// 27 - l = (l>>13)&0xffffffffffffe000L; - r ^= l; // 40 - l = (l>>11)&0xffffffffffff0000L; - r ^= l; // 51 - l = (l>>9)& 0xfffffffffffc0000L; //53 - r ^= l; // 60 - l = (l>>7)& 0xfffffffffff00000L; //62 - r ^=l; //67 - int x = ((int)r) % size; - - return x >= 0 ? x : -x ; + long r = l;// 27 + l = (l >> 13) & 0xffffffffffffe000L; + r ^= l; // 40 + l = (l >> 11) & 0xffffffffffff0000L; + r ^= l; // 51 + l = (l >> 9) & 0xfffffffffffc0000L; // 53 + r ^= l; // 60 + l = (l >> 7) & 0xfffffffffff00000L; // 62 + r ^= l; // 67 + int x = ((int) r) % size; + + return x >= 0 ? x : -x; } - + static public StringBuffer printBits(long out) { StringBuffer s = new StringBuffer(); - - for(int k=0;k<65;k++) { - s.append((out & 1)==1?"1":"0"); - out >>=1; + + for (int k = 0; k < 65; k++) { + s.append((out & 1) == 1 ? "1" : "0"); + out >>= 1; } s.reverse(); return s; diff --git a/dependencyParser/mate-tools/src/is2/data/Long2IntExact.java b/dependencyParser/mate-tools/src/is2/data/Long2IntExact.java index 62f6375..debf455 100644 --- a/dependencyParser/mate-tools/src/is2/data/Long2IntExact.java +++ b/dependencyParser/mate-tools/src/is2/data/Long2IntExact.java @@ -1,56 +1,61 @@ package is2.data; - - /** * @author Bernd Bohnet, 01.09.2009 - * - * Maps for the Hash Kernel the long values to the int values. + * + * Maps for the Hash Kernel the long values to the int values. */ final public class Long2IntExact implements Long2IntInterface { static gnu.trove.TLongIntHashMap mapt = new gnu.trove.TLongIntHashMap(); - - static int cnt=0; - - + static int cnt = 0; + public Long2IntExact() { - size=115911564; - } - - + size = 115911564; + } + public Long2IntExact(int s) { - size=s; + size = s; } - - + /** Integer counter for long2int */ - final private int size; //0x03ffffff //0x07ffffff - - - /* (non-Javadoc) + final private int size; // 0x03ffffff //0x07ffffff + + /* + * (non-Javadoc) + * * @see is2.sp09k9992.Long2IntIterface#size() */ - public int size() {return size;} - - /* (non-Javadoc) - * @see is2.sp09k9992.Long2IntIterface#start() - * has no meaning for this implementation + @Override + public int size() { + return size; + } + + /* + * (non-Javadoc) + * + * @see is2.sp09k9992.Long2IntIterface#start() has no meaning for this + * implementation */ - final public void start() {} - + final public void start() { + } - /* (non-Javadoc) + /* + * (non-Javadoc) + * * @see is2.sp09k9992.Long2IntIterface#l2i(long) */ - final public int l2i(long l) { - if (l<0) return -1; - + @Override + final public int l2i(long l) { + if (l < 0) + return -1; + int i = mapt.get(l); - if (i!=0) return i; - - if (i==0 && cnt<size-1) { + if (i != 0) + return i; + + if (i == 0 && cnt < size - 1) { cnt++; mapt.put(l, cnt); return cnt; diff --git a/dependencyParser/mate-tools/src/is2/data/Long2IntInterface.java b/dependencyParser/mate-tools/src/is2/data/Long2IntInterface.java index a6cba63..8401c1f 100755 --- a/dependencyParser/mate-tools/src/is2/data/Long2IntInterface.java +++ b/dependencyParser/mate-tools/src/is2/data/Long2IntInterface.java @@ -4,9 +4,10 @@ public interface Long2IntInterface { public abstract int size(); - - /** - * Maps a long to a integer value. This is very useful to save memory for sparse data long values + /** + * Maps a long to a integer value. This is very useful to save memory for + * sparse data long values + * * @param l * @return the integer */ diff --git a/dependencyParser/mate-tools/src/is2/data/Long2IntQuick.java b/dependencyParser/mate-tools/src/is2/data/Long2IntQuick.java index adbe57d..9956173 100644 --- a/dependencyParser/mate-tools/src/is2/data/Long2IntQuick.java +++ b/dependencyParser/mate-tools/src/is2/data/Long2IntQuick.java @@ -1,47 +1,51 @@ package is2.data; - - /** * @author Bernd Bohnet, 01.09.2009 - * - * Maps for the Hash Kernel the long values to the int values. + * + * Maps for the Hash Kernel the long values to the int values. */ final public class Long2IntQuick implements Long2IntInterface { - /** Integer counter for long2int */ final private int size; - + public Long2IntQuick() { - size=0x07ffffff; + size = 0x07ffffff; } - - + public Long2IntQuick(int s) { - size=s; + size = s; } - - - /* (non-Javadoc) + + /* + * (non-Javadoc) + * * @see is2.sp09k9992.Long2IntIterface#size() */ - public int size() {return size;} - - /* (non-Javadoc) - * @see is2.sp09k9992.Long2IntIterface#start() - * has no meaning for this implementation + @Override + public int size() { + return size; + } + + /* + * (non-Javadoc) + * + * @see is2.sp09k9992.Long2IntIterface#start() has no meaning for this + * implementation */ - - /* (non-Javadoc) + /* + * (non-Javadoc) + * * @see is2.sp09k9992.Long2IntIterface#l2i(long) */ - final public int l2i(long r) { - long l = (r>>16)&0xfffffffffffff000L; - r ^= l; - r ^= l = (l>>12)&0xffffffffffff0000L; - r ^= l = (l>>8)& 0xfffffffffffc0000L; - return (int)(r % size); + @Override + final public int l2i(long r) { + long l = (r >> 16) & 0xfffffffffffff000L; + r ^= l; + r ^= l = (l >> 12) & 0xffffffffffff0000L; + r ^= l = (l >> 8) & 0xfffffffffffc0000L; + return (int) (r % size); } } diff --git a/dependencyParser/mate-tools/src/is2/data/MFB.java b/dependencyParser/mate-tools/src/is2/data/MFB.java index 04c36ae..9fa4e3c 100755 --- a/dependencyParser/mate-tools/src/is2/data/MFB.java +++ b/dependencyParser/mate-tools/src/is2/data/MFB.java @@ -1,256 +1,248 @@ package is2.data; - -import is2.util.DB; - import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.IOException; import java.util.HashMap; import java.util.Map.Entry; +import is2.util.DB; + /** * Map Features, do not map long to integer - * + * * @author Bernd Bohnet, 20.09.2009 */ -final public class MFB implements IEncoderPlus { - +final public class MFB implements IEncoderPlus { + /** The features and its values */ - static private final HashMap<String,HashMap<String,Integer>> m_featureSets = new HashMap<String,HashMap<String,Integer>>(); + static private final HashMap<String, HashMap<String, Integer>> m_featureSets = new HashMap<String, HashMap<String, Integer>>(); /** The feature class and the number of values */ - static private final HashMap<String,Integer> m_featureCounters = new HashMap<String,Integer>(); + static private final HashMap<String, Integer> m_featureCounters = new HashMap<String, Integer>(); /** The number of bits needed to encode a feature */ - static final HashMap<String,Integer> m_featureBits = new HashMap<String,Integer>(); - + static final HashMap<String, Integer> m_featureBits = new HashMap<String, Integer>(); + /** Integer counter for long2int */ - static private int count=0; - + static private int count = 0; + /** Stop growing */ - public boolean stop=false; - - final public static String NONE="<None>"; - - - - - - - - public MFB () {} - - - public int size() {return count;} - - - + public boolean stop = false; + + final public static String NONE = "<None>"; + + public MFB() { + } + + public int size() { + return count; + } + /** * Register an attribute class, if it not exists and add a possible value + * * @param type * @param type2 */ - final public int register(String a, String v) { - - synchronized(m_featureCounters) { - - HashMap<String,Integer> fs = getFeatureSet().get(a); - if (fs==null) { - fs = new HashMap<String,Integer>(); - getFeatureSet().put(a, fs); - fs.put(NONE, 0); - getFeatureCounter().put(a, 1); - } - - Integer i = fs.get(v); - if (i==null) { - Integer c = getFeatureCounter().get(a); - fs.put(v, c); - c++; - getFeatureCounter().put(a,c); - return c-1; - } else return i; + @Override + final public int register(String a, String v) { + + synchronized (m_featureCounters) { + + HashMap<String, Integer> fs = getFeatureSet().get(a); + if (fs == null) { + fs = new HashMap<String, Integer>(); + getFeatureSet().put(a, fs); + fs.put(NONE, 0); + getFeatureCounter().put(a, 1); } + + Integer i = fs.get(v); + if (i == null) { + Integer c = getFeatureCounter().get(a); + fs.put(v, c); + c++; + getFeatureCounter().put(a, c); + return c - 1; + } else + return i; + } } - + /** * Calculates the number of bits needed to encode a feature */ - public void calculateBits() { - - int total=0; - for(Entry<String,Integer> e : getFeatureCounter().entrySet() ){ - int bits =(int)Math.ceil((Math.log(e.getValue()+1)/Math.log(2))); + public void calculateBits() { + + for (Entry<String, Integer> e : getFeatureCounter().entrySet()) { + int bits = (int) Math.ceil((Math.log(e.getValue() + 1) / Math.log(2))); m_featureBits.put(e.getKey(), bits); - total+=bits; - // System.out.println(" "+e.getKey()+" bits "+bits+" number "+(e.getValue()+1)); } - -// System.out.println("total number of needed bits "+total); + + // System.out.println("total number of needed bits "+total); } - - - - public String toString() { - + + @Override + public String toString() { + StringBuffer content = new StringBuffer(); - for(Entry<String,Integer> e : getFeatureCounter().entrySet() ){ - content.append(e.getKey()+" "+e.getValue()); - content.append(':'); - // HashMap<String,Integer> vs = getFeatureSet().get(e.getKey()); - content.append(getFeatureBits(e.getKey())); - - /*if (vs.size()<120) - for(Entry<String,Integer> e2 : vs.entrySet()) { - content.append(e2.getKey()+" ("+e2.getValue()+") "); - }*/ - content.append('\n'); - + for (Entry<String, Integer> e : getFeatureCounter().entrySet()) { + content.append(e.getKey() + " " + e.getValue()); + content.append(':'); + // HashMap<String,Integer> vs = getFeatureSet().get(e.getKey()); + content.append(getFeatureBits(e.getKey())); + + /* + * if (vs.size()<120) for(Entry<String,Integer> e2 : vs.entrySet()) + * { content.append(e2.getKey()+" ("+e2.getValue()+") "); } + */ + content.append('\n'); + } return content.toString(); } - - - + static final public short getFeatureBits(String a) { - if(m_featureBits.get(a)==null) return 0; - return (short)m_featureBits.get(a).intValue(); + if (m_featureBits.get(a) == null) + return 0; + return (short) m_featureBits.get(a).intValue(); } - - /** * Get the integer place holder of the string value v of the type a - * - * @param t the type - * @param v the value + * + * @param t + * the type + * @param v + * the value * @return the integer place holder of v */ - final public int getValue(String t, String v) { - - if (m_featureSets.get(t)==null) return -1; + @Override + final public int getValue(String t, String v) { + + if (m_featureSets.get(t) == null) + return -1; Integer vi = m_featureSets.get(t).get(v); - if (vi==null) return -1; //stop && + if (vi == null) + return -1; // stop && return vi.intValue(); } - /** - * Static version of getValue - * @see getValue - */ + /** + * Static version of getValue + * + * @see getValue + */ static final public int getValueS(String a, String v) { - - if (m_featureSets.get(a)==null) return -1; - Integer vi = m_featureSets.get(a).get(v); - if (vi==null) return -1; //stop && - return vi.intValue(); - } - - public int hasValue(String a, String v) { - + + if (m_featureSets.get(a) == null) + return -1; Integer vi = m_featureSets.get(a).get(v); - if (vi==null) return -1; + if (vi == null) + return -1; // stop && return vi.intValue(); } - - + + public int hasValue(String a, String v) { + + Integer vi = m_featureSets.get(a).get(v); + if (vi == null) + return -1; + return vi.intValue(); + } + public static String printBits(int k) { StringBuffer s = new StringBuffer(); - for(int i =0;i<31;i++) { - s.append((k&0x00000001)==1?'1':'0'); - k=k>>1; - + for (int i = 0; i < 31; i++) { + s.append((k & 0x00000001) == 1 ? '1' : '0'); + k = k >> 1; + } s.reverse(); return s.toString(); } - - - - - - - - /** - * Maps a long to a integer value. This is very useful to save memory for sparse data long values + + /** + * Maps a long to a integer value. This is very useful to save memory for + * sparse data long values + * * @param l * @return the integer */ - static public int misses = 0; - static public int good = 0; - + static public int misses = 0; + static public int good = 0; - - /** * Write the data + * * @param dos * @throws IOException */ - static public void writeData(DataOutputStream dos) throws IOException { - dos.writeInt(getFeatureSet().size()); - // DB.println("write"+getFeatureSet().size()); - for(Entry<String, HashMap<String,Integer>> e : getFeatureSet().entrySet()) { - dos.writeUTF(e.getKey()); - dos.writeInt(e.getValue().size()); - - for(Entry<String,Integer> e2 : e.getValue().entrySet()) { - - if(e2.getKey()==null) DB.println("key "+e2.getKey()+" value "+e2.getValue()+" e -key "+e.getKey()); - dos.writeUTF(e2.getKey()); - dos.writeInt(e2.getValue()); - - } - - } - } - public void read(DataInputStream din) throws IOException { - + static public void writeData(DataOutputStream dos) throws IOException { + dos.writeInt(getFeatureSet().size()); + // DB.println("write"+getFeatureSet().size()); + for (Entry<String, HashMap<String, Integer>> e : getFeatureSet().entrySet()) { + dos.writeUTF(e.getKey()); + dos.writeInt(e.getValue().size()); + + for (Entry<String, Integer> e2 : e.getValue().entrySet()) { + + if (e2.getKey() == null) + DB.println("key " + e2.getKey() + " value " + e2.getValue() + " e -key " + e.getKey()); + dos.writeUTF(e2.getKey()); + dos.writeInt(e2.getValue()); + + } + + } + } + + public void read(DataInputStream din) throws IOException { + int size = din.readInt(); - for(int i=0; i<size;i++) { + for (int i = 0; i < size; i++) { String k = din.readUTF(); int size2 = din.readInt(); - - HashMap<String,Integer> h = new HashMap<String,Integer>(); - getFeatureSet().put(k,h); - for(int j = 0;j<size2;j++) { + + HashMap<String, Integer> h = new HashMap<String, Integer>(); + getFeatureSet().put(k, h); + for (int j = 0; j < size2; j++) { h.put(din.readUTF(), din.readInt()); } getFeatureCounter().put(k, size2); } - count =size; - // stop(); - calculateBits(); + count = size; + // stop(); + calculateBits(); } - - /** + /** * Clear the data */ - static public void clearData() { - getFeatureSet().clear(); - m_featureBits.clear(); - getFeatureSet().clear(); - } + static public void clearData() { + getFeatureSet().clear(); + m_featureBits.clear(); + getFeatureSet().clear(); + } - public HashMap<String,Integer> getFeatureCounter() { + @Override + public HashMap<String, Integer> getFeatureCounter() { return m_featureCounters; } - static public HashMap<String,HashMap<String,Integer>> getFeatureSet() { + static public HashMap<String, HashMap<String, Integer>> getFeatureSet() { return m_featureSets; } - - static public String[] reverse(HashMap<String,Integer> v){ + + static public String[] reverse(HashMap<String, Integer> v) { String[] set = new String[v.size()]; - for(Entry<String,Integer> e : v.entrySet()) { - set[e.getValue()]=e.getKey(); + for (Entry<String, Integer> e : v.entrySet()) { + set[e.getValue()] = e.getKey(); } return set; } - } diff --git a/dependencyParser/mate-tools/src/is2/data/MFC.java b/dependencyParser/mate-tools/src/is2/data/MFC.java index bb1f27a..859a8ce 100644 --- a/dependencyParser/mate-tools/src/is2/data/MFC.java +++ b/dependencyParser/mate-tools/src/is2/data/MFC.java @@ -1,246 +1,243 @@ package is2.data; - -import is2.util.DB; - import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.IOException; import java.util.HashMap; import java.util.Map.Entry; +import is2.util.DB; + /** * Map Features, do not map long to integer - * + * * @author Bernd Bohnet, 17.09.2011 */ -final public class MFC implements IEncoderPlus { - +final public class MFC implements IEncoderPlus { + /** The features and its values */ - private final HashMap<String,HashMap<String,Integer>> m_featureSets = new HashMap<String,HashMap<String,Integer>>(); + private final HashMap<String, HashMap<String, Integer>> m_featureSets = new HashMap<String, HashMap<String, Integer>>(); /** The feature class and the number of values */ - private final HashMap<String,Integer> m_featureCounters = new HashMap<String,Integer>(); + private final HashMap<String, Integer> m_featureCounters = new HashMap<String, Integer>(); /** The number of bits needed to encode a feature */ - final HashMap<String,Integer> m_featureBits = new HashMap<String,Integer>(); - + final HashMap<String, Integer> m_featureBits = new HashMap<String, Integer>(); + /** Integer counter for long2int */ - private int count=0; - - - public MFC () {} - - - public int size() {return count;} - - - + private int count = 0; + + public MFC() { + } + + public int size() { + return count; + } + /** * Register an attribute class, if it not exists and add a possible value + * * @param type * @param type2 */ - final public int register(String a, String v) { - - synchronized(m_featureCounters) { - - HashMap<String,Integer> fs = getFeatureSet().get(a); - if (fs==null) { - fs = new HashMap<String,Integer>(); - getFeatureSet().put(a, fs); - fs.put(NONE, 0); - getFeatureCounter().put(a, 1); - } - - Integer i = fs.get(v); - if (i==null) { - Integer c = getFeatureCounter().get(a); - fs.put(v, c); - c++; - getFeatureCounter().put(a,c); - return c-1; - } else return i; + @Override + final public int register(String a, String v) { + + synchronized (m_featureCounters) { + + HashMap<String, Integer> fs = getFeatureSet().get(a); + if (fs == null) { + fs = new HashMap<String, Integer>(); + getFeatureSet().put(a, fs); + fs.put(NONE, 0); + getFeatureCounter().put(a, 1); } + + Integer i = fs.get(v); + if (i == null) { + Integer c = getFeatureCounter().get(a); + fs.put(v, c); + c++; + getFeatureCounter().put(a, c); + return c - 1; + } else + return i; + } } - + /** * Calculates the number of bits needed to encode a feature */ - public void calculateBits() { - - int total=0; - for(Entry<String,Integer> e : getFeatureCounter().entrySet() ){ - int bits =(int)Math.ceil((Math.log(e.getValue()+1)/Math.log(2))); + public void calculateBits() { + + for (Entry<String, Integer> e : getFeatureCounter().entrySet()) { + int bits = (int) Math.ceil((Math.log(e.getValue() + 1) / Math.log(2))); m_featureBits.put(e.getKey(), bits); - total+=bits; - // System.out.println(" "+e.getKey()+" bits "+bits+" number "+(e.getValue()+1)); } - -// System.out.println("total number of needed bits "+total); - } - - - - public String toString() { - + + // System.out.println("total number of needed bits "+total); + } + + @Override + public String toString() { + StringBuffer content = new StringBuffer(); - for(Entry<String,Integer> e : getFeatureCounter().entrySet() ){ - content.append(e.getKey()+" "+e.getValue()); - content.append(':'); - // HashMap<String,Integer> vs = getFeatureSet().get(e.getKey()); - content.append(getFeatureBits(e.getKey())); - - /*if (vs.size()<120) - for(Entry<String,Integer> e2 : vs.entrySet()) { - content.append(e2.getKey()+" ("+e2.getValue()+") "); - }*/ - content.append('\n'); - + for (Entry<String, Integer> e : getFeatureCounter().entrySet()) { + content.append(e.getKey() + " " + e.getValue()); + content.append(':'); + // HashMap<String,Integer> vs = getFeatureSet().get(e.getKey()); + content.append(getFeatureBits(e.getKey())); + + /* + * if (vs.size()<120) for(Entry<String,Integer> e2 : vs.entrySet()) + * { content.append(e2.getKey()+" ("+e2.getValue()+") "); } + */ + content.append('\n'); + } return content.toString(); } - - - + final public short getFeatureBits(String a) { - if(m_featureBits.get(a)==null) return 0; - return (short)m_featureBits.get(a).intValue(); + if (m_featureBits.get(a) == null) + return 0; + return (short) m_featureBits.get(a).intValue(); } - - /** * Get the integer place holder of the string value v of the type a - * - * @param t the type - * @param v the value + * + * @param t + * the type + * @param v + * the value * @return the integer place holder of v */ - final public int getValue(String t, String v) { - - if (m_featureSets.get(t)==null) return -1; + @Override + final public int getValue(String t, String v) { + + if (m_featureSets.get(t) == null) + return -1; Integer vi = m_featureSets.get(t).get(v); - if (vi==null) return -1; //stop && + if (vi == null) + return -1; // stop && return vi.intValue(); } - /** - * Static version of getValue - * @see getValue - */ + /** + * Static version of getValue + * + * @see getValue + */ final public int getValueS(String a, String v) { - - if (m_featureSets.get(a)==null) return -1; - Integer vi = m_featureSets.get(a).get(v); - if (vi==null) return -1; //stop && - return vi.intValue(); - } - - public int hasValue(String a, String v) { - + + if (m_featureSets.get(a) == null) + return -1; Integer vi = m_featureSets.get(a).get(v); - if (vi==null) return -1; + if (vi == null) + return -1; // stop && return vi.intValue(); } - - + + public int hasValue(String a, String v) { + + Integer vi = m_featureSets.get(a).get(v); + if (vi == null) + return -1; + return vi.intValue(); + } + public static String printBits(int k) { StringBuffer s = new StringBuffer(); - for(int i =0;i<31;i++) { - s.append((k&0x00000001)==1?'1':'0'); - k=k>>1; - + for (int i = 0; i < 31; i++) { + s.append((k & 0x00000001) == 1 ? '1' : '0'); + k = k >> 1; + } s.reverse(); return s.toString(); } - - - - - - - /** - * Maps a long to a integer value. This is very useful to save memory for sparse data long values + /** + * Maps a long to a integer value. This is very useful to save memory for + * sparse data long values + * * @param l * @return the integer */ - static public int misses = 0; - static public int good = 0; - + static public int misses = 0; + static public int good = 0; - - /** * Write the data + * * @param dos * @throws IOException */ - public void writeData(DataOutputStream dos) throws IOException { - dos.writeInt(getFeatureSet().size()); - // DB.println("write"+getFeatureSet().size()); - for(Entry<String, HashMap<String,Integer>> e : getFeatureSet().entrySet()) { - dos.writeUTF(e.getKey()); - dos.writeInt(e.getValue().size()); - - for(Entry<String,Integer> e2 : e.getValue().entrySet()) { - - if(e2.getKey()==null) DB.println("key "+e2.getKey()+" value "+e2.getValue()+" e -key "+e.getKey()); - dos.writeUTF(e2.getKey()); - dos.writeInt(e2.getValue()); - - } - - } - } - public void read(DataInputStream din) throws IOException { - + public void writeData(DataOutputStream dos) throws IOException { + dos.writeInt(getFeatureSet().size()); + // DB.println("write"+getFeatureSet().size()); + for (Entry<String, HashMap<String, Integer>> e : getFeatureSet().entrySet()) { + dos.writeUTF(e.getKey()); + dos.writeInt(e.getValue().size()); + + for (Entry<String, Integer> e2 : e.getValue().entrySet()) { + + if (e2.getKey() == null) + DB.println("key " + e2.getKey() + " value " + e2.getValue() + " e -key " + e.getKey()); + dos.writeUTF(e2.getKey()); + dos.writeInt(e2.getValue()); + + } + + } + } + + public void read(DataInputStream din) throws IOException { + int size = din.readInt(); - for(int i=0; i<size;i++) { + for (int i = 0; i < size; i++) { String k = din.readUTF(); int size2 = din.readInt(); - - HashMap<String,Integer> h = new HashMap<String,Integer>(); - getFeatureSet().put(k,h); - for(int j = 0;j<size2;j++) { + + HashMap<String, Integer> h = new HashMap<String, Integer>(); + getFeatureSet().put(k, h); + for (int j = 0; j < size2; j++) { h.put(din.readUTF(), din.readInt()); } getFeatureCounter().put(k, size2); } - count =size; - // stop(); - calculateBits(); + count = size; + // stop(); + calculateBits(); } - - /** + /** * Clear the data */ - public void clearData() { - getFeatureSet().clear(); - m_featureBits.clear(); - getFeatureSet().clear(); - } + public void clearData() { + getFeatureSet().clear(); + m_featureBits.clear(); + getFeatureSet().clear(); + } - public HashMap<String,Integer> getFeatureCounter() { + @Override + public HashMap<String, Integer> getFeatureCounter() { return m_featureCounters; } - public HashMap<String,HashMap<String,Integer>> getFeatureSet() { + public HashMap<String, HashMap<String, Integer>> getFeatureSet() { return m_featureSets; } - - public String[] reverse(HashMap<String,Integer> v){ + + public String[] reverse(HashMap<String, Integer> v) { String[] set = new String[v.size()]; - for(Entry<String,Integer> e : v.entrySet()) { - set[e.getValue()]=e.getKey(); + for (Entry<String, Integer> e : v.entrySet()) { + set[e.getValue()] = e.getKey(); } return set; } - } diff --git a/dependencyParser/mate-tools/src/is2/data/MFO.java b/dependencyParser/mate-tools/src/is2/data/MFO.java index ff4d43e..a8bc441 100755 --- a/dependencyParser/mate-tools/src/is2/data/MFO.java +++ b/dependencyParser/mate-tools/src/is2/data/MFO.java @@ -1,63 +1,62 @@ package is2.data; - -import is2.util.DB; - import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.IOException; import java.util.HashMap; import java.util.Map.Entry; +import is2.util.DB; + /** * Map Features, do not map long to integer - * + * * @author Bernd Bohnet, 20.09.2009 */ -final public class MFO implements IEncoderPlus { +final public class MFO implements IEncoderPlus { /** The features and its values */ - static private final HashMap<String,HashMap<String,Integer>> m_featureSets = new HashMap<String,HashMap<String,Integer>>(); + static private final HashMap<String, HashMap<String, Integer>> m_featureSets = new HashMap<String, HashMap<String, Integer>>(); /** The feature class and the number of values */ - static private final HashMap<String,Integer> m_featureCounters = new HashMap<String,Integer>(); + static private final HashMap<String, Integer> m_featureCounters = new HashMap<String, Integer>(); /** The number of bits needed to encode a feature */ - static final HashMap<String,Integer> m_featureBits = new HashMap<String,Integer>(); + static final HashMap<String, Integer> m_featureBits = new HashMap<String, Integer>(); - final public static String NONE="<None>"; + final public static String NONE = "<None>"; final public static class Data4 { public int shift; - public short a0,a1,a2,a3,a4,a5,a6,a7,a8,a9; - public int v0,v1,v2,v3,v4,v5,v6,v7,v8,v9; + public short a0, a1, a2, a3, a4, a5, a6, a7, a8, a9; + public int v0, v1, v2, v3, v4, v5, v6, v7, v8, v9; final public long calcs(int b, long v, long l) { - if (l<0) return l; - l |= v<<shift; - shift +=b; + if (l < 0) + return l; + l |= v << shift; + shift += b; return l; } } - public MFO () {} - - - - + public MFO() { + } /** * Register an attribute class, if it not exists and add a possible value + * * @param type * @param type2 */ - final public int register(String a, String v) { + @Override + final public int register(String a, String v) { - HashMap<String,Integer> fs = getFeatureSet().get(a); - if (fs==null) { - fs = new HashMap<String,Integer>(); + HashMap<String, Integer> fs = getFeatureSet().get(a); + if (fs == null) { + fs = new HashMap<String, Integer>(); getFeatureSet().put(a, fs); fs.put(NONE, 0); getFeatureCounter().put(a, 1); @@ -65,290 +64,285 @@ final public class MFO implements IEncoderPlus { Integer c = getFeatureCounter().get(a); Integer i = fs.get(v); - if (i==null) { + if (i == null) { fs.put(v, c); c++; - getFeatureCounter().put(a,c); - return c-1; - } else return i; + getFeatureCounter().put(a, c); + return c - 1; + } else + return i; } /** * Calculates the number of bits needed to encode a feature */ - public void calculateBits() { + public void calculateBits() { - int total=0; - for(Entry<String,Integer> e : getFeatureCounter().entrySet() ){ - int bits =(int)Math.ceil((Math.log(e.getValue()+1)/Math.log(2))); + for (Entry<String, Integer> e : getFeatureCounter().entrySet()) { + int bits = (int) Math.ceil((Math.log(e.getValue() + 1) / Math.log(2))); m_featureBits.put(e.getKey(), bits); - total+=bits; - // System.out.println(" "+e.getKey()+" bits "+bits+" number "+(e.getValue()+1)); } - // System.out.println("total number of needed bits "+total); + // System.out.println("total number of needed bits "+total); } - - @Override - public String toString() { + public String toString() { StringBuffer content = new StringBuffer(); - for(Entry<String,Integer> e : getFeatureCounter().entrySet() ){ - content.append(e.getKey()+" "+e.getValue()); + for (Entry<String, Integer> e : getFeatureCounter().entrySet()) { + content.append(e.getKey() + " " + e.getValue()); content.append(':'); - // HashMap<String,Integer> vs = getFeatureSet().get(e.getKey()); + // HashMap<String,Integer> vs = getFeatureSet().get(e.getKey()); content.append(getFeatureBits(e.getKey())); - /*if (vs.size()<120) - for(Entry<String,Integer> e2 : vs.entrySet()) { - content.append(e2.getKey()+" ("+e2.getValue()+") "); - }*/ + /* + * if (vs.size()<120) for(Entry<String,Integer> e2 : vs.entrySet()) + * { content.append(e2.getKey()+" ("+e2.getValue()+") "); } + */ content.append('\n'); } return content.toString(); } - - static final public long calcs(Data4 d,int b, long v, long l) { - if (l<0) return l; - l |= v<<d.shift; - d.shift +=b; + static final public long calcs(Data4 d, int b, long v, long l) { + if (l < 0) + return l; + l |= v << d.shift; + d.shift += b; return l; } - static final public short getFeatureBits(String a) { - return (short)m_featureBits.get(a).intValue(); + return (short) m_featureBits.get(a).intValue(); } - - /** * Get the integer place holder of the string value v of the type a - * - * @param t the type - * @param v the value + * + * @param t + * the type + * @param v + * the value * @return the integer place holder of v */ + @Override final public int getValue(String t, String v) { - if (m_featureSets.get(t)==null) return -1; + if (m_featureSets.get(t) == null) + return -1; Integer vi = m_featureSets.get(t).get(v); - if (vi==null) return -1; //stop && + if (vi == null) + return -1; // stop && return vi.intValue(); } /** * Static version of getValue + * * @see getValue */ static final public int getValueS(String a, String v) { - if (m_featureSets.get(a)==null) return -1; + if (m_featureSets.get(a) == null) + return -1; Integer vi = m_featureSets.get(a).get(v); - if (vi==null) return -1; //stop && + if (vi == null) + return -1; // stop && return vi.intValue(); } public int hasValue(String a, String v) { Integer vi = m_featureSets.get(a).get(v); - if (vi==null) return -1; + if (vi == null) + return -1; return vi.intValue(); } - - - final public long calc2(Data4 d) { - if (d.v0<0||d.v1<0) return -1; - // if (d.v1<0||d.v2<0) return -1; + if (d.v0 < 0 || d.v1 < 0) + return -1; + // if (d.v1<0||d.v2<0) return -1; long l = d.v0; - short shift =d.a0; - l |= (long)d.v1<<shift; - shift +=d.a1; - // l |= (long)d.v2<<shift; - d.shift=shift; + short shift = d.a0; + l |= (long) d.v1 << shift; + shift += d.a1; + // l |= (long)d.v2<<shift; + d.shift = shift; - //d.shift=; + // d.shift=; return l; } - - final public long calc3(Data4 d) { - if (d.v0<0||d.v1<0||d.v2<0) return -1; - // if (d.v1<0||d.v2<0) return -1; + if (d.v0 < 0 || d.v1 < 0 || d.v2 < 0) + return -1; + // if (d.v1<0||d.v2<0) return -1; long l = d.v0; - short shift =d.a0; - l |= (long)d.v1<<shift; - shift +=d.a1; - l |= (long)d.v2<<shift; - d.shift=shift + d.a2; + short shift = d.a0; + l |= (long) d.v1 << shift; + shift += d.a1; + l |= (long) d.v2 << shift; + d.shift = shift + d.a2; - //d.shift=; + // d.shift=; return l; } - final public long calc4(Data4 d) { - if (d.v0<0||d.v1<0||d.v2<0||d.v3<0) return -1; + if (d.v0 < 0 || d.v1 < 0 || d.v2 < 0 || d.v3 < 0) + return -1; long l = d.v0; - int shift =d.a0; - l |= (long)d.v1<<shift; - shift +=d.a1; - l |= (long)d.v2<<shift; - shift +=d.a2; - l |= (long)d.v3<<shift; - d.shift= shift +d.a3; + int shift = d.a0; + l |= (long) d.v1 << shift; + shift += d.a1; + l |= (long) d.v2 << shift; + shift += d.a2; + l |= (long) d.v3 << shift; + d.shift = shift + d.a3; return l; } - - final public long calc5(Data4 d) { - if (d.v0<0||d.v1<0||d.v2<0||d.v3<0||d.v4<0) return -1; + if (d.v0 < 0 || d.v1 < 0 || d.v2 < 0 || d.v3 < 0 || d.v4 < 0) + return -1; long l = d.v0; - int shift =d.a0; - l |= (long)d.v1<<shift; - shift +=d.a1; - l |= (long)d.v2<<shift; - shift +=d.a2; - l |= (long)d.v3<<shift; - shift +=d.a3; - l |= (long)d.v4<<shift; - d.shift =shift+d.a4; + int shift = d.a0; + l |= (long) d.v1 << shift; + shift += d.a1; + l |= (long) d.v2 << shift; + shift += d.a2; + l |= (long) d.v3 << shift; + shift += d.a3; + l |= (long) d.v4 << shift; + d.shift = shift + d.a4; return l; } - static final public long calc6(Data4 d) { - if (d.v0<0||d.v1<0||d.v2<0||d.v3<0||d.v4<0||d.v5<0) return -1; + if (d.v0 < 0 || d.v1 < 0 || d.v2 < 0 || d.v3 < 0 || d.v4 < 0 || d.v5 < 0) + return -1; long l = d.v0; - int shift =d.a0; - l |= (long)d.v1<<shift; - shift +=d.a1; - l |= (long)d.v2<<shift; - shift +=d.a2; - l |= (long)d.v3<<shift; - shift +=d.a3; - l |= (long)d.v4<<shift; - shift +=d.a4; - l |= (long)d.v5<<shift; - d.shift =shift+d.a5; + int shift = d.a0; + l |= (long) d.v1 << shift; + shift += d.a1; + l |= (long) d.v2 << shift; + shift += d.a2; + l |= (long) d.v3 << shift; + shift += d.a3; + l |= (long) d.v4 << shift; + shift += d.a4; + l |= (long) d.v5 << shift; + d.shift = shift + d.a5; return l; } final public long calc7(Data4 d) { - if (d.v0<0||d.v1<0||d.v2<0||d.v3<0||d.v4<0||d.v5<0||d.v6<0) return -1; + if (d.v0 < 0 || d.v1 < 0 || d.v2 < 0 || d.v3 < 0 || d.v4 < 0 || d.v5 < 0 || d.v6 < 0) + return -1; long l = d.v0; - int shift =d.a0; - l |= (long)d.v1<<shift; - shift +=d.a1; - l |= (long)d.v2<<shift; - shift +=d.a2; - l |= (long)d.v3<<shift; - shift +=d.a3; - l |= (long)d.v4<<shift; - shift +=d.a4; - l |= (long)d.v5<<shift; - shift +=d.a5; - l |= (long)d.v6<<shift; - d.shift =shift+d.a6; + int shift = d.a0; + l |= (long) d.v1 << shift; + shift += d.a1; + l |= (long) d.v2 << shift; + shift += d.a2; + l |= (long) d.v3 << shift; + shift += d.a3; + l |= (long) d.v4 << shift; + shift += d.a4; + l |= (long) d.v5 << shift; + shift += d.a5; + l |= (long) d.v6 << shift; + d.shift = shift + d.a6; return l; } - final public long calc8(Data4 d) { - if (d.v0<0||d.v1<0||d.v2<0||d.v3<0||d.v4<0||d.v5<0||d.v6<0||d.v7<0) return -1; + if (d.v0 < 0 || d.v1 < 0 || d.v2 < 0 || d.v3 < 0 || d.v4 < 0 || d.v5 < 0 || d.v6 < 0 || d.v7 < 0) + return -1; long l = d.v0; - int shift =d.a0; - l |= (long)d.v1<<shift; - shift +=d.a1; - l |= (long)d.v2<<shift; - shift +=d.a2; - l |= (long)d.v3<<shift; - shift +=d.a3; - l |= (long)d.v4<<shift; - shift +=d.a4; - l |= (long)d.v5<<shift; - shift +=d.a5; - l |= (long)d.v6<<shift; - shift +=d.a6; - l |= (long)d.v7<<shift; - d.shift =shift+d.a7; + int shift = d.a0; + l |= (long) d.v1 << shift; + shift += d.a1; + l |= (long) d.v2 << shift; + shift += d.a2; + l |= (long) d.v3 << shift; + shift += d.a3; + l |= (long) d.v4 << shift; + shift += d.a4; + l |= (long) d.v5 << shift; + shift += d.a5; + l |= (long) d.v6 << shift; + shift += d.a6; + l |= (long) d.v7 << shift; + d.shift = shift + d.a7; return l; } - - - - - - - /** - * Maps a long to a integer value. This is very useful to save memory for sparse data long values + /** + * Maps a long to a integer value. This is very useful to save memory for + * sparse data long values + * * @param node * @return the integer */ - static public int misses = 0; - static public int good = 0; - - - + static public int misses = 0; + static public int good = 0; /** * Write the data + * * @param dos * @throws IOException */ - public void writeData(DataOutputStream dos) throws IOException { + public void writeData(DataOutputStream dos) throws IOException { dos.writeInt(getFeatureSet().size()); - for(Entry<String, HashMap<String,Integer>> e : getFeatureSet().entrySet()) { + for (Entry<String, HashMap<String, Integer>> e : getFeatureSet().entrySet()) { dos.writeUTF(e.getKey()); dos.writeInt(e.getValue().size()); - for(Entry<String,Integer> e2 : e.getValue().entrySet()) { + for (Entry<String, Integer> e2 : e.getValue().entrySet()) { - if(e2.getKey()==null) DB.println("key "+e2.getKey()+" value "+e2.getValue()+" e -key "+e.getKey()); - dos.writeUTF(e2.getKey()); + if (e2.getKey() == null) + DB.println("key " + e2.getKey() + " value " + e2.getValue() + " e -key " + e.getKey()); + dos.writeUTF(e2.getKey()); dos.writeInt(e2.getValue()); - } + } } } - public void read(DataInputStream din) throws IOException { + + public void read(DataInputStream din) throws IOException { int size = din.readInt(); - for(int i=0; i<size;i++) { + for (int i = 0; i < size; i++) { String k = din.readUTF(); int size2 = din.readInt(); - HashMap<String,Integer> h = new HashMap<String,Integer>(); - getFeatureSet().put(k,h); - for(int j = 0;j<size2;j++) { + HashMap<String, Integer> h = new HashMap<String, Integer>(); + getFeatureSet().put(k, h); + for (int j = 0; j < size2; j++) { h.put(din.readUTF(), din.readInt()); } getFeatureCounter().put(k, size2); @@ -357,8 +351,7 @@ final public class MFO implements IEncoderPlus { calculateBits(); } - - /** + /** * Clear the data */ static public void clearData() { @@ -367,18 +360,19 @@ final public class MFO implements IEncoderPlus { getFeatureSet().clear(); } - public HashMap<String,Integer> getFeatureCounter() { + @Override + public HashMap<String, Integer> getFeatureCounter() { return m_featureCounters; } - static public HashMap<String,HashMap<String,Integer>> getFeatureSet() { + static public HashMap<String, HashMap<String, Integer>> getFeatureSet() { return m_featureSets; } - static public String[] reverse(HashMap<String,Integer> v){ + static public String[] reverse(HashMap<String, Integer> v) { String[] set = new String[v.size()]; - for(Entry<String,Integer> e : v.entrySet()) { - set[e.getValue()]=e.getKey(); + for (Entry<String, Integer> e : v.entrySet()) { + set[e.getValue()] = e.getKey(); } return set; } diff --git a/dependencyParser/mate-tools/src/is2/data/Open.java b/dependencyParser/mate-tools/src/is2/data/Open.java index ba75fe3..d9bf0e6 100755 --- a/dependencyParser/mate-tools/src/is2/data/Open.java +++ b/dependencyParser/mate-tools/src/is2/data/Open.java @@ -1,8 +1,6 @@ package is2.data; - - -final public class Open { +final public class Open { public float p; short s, e, label; @@ -11,27 +9,30 @@ final public class Open { Closed left; Closed right; - public Open(short s, short t, short dir, short label,Closed left, Closed right, float p) { + public Open(short s, short t, short dir, short label, Closed left, Closed right, float p) { this.s = s; this.e = t; this.label = label; - this.dir = (byte)dir; - this.left =left; - this.right=right; - this.p=p; + this.dir = (byte) dir; + this.left = left; + this.right = right; + this.p = p; } - void create(Parse parse) { if (dir == 0) { parse.heads[s] = e; - if (label != -1) parse.labels[s] = label; + if (label != -1) + parse.labels[s] = label; } else { parse.heads[e] = s; - if (label != -1) parse.labels[e] = label; + if (label != -1) + parse.labels[e] = label; } - if (left != null) left.create(parse); - if (right != null) right.create(parse); + if (left != null) + left.create(parse); + if (right != null) + right.create(parse); } - + } diff --git a/dependencyParser/mate-tools/src/is2/data/PSTree.java b/dependencyParser/mate-tools/src/is2/data/PSTree.java index 30c1364..e916548 100644 --- a/dependencyParser/mate-tools/src/is2/data/PSTree.java +++ b/dependencyParser/mate-tools/src/is2/data/PSTree.java @@ -1,22 +1,21 @@ /** - * + * */ package is2.data; -import is2.util.DB; - import java.util.ArrayList; import java.util.Collections; -import java.util.Stack; + +import is2.util.DB; /** * @author Dr. Bernd Bohnet, 17.01.2011 - * - * + * + * */ public class PSTree { - int wordCount =0; + int wordCount = 0; public String entries[]; public String lemmas[]; public int head[]; @@ -25,179 +24,183 @@ public class PSTree { public int non; public int terminalCount; public String[] morph; - + public int[] forms; public int[] phrases; public int[][] psfeats; public int[] ppos; - - + /** * @param d */ public PSTree(SentenceData09 d) { - create(d.length()-1,d.length()*20); - for(int i=1;i<d.length();i++) { - entries[i-1]=d.forms[i]; - pos[i-1]=d.ppos[i]; + create(d.length() - 1, d.length() * 20); + for (int i = 1; i < d.length(); i++) { + entries[i - 1] = d.forms[i]; + pos[i - 1] = d.ppos[i]; } } - /** * Create an undefined phrase tree */ - public PSTree() { } - + public PSTree() { + } /** * @param terminals * @param nonTerminals */ public void create(int terminals, int nonTerminals) { - entries = new String[terminals+nonTerminals]; - pos = new String[terminals+nonTerminals]; - head = new int[terminals+nonTerminals]; - lemmas = new String[terminals+nonTerminals]; - morph = new String[terminals+nonTerminals]; - non=terminals; - wordCount=terminals; - - for(int i=terminals+1;i<head.length;i++) head[i]=-1; + entries = new String[terminals + nonTerminals]; + pos = new String[terminals + nonTerminals]; + head = new int[terminals + nonTerminals]; + lemmas = new String[terminals + nonTerminals]; + morph = new String[terminals + nonTerminals]; + non = terminals; + wordCount = terminals; + + for (int i = terminals + 1; i < head.length; i++) + head[i] = -1; } - + @Override public String toString() { StringBuffer s = new StringBuffer(); - for(int i=0;i<entries.length;i++) { - if (head[i]==-1&&entries[i]==null) break; + for (int i = 0; i < entries.length; i++) { + if (head[i] == -1 && entries[i] == null) + break; - s.append(i+"\t"+pos[i]+"\t"+entries[i]+"\t"+head[i]+(ok==null?"":("\t"+(ok[i]==1)))+" \n"); + s.append(i + "\t" + pos[i] + "\t" + entries[i] + "\t" + head[i] + (ok == null ? "" : ("\t" + (ok[i] == 1))) + + " \n"); } - // DB.println("entries "+entries.length); + // DB.println("entries "+entries.length); return s.toString(); } - /** * @return */ public boolean containsNull() { - for(int k=0;k<wordCount-1;k++) { - if (entries[k]==null) return true; + for (int k = 0; k < wordCount - 1; k++) { + if (entries[k] == null) + return true; } return false; } - public int equals(SentenceData09 s) { - int j=1; // starts with root - for(int i=0;i<terminalCount-1;i++){ + int j = 1; // starts with root + for (int i = 0; i < terminalCount - 1; i++) { - // if (s.forms[j].equals("erschrekkend")) s.forms[j]="erschreckend"; + // if (s.forms[j].equals("erschrekkend")) s.forms[j]="erschreckend"; - if (s.forms.length<j) { - DB.println(""+s+" "+this.toString()); + if (s.forms.length < j) { + DB.println("" + s + " " + this.toString()); return i; } - if(!entries[i].equals(s.forms[j])) { - // System.out.println("ps "+entries[i]+" != ds "+s.forms[j]); + if (!entries[i].equals(s.forms[j])) { + // System.out.println("ps "+entries[i]+" != ds "+s.forms[j]); // Rolls-Royce - if(entries[i].startsWith(s.forms[j]) && s.forms.length>i+2 && s.forms[j+1].equals("-")) { - j+=2; - if( entries[i].contains(s.forms[j-1]) && s.forms.length>i+3 && s.forms[j+1].equals("-")) { - j+=2; // && - // System.out.println("s.forms[j] "+s.forms[j]+" s.forms[j-1] "+s.forms[j-1]+" "+entries[i]); - if( entries[i].contains(s.forms[j-1]) && s.forms.length>i+3 && s.forms[j+1].equals("-")) { - j+=2; // && - // System.out.println("s.forms[j] "+s.forms[j]+" s.forms[j-1] "+s.forms[j-1]+" "+entries[i]); + if (entries[i].startsWith(s.forms[j]) && s.forms.length > i + 2 && s.forms[j + 1].equals("-")) { + j += 2; + if (entries[i].contains(s.forms[j - 1]) && s.forms.length > i + 3 && s.forms[j + 1].equals("-")) { + j += 2; // && + // System.out.println("s.forms[j] "+s.forms[j]+" + // s.forms[j-1] "+s.forms[j-1]+" "+entries[i]); + if (entries[i].contains(s.forms[j - 1]) && s.forms.length > i + 3 + && s.forms[j + 1].equals("-")) { + j += 2; // && + // System.out.println("s.forms[j] "+s.forms[j]+" + // s.forms[j-1] "+s.forms[j-1]+" "+entries[i]); } - } - //Interstate\/Johnson - } else if(entries[i].startsWith(s.forms[j]) && s.forms.length>i+2 && s.forms[j+1].equals("/")) { - j+=2; - if( entries[i].contains(s.forms[j-1]) && s.forms.length>i+3 && s.forms[j+1].equals("/")) { - j+=2; // && - // System.out.println("s.forms[j] "+s.forms[j]+" s.forms[j-1] "+s.forms[j-1]+" "+entries[i]); + } + // Interstate\/Johnson + } else if (entries[i].startsWith(s.forms[j]) && s.forms.length > i + 2 && s.forms[j + 1].equals("/")) { + j += 2; + if (entries[i].contains(s.forms[j - 1]) && s.forms.length > i + 3 && s.forms[j + 1].equals("/")) { + j += 2; // && + // System.out.println("s.forms[j] "+s.forms[j]+" + // s.forms[j-1] "+s.forms[j-1]+" "+entries[i]); } - // U.S.-Japan -> U . S . - Japan - } else if(entries[i].startsWith(s.forms[j]) && s.forms.length>i+2 && s.forms[j+1].equals(".")) { - j+=2; - if( entries[i].contains(s.forms[j-1]) && s.forms.length>i+3 && s.forms[j+1].equals(".")) { - j+=2; // && - // System.out.println("s.forms[j] "+s.forms[j]+" s.forms[j-1] "+s.forms[j-1]+" "+entries[i]); + // U.S.-Japan -> U . S . - Japan + } else if (entries[i].startsWith(s.forms[j]) && s.forms.length > i + 2 && s.forms[j + 1].equals(".")) { + j += 2; + if (entries[i].contains(s.forms[j - 1]) && s.forms.length > i + 3 && s.forms[j + 1].equals(".")) { + j += 2; // && + // System.out.println("s.forms[j] "+s.forms[j]+" + // s.forms[j-1] "+s.forms[j-1]+" "+entries[i]); } - } else if(entries[i].startsWith(s.forms[j]) && s.forms.length>i+1 && s.forms[j+1].equals("'S")) { - j+=1; + } else if (entries[i].startsWith(s.forms[j]) && s.forms.length > i + 1 && s.forms[j + 1].equals("'S")) { + j += 1; } else { // chech those !!! - // System.out.print("entry "+entries[i]+" form "+s.forms[j]+" "); + // System.out.print("entry "+entries[i]+" form + // "+s.forms[j]+" "); return j; } } j++; - } // without root return s.length(); - //return j; + // return j; } - /** * @param dn * @return */ public int getPS(int dn) { - return this.head[dn-1]; + return this.head[dn - 1]; } - /** * @param dn * @param n - * @param commonHead the common head in the phrase structure + * @param commonHead + * the common head in the phrase structure * @return */ public String getChain(int dn, int n, int commonHead) { - int pdn =dn-1,pdh=n-1; - // int phraseHead =head[pdh]; + int pdn = dn - 1; + // int phraseHead =head[pdh]; - // System.out.println("phrase head "+phraseHead+" common head "+commonHead); + // System.out.println("phrase head "+phraseHead+" common head + // "+commonHead); int[] ch = new int[20]; - int head =this.head[pdn]; - int i=0; - ch[i++]=head; - while(commonHead!=head && head!=0) { + int head = this.head[pdn]; + int i = 0; + ch[i++] = head; + while (commonHead != head && head != 0) { head = this.head[head]; - ch[i++]=head; + ch[i++] = head; } - StringBuffer chain= new StringBuffer(); + StringBuffer chain = new StringBuffer(); - for(int k=0;k<i;k++) { + for (int k = 0; k < i; k++) { chain.append(entries[ch[k]]).append(" "); } return chain.toString(); } - /** * @param dn * @param n @@ -206,43 +209,42 @@ public class PSTree { public int getCommonHead(int d, int dh) { int pdh = this.getPS(dh), pd = this.getPS(d); - ArrayList<Integer> path2root = getPath2Root(pdh); - //System.out.println("path 2 root "+path2root+" pdh "+pdh); + // System.out.println("path 2 root "+path2root+" pdh "+pdh); - for(int n : path2root) { - int candidateHead=pd; - while(candidateHead!=0&& candidateHead!=-1) { - if (n==candidateHead) return n; - candidateHead =this.head[candidateHead]; + for (int n : path2root) { + int candidateHead = pd; + while (candidateHead != 0 && candidateHead != -1) { + if (n == candidateHead) + return n; + candidateHead = this.head[candidateHead]; } } return -1; } - /** * @param pdh */ private ArrayList<Integer> getPath2Root(int pdh) { ArrayList<Integer> path = new ArrayList<Integer>(); - // restrict the number in case its a cycle which should never be - for(int k=0;k<100;k++) { - if(pdh==-1) break; + for (int k = 0; k < 100; k++) { + if (pdh == -1) + break; path.add(pdh); pdh = this.head[pdh]; - if(pdh==0) break; + if (pdh == 0) + break; } return path; } - /** - * Get operations to create root - * see operation in method getOperation + * Get operations to create root see operation in method getOperation + * * @param pr */ public String getOperationRoot(int pr) { @@ -250,48 +252,48 @@ public class PSTree { StringBuffer o = new StringBuffer(); int h = pr; int[] path = new int[10]; - // System.out.println(" start node "+pr); - int k=0; - for(;k<10;k++) { + // System.out.println(" start node "+pr); + int k = 0; + for (; k < 10; k++) { h = head[h]; - if (h==-1){ + if (h == -1) { break; } - path[k]=h; - if (h==0){ + path[k] = h; + if (h == 0) { break; } } - k-=2; + k -= 2; - boolean first=true; - for(;k>=0;k--) { + boolean first = true; + for (; k >= 0; k--) { // create phrase if (first) { o.append("c:").append(entries[path[k]]); - first =false; + first = false; } // insert and create phrase - else {o.append(":ci:").append(entries[path[k]]);} + else { + o.append(":ci:").append(entries[path[k]]); + } } - // insert dependent node - //if (o.length()>0) - o.append(":in:d"); - //else o.append("in:d"); // insert root into nothing + // if (o.length()>0) + o.append(":in:d"); + // else o.append("in:d"); // insert root into nothing return o.toString(); } - /** * Create operation to include dependency edges in phrase structure - * Operations: c - create ; i - insert ; in - insert (dependent) node ; up:X go the (phrase) X up - * ci create and insert ... - * + * Operations: c - create ; i - insert ; in - insert (dependent) node ; up:X + * go the (phrase) X up ci create and insert ... + * * @param dn * @param n * @param commonHead @@ -299,296 +301,322 @@ public class PSTree { */ public String getOperation(int dn, int n, int commonHead) { - StringBuffer o= new StringBuffer(); + StringBuffer o = new StringBuffer(); // from n move up to common head, if needed - int ph =n-1, pd = dn-1; + int ph = n - 1, pd = dn - 1; int[] path = new int[20]; - int i=0; - - int h =ph; + int h = ph; - boolean nth=false; - for(int k=0;k<10;k++) { + boolean nth = false; + for (int k = 0; k < 10; k++) { h = head[h]; - path[k]=h; - if (nth) o.append(':'); - o.append("up:"+entries[h]); - nth=true; - if (h==commonHead) break; + path[k] = h; + if (nth) + o.append(':'); + o.append("up:" + entries[h]); + nth = true; + if (h == commonHead) + break; } // from common head to the node - int k=0; - h=pd; - for(;k<10;k++) { + int k = 0; + h = pd; + for (; k < 10; k++) { h = head[h]; - path[k]=h; - if (h==commonHead){ + path[k] = h; + if (h == commonHead) { break; } } - k-=1; + k -= 1; - // boolean first=true; - for(;k>=0;k--) { + // boolean first=true; + for (; k >= 0; k--) { // create phrase if (!nth) { o.append("ci:").append(entries[path[k]]); - nth =true; + nth = true; } // insert and create phrase - else {o.append(":ci:").append(entries[path[k]]);} + else { + o.append(":ci:").append(entries[path[k]]); + } } - // insert dependent node o.append(":in:d"); - - return o.toString(); } - /** - * @param ph node in the phrase structure corresponding to the head in the dependency structure - * @param pt node in the prhase structure corresponding to the dependent in the ds. - * @param check + * @param ph + * node in the phrase structure corresponding to the head in the + * dependency structure + * @param pt + * node in the prhase structure corresponding to the dependent in + * the ds. + * @param check * @return rules was applicable */ public boolean exec(String r, int ph, int pt, boolean check) { String o[] = r.split(":"); - int last =-1, headP = -1; + int last = -1, headP = -1; // create root node - // System.out.println("operation "+r+" "+ph+" "+pt); - boolean done =true; - for(int i=0;i<o.length;i++) { + // System.out.println("operation "+r+" "+ph+" "+pt); + boolean done = true; + for (int i = 0; i < o.length; i++) { if (o[i].equals("c")) { - if (check) return true; - - if(ph<0) { - last=non++; + if (check) + return true; + + if (ph < 0) { + last = non++; } - entries[non]=o[++i]; // create - head[pt]=non; - head[non]=last; // insert into root - last=non++; + entries[non] = o[++i]; // create + head[pt] = non; + head[non] = last; // insert into root + last = non++; } else if (o[i].equals("ci")) { - if (check) return true; - entries[non]= o[++i]; // create + if (check) + return true; + entries[non] = o[++i]; // create head[non] = last; // insert - last =non; + last = non; non++; - } else if (o[i].equals("in")&&o[i+1].equals("d")) { - if (check) return true; + } else if (o[i].equals("in") && o[i + 1].equals("d")) { + if (check) + return true; head[pt] = last; // insert i++; // move forward because of 'd' } else if (o[i].equals("up")) { - if (ph==-1) { - // System.out.println("ph is -1 please check this "+ph+" there is a bug "); + if (ph == -1) { + // System.out.println("ph is -1 please check this "+ph+" + // there is a bug "); return false; } - if (headP==-1) headP=head[ph]; - else headP=head[headP]; + if (headP == -1) + headP = head[ph]; + else + headP = head[headP]; - try { - if (headP==-1 || entries[headP]==null ||!entries[headP].equals(o[i+1])) return false; + try { + if (headP == -1 || entries[headP] == null || !entries[headP].equals(o[i + 1])) + return false; - } catch(Exception e) { + } catch (Exception e) { e.printStackTrace(); - System.out.println(""+entries[headP]+" o[i+1] "+o[i+1]+" "+headP+" "+this.terminalCount); - // System.out.println(""+ this.toString()); + System.out.println( + "" + entries[headP] + " o[i+1] " + o[i + 1] + " " + headP + " " + this.terminalCount); + // System.out.println(""+ this.toString()); System.exit(0); } i++; - last =headP; + last = headP; } else { done = false; } } - return done; } /** * More tolerant mapping - * - * @param ph node in the phrase structure corresponding to the head in the dependency structure - * @param pt node in the prhase structure corresponding to the dependent in the ds. - * @param check + * + * @param ph + * node in the phrase structure corresponding to the head in the + * dependency structure + * @param pt + * node in the prhase structure corresponding to the dependent in + * the ds. + * @param check * @return rules was applicable */ public boolean execT(String r, int ph, int pt, boolean check) { String o[] = r.split(":"); - int last =-1, headP = -1; + int last = -1, headP = -1; - int up=0; + int up = 0; - boolean done =true; - for(int i=0;i<o.length;i++) { + boolean done = true; + for (int i = 0; i < o.length; i++) { if (o[i].equals("c")) { - if (check) return true; - + if (check) + return true; // create root node - if(ph<0) { - last=non++; + if (ph < 0) { + last = non++; } - entries[non]= o[++i]; // create - head[pt]=non; - head[non]=last; // insert into root - last=non++; + entries[non] = o[++i]; // create + head[pt] = non; + head[non] = last; // insert into root + last = non++; } else if (o[i].equals("ci")) { - if (check) return true; - entries[non]= o[++i]; // create + if (check) + return true; + entries[non] = o[++i]; // create head[non] = last; // insert - last =non; + last = non; non++; - } else if (o[i].equals("in")&&o[i+1].equals("d")) { - if (check) return true; - - // DB.println("hallo"); - - if (last !=-1) + } else if (o[i].equals("in") && o[i + 1].equals("d")) { + if (check) + return true; + + // DB.println("hallo"); + + if (last != -1) head[pt] = last; // insert - - + // i am not sure if this does much good? - // if (last ==-1) - - // done=true; - + // if (last ==-1) + + // done=true; - i++; // move forward because of 'd' - + } else if (o[i].equals("up")) { up++; - if (ph==-1) { + if (ph == -1) { return false; } - if (headP==-1) headP=head[ph]; - else headP=head[headP]; + if (headP == -1) + headP = head[ph]; + else + headP = head[headP]; - try { + try { // tolerant mapping - if (headP==-1 || entries[headP]==null || - ((!entries[headP].equals(o[i+1]) ) && up>1 )) return false; //>1 -// && entries[headP].charAt(0)!=o[i+1].charAt(0) - } catch(Exception e) { + if (headP == -1 || entries[headP] == null || ((!entries[headP].equals(o[i + 1])) && up > 1)) + return false; // >1 + // && entries[headP].charAt(0)!=o[i+1].charAt(0) + } catch (Exception e) { e.printStackTrace(); - System.out.println(""+entries[headP]+" o[i+1] "+o[i+1]+" "+headP+" "+this.terminalCount); + System.out.println( + "" + entries[headP] + " o[i+1] " + o[i + 1] + " " + headP + " " + this.terminalCount); } i++; - last =headP; + last = headP; } else { done = false; } } - return done; } - - public final static boolean INSERT_NEWLINE =true; + public final static boolean INSERT_NEWLINE = true; /** - * Convert to bracket format + * Convert to bracket format + * * @param newLine * @return */ public String toPennBracket(boolean newLine) { - StringBuffer b = new StringBuffer(); - ArrayList<Integer> current=null;// = new ArrayList<Integer>(); - int open =0; - for(int i=0; i<terminalCount ;i++) { + ArrayList<Integer> current = null;// = new ArrayList<Integer>(); + int open = 0; + for (int i = 0; i < terminalCount; i++) { ArrayList<Integer> path = getPathToRoot(i); - + ArrayList<Integer> diff = getDiffPath(path, current); - - boolean spaces=false; + + boolean spaces = false; ArrayList<Integer> common = this.getDiffCommon(path, current); - if(current!=null && (current.size()>common.size())) { - - // close brackets - for(int bc =0;bc<current.size()-common.size();bc++) { + if (current != null && (current.size() > common.size())) { + + // close brackets + for (int bc = 0; bc < current.size() - common.size(); bc++) { b.append(")"); open--; } - if(diff.size()==0 && newLine) b.append("\n"); - spaces=true; + if (diff.size() == 0 && newLine) + b.append("\n"); + spaces = true; } - if(i!=0 && diff.size()>0 && newLine) b.append("\n").append(createSpaces(open)); + if (i != 0 && diff.size() > 0 && newLine) + b.append("\n").append(createSpaces(open)); - for(int k=diff.size()-1;k>=0;k--) { + for (int k = diff.size() - 1; k >= 0; k--) { open++; - b.append("("+(entries[path.get(k)]==null?" ":entries[path.get(k)])); - if (k!=0 &&path.size()-1!=k && newLine) + b.append("(" + (entries[path.get(k)] == null ? " " : entries[path.get(k)])); + if (k != 0 && path.size() - 1 != k && newLine) b.append("\n").append(createSpaces(open)); - spaces=false; + spaces = false; } - if(spaces) b.append(createSpaces(open)); - else b.append(" "); - - String term=entries[i]; - if(term.equals("(")) term="-LRB-"; - if(term.equals(")")) term="-RRB-"; - if(term.equals("{")) term="-LCB-"; - if(term.equals("}")) term="-RCB-"; - - String ps=pos[i]; - if(ps.equals("(")) ps="-LRB-"; - if(ps.equals("$(")) ps="-LRB-"; - - if(ps.equals(")")) ps="-RRB-"; - if(ps.equals("{")) ps="-LCB-"; - if(ps.equals("}")) ps="-RCB-"; - + if (spaces) + b.append(createSpaces(open)); + else + b.append(" "); + + String term = entries[i]; + if (term.equals("(")) + term = "-LRB-"; + if (term.equals(")")) + term = "-RRB-"; + if (term.equals("{")) + term = "-LCB-"; + if (term.equals("}")) + term = "-RCB-"; + + String ps = pos[i]; + if (ps.equals("(")) + ps = "-LRB-"; + if (ps.equals("$(")) + ps = "-LRB-"; + + if (ps.equals(")")) + ps = "-RRB-"; + if (ps.equals("{")) + ps = "-LCB-"; + if (ps.equals("}")) + ps = "-RCB-"; b.append("(").append(ps).append(" ").append(term).append(')'); current = path; - // break; + // break; } - for(;open>0;open--) { + for (; open > 0; open--) { b.append(")"); } - // b.append("\n"); - + // b.append("\n"); + return b.toString(); } - static int cnt=0; + + static int cnt = 0; /** * @param path @@ -596,22 +624,24 @@ public class PSTree { * @return */ private ArrayList<Integer> getDiffPath(ArrayList<Integer> path, ArrayList<Integer> current) { - if (current==null) return path; + if (current == null) + return path; ArrayList<Integer> common = new ArrayList<Integer>(); - - int pindex = path.size()-1; - int cindex = current.size()-1; - while(cindex>=0 && pindex>=0) { + int pindex = path.size() - 1; + int cindex = current.size() - 1; + + while (cindex >= 0 && pindex >= 0) { - if(path.get(pindex)==current.get(cindex)) { + if (path.get(pindex) == current.get(cindex)) { cindex--; pindex--; - } else break; + } else + break; } - - for(int k=0;k<=pindex;k++) { + + for (int k = 0; k <= pindex; k++) { common.add(path.get(k)); } @@ -619,72 +649,74 @@ public class PSTree { } private ArrayList<Integer> getDiffCommon(ArrayList<Integer> path, ArrayList<Integer> current) { - if (current==null) return path; + if (current == null) + return path; ArrayList<Integer> common = new ArrayList<Integer>(); - int pindex = path.size()-1; - int cindex = current.size()-1; + int pindex = path.size() - 1; + int cindex = current.size() - 1; - while(cindex>=0 && pindex>=0) { + while (cindex >= 0 && pindex >= 0) { - if(path.get(pindex)==current.get(cindex)) { + if (path.get(pindex) == current.get(cindex)) { common.add(path.get(pindex)); cindex--; pindex--; - } else break; + } else + break; } Collections.reverse(common); - // System.out.println("common "+pindex+" "+common); + // System.out.println("common "+pindex+" "+common); return common; } + /** * @param i * @return */ private StringBuffer createSpaces(int i) { StringBuffer s = new StringBuffer(); - for (int k=0;k<i;k++) s.append(" "); + for (int k = 0; k < i; k++) + s.append(" "); return s; } - /** * @param i * @return */ private ArrayList<Integer> getPathToRoot(int i) { - ArrayList<Integer> path = new ArrayList<Integer> (); + ArrayList<Integer> path = new ArrayList<Integer>(); - int h=i; - while(true) { - h=this.head[h]; - if (h<this.terminalCount || path.contains(h)) break; + int h = i; + while (true) { + h = this.head[h]; + if (h < this.terminalCount || path.contains(h)) + break; path.add(h); } - // Collections.reverse(list) - + // Collections.reverse(list) return path; } - public String conll09() { - + StringBuilder s = new StringBuilder(); - for(int i=0;i<this.terminalCount;i++) { - if (head[i]==-1&&entries[i]==null) break; + for (int i = 0; i < this.terminalCount; i++) { + if (head[i] == -1 && entries[i] == null) + break; - s.append((i+1)).append('\t').append(entries[i]).append("\t_\t_\t").append(pos[i]).append("\t_\t_\t_\t_\t_\t_\t_\t_\n"); + s.append((i + 1)).append('\t').append(entries[i]).append("\t_\t_\t").append(pos[i]) + .append("\t_\t_\t_\t_\t_\t_\t_\t_\n"); - } - - + return s.toString(); } @@ -694,15 +726,17 @@ public class PSTree { */ public int[] getChilds(int head) { - int count=0; - for(int i =0;i<this.entries.length;i++) { - if (this.head[i]==head) count++; + int count = 0; + for (int i = 0; i < this.entries.length; i++) { + if (this.head[i] == head) + count++; } int[] clds = new int[count]; - count=0; - for(int i =0;i<this.entries.length;i++) { - if (this.head[i]==head) clds[count++]=i; + count = 0; + for (int i = 0; i < this.entries.length; i++) { + if (this.head[i] == head) + clds[count++] = i; } return clds; diff --git a/dependencyParser/mate-tools/src/is2/data/Parameter.java b/dependencyParser/mate-tools/src/is2/data/Parameter.java index 35a9911..7b1f870 100644 --- a/dependencyParser/mate-tools/src/is2/data/Parameter.java +++ b/dependencyParser/mate-tools/src/is2/data/Parameter.java @@ -1,12 +1,12 @@ /** - * + * */ package is2.data; /** * @author Dr. Bernd Bohnet, 23.12.2010 - * - * + * + * */ public class Parameter { diff --git a/dependencyParser/mate-tools/src/is2/data/ParametersFloat.java b/dependencyParser/mate-tools/src/is2/data/ParametersFloat.java index 653487e..17837ef 100755 --- a/dependencyParser/mate-tools/src/is2/data/ParametersFloat.java +++ b/dependencyParser/mate-tools/src/is2/data/ParametersFloat.java @@ -1,132 +1,132 @@ package is2.data; -import is2.util.DB; - import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.IOException; +import is2.util.DB; - -final public class ParametersFloat { +final public class ParametersFloat { public float[] parameters; public float[] total; public ParametersFloat(int size) { - - parameters = new float[size]; + + parameters = new float[size]; total = new float[size]; - for(int i = 0; i < parameters.length; i++) { + for (int i = 0; i < parameters.length; i++) { parameters[i] = 0F; total[i] = 0F; } } - /** * @param parameters2 */ public ParametersFloat(float[] p) { - parameters =p; + parameters = p; } public void average(double avVal) { - for(int j = 0; j < total.length; j++) { - parameters[j] = total[j]/((float)avVal); + for (int j = 0; j < total.length; j++) { + parameters[j] = total[j] / ((float) avVal); } - total =null; + total = null; } public ParametersFloat average2(double avVal) { float[] px = new float[this.parameters.length]; - for(int j = 0; j < total.length; j++) { - px[j] = total[j]/((float)avVal); + for (int j = 0; j < total.length; j++) { + px[j] = total[j] / ((float) avVal); } ParametersFloat pf = new ParametersFloat(px); return pf; } - - public void update(FV pred, FV act, float upd, float err) { - - float lam_dist = act.getScore(parameters,false)- pred.getScore(parameters,false); - float loss =(float)err - lam_dist; + public void update(FV pred, FV act, float upd, float err) { - FV dist = act.getDistVector(pred); + float lam_dist = act.getScore(parameters, false) - pred.getScore(parameters, false); + float loss = err - lam_dist; + + FV dist = act.getDistVector(pred); float alpha; float A = dist.dotProduct(dist); - if (A<=0.0000000000000001) alpha=0.0f; - else alpha= loss/A; - - // alpha = Math.min(alpha, 0.00578125F); - - dist.update(parameters, total, alpha, upd,false); - + if (A <= 0.0000000000000001) + alpha = 0.0f; + else + alpha = loss / A; + + // alpha = Math.min(alpha, 0.00578125F); + + dist.update(parameters, total, alpha, upd, false); + } - public void update(FV pred, FV act, float upd, float err, float C) { + public void update(FV pred, FV act, float upd, float err, float C) { - - float lam_dist = act.getScore(parameters,false)- pred.getScore(parameters,false); - float loss =(float)err - lam_dist; + float lam_dist = act.getScore(parameters, false) - pred.getScore(parameters, false); + float loss = err - lam_dist; - FV dist = act.getDistVector(pred); + FV dist = act.getDistVector(pred); float alpha; float A = dist.dotProduct(dist); - if (A<=0.0000000000000001) alpha=0.0f; - else alpha= loss/A; - + if (A <= 0.0000000000000001) + alpha = 0.0f; + else + alpha = loss / A; + alpha = Math.min(alpha, C); - - dist.update(parameters, total, alpha, upd,false); - + + dist.update(parameters, total, alpha, upd, false); + } - - public double update(FV a, double b) { double A = a.dotProduct(a); - if (A<=0.0000000000000000001) return 0.0; - return b/A; + if (A <= 0.0000000000000000001) + return 0.0; + return b / A; } - public double getScore(FV fv) { - if (fv ==null) return 0.0F; - return fv.getScore(parameters,false); + if (fv == null) + return 0.0F; + return fv.getScore(parameters, false); } - - final public void write(DataOutputStream dos) throws IOException{ + final public void write(DataOutputStream dos) throws IOException { dos.writeInt(parameters.length); - for(float d : parameters) dos.writeFloat(d); + for (float d : parameters) + dos.writeFloat(d); } - public void read(DataInputStream dis) throws IOException{ + public void read(DataInputStream dis) throws IOException { parameters = new float[dis.readInt()]; - int notZero=0; - for(int i=0;i<parameters.length;i++) { - parameters[i]=dis.readFloat(); - if (parameters[i]!=0.0F) notZero++; + int notZero = 0; + for (int i = 0; i < parameters.length; i++) { + parameters[i] = dis.readFloat(); + if (parameters[i] != 0.0F) + notZero++; } - - DB.println("read parameters "+parameters.length+" not zero "+notZero); + + DB.println("read parameters " + parameters.length + " not zero " + notZero); } - + public int countNZ() { - int notZero=0; - for(int i=0;i<parameters.length;i++) { - if (parameters[i]!=0.0F) notZero++; + int notZero = 0; + for (float parameter : parameters) { + if (parameter != 0.0F) + notZero++; } return notZero; @@ -136,7 +136,6 @@ final public class ParametersFloat { return new F2SF(parameters); } - public int size() { return parameters.length; } @@ -144,40 +143,39 @@ final public class ParametersFloat { public void update(FVR act, FVR pred, Instances isd, int instc, Parse dx, double upd, double e, float lam_dist) { e++; - - - float b = (float)e-lam_dist; - + + float b = (float) e - lam_dist; + FVR dist = act.getDistVector(pred); - - dist.update(parameters, total, hildreth(dist,b), upd,false); + + dist.update(parameters, total, hildreth(dist, b), upd, false); } - - + public void update(FVR pred, FVR act, float upd, float e) { e++; - float lam_dist = act.getScore(parameters,false)- pred.getScore(parameters,false); - - float b = (float)e-lam_dist; - + float lam_dist = act.getScore(parameters, false) - pred.getScore(parameters, false); + + float b = e - lam_dist; + FVR dist = act.getDistVector(pred); - - dist.update(parameters, total, hildreth(dist,b), upd,false); + + dist.update(parameters, total, hildreth(dist, b), upd, false); } - + protected double hildreth(FVR a, double b) { double A = a.dotProduct(a); - if (A<=0.0000000000000000001) return 0.0; - return b/A; + if (A <= 0.0000000000000000001) + return 0.0; + return b / A; } - - public float getScore(FVR fv) { //xx - if (fv ==null) return 0.0F; - return fv.getScore(parameters,false); + + public float getScore(FVR fv) { // xx + if (fv == null) + return 0.0F; + return fv.getScore(parameters, false); } - } diff --git a/dependencyParser/mate-tools/src/is2/data/Parse.java b/dependencyParser/mate-tools/src/is2/data/Parse.java index 21a83d9..aa24005 100755 --- a/dependencyParser/mate-tools/src/is2/data/Parse.java +++ b/dependencyParser/mate-tools/src/is2/data/Parse.java @@ -1,23 +1,22 @@ package is2.data; - import java.util.BitSet; - public class Parse implements Comparable<Parse> { public short[] heads; public short[] labels; public double f1; - - public Parse() {} - + public Parse() { + } + public Parse(int i) { heads = new short[i]; labels = new short[i]; } + /** * @param heads2 * @param types2 @@ -26,12 +25,12 @@ public class Parse implements Comparable<Parse> { public Parse(short[] heads2, short[] types2, float p_new) { this.heads = new short[heads2.length]; this.labels = new short[types2.length]; - // this.heads=heads2; - // this.labels=types2; + // this.heads=heads2; + // this.labels=types2; System.arraycopy(heads2, 0, heads, 0, heads.length); System.arraycopy(types2, 0, labels, 0, labels.length); - f1=p_new; - + f1 = p_new; + } /** @@ -40,51 +39,51 @@ public class Parse implements Comparable<Parse> { * @param p_new */ public Parse(String parse, float p_new) { - - // this(parse.length()/2); - + + // this(parse.length()/2); + signature2parse(parse); - - f1=p_new; - + + f1 = p_new; + } public void signature2parse(String parse) { - int p=0; - heads = new short[parse.length()/2]; + int p = 0; + heads = new short[parse.length() / 2]; labels = new short[heads.length]; - // DB.println("pl "+parse.length()); - for(int k=0;k<heads.length;k++) { - heads[k]= (short)parse.charAt(p++); - labels[k] = (short)parse.charAt(p++); + // DB.println("pl "+parse.length()); + for (int k = 0; k < heads.length; k++) { + heads[k] = (short) parse.charAt(p++); + labels[k] = (short) parse.charAt(p++); } } - @Override public Parse clone() { Parse p = new Parse(); p.heads = new short[heads.length]; p.labels = new short[labels.length]; - + System.arraycopy(heads, 0, p.heads, 0, heads.length); System.arraycopy(labels, 0, p.labels, 0, labels.length); - - p.f1=f1; - + + p.f1 = f1; + return p; } /** * Check if it is a tree + * * @return */ public boolean checkTree() { - + BitSet set = new BitSet(heads.length); set.set(0); return checkTree(set, 0); - + } /** @@ -92,66 +91,67 @@ public class Parse implements Comparable<Parse> { * @return */ private boolean checkTree(BitSet set, int h) { - //System.out.print(" h "+h); - - for(int i=0;i<heads.length;i++) { - if (heads[i]==h) { - // System.out.print(" "+i); - if (!set.get(i)) checkTree(set, i); + // System.out.print(" h "+h); + + for (int i = 0; i < heads.length; i++) { + if (heads[i] == h) { + // System.out.print(" "+i); + if (!set.get(i)) + checkTree(set, i); set.set(i); - + } } - - for(int i=0;i<heads.length;i++) { - if (!set.get(i)) return false; + + for (int i = 0; i < heads.length; i++) { + if (!set.get(i)) + return false; } return true; } - + + @Override public String toString() { StringBuilder b = new StringBuilder(); - for(int k=0;k<this.heads.length;k++) { - b.append(k).append(" ").append(heads[k]+" ").append(this.labels[k]).append("\n"); + for (int k = 0; k < this.heads.length; k++) { + b.append(k).append(" ").append(heads[k] + " ").append(this.labels[k]).append("\n"); } return b.toString(); } - - - /* (non-Javadoc) + /* + * (non-Javadoc) + * * @see java.lang.Comparable#compareTo(java.lang.Object) */ @Override public int compareTo(Parse o) { - - if (f1==o.f1) return this.signature().compareTo(o.signature()); - return f1<o.f1?1:f1==o.f1?0:-1; + + if (f1 == o.f1) + return this.signature().compareTo(o.signature()); + return f1 < o.f1 ? 1 : f1 == o.f1 ? 0 : -1; } /** * @return the signature of a parse */ public String signature() { - StringBuilder b = new StringBuilder(heads.length*2); - for(int k=0;k<heads.length;k++) { - b.append((char)heads[k]).append((char)labels[k]); + StringBuilder b = new StringBuilder(heads.length * 2); + for (int k = 0; k < heads.length; k++) { + b.append((char) heads[k]).append((char) labels[k]); } return b.toString(); } - /** * @return the signature of a parse */ public StringBuilder signatureSB() { - StringBuilder b = new StringBuilder(heads.length*2); - for(int k=0;k<heads.length;k++) { - b.append((char)heads[k]).append((char)labels[k]); + StringBuilder b = new StringBuilder(heads.length * 2); + for (int k = 0; k < heads.length; k++) { + b.append((char) heads[k]).append((char) labels[k]); } return b; } - } - \ No newline at end of file diff --git a/dependencyParser/mate-tools/src/is2/data/ParseNBest.java b/dependencyParser/mate-tools/src/is2/data/ParseNBest.java index b66306c..cb02b71 100644 --- a/dependencyParser/mate-tools/src/is2/data/ParseNBest.java +++ b/dependencyParser/mate-tools/src/is2/data/ParseNBest.java @@ -1,66 +1,68 @@ package is2.data; +final public class ParseNBest extends Parse { + private String signature = null; + // public float[] scores; -final public class ParseNBest extends Parse { + public ParseNBest() { + } - - private String signature=null; - - //public float[] scores; - - public ParseNBest() {} - public ParseNBest(short[] heads2, short[] types2, float p_new) { super(heads2, types2, p_new); } - + public ParseNBest(int i) { - super(i); + super(i); } - + /** * @param sig * @param readFloat */ public ParseNBest(String sig, float score) { - super(sig,score); + super(sig, score); } /** * create a total order to provide replicable deterministic results + * * @param o * @return */ public int compareTo(ParseNBest o) { - if (f1<o.f1) return 1; - if (f1==o.f1) { - if (signature==null) signature=signature(); - if (o.signature==null) o.signature=o.signature(); - return o.signature.compareTo(signature); - - } + if (f1 < o.f1) + return 1; + if (f1 == o.f1) { + if (signature == null) + signature = signature(); + if (o.signature == null) + o.signature = o.signature(); + return o.signature.compareTo(signature); + + } return -1; } - + /** * @return the signature of a parse */ + @Override public String signature() { - if (signature!=null) return signature; - signature= super.signature(); + if (signature != null) + return signature; + signature = super.signature(); return signature; } - + /** * @return the signature of a parse */ public String signature(short[] heads, short[] labels) { - StringBuilder b = new StringBuilder(heads.length*2); - for(int k=0;k<heads.length;k++) { - b.append((char)heads[k]). - append((char)labels[k]); + StringBuilder b = new StringBuilder(heads.length * 2); + for (int k = 0; k < heads.length; k++) { + b.append((char) heads[k]).append((char) labels[k]); } signature = b.toString(); return signature; @@ -73,18 +75,15 @@ final public class ParseNBest extends Parse { * @param ch * @param s */ - public String signature(short[] heads, short[] types, short p, short ch,short l) { - StringBuilder b = new StringBuilder(heads.length*2); - for(int k=0;k<heads.length;k++) { - - - b.append(k==ch?(char)p: - (char)heads[k]). - append(k==ch?(char)l:(char)types[k]); + public String signature(short[] heads, short[] types, short p, short ch, short l) { + StringBuilder b = new StringBuilder(heads.length * 2); + for (int k = 0; k < heads.length; k++) { + + b.append(k == ch ? (char) p : (char) heads[k]).append(k == ch ? (char) l : (char) types[k]); } signature = b.toString(); return signature; - + } @Override @@ -92,16 +91,13 @@ final public class ParseNBest extends Parse { ParseNBest p = new ParseNBest(); p.heads = new short[heads.length]; p.labels = new short[labels.length]; - + System.arraycopy(heads, 0, p.heads, 0, heads.length); System.arraycopy(labels, 0, p.labels, 0, labels.length); - - p.f1=f1; - + + p.f1 = f1; + return p; } - } - - \ No newline at end of file diff --git a/dependencyParser/mate-tools/src/is2/data/PipeGen.java b/dependencyParser/mate-tools/src/is2/data/PipeGen.java index b63fb90..728666f 100755 --- a/dependencyParser/mate-tools/src/is2/data/PipeGen.java +++ b/dependencyParser/mate-tools/src/is2/data/PipeGen.java @@ -1,73 +1,75 @@ package is2.data; +public class PipeGen { -public class PipeGen { + public static final String SENSE = "SENSE", POS = "POS", DIST = "DIST", WORD = "WORD", PRED = "PRED", ARG = "ARG", + FEAT = "F", REL = "REL", TYPE = "TYPE", CHAR = "C", FFEATS = "FF", DIR = "DIR", LA = "LA", RA = "RA"; - public static final String SENSE = "SENSE",POS = "POS",DIST = "DIST",WORD = "WORD",PRED = "PRED",ARG = "ARG", - FEAT = "F", REL = "REL",TYPE = "TYPE" ,CHAR = "C",FFEATS="FF", DIR="DIR",LA = "LA",RA = "RA"; + public static final String GPOS = "GPOS", MID = "MID", END = "END", STR = "STR", FM = "FM", NOFEAT = "NOFEAT"; - public static final String GPOS = "GPOS", MID = "MID",END = "END",STR = "STR",FM="FM", NOFEAT = "NOFEAT"; + public static final String _0 = "0", _4 = "4", _3 = "3", _2 = "2", _1 = "1", _5 = "5", _10 = "10"; - public static final String _0 = "0",_4 = "4", _3 = "3", _2 = "2",_1 = "1", _5 = "5",_10 = "10"; - static public int outValue(int num1, int del) { - String out = ""+num1; - StringBuffer delS=new StringBuffer(); - for(int k =0;k< del;k++) delS.append('\b'); - del=out.length(); - System.out.print(delS+out); + String out = "" + num1; + StringBuffer delS = new StringBuffer(); + for (int k = 0; k < del; k++) + delS.append('\b'); + del = out.length(); + System.out.print(delS + out); return del; } - + static public int outValue(int num1, int del, long last) { - String out = ""+num1+" ("+(System.currentTimeMillis()-last)/(num1+1)+" ms/instance)"; - StringBuffer delS=new StringBuffer(); - for(int k =0;k< del;k++) delS.append('\b'); - del=out.length(); - System.out.print(delS+out); + String out = "" + num1 + " (" + (System.currentTimeMillis() - last) / (num1 + 1) + " ms/instance)"; + StringBuffer delS = new StringBuffer(); + for (int k = 0; k < del; k++) + delS.append('\b'); + del = out.length(); + System.out.print(delS + out); return del; } - + static public int outValueErr(int num1, float err, float f1, int del, long last) { - - String out = ""+num1+" ("+(System.currentTimeMillis()-last)/(num1+1)+" ms/instance "+(err/num1)+" err/instance f1="+ - f1 +") "; - StringBuffer delS=new StringBuffer(); - for(int k =0;k< del;k++) delS.append('\b'); - del=out.length(); - System.out.print(delS+out); + + String out = "" + num1 + " (" + (System.currentTimeMillis() - last) / (num1 + 1) + " ms/instance " + + (err / num1) + " err/instance f1=" + f1 + ") "; + StringBuffer delS = new StringBuffer(); + for (int k = 0; k < del; k++) + delS.append('\b'); + del = out.length(); + System.out.print(delS + out); return del; } - static public int outValueErr(int num1, float err, float f1, int del, long last, double upd) { - String out = ""+num1+" ("+(System.currentTimeMillis()-last)/(num1+1)+" ms/instance "+(err/num1)+" err/instance f1="+ - f1 +") upd "+upd; - StringBuffer delS=new StringBuffer(); - for(int k =0;k< del;k++) delS.append('\b'); - del=out.length(); - System.out.print(delS+out); + String out = "" + num1 + " (" + (System.currentTimeMillis() - last) / (num1 + 1) + " ms/instance " + + (err / num1) + " err/instance f1=" + f1 + ") upd " + upd; + StringBuffer delS = new StringBuffer(); + for (int k = 0; k < del; k++) + delS.append('\b'); + del = out.length(); + System.out.print(delS + out); return del; } static public int outValueErr(int num1, float err, float f1, int del, long last, double upd, String info) { - String out = ""+num1+" ("+(System.currentTimeMillis()-last)/(num1+1)+" ms/instance "+(err/(float)num1)+" err/instance f1="+ - f1 +") upd "+upd+" "+info; - StringBuffer delS=new StringBuffer(); - for(int k =0;k< del;k++) delS.append('\b'); - del=out.length(); - System.out.print(delS+out); + String out = "" + num1 + " (" + (System.currentTimeMillis() - last) / (num1 + 1) + " ms/instance " + + (err / num1) + " err/instance f1=" + f1 + ") upd " + upd + " " + info; + StringBuffer delS = new StringBuffer(); + for (int k = 0; k < del; k++) + delS.append('\b'); + del = out.length(); + System.out.print(delS + out); return del; } - - + /** * @param cnt * @param l * @return */ public static String getSecondsPerInstnace(int cnt, long l) { - return " "+((float)l/(cnt*1000f))+" seconds/sentnece "; + return " " + (l / (cnt * 1000f)) + " seconds/sentnece "; } /** @@ -75,9 +77,7 @@ public class PipeGen { * @return */ public static String getUsedTime(long l) { - return "Used time " + (((float)l)/1000f)+" seconds "; + return "Used time " + ((l) / 1000f) + " seconds "; } - - } diff --git a/dependencyParser/mate-tools/src/is2/data/PrimeFinder.java b/dependencyParser/mate-tools/src/is2/data/PrimeFinder.java index 38c614b..fab0901 100644 --- a/dependencyParser/mate-tools/src/is2/data/PrimeFinder.java +++ b/dependencyParser/mate-tools/src/is2/data/PrimeFinder.java @@ -1,5 +1,5 @@ /** - * + * */ package is2.data; @@ -7,60 +7,45 @@ import java.util.Arrays; /** * @author Dr. Bernd Bohnet, 13.05.2010 - * - * + * + * */ public class PrimeFinder { - + public PrimeFinder() { + } + + public static final int nextPrime(int desiredCapacity) { + int i = Arrays.binarySearch(primeCapacities, desiredCapacity); + if (i < 0) + i = -i - 1; + return primeCapacities[i]; + } + + public static final int largestPrime = 2147483647; + private static final int primeCapacities[] = { 2147483647, 5, 11, 23, 47, 97, 197, 397, 797, 1597, 3203, 6421, + 12853, 25717, 51437, 102877, 205759, 411527, 823117, 1646237, 3292489, 6584983, 13169977, 26339969, + 52679969, 105359939, 210719881, 421439783, 842879579, 1685759167, 433, 877, 1759, 3527, 7057, 14143, 28289, + 56591, 113189, 226379, 452759, 905551, 1811107, 3622219, 7244441, 14488931, 28977863, 57955739, 115911563, + 231823147, 463646329, 927292699, 1854585413, 953, 1907, 3821, 7643, 15287, 30577, 61169, 122347, 244703, + 489407, 978821, 1957651, 3915341, 7830701, 15661423, 31322867, 62645741, 125291483, 250582987, 501165979, + 1002331963, 2004663929, 1039, 2081, 4177, 8363, 16729, 33461, 66923, 133853, 267713, 535481, 1070981, + 2141977, 4283963, 8567929, 17135863, 34271747, 68543509, 137087021, 274174111, 548348231, 1096696463, 31, + 67, 137, 277, 557, 1117, 2237, 4481, 8963, 17929, 35863, 71741, 143483, 286973, 573953, 1147921, 2295859, + 4591721, 9183457, 18366923, 36733847, 73467739, 146935499, 293871013, 587742049, 1175484103, 599, 1201, + 2411, 4831, 9677, 19373, 38747, 77509, 155027, 310081, 620171, 1240361, 2480729, 4961459, 9922933, 19845871, + 39691759, 79383533, 158767069, 317534141, 635068283, 1270136683, 311, 631, 1277, 2557, 5119, 10243, 20507, + 41017, 82037, 164089, 328213, 656429, 1312867, 2625761, 5251529, 10503061, 21006137, 42012281, 84024581, + 168049163, 336098327, 672196673, 1344393353, 3, 7, 17, 37, 79, 163, 331, 673, 1361, 2729, 5471, 10949, + 21911, 43853, 87719, 175447, 350899, 701819, 1403641, 2807303, 5614657, 11229331, 22458671, 44917381, + 89834777, 179669557, 359339171, 718678369, 1437356741, 43, 89, 179, 359, 719, 1439, 2879, 5779, 11579, + 23159, 46327, 92657, 185323, 370661, 741337, 1482707, 2965421, 5930887, 11861791, 23723597, 47447201, + 94894427, 189788857, 379577741, 759155483, 1518310967, 379, 761, 1523, 3049, 6101, 12203, 24407, 48817, + 97649, 195311, 390647, 781301, 1562611, 3125257, 6250537, 12501169, 25002389, 50004791, 100009607, + 200019221, 400038451, 800076929, 1600153859 }; + + static { + Arrays.sort(primeCapacities); + } - public PrimeFinder() - { - } - - public static final int nextPrime(int desiredCapacity) - { - int i = Arrays.binarySearch(primeCapacities, desiredCapacity); - if(i < 0) - i = -i - 1; - return primeCapacities[i]; - } - - public static final int largestPrime = 2147483647; - private static final int primeCapacities[] = { - 2147483647, 5, 11, 23, 47, 97, 197, 397, 797, 1597, - 3203, 6421, 12853, 25717, 51437, 102877, 205759, 411527, 823117, 1646237, - 3292489, 6584983, 13169977, 26339969, 52679969, 105359939, 210719881, 421439783, 842879579, 1685759167, - 433, 877, 1759, 3527, 7057, 14143, 28289, 56591, 113189, 226379, - 452759, 905551, 1811107, 3622219, 7244441, 14488931, 28977863, 57955739, 115911563, 231823147, - 463646329, 927292699, 1854585413, 953, 1907, 3821, 7643, 15287, 30577, 61169, - 122347, 244703, 489407, 978821, 1957651, 3915341, 7830701, 15661423, 31322867, 62645741, - 125291483, 250582987, 501165979, 1002331963, 2004663929, 1039, 2081, 4177, 8363, 16729, - 33461, 66923, 133853, 267713, 535481, 1070981, 2141977, 4283963, 8567929, 17135863, - 34271747, 68543509, 137087021, 274174111, 548348231, 1096696463, 31, 67, 137, 277, - 557, 1117, 2237, 4481, 8963, 17929, 35863, 71741, 143483, 286973, - 573953, 1147921, 2295859, 4591721, 9183457, 18366923, 36733847, 73467739, 146935499, 293871013, - 587742049, 1175484103, 599, 1201, 2411, 4831, 9677, 19373, 38747, 77509, - 155027, 310081, 620171, 1240361, 2480729, 4961459, 9922933, 19845871, 39691759, 79383533, - 158767069, 317534141, 635068283, 1270136683, 311, 631, 1277, 2557, 5119, 10243, - 20507, 41017, 82037, 164089, 328213, 656429, 1312867, 2625761, 5251529, 10503061, - 21006137, 42012281, 84024581, 168049163, 336098327, 672196673, 1344393353, 3, 7, 17, - 37, 79, 163, 331, 673, 1361, 2729, 5471, 10949, 21911, - 43853, 87719, 175447, 350899, 701819, 1403641, 2807303, 5614657, 11229331, 22458671, - 44917381, 89834777, 179669557, 359339171, 718678369, 1437356741, 43, 89, 179, 359, - 719, 1439, 2879, 5779, 11579, 23159, 46327, 92657, 185323, 370661, - 741337, 1482707, 2965421, 5930887, 11861791, 23723597, 47447201, 94894427, 189788857, 379577741, - 759155483, 1518310967, 379, 761, 1523, 3049, 6101, 12203, 24407, 48817, - 97649, 195311, 390647, 781301, 1562611, 3125257, 6250537, 12501169, 25002389, 50004791, - 100009607, 200019221, 400038451, 800076929, 1600153859 - }; - - static - { - Arrays.sort(primeCapacities); - } - - - - } diff --git a/dependencyParser/mate-tools/src/is2/data/RandomIndex.java b/dependencyParser/mate-tools/src/is2/data/RandomIndex.java index 7fc67b3..8ab61e9 100644 --- a/dependencyParser/mate-tools/src/is2/data/RandomIndex.java +++ b/dependencyParser/mate-tools/src/is2/data/RandomIndex.java @@ -1,145 +1,134 @@ /** - * + * */ package is2.data; -import java.util.BitSet; - import is2.util.DB; - /** * @author Dr. Bernd Bohnet, 20.05.2011 - * - * + * + * */ public class RandomIndex implements Long2IntInterface { - - - final int[] prims = {52349171,199951347,89990,5001,32891,17,19,23,29,31,37,47,53,59,61,67,71}; -// final int[] prims = {1,3,5,7,11,17,19,23,29,31,37,47,53,59,61,67,71}; - - final long hashFunctionModifiers[]; - - final int kbit,lbit; - final int hsize ; // maximal size of hash - - final int bits; // available bits - final int moves; // needed moves to put a number into - - + + final int[] prims = { 52349171, 199951347, 89990, 5001, 32891, 17, 19, 23, 29, 31, 37, 47, 53, 59, 61, 67, 71 }; + // final int[] prims = {1,3,5,7,11,17,19,23,29,31,37,47,53,59,61,67,71}; + + final long hashFunctionModifiers[]; + + final int kbit, lbit; + final int hsize; // maximal size of hash + + final int bits; // available bits + final int moves; // needed moves to put a number into + /** * Creates the random functions. - * - * @param kbit The bits to be mapped - * @param lbit The left shift of the bits - * @param hsize The size of the featurs space (not included in the original algorithm) - * @param numberFunctions The number of the hash functions + * + * @param kbit + * The bits to be mapped + * @param lbit + * The left shift of the bits + * @param hsize + * The size of the featurs space (not included in the original + * algorithm) + * @param numberFunctions + * The number of the hash functions */ public RandomIndex(int kbit, int lbit, int hsize, int numberFunctions) { - - - this.kbit =kbit; - this.lbit =lbit; - - - if (hsize<=0) this.hsize = 67000001; // default value - else this.hsize = hsize; - - bits = (int) Math.ceil(Math.log(this.hsize)/Math.log(2)); - - moves = (int) Math.ceil(64f/(float)bits); - - - - DB.println("moves "+moves+" bits "+bits+" hsize "+hsize); - + + this.kbit = kbit; + this.lbit = lbit; + + if (hsize <= 0) + this.hsize = 67000001; // default value + else + this.hsize = hsize; + + bits = (int) Math.ceil(Math.log(this.hsize) / Math.log(2)); + + moves = (int) Math.ceil(64f / bits); + + DB.println("moves " + moves + " bits " + bits + " hsize " + hsize); + hashFunctionModifiers = new long[numberFunctions]; - - for (int f = 0;f<numberFunctions;f++) hashFunctionModifiers[f] = prims[f]; + + for (int f = 0; f < numberFunctions; f++) + hashFunctionModifiers[f] = prims[f]; } - - - - public int[] hash(long x) - { - int[] hvals = new int[hashFunctionModifiers.length]; - - for(int k=0;k<hashFunctionModifiers.length;k++) { - - // the original function: value = ((x+1) * hashFunctionModifiers[k] & m ) >> n; - - // the first part of the original function - long value = (x+1) * hashFunctionModifiers[k]; - - // do the above >> n with a maximal size of the available hash values - // Shift all bits until they have been each xor-ed (^) in the range of the hash - // in order the have all information potentially represented there. - - for(int j=1;j<= moves;j++) value = value ^ (value >> (bits*j)); - - // Map the value to the range of the available space should be the same as (value & m) . - hvals[k] = Math.abs((int)value % hsize); - } - return hvals; - } - - public int[] hashU(long x) - { - int[] hvals = new int[hashFunctionModifiers.length]; - - long y = Long.reverse(x); - for(int k=0;k<hashFunctionModifiers.length;k++) { - - // the original function: value = ((x+1) * hashFunctionModifiers[k] & m ) >> n; - - // the first part of the original function - long value1 = (((y+1) * hashFunctionModifiers[k]) /* % 2 pow 64 */ ) >> (kbit-lbit); - - // I get probably only the first part lets get the second part too - // long value2 = (((y+1>>20) * hashFunctionModifiers[k]) /* % 2 pow 64 */ ) >> (kbit-lbit); - - - // the modulo (%) 2 pow 64 is done since the long number can not be larger than 2 pow 64. - // System.out.println("value "+value+" shift "+(lbit-kbit)); - hvals[k] = Math.abs((int)value1); - } - return hvals; - } - - /* - (defun generate-hash-fn (&key (k-bit 32) - (l-bit 8) - verbosep constants (count 4)) - - (labels ((random-constant () - (let ((a (+ (random (- (expt 2 k-bit) 1)) 1))) - (logior a 1)))) ;; inclusive OR ensures odd number. - (let ((pdiff (- (- k-bit l-bit)));; neg. sign to do a rightshift, see ash() - (sub1 (- (expt 2 k-bit) 1)) - (constants (copy-list constants))) - (unless constants - (loop ;; a = odd number a where 0 < a < u. - until (= count (length constants)) - do (pushnew (random-constant) constants))) - (when verbosep - (format t "~&generate-hash-fn(): using random constants: ~a~%" - constants)) - (values - #'(lambda (x) - (loop - for a in constants - ;;; always add 1 to x to avoid f(0)=0. - collect (ash (logand (* (+ 1 x) a) sub1) pdiff))) - constants)))) - - */ - - - - - - /* (non-Javadoc) + + public int[] hash(long x) { + int[] hvals = new int[hashFunctionModifiers.length]; + + for (int k = 0; k < hashFunctionModifiers.length; k++) { + + // the original function: value = ((x+1) * hashFunctionModifiers[k] + // & m ) >> n; + + // the first part of the original function + long value = (x + 1) * hashFunctionModifiers[k]; + + // do the above >> n with a maximal size of the available hash + // values + // Shift all bits until they have been each xor-ed (^) in the range + // of the hash + // in order the have all information potentially represented there. + + for (int j = 1; j <= moves; j++) + value = value ^ (value >> (bits * j)); + + // Map the value to the range of the available space should be the + // same as (value & m) . + hvals[k] = Math.abs((int) value % hsize); + } + return hvals; + } + + public int[] hashU(long x) { + int[] hvals = new int[hashFunctionModifiers.length]; + + long y = Long.reverse(x); + for (int k = 0; k < hashFunctionModifiers.length; k++) { + + // the original function: value = ((x+1) * hashFunctionModifiers[k] + // & m ) >> n; + + // the first part of the original function + long value1 = (((y + 1) * hashFunctionModifiers[k]) /* % 2 pow 64 */ ) >> (kbit - lbit); + + // I get probably only the first part lets get the second part too + // long value2 = (((y+1>>20) * hashFunctionModifiers[k]) /* % 2 pow + // 64 */ ) >> (kbit-lbit); + + // the modulo (%) 2 pow 64 is done since the long number can not be + // larger than 2 pow 64. + // System.out.println("value "+value+" shift "+(lbit-kbit)); + hvals[k] = Math.abs((int) value1); + } + return hvals; + } + + /* + * (defun generate-hash-fn (&key (k-bit 32) (l-bit 8) verbosep constants + * (count 4)) + * + * (labels ((random-constant () (let ((a (+ (random (- (expt 2 k-bit) 1)) + * 1))) (logior a 1)))) ;; inclusive OR ensures odd number. (let ((pdiff (- + * (- k-bit l-bit)));; neg. sign to do a rightshift, see ash() (sub1 (- + * (expt 2 k-bit) 1)) (constants (copy-list constants))) (unless constants + * (loop ;; a = odd number a where 0 < a < u. until (= count (length + * constants)) do (pushnew (random-constant) constants))) (when verbosep + * (format t "~&generate-hash-fn(): using random constants: ~a~%" + * constants)) (values #'(lambda (x) (loop for a in constants ;;; always add + * 1 to x to avoid f(0)=0. collect (ash (logand (* (+ 1 x) a) sub1) pdiff))) + * constants)))) + * + */ + + /* + * (non-Javadoc) + * * @see is2.data.Long2IntInterface#l2i(long) */ @Override @@ -148,7 +137,9 @@ public class RandomIndex implements Long2IntInterface { return 0; } - /* (non-Javadoc) + /* + * (non-Javadoc) + * * @see is2.data.Long2IntInterface#size() */ @Override @@ -157,5 +148,3 @@ public class RandomIndex implements Long2IntInterface { } } - - diff --git a/dependencyParser/mate-tools/src/is2/data/SentenceData09.java b/dependencyParser/mate-tools/src/is2/data/SentenceData09.java index 46cabc0..386fa39 100755 --- a/dependencyParser/mate-tools/src/is2/data/SentenceData09.java +++ b/dependencyParser/mate-tools/src/is2/data/SentenceData09.java @@ -1,9 +1,5 @@ package is2.data; - -import is2.io.CONLLReader09; -import is2.io.CONLLWriter09; - import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.IOException; @@ -11,9 +7,11 @@ import java.io.ObjectInputStream; import java.io.StringWriter; import java.util.ArrayList; +import is2.io.CONLLWriter09; +import is2.io.IOGenerals; + public class SentenceData09 { - public String[] id; public String[] forms; @@ -24,17 +22,17 @@ public class SentenceData09 { public int[] pheads; public String[] labels; - public String[] plabels; + public String[] plabels; - public String[] gpos; // gold pos + public String[] gpos; // gold pos public String[] ppos; public String feats[][]; -// public String[] split_lemma; + // public String[] split_lemma; public String[] sem; public int[] semposition; - + // predicate number, argument number -> argument string public String[][] arg; public int[][] argposition; @@ -44,7 +42,8 @@ public class SentenceData09 { public String[] ofeats; public String[] pfeats; - public SentenceData09() {} + public SentenceData09() { + } public SentenceData09(String[] forms, String[] postags, String[] labs, int[] heads) { this.forms = forms; @@ -56,12 +55,13 @@ public class SentenceData09 { public SentenceData09(String[] forms, String[] lemmas, String[] postags, String[] labs, int[] heads) { this.forms = forms; gpos = postags; - //ppos = postags; + // ppos = postags; labels = labs; this.heads = heads; this.plemmas = lemmas; } + public SentenceData09(String[] forms, String[] lemmas, String[] gpos, String[] ppos, String[] labs, int[] heads) { this.forms = forms; this.gpos = gpos; @@ -70,10 +70,11 @@ public class SentenceData09 { labels = labs; this.heads = heads; this.plemmas = lemmas; - - + } - public SentenceData09(String[] forms, String[] lemmas, String[] gpos, String[] ppos, String[] labs, int[] heads, String[] fillpred) { + + public SentenceData09(String[] forms, String[] lemmas, String[] gpos, String[] ppos, String[] labs, int[] heads, + String[] fillpred) { this.forms = forms; this.gpos = gpos; this.ppos = ppos; @@ -81,11 +82,12 @@ public class SentenceData09 { labels = labs; this.heads = heads; this.plemmas = lemmas; - - fillp =fillpred; + + fillp = fillpred; } - public SentenceData09(String[] forms, String[] lemmas, String[] olemmas,String[] gpos, String[] ppos, String[] labs, int[] heads, String[] fillpred) { + public SentenceData09(String[] forms, String[] lemmas, String[] olemmas, String[] gpos, String[] ppos, + String[] labs, int[] heads, String[] fillpred) { this.forms = forms; this.gpos = gpos; this.ppos = ppos; @@ -93,39 +95,37 @@ public class SentenceData09 { labels = labs; this.heads = heads; this.plemmas = lemmas; - this.lemmas =olemmas; - fillp =fillpred; + this.lemmas = olemmas; + fillp = fillpred; } - public SentenceData09(String[] forms, String[] olemmas, String[] lemmas,String[] gpos, - String[] ppos, String[] labs, int[] heads, String[] fillpred, String[] of, String[] pf) { + public SentenceData09(String[] forms, String[] olemmas, String[] lemmas, String[] gpos, String[] ppos, + String[] labs, int[] heads, String[] fillpred, String[] of, String[] pf) { this.forms = forms; this.gpos = gpos; this.ppos = ppos; labels = labs; this.heads = heads; - this.pheads =heads; - this.plabels=labs; + this.pheads = heads; + this.plabels = labs; this.plemmas = lemmas; - this.lemmas =olemmas; - - this.ofeats =of; - this.pfeats =pf; - fillp =fillpred; + this.lemmas = olemmas; + + this.ofeats = of; + this.pfeats = pf; + fillp = fillpred; } - - - /** * Create an instance without root of the input instance + * * @param instance */ public SentenceData09(SentenceData09 i) { - - int length = i.length()-1; - + + int length = i.length() - 1; + forms = new String[length]; gpos = new String[length]; ppos = new String[length]; @@ -139,80 +139,83 @@ public class SentenceData09 { labels = new String[length]; fillp = new String[length]; id = new String[length]; - - - for(int j = 0; j < length; j++) { - forms[j] = i.forms[j+1]; - ppos[j] = i.ppos[j+1]; - gpos[j] = i.gpos[j+1]; - - labels[j] = i.labels[j+1]; - heads[j] = i.heads[j+1]; - - - - if (i.pheads!=null) pheads[j] = i.pheads[j+1]; - if (i.plabels!=null) plabels[j] = i.plabels[j+1]; - - - if (i.lemmas!=null) lemmas[j] = i.lemmas[j+1]; - - plemmas[j] = i.plemmas[j+1]; - - - if (i.ofeats!=null) ofeats[j] = i.ofeats[j+1]; - if (i.pfeats!=null) pfeats[j] = i.pfeats[j+1]; - - if (i.fillp!=null) fillp[j] = i.fillp[j+1]; - if (i.id!=null) id[j] = i.id[j+1]; + + for (int j = 0; j < length; j++) { + forms[j] = i.forms[j + 1]; + ppos[j] = i.ppos[j + 1]; + gpos[j] = i.gpos[j + 1]; + + labels[j] = i.labels[j + 1]; + heads[j] = i.heads[j + 1]; + + if (i.pheads != null) + pheads[j] = i.pheads[j + 1]; + if (i.plabels != null) + plabels[j] = i.plabels[j + 1]; + + if (i.lemmas != null) + lemmas[j] = i.lemmas[j + 1]; + + plemmas[j] = i.plemmas[j + 1]; + + if (i.ofeats != null) + ofeats[j] = i.ofeats[j + 1]; + if (i.pfeats != null) + pfeats[j] = i.pfeats[j + 1]; + + if (i.fillp != null) + fillp[j] = i.fillp[j + 1]; + if (i.id != null) + id[j] = i.id[j + 1]; } - - + } + public void setPPos(String[] pos) { - ppos=pos; + ppos = pos; } - + public void setLemmas(String[] lemmas) { - this.plemmas=lemmas; + this.plemmas = lemmas; } public void setFeats(String[] fts) { feats = new String[fts.length][]; - for(int i=0;i<fts.length;i++) { + for (int i = 0; i < fts.length; i++) { feats[i] = fts[i].split("\\|"); } - pfeats =fts; + pfeats = fts; } - public int length () { + public int length() { return forms.length; } @Override - public String toString () { + public String toString() { // prepare the output StringWriter sw = new StringWriter(); CONLLWriter09 snt2str = new is2.io.CONLLWriter09(sw); - try{ + try { snt2str.write(this, CONLLWriter09.NO_ROOT); snt2str.finishWriting(); return sw.toString(); - }catch(Exception e) { + } catch (Exception e) { e.printStackTrace(); } // backup StringBuffer sb = new StringBuffer(); - for(int k=0;k<forms.length;k++) sb.append(k+1).append('\t').append(forms[k]).append('\t').append(heads[k]).append('\t').append(labels[k]).append('\n'); + for (int k = 0; k < forms.length; k++) + sb.append(k + 1).append('\t').append(forms[k]).append('\t').append(heads[k]).append('\t').append(labels[k]) + .append('\n'); return sw.toString(); } - - final public void write (DataOutputStream out) throws IOException { - + final public void write(DataOutputStream out) throws IOException { + out.writeInt(forms.length); - for(int k=0;k<forms.length;k++) { + for (int k = 0; k < forms.length; k++) { out.writeUTF(forms[k]); out.writeUTF(ppos[k]); out.writeUTF(gpos[k]); @@ -220,18 +223,18 @@ public class SentenceData09 { out.writeUTF(labels[k]); out.writeUTF(lemmas[k]); out.writeUTF(plemmas[k]); - out.writeUTF(ofeats[k]); // needed for mtag + out.writeUTF(ofeats[k]); // needed for mtag out.writeUTF(fillp[k]); } - - // out.writeUTF(actParseTree); - + + // out.writeUTF(actParseTree); + } - final public void read (DataInputStream dis) throws IOException { - + final public void read(DataInputStream dis) throws IOException { + int l = dis.readInt(); - + forms = new String[l]; lemmas = new String[l]; plemmas = new String[l]; @@ -240,100 +243,100 @@ public class SentenceData09 { labels = new String[l]; heads = new int[l]; fillp = new String[l]; - ofeats=new String[l]; - - for(int k=0;k<l;k++) { + ofeats = new String[l]; + + for (int k = 0; k < l; k++) { forms[k] = dis.readUTF(); - ppos[k]=dis.readUTF(); - gpos[k]=dis.readUTF(); - heads[k]=dis.readInt(); - labels[k]=dis.readUTF(); - lemmas[k]=dis.readUTF(); - plemmas[k]=dis.readUTF(); - ofeats[k]=dis.readUTF(); - fillp[k]=dis.readUTF(); - + ppos[k] = dis.readUTF(); + gpos[k] = dis.readUTF(); + heads[k] = dis.readInt(); + labels[k] = dis.readUTF(); + lemmas[k] = dis.readUTF(); + plemmas[k] = dis.readUTF(); + ofeats[k] = dis.readUTF(); + fillp[k] = dis.readUTF(); + } } - - - private void readObject (ObjectInputStream in) throws IOException, ClassNotFoundException { - forms = (String[])in.readObject(); - plemmas = (String[])in.readObject(); - ppos = (String[])in.readObject(); - heads = (int[])in.readObject(); - labels = (String[])in.readObject(); + + private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException { + forms = (String[]) in.readObject(); + plemmas = (String[]) in.readObject(); + ppos = (String[]) in.readObject(); + heads = (int[]) in.readObject(); + labels = (String[]) in.readObject(); } public void addPredicate(int i, String s) { - + int predId; if (sem == null) { - predId=0; + predId = 0; sem = new String[1]; semposition = new int[1]; - } - else { - predId=sem.length; - String p[] = new String[sem.length+1]; + } else { + predId = sem.length; + String p[] = new String[sem.length + 1]; System.arraycopy(sem, 0, p, 0, sem.length); - int id[] = new int[sem.length+1]; + int id[] = new int[sem.length + 1]; System.arraycopy(semposition, 0, id, 0, semposition.length); - sem =p; - semposition=id; + sem = p; + semposition = id; } - sem[predId]=s; - semposition[predId]=i; + sem[predId] = s; + semposition[predId] = i; } - /** - * Add an argument - * @param i the instance (the child) - * @param predId the id of the predicate (the head) - * @param a the label of the argument + * Add an argument + * + * @param i + * the instance (the child) + * @param predId + * the id of the predicate (the head) + * @param a + * the label of the argument */ public void addArgument(int i, int predId, String a) { - - if (a ==null || a.equals("_")) return; - + + if (a == null || a.equals("_")) + return; + // ensure the space for the argument in the data structure if (arg == null) { - arg = new String[predId+1][]; - argposition = new int[predId+1][]; - } else if (arg.length<=predId) { - String p[][] = new String[predId+1][]; + arg = new String[predId + 1][]; + argposition = new int[predId + 1][]; + } else if (arg.length <= predId) { + String p[][] = new String[predId + 1][]; System.arraycopy(arg, 0, p, 0, arg.length); - arg =p; + arg = p; - int id[][] = new int[predId+1][]; + int id[][] = new int[predId + 1][]; System.arraycopy(argposition, 0, id, 0, argposition.length); argposition = id; } - - - + int aId; - if (arg[predId]==null) { - aId=0; + if (arg[predId] == null) { + aId = 0; arg[predId] = new String[1]; argposition[predId] = new int[1]; } else { - aId =arg[predId].length; - String args[] = new String[arg[predId].length+1]; + aId = arg[predId].length; + String args[] = new String[arg[predId].length + 1]; System.arraycopy(arg[predId], 0, args, 0, arg[predId].length); - arg[predId]=args; - - int argsId[] = new int[argposition[predId].length+1]; + arg[predId] = args; + + int argsId[] = new int[argposition[predId].length + 1]; System.arraycopy(argposition[predId], 0, argsId, 0, argposition[predId].length); - argposition[predId]=argsId; + argposition[predId] = argsId; } - - arg[predId][aId]=a; - argposition[predId][aId]=i; - + + arg[predId][aId] = a; + argposition[predId][aId] = i; + } - + public int[] getParents() { return heads; } @@ -343,22 +346,22 @@ public class SentenceData09 { } public String printSem() { - - if (sem==null) return ""; + + if (sem == null) + return ""; StringBuilder s = new StringBuilder(); - - for(int k=0;k<sem.length;k++) { + + for (int k = 0; k < sem.length; k++) { s.append(sem[k]).append("\n"); - - if (arg==null) { + + if (arg == null) { s.append("arg == null"); - }else - if (arg.length<=k) { - s.append("args.length <=k arg.length:"+arg.length+" k:"+k); - } else if (arg[k]!=null) { - for(int a=0;a< arg[k].length;a++) { - s.append(" ").append(arg[k][a]); - } + } else if (arg.length <= k) { + s.append("args.length <=k arg.length:" + arg.length + " k:" + k); + } else if (arg[k] != null) { + for (int a = 0; a < arg[k].length; a++) { + s.append(" ").append(arg[k][a]); + } } else { s.append("args == null "); } @@ -367,9 +370,9 @@ public class SentenceData09 { return s.toString(); } - /** * Initialize a instance so that a tagger, parser, etc. could be applied + * * @param forms */ public void init(String[] forms) { @@ -388,40 +391,38 @@ public class SentenceData09 { * @param i09 */ public void createSemantic(SentenceData09 instance) { - - this.sem = instance.sem; - this.semposition = instance.semposition; - if (instance.semposition!=null) - for (int k= 0;k< instance.semposition.length;k++) { - this.semposition[k]=instance.semposition[k]-1; - } + this.sem = instance.sem; + this.semposition = instance.semposition; - this.arg = instance.arg; + if (instance.semposition != null) + for (int k = 0; k < instance.semposition.length; k++) { + this.semposition[k] = instance.semposition[k] - 1; + } + this.arg = instance.arg; - this.argposition = instance.argposition; + this.argposition = instance.argposition; + + if (this.argposition != null) + for (int p = 0; p < instance.argposition.length; p++) { + if (this.argposition[p] != null) + for (int a = 0; a < instance.argposition[p].length; a++) + this.argposition[p][a] = instance.argposition[p][a] - 1; + } - if (this.argposition!=null) - for (int p= 0;p< instance.argposition.length;p++) { - if (this.argposition[p]!=null) - for(int a=0;a<instance.argposition[p].length;a++) - this.argposition[p][a]=instance.argposition[p][a]-1; - } - - } /** - * + * */ public String oneLine() { - - + StringBuffer o = new StringBuffer(); - for(int i=1;i<this.length();i++) { - - if (i!=1)o.append(" "); + for (int i = 1; i < this.length(); i++) { + + if (i != 1) + o.append(" "); o.append(this.forms[i]); } return o.toString(); @@ -429,29 +430,29 @@ public class SentenceData09 { /** * Get the children of this instance - * @param head - * @return children of the head + * + * @param head + * @return children of the head */ - public ArrayList<Integer> getChildren(int head) { + public ArrayList<Integer> getChildren(int head) { ArrayList<Integer> children = new ArrayList<Integer>(); - for(int i=0;i<length();i++) { - if (heads[i]==head) children.add(i); + for (int i = 0; i < length(); i++) { + if (heads[i] == head) + children.add(i); } return children; } public void createWithRoot(SentenceData09 i) { - + int length = i.length(); int offset = 0; - if (! i.forms[0].equals(CONLLReader09.ROOT)) { + if (!i.forms[0].equals(IOGenerals.ROOT)) { length++; offset = -1; } - - - + forms = new String[length]; gpos = new String[length]; ppos = new String[length]; @@ -466,65 +467,64 @@ public class SentenceData09 { fillp = new String[length]; id = new String[length]; feats = new String[forms.length][]; - - for(int j = 1; j < length; j++) { - forms[j] = i.forms[j+offset]; - ppos[j] = i.ppos[j+offset]; - gpos[j] = i.gpos[j+offset]; - - labels[j] = i.labels[j+offset]; - heads[j] = i.heads[j+offset]; - - - - if (i.pheads!=null) pheads[j] = i.pheads[j+offset]; - if (i.plabels!=null) plabels[j] = i.plabels[j+offset]; - - - if (i.lemmas!=null) lemmas[j] = i.lemmas[j+offset]; - - plemmas[j] = i.plemmas[j+offset]; - - - // if (i.ofeats!=null) ofeats[j] = i.ofeats[j+offset]; - - ofeats[j]= i.ofeats[j+offset].equals(CONLLWriter09.DASH)? "_" : i.ofeats[j+offset]; - - // if (i.pfeats!=null) pfeats[j] = i.pfeats[j+offset]; - - if (i.pfeats!=null && i.pfeats[j+offset]!=null) { - if (i.pfeats[j+offset].equals(CONLLWriter09.DASH)) feats[j]=null; + + for (int j = 1; j < length; j++) { + forms[j] = i.forms[j + offset]; + ppos[j] = i.ppos[j + offset]; + gpos[j] = i.gpos[j + offset]; + + labels[j] = i.labels[j + offset]; + heads[j] = i.heads[j + offset]; + + if (i.pheads != null) + pheads[j] = i.pheads[j + offset]; + if (i.plabels != null) + plabels[j] = i.plabels[j + offset]; + + if (i.lemmas != null) + lemmas[j] = i.lemmas[j + offset]; + + plemmas[j] = i.plemmas[j + offset]; + + // if (i.ofeats!=null) ofeats[j] = i.ofeats[j+offset]; + + ofeats[j] = i.ofeats[j + offset].equals(CONLLWriter09.DASH) ? "_" : i.ofeats[j + offset]; + + // if (i.pfeats!=null) pfeats[j] = i.pfeats[j+offset]; + + if (i.pfeats != null && i.pfeats[j + offset] != null) { + if (i.pfeats[j + offset].equals(CONLLWriter09.DASH)) + feats[j] = null; else { - feats[j] =i.pfeats[j+offset].split(CONLLReader09.PIPE); - - // if (info[7].equals(CONLLWriter09.DASH)) it.feats[i]=null; - // else { - // it.feats[i] =info[7].split(PIPE); - pfeats[j] = i.pfeats[j+offset]; - // } + feats[j] = i.pfeats[j + offset].split(IOGenerals.PIPE); + + // if (info[7].equals(CONLLWriter09.DASH)) it.feats[i]=null; + // else { + // it.feats[i] =info[7].split(PIPE); + pfeats[j] = i.pfeats[j + offset]; + // } } } - - if (i.fillp!=null) fillp[j] = i.fillp[j+offset]; - if (i.id!=null) id[j] = i.id[j+offset]; + + if (i.fillp != null) + fillp[j] = i.fillp[j + offset]; + if (i.id != null) + id[j] = i.id[j + offset]; } - - - - forms[0] = CONLLReader09.ROOT; - plemmas[0] = CONLLReader09.ROOT_LEMMA; + + forms[0] = IOGenerals.ROOT; + plemmas[0] = IOGenerals.ROOT_LEMMA; fillp[0] = "N"; - lemmas[0] = CONLLReader09.ROOT_LEMMA; + lemmas[0] = IOGenerals.ROOT_LEMMA; - gpos[0] = CONLLReader09.ROOT_POS; - ppos[0] = CONLLReader09.ROOT_POS; - labels[0] = CONLLReader09.NO_TYPE; + gpos[0] = IOGenerals.ROOT_POS; + ppos[0] = IOGenerals.ROOT_POS; + labels[0] = IOGenerals.NO_TYPE; heads[0] = -1; - plabels[0] = CONLLReader09.NO_TYPE; + plabels[0] = IOGenerals.NO_TYPE; pheads[0] = -1; - ofeats[0] = CONLLReader09.NO_TYPE; - id[0] ="0"; + ofeats[0] = IOGenerals.NO_TYPE; + id[0] = "0"; } - } diff --git a/dependencyParser/mate-tools/src/is2/data/Thesaurus.java b/dependencyParser/mate-tools/src/is2/data/Thesaurus.java index 2d3677a..bafc3b9 100644 --- a/dependencyParser/mate-tools/src/is2/data/Thesaurus.java +++ b/dependencyParser/mate-tools/src/is2/data/Thesaurus.java @@ -1,10 +1,8 @@ /** - * + * */ package is2.data; -import is2.util.DB; - import java.io.BufferedReader; import java.io.DataInputStream; import java.io.DataOutputStream; @@ -13,27 +11,29 @@ import java.io.IOException; import java.io.InputStreamReader; import java.util.ArrayList; +import is2.util.DB; + /** * @author Dr. Bernd Bohnet, 28.10.2010 - * - * + * + * */ final public class Thesaurus { public static final String LPATH = "LP"; public static final String SPATH = "SP"; - // [word][p] p = [0:long-path | 1:short-path] - final private int[][] word2path; - + // [word][p] p = [0:long-path | 1:short-path] + final private int[][] word2path; + public Thesaurus() { - word2path =new int[0][]; - } - + word2path = new int[0][]; + } + /** * @param clusterFile * @param mf - * + * */ public Thesaurus(String clusterFile, IEncoderPlus mf, int ls) { @@ -41,85 +41,88 @@ final public class Thesaurus { // register words try { - BufferedReader inputReader = new BufferedReader(new InputStreamReader(new FileInputStream(clusterFile),"UTF-8"),32768); + BufferedReader inputReader = new BufferedReader( + new InputStreamReader(new FileInputStream(clusterFile), "UTF-8"), 32768); - int cnt=0; + int cnt = 0; String line; - while ((line =inputReader.readLine())!=null) { + while ((line = inputReader.readLine()) != null) { cnt++; try { - String[] split = line.split(REGEX); - // mf.register(LPATH, split[0].length()<ls?split[0]:split[0].substring(0,ls)); - mf.register(PipeGen.WORD, split[0]); - mf.register(PipeGen.WORD, split[1]); - } catch(Exception e) { - System.out.println("Error in cluster line "+cnt+" error: "+e.getMessage()); + String[] split = line.split(REGEX); + // mf.register(LPATH, + // split[0].length()<ls?split[0]:split[0].substring(0,ls)); + mf.register(PipeGen.WORD, split[0]); + mf.register(PipeGen.WORD, split[1]); + } catch (Exception e) { + System.out.println("Error in cluster line " + cnt + " error: " + e.getMessage()); } } - System.out.println("read number of thesaury entries "+cnt); + System.out.println("read number of thesaury entries " + cnt); inputReader.close(); - + } catch (Exception e) { e.printStackTrace(); } - + word2path = new int[mf.getFeatureCounter().get(PipeGen.WORD)][]; - // insert words try { String line; - BufferedReader inputReader = new BufferedReader(new InputStreamReader(new FileInputStream(clusterFile),"UTF-8"),32768); + BufferedReader inputReader = new BufferedReader( + new InputStreamReader(new FileInputStream(clusterFile), "UTF-8"), 32768); - int startWd =-1; + int startWd = -1; ArrayList<Integer> wrds = new ArrayList<Integer>(); - while ((line =inputReader.readLine())!=null) { + while ((line = inputReader.readLine()) != null) { String[] split = line.split(REGEX); int wd = mf.getValue(PipeGen.WORD, split[0]); - // DB.println("wd "+wd+" "+startWd); + // DB.println("wd "+wd+" "+startWd); if (startWd == wd) { int thesaurusWrd = mf.getValue(PipeGen.WORD, split[1]); - if (thesaurusWrd!=wd) wrds.add(thesaurusWrd); - } else if (startWd!=-1) { + if (thesaurusWrd != wd) + wrds.add(thesaurusWrd); + } else if (startWd != -1) { int[] ths = new int[wrds.size()]; - for(int k=0;k<ths.length;k++) ths[k]=wrds.get(k); + for (int k = 0; k < ths.length; k++) + ths[k] = wrds.get(k); word2path[startWd] = ths; - // DB.println(""+wrds+" size "+ths.length); + // DB.println(""+wrds+" size "+ths.length); wrds.clear(); int thesaurusWrd = mf.getValue(PipeGen.WORD, split[1]); - if (thesaurusWrd!=wd) wrds.add(thesaurusWrd); + if (thesaurusWrd != wd) + wrds.add(thesaurusWrd); } - startWd=wd; + startWd = wd; } - - if (wrds.size()!=0) { + + if (wrds.size() != 0) { // put rest of the words int[] ths = new int[wrds.size()]; - for(int k=0;k<ths.length;k++) ths[k]=wrds.get(k); + for (int k = 0; k < ths.length; k++) + ths[k] = wrds.get(k); word2path[startWd] = ths; - // DB.println(""+wrds+" size "+ths.length); + // DB.println(""+wrds+" size "+ths.length); wrds.clear(); - - - } - + inputReader.close(); - int fill=0; - for(int l = 0; l<word2path.length; l++ ){ - if (word2path[l]!=null) fill++; + int fill = 0; + for (int[] element : word2path) { + if (element != null) + fill++; } /* - for(int l = 0; l<word2path.length; l++ ){ - if (word2path[l][1]!=0) fillL++; - if (word2path[l][1]<-1) System.out.println("lower "+word2path[l][1]); - } - */ - System.out.println("filled "+fill+" of "+word2path.length); - + * for(int l = 0; l<word2path.length; l++ ){ if (word2path[l][1]!=0) + * fillL++; if (word2path[l][1]<-1) + * System.out.println("lower "+word2path[l][1]); } + */ + System.out.println("filled " + fill + " of " + word2path.length); + } catch (Exception e) { e.printStackTrace(); } @@ -127,68 +130,71 @@ final public class Thesaurus { /** * Read the cluster + * * @param dos - * @throws IOException + * @throws IOException */ public Thesaurus(DataInputStream dis) throws IOException { word2path = new int[dis.readInt()][]; - for(int i =0;i<word2path.length;i++) { + for (int i = 0; i < word2path.length; i++) { int len = dis.readInt(); - if (len>0) { + if (len > 0) { word2path[i] = new int[len]; - for(int j =0;j<len;j++) { + for (int j = 0; j < len; j++) { word2path[i][j] = dis.readInt(); - - } + + } } - - word2path[i][0]=dis.readShort(); + + word2path[i][0] = dis.readShort(); } - DB.println("Read cluster with "+word2path.length+" words "); + DB.println("Read cluster with " + word2path.length + " words "); } - + /** * Write the cluster + * * @param dos - * @throws IOException + * @throws IOException */ public void write(DataOutputStream dos) throws IOException { dos.writeInt(word2path.length); - for(int[] i : word2path) { - dos.writeInt(i==null?0:i.length); - - if (i!=null) { - for(int j=0;j<i.length;j++) { - - dos.writeInt(i[j]); - + for (int[] i : word2path) { + dos.writeInt(i == null ? 0 : i.length); + + if (i != null) { + for (int element : i) { + + dos.writeInt(element); + } - + } } - + } /** - * @param form the id of a word form + * @param form + * the id of a word form * @return the short path to the word form in the cluster - - final public int getSP(int form) { - if (word2path.length<form) return -1; - return word2path[form][0]; - } - */ + * + * final public int getSP(int form) { if (word2path.length<form) + * return -1; return word2path[form][0]; } + */ /** * get the long path to a word form in the cluster - * @param form the id of a word form + * + * @param form + * the id of a word form * @return the long path to the word */ final public int get(int form, int k) { - if (word2path.length<form || word2path[form]==null) return -1; + if (word2path.length < form || word2path[form] == null) + return -1; return word2path[form][k]; } - } diff --git a/dependencyParser/mate-tools/src/is2/io/CONLLReader04.java b/dependencyParser/mate-tools/src/is2/io/CONLLReader04.java index 4ca5254..695f10d 100644 --- a/dependencyParser/mate-tools/src/is2/io/CONLLReader04.java +++ b/dependencyParser/mate-tools/src/is2/io/CONLLReader04.java @@ -1,25 +1,22 @@ - package is2.io; -import is2.data.Instances; -import is2.data.SentenceData09; -import is2.util.DB; - import java.io.BufferedReader; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; import java.util.ArrayList; - +import is2.data.Instances; +import is2.data.SentenceData09; +import is2.util.DB; /** * This class reads files in the CONLL-08 and CONLL-09 format. * * @author Bernd Bohnet */ -public class CONLLReader04 { +public class CONLLReader04 { private static final String US = "_"; private static final String REGEX = "\t"; @@ -36,46 +33,45 @@ public class CONLLReader04 { private BufferedReader inputReader; - public static final int TASK08=8; - public static final int TASK09=9; - - public static boolean normalizeOn =true; + public static final int TASK08 = 8; + public static final int TASK09 = 9; + public static boolean normalizeOn = true; private int lineNumber = 0; - public CONLLReader04(){} + public CONLLReader04() { + } - public CONLLReader04(String file){ - lineNumber=0; + public CONLLReader04(String file) { + lineNumber = 0; try { - inputReader = new BufferedReader(new InputStreamReader(new FileInputStream(file),"UTF-8"),32768); //,"UTF-8" - } catch (Exception e) { + inputReader = new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF-8"), 32768); // ,"UTF-8" + } catch (Exception e) { e.printStackTrace(); } } - public CONLLReader04(String file, int task){ + public CONLLReader04(String file, int task) { this(file); } - - - public void startReading(String file ){ - lineNumber=0; + public void startReading(String file) { + lineNumber = 0; try { - inputReader = new BufferedReader(new InputStreamReader(new FileInputStream(file),"UTF-8"),32768); + inputReader = new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF-8"), 32768); } catch (Exception e) { e.printStackTrace(); } } - /**i.forms[heads[l]-1]+" "+rel+" "+ - * Read a instance + /** + * i.forms[heads[l]-1]+" "+rel+" "+ Read a instance + * * @return a instance - * @throws Exception + * @throws Exception */ - public SentenceData09 getNext() throws Exception { + public SentenceData09 getNext() throws Exception { try { @@ -83,56 +79,52 @@ public class CONLLReader04 { String line = inputReader.readLine(); - while(line !=null && line.length()<2) { + while (line != null && line.length() < 2) { line = inputReader.readLine(); lineNumber++; - System.out.println("skip empty line at line "+lineNumber); + System.out.println("skip empty line at line " + lineNumber); } - while (line != null && line.length()!=0 && !line.startsWith(STRING) &&!line.startsWith(REGEX)) { + while (line != null && line.length() != 0 && !line.startsWith(STRING) && !line.startsWith(REGEX)) { lineList.add(line.split(REGEX)); line = inputReader.readLine(); lineNumber++; } - - int length = lineList.size(); - if(length == 0) { + if (length == 0) { inputReader.close(); return null; } SentenceData09 it = new SentenceData09(); - // column content - // 1 id - // 2 form - // 3 lemma - // 4 cpos-tag - // 5 pos-tog - // 6 feats - // 7 head - // 8 deprel - - - it.forms = new String[length+1]; - - it.plemmas = new String[length+1]; - it.gpos = new String[length+1]; - it.labels = new String[length+1]; - it.heads = new int[length+1]; - it.pheads = new int[length+1]; - it.plabels = new String[length+1]; - - it.ppos = new String[length+1]; - it.lemmas = new String[length+1]; - it.fillp = new String[length+1]; - it.feats = new String[length+1][]; - it.ofeats = new String[length+1]; - it.pfeats = new String[length+1]; - + // column content + // 1 id + // 2 form + // 3 lemma + // 4 cpos-tag + // 5 pos-tog + // 6 feats + // 7 head + // 8 deprel + + it.forms = new String[length + 1]; + + it.plemmas = new String[length + 1]; + it.gpos = new String[length + 1]; + it.labels = new String[length + 1]; + it.heads = new int[length + 1]; + it.pheads = new int[length + 1]; + it.plabels = new String[length + 1]; + + it.ppos = new String[length + 1]; + it.lemmas = new String[length + 1]; + it.fillp = new String[length + 1]; + it.feats = new String[length + 1][]; + it.ofeats = new String[length + 1]; + it.pfeats = new String[length + 1]; it.forms[0] = ROOT; it.plemmas[0] = ROOT_LEMMA; @@ -149,51 +141,48 @@ public class CONLLReader04 { // root is 0 therefore start with 1 - for(int i = 1; i <= length; i++) { - - String[] info = lineList.get(i-1); + for (int i = 1; i <= length; i++) { + + String[] info = lineList.get(i - 1); - it.forms[i] = info[0]; //normalize( + it.forms[i] = info[0]; // normalize( it.lemmas[i] = "_"; - it.plemmas[i] ="_"; - + it.plemmas[i] = "_"; + // 3 cpos - - it.gpos[i] = info[1]; + + it.gpos[i] = info[1]; it.ppos[i] = info[1]; - - it.ofeats[i]="_"; - + it.ofeats[i] = "_"; - it.feats[i]=null; - // it.feats[i] =info[5].split(PIPE); + it.feats[i] = null; + // it.feats[i] =info[5].split(PIPE); it.pfeats[i] = "_"; - - if (info[2].equals(US)) it.heads[i]=-1; - else it.heads[i] = Integer.parseInt(info[2]);// head - - - - it.labels[i] = info[3]; + if (info[2].equals(US)) + it.heads[i] = -1; + else + it.heads[i] = Integer.parseInt(info[2]);// head + it.labels[i] = info[3]; } return it; - } catch(Exception e) { - System.out.println("\n!!! Error in input file at line : "+lineNumber+" "+e.toString()); + } catch (Exception e) { + System.out.println("\n!!! Error in input file at line : " + lineNumber + " " + e.toString()); e.printStackTrace(); throw new Exception(); - // return null; + // return null; } } /** * Read a instance an store it in a compressed format + * * @param is * @return * @throws IOException @@ -202,71 +191,74 @@ public class CONLLReader04 { SentenceData09 it = getNext(); - if (is !=null) insert(is,it); + if (is != null) + insert(is, it); return it; } - - - final public boolean insert(Instances is, SentenceData09 it) throws IOException { try { - if(it == null) { + if (it == null) { inputReader.close(); return false; } - int i= is.createInstance09(it.length()); + int i = is.createInstance09(it.length()); - for(int p = 0; p < it.length(); p++) { + for (int p = 0; p < it.length(); p++) { is.setForm(i, p, normalize(it.forms[p])); - is.setGPos(i, p, it.gpos[p]); + is.setGPos(i, p, it.gpos[p]); - if (it.ppos[p]==null||it.ppos[p].equals(US)) { + if (it.ppos[p] == null || it.ppos[p].equals(US)) { is.setPPoss(i, p, it.gpos[p]); - } else is.setPPoss(i, p, it.ppos[p]); - + } else + is.setPPoss(i, p, it.ppos[p]); - if (it.plemmas[p]==null ||it.plemmas[p].equals(US)) { + if (it.plemmas[p] == null || it.plemmas[p].equals(US)) { is.setLemma(i, p, normalize(it.forms[p])); - } else is.setLemma(i, p, normalize(it.plemmas[p])); - - - is.setFeats(i,p,it.feats[p]); + } else + is.setLemma(i, p, normalize(it.plemmas[p])); + is.setFeats(i, p, it.feats[p]); - is.setFeature(i,p,it.ofeats[p]); + is.setFeature(i, p, it.ofeats[p]); + is.setRel(i, p, it.labels[p]); + if (it.plabels != null) + is.setPRel(i, p, it.plabels[p]); + is.setHead(i, p, it.heads[p]); + if (it.pheads != null) + is.setPHead(i, p, it.pheads[p]); - is.setRel(i,p,it.labels[p]); - if (it.plabels!=null) is.setPRel(i,p,it.plabels[p]); - is.setHead(i,p,it.heads[p]); - if (it.pheads!=null) is.setPHead(i,p,it.pheads[p]); - - if (it.fillp!=null && it.fillp[p]!=null && it.fillp[p].startsWith("Y")) is.pfill[i].set(p); - else is.pfill[i].clear(p); + if (it.fillp != null && it.fillp[p] != null && it.fillp[p].startsWith("Y")) + is.pfill[i].set(p); + else + is.pfill[i].clear(p); } - if (is.createSem(i,it)) { - DB.println("count "+i+" len "+it.length()); + if (is.createSem(i, it)) { + DB.println("count " + i + " len " + it.length()); DB.println(it.printSem()); } - } catch(Exception e ){ - DB.println("head "+it); + } catch (Exception e) { + DB.println("head " + it); e.printStackTrace(); } return true; } - public static String normalize (String s) { - if (!normalizeOn) return s; - if(s.matches(NUMBER)) return NUM; + + public static String normalize(String s) { + if (!normalizeOn) + return s; + if (s.matches(NUMBER)) + return NUM; return s; - } + } } diff --git a/dependencyParser/mate-tools/src/is2/io/CONLLReader06.java b/dependencyParser/mate-tools/src/is2/io/CONLLReader06.java index 351fa04..10d1d2d 100755 --- a/dependencyParser/mate-tools/src/is2/io/CONLLReader06.java +++ b/dependencyParser/mate-tools/src/is2/io/CONLLReader06.java @@ -1,25 +1,22 @@ - package is2.io; -import is2.data.Instances; -import is2.data.SentenceData09; -import is2.util.DB; - import java.io.BufferedReader; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; import java.util.ArrayList; - +import is2.data.Instances; +import is2.data.SentenceData09; +import is2.util.DB; /** * This class reads files in the CONLL-08 and CONLL-09 format. * * @author Bernd Bohnet */ -public class CONLLReader06 { +public class CONLLReader06 { private static final String US = "_"; private static final String REGEX = "\t"; @@ -36,46 +33,45 @@ public class CONLLReader06 { private BufferedReader inputReader; - public static final int TASK08=8; - public static final int TASK09=9; - - public static boolean normalizeOn =true; + public static final int TASK08 = 8; + public static final int TASK09 = 9; + public static boolean normalizeOn = true; private int lineNumber = 0; - public CONLLReader06(){} + public CONLLReader06() { + } - public CONLLReader06(String file){ - lineNumber=0; + public CONLLReader06(String file) { + lineNumber = 0; try { - inputReader = new BufferedReader(new InputStreamReader(new FileInputStream(file),"UTF-8"),32768); //,"UTF-8" - } catch (Exception e) { + inputReader = new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF-8"), 32768); // ,"UTF-8" + } catch (Exception e) { e.printStackTrace(); } } - public CONLLReader06(String file, int task){ + public CONLLReader06(String file, int task) { this(file); } - - - public void startReading(String file ){ - lineNumber=0; + public void startReading(String file) { + lineNumber = 0; try { - inputReader = new BufferedReader(new InputStreamReader(new FileInputStream(file),"UTF-8"),32768); + inputReader = new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF-8"), 32768); } catch (Exception e) { e.printStackTrace(); } } - /**i.forms[heads[l]-1]+" "+rel+" "+ - * Read a instance + /** + * i.forms[heads[l]-1]+" "+rel+" "+ Read a instance + * * @return a instance - * @throws Exception + * @throws Exception */ - public SentenceData09 getNext() throws Exception { + public SentenceData09 getNext() throws Exception { try { @@ -83,56 +79,52 @@ public class CONLLReader06 { String line = inputReader.readLine(); - while(line !=null && line.length()==0) { + while (line != null && line.length() == 0) { line = inputReader.readLine(); lineNumber++; - System.out.println("skip empty line at line "+lineNumber); + System.out.println("skip empty line at line " + lineNumber); } - while (line != null && line.length()!=0 && !line.startsWith(STRING) &&!line.startsWith(REGEX)) { + while (line != null && line.length() != 0 && !line.startsWith(STRING) && !line.startsWith(REGEX)) { lineList.add(line.split(REGEX)); line = inputReader.readLine(); lineNumber++; } - - int length = lineList.size(); - if(length == 0) { + if (length == 0) { inputReader.close(); return null; } SentenceData09 it = new SentenceData09(); - // column content - // 1 id - // 2 form - // 3 lemma - // 4 cpos-tag - // 5 pos-tog - // 6 feats - // 7 head - // 8 deprel - - - it.forms = new String[length+1]; - - it.plemmas = new String[length+1]; - it.gpos = new String[length+1]; - it.labels = new String[length+1]; - it.heads = new int[length+1]; - it.pheads = new int[length+1]; - it.plabels = new String[length+1]; - - it.ppos = new String[length+1]; - it.lemmas = new String[length+1]; - it.fillp = new String[length+1]; - it.feats = new String[length+1][]; - it.ofeats = new String[length+1]; - it.pfeats = new String[length+1]; - + // column content + // 1 id + // 2 form + // 3 lemma + // 4 cpos-tag + // 5 pos-tog + // 6 feats + // 7 head + // 8 deprel + + it.forms = new String[length + 1]; + + it.plemmas = new String[length + 1]; + it.gpos = new String[length + 1]; + it.labels = new String[length + 1]; + it.heads = new int[length + 1]; + it.pheads = new int[length + 1]; + it.plabels = new String[length + 1]; + + it.ppos = new String[length + 1]; + it.lemmas = new String[length + 1]; + it.fillp = new String[length + 1]; + it.feats = new String[length + 1][]; + it.ofeats = new String[length + 1]; + it.pfeats = new String[length + 1]; it.forms[0] = ROOT; it.plemmas[0] = ROOT_LEMMA; @@ -149,54 +141,55 @@ public class CONLLReader06 { // root is 0 therefore start with 1 - for(int i = 1; i <= length; i++) { - - String[] info = lineList.get(i-1); + for (int i = 1; i <= length; i++) { + + String[] info = lineList.get(i - 1); - it.forms[i] = info[1]; //normalize( + it.forms[i] = info[1]; // normalize( it.lemmas[i] = info[2]; - it.plemmas[i] =info[2]; - + it.plemmas[i] = info[2]; + // 3 cpos - - it.gpos[i] = info[3]; - it.ppos[i] = info[4]; - - it.ofeats[i]=info[5].equals(CONLLWriter09.DASH)? "": info[5]; + it.gpos[i] = info[3]; + it.ppos[i] = info[4]; + it.ofeats[i] = info[5].equals(CONLLWriter09.DASH) ? "" : info[5]; - if (info[5].equals(CONLLWriter09.DASH)) it.feats[i]=null; + if (info[5].equals(CONLLWriter09.DASH)) + it.feats[i] = null; else { - it.feats[i] =info[5].split(PIPE); + it.feats[i] = info[5].split(PIPE); it.pfeats[i] = info[5]; } - if (info[6].equals(US)) it.heads[i]=-1; - else it.heads[i] = Integer.parseInt(info[6]);// head - - -// it.phead[i]=info[9].equals(US) ? it.phead[i]=-1: Integer.parseInt(info[9]);// head + if (info[6].equals(US)) + it.heads[i] = -1; + else + it.heads[i] = Integer.parseInt(info[6]);// head - it.labels[i] = info[7]; -// it.pedge[i] = info[11]; + // it.phead[i]=info[9].equals(US) ? it.phead[i]=-1: + // Integer.parseInt(info[9]);// head + it.labels[i] = info[7]; + // it.pedge[i] = info[11]; } return it; - } catch(Exception e) { - System.out.println("\n!!! Error in input file at line : "+lineNumber+" "+e.toString()); + } catch (Exception e) { + System.out.println("\n!!! Error in input file at line : " + lineNumber + " " + e.toString()); e.printStackTrace(); throw new Exception(); - // return null; + // return null; } } /** * Read a instance an store it in a compressed format + * * @param is * @return * @throws IOException @@ -205,71 +198,74 @@ public class CONLLReader06 { SentenceData09 it = getNext(); - if (is !=null) insert(is,it); + if (is != null) + insert(is, it); return it; } - - - final public boolean insert(Instances is, SentenceData09 it) throws IOException { try { - if(it == null) { + if (it == null) { inputReader.close(); return false; } - int i= is.createInstance09(it.length()); + int i = is.createInstance09(it.length()); - for(int p = 0; p < it.length(); p++) { + for (int p = 0; p < it.length(); p++) { is.setForm(i, p, normalize(it.forms[p])); - is.setGPos(i, p, it.gpos[p]); + is.setGPos(i, p, it.gpos[p]); - if (it.ppos[p]==null||it.ppos[p].equals(US)) { + if (it.ppos[p] == null || it.ppos[p].equals(US)) { is.setPPoss(i, p, it.gpos[p]); - } else is.setPPoss(i, p, it.ppos[p]); - + } else + is.setPPoss(i, p, it.ppos[p]); - if (it.plemmas[p]==null ||it.plemmas[p].equals(US)) { + if (it.plemmas[p] == null || it.plemmas[p].equals(US)) { is.setLemma(i, p, normalize(it.forms[p])); - } else is.setLemma(i, p, normalize(it.plemmas[p])); - - - is.setFeats(i,p,it.feats[p]); + } else + is.setLemma(i, p, normalize(it.plemmas[p])); + is.setFeats(i, p, it.feats[p]); - is.setFeature(i,p,it.ofeats[p]); + is.setFeature(i, p, it.ofeats[p]); + is.setRel(i, p, it.labels[p]); + if (it.plabels != null) + is.setPRel(i, p, it.plabels[p]); + is.setHead(i, p, it.heads[p]); + if (it.pheads != null) + is.setPHead(i, p, it.pheads[p]); - is.setRel(i,p,it.labels[p]); - if (it.plabels!=null) is.setPRel(i,p,it.plabels[p]); - is.setHead(i,p,it.heads[p]); - if (it.pheads!=null) is.setPHead(i,p,it.pheads[p]); - - if (it.fillp!=null && it.fillp[p]!=null && it.fillp[p].startsWith("Y")) is.pfill[i].set(p); - else is.pfill[i].clear(p); + if (it.fillp != null && it.fillp[p] != null && it.fillp[p].startsWith("Y")) + is.pfill[i].set(p); + else + is.pfill[i].clear(p); } - if (is.createSem(i,it)) { - DB.println("count "+i+" len "+it.length()); + if (is.createSem(i, it)) { + DB.println("count " + i + " len " + it.length()); DB.println(it.printSem()); } - } catch(Exception e ){ - DB.println("head "+it); + } catch (Exception e) { + DB.println("head " + it); e.printStackTrace(); } return true; } - public static String normalize (String s) { - if (!normalizeOn) return s; - if(s.matches(NUMBER)) return NUM; + + public static String normalize(String s) { + if (!normalizeOn) + return s; + if (s.matches(NUMBER)) + return NUM; return s; - } + } } diff --git a/dependencyParser/mate-tools/src/is2/io/CONLLReader08.java b/dependencyParser/mate-tools/src/is2/io/CONLLReader08.java index a6194a3..467e853 100644 --- a/dependencyParser/mate-tools/src/is2/io/CONLLReader08.java +++ b/dependencyParser/mate-tools/src/is2/io/CONLLReader08.java @@ -1,79 +1,71 @@ - package is2.io; -import is2.data.Instances; -import is2.data.SentenceData09; -import is2.util.DB; - import java.io.BufferedReader; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; import java.util.ArrayList; - +import is2.data.Instances; +import is2.data.SentenceData09; +import is2.util.DB; /** * This class reads files in the CONLL-09 format. - * + * * @author Bernd Bohnet */ public class CONLLReader08 extends IOGenerals { - private BufferedReader inputReader; public static final boolean NORMALIZE = true; public static final boolean NO_NORMALIZE = false; - public boolean normalizeOn =true; + public boolean normalizeOn = true; - - - private int format = 0; + private int format = 0; private int lineNumber = 0; + public CONLLReader08(boolean normalize) { - public CONLLReader08(boolean normalize){ - - normalizeOn=normalize; + normalizeOn = normalize; } - public CONLLReader08(String file){ - lineNumber=0; + public CONLLReader08(String file) { + lineNumber = 0; try { - inputReader = new BufferedReader(new InputStreamReader(new FileInputStream(file),"UTF-8"),32768); + inputReader = new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF-8"), 32768); } catch (Exception e) { e.printStackTrace(); } } - public CONLLReader08(String file, boolean normalize){ + public CONLLReader08(String file, boolean normalize) { this(file); - normalizeOn=normalize; + normalizeOn = normalize; } /** - * Sets the input format: - * - * CONLL09 is standard, - * ONE_LINE - * - * @param format the fomrat (see the constants starting with F_). + * Sets the input format: + * + * CONLL09 is standard, ONE_LINE + * + * @param format + * the fomrat (see the constants starting with F_). */ public void setInputFormat(int format) { - this.format=format; + this.format = format; } - - /** - * + * */ - public CONLLReader08() {} + public CONLLReader08() { + } /** * @param testfile @@ -83,62 +75,64 @@ public class CONLLReader08 extends IOGenerals { this(testfile); } - public void startReading(String file ){ - lineNumber=0; + public void startReading(String file) { + lineNumber = 0; try { - inputReader = new BufferedReader(new InputStreamReader(new FileInputStream(file),"UTF-8"),32768); + inputReader = new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF-8"), 32768); } catch (Exception e) { e.printStackTrace(); } } - public SentenceData09 getNext() { + public SentenceData09 getNext() { - if (F_ONE_LINE == format) return getNextOneLine(); - else return getNextCoNLL09(); + if (F_ONE_LINE == format) + return getNextOneLine(); + else + return getNextCoNLL09(); } - + /** * @return */ private SentenceData09 getNextOneLine() { - - String line=null; - int i=0; - try { + String line = null; + int i = 0; + try { line = inputReader.readLine(); lineNumber++; - if (line==null ) { + if (line == null) { inputReader.close(); return null; } - + String[] tokens = line.split(" "); int length = tokens.length; - if (line.isEmpty()) length=0; + if (line.isEmpty()) + length = 0; - SentenceData09 it = new SentenceData09(); + SentenceData09 it = new SentenceData09(); - it.forms = new String[length+1]; + it.forms = new String[length + 1]; - it.plemmas = new String[length+1]; - // it.ppos = new String[length+1]; - it.gpos = new String[length+1]; - it.labels = new String[length+1]; - it.heads = new int[length+1]; - it.pheads = new int[length+1]; - it.plabels = new String[length+1]; + it.plemmas = new String[length + 1]; + // it.ppos = new String[length+1]; + it.gpos = new String[length + 1]; + it.labels = new String[length + 1]; + it.heads = new int[length + 1]; + it.pheads = new int[length + 1]; + it.plabels = new String[length + 1]; - it.ppos = new String[length+1]; - it.lemmas = new String[length+1]; - it.fillp = new String[length+1]; - it.feats = new String[length+1][]; - it.ofeats = new String[length+1]; - it.pfeats = new String[length+1]; - it.id = new String[length+1]; + it.ppos = new String[length + 1]; + it.lemmas = new String[length + 1]; + it.fillp = new String[length + 1]; + it.feats = new String[length + 1][]; + it.ofeats = new String[length + 1]; + it.pfeats = new String[length + 1]; + it.id = new String[length + 1]; it.forms[0] = ROOT; it.plemmas[0] = ROOT_LEMMA; @@ -152,47 +146,43 @@ public class CONLLReader08 extends IOGenerals { it.plabels[0] = NO_TYPE; it.pheads[0] = -1; it.ofeats[0] = NO_TYPE; - it.id[0] ="0"; + it.id[0] = "0"; // root is 0 therefore start with 1 - for(i = 1; i <= length; i++) { - - it.id[i] = ""+i; - - it.forms[i] = this.normalizeOn?normalize(tokens[i-1]):tokens[i-1]; //normalize( + for (i = 1; i <= length; i++) { + + it.id[i] = "" + i; + it.forms[i] = this.normalizeOn ? normalize(tokens[i - 1]) : tokens[i - 1]; // normalize( } - + return it; - } catch(Exception e) { - System.out.println("\n!!! Error in input file sentence before line: "+lineNumber+" (in sentence line "+i+" ) "+e.toString()); + } catch (Exception e) { + System.out.println("\n!!! Error in input file sentence before line: " + lineNumber + " (in sentence line " + + i + " ) " + e.toString()); e.printStackTrace(); System.exit(0); - - - - //throw new Exception(); + // throw new Exception(); return null; } - - } - /**i.forms[heads[l]-1]+" "+rel+" "+ - * Read a instance + /** + * i.forms[heads[l]-1]+" "+rel+" "+ Read a instance + * * @return a instance - * @throws Exception + * @throws Exception */ - - public SentenceData09 getNextCoNLL09() { - String line=null; - int i=0; + public SentenceData09 getNextCoNLL09() { + + String line = null; + int i = 0; try { ArrayList<String[]> lineList = new ArrayList<String[]>(); @@ -200,46 +190,44 @@ public class CONLLReader08 extends IOGenerals { line = inputReader.readLine(); lineNumber++; - while(line !=null && line.length()==0) { + while (line != null && line.length() == 0) { line = inputReader.readLine(); lineNumber++; - System.out.println("skip empty line at line "+lineNumber); - } + System.out.println("skip empty line at line " + lineNumber); + } - while (line != null && line.length()!=0 && !line.startsWith(STRING) &&!line.startsWith(REGEX)) { + while (line != null && line.length() != 0 && !line.startsWith(STRING) && !line.startsWith(REGEX)) { lineList.add(line.split(REGEX)); line = inputReader.readLine(); lineNumber++; } - - int length = lineList.size(); - if(length == 0) { + if (length == 0) { inputReader.close(); return null; } SentenceData09 it = new SentenceData09(); - it.forms = new String[length+1]; + it.forms = new String[length + 1]; - it.plemmas = new String[length+1]; - // it.ppos = new String[length+1]; - it.gpos = new String[length+1]; - it.labels = new String[length+1]; - it.heads = new int[length+1]; - it.pheads = new int[length+1]; - it.plabels = new String[length+1]; + it.plemmas = new String[length + 1]; + // it.ppos = new String[length+1]; + it.gpos = new String[length + 1]; + it.labels = new String[length + 1]; + it.heads = new int[length + 1]; + it.pheads = new int[length + 1]; + it.plabels = new String[length + 1]; - it.ppos = new String[length+1]; - it.lemmas = new String[length+1]; - it.fillp = new String[length+1]; - it.feats = new String[length+1][]; - it.ofeats = new String[length+1]; - it.pfeats = new String[length+1]; - it.id = new String[length+1]; + it.ppos = new String[length + 1]; + it.lemmas = new String[length + 1]; + it.fillp = new String[length + 1]; + it.feats = new String[length + 1][]; + it.ofeats = new String[length + 1]; + it.pfeats = new String[length + 1]; + it.id = new String[length + 1]; it.forms[0] = ROOT; it.plemmas[0] = ROOT_LEMMA; @@ -253,73 +241,69 @@ public class CONLLReader08 extends IOGenerals { it.plabels[0] = NO_TYPE; it.pheads[0] = -1; it.ofeats[0] = NO_TYPE; - it.id[0] ="0"; + it.id[0] = "0"; // root is 0 therefore start with 1 - for(i = 1; i <= length; i++) { - - - - String[] info = lineList.get(i-1); + for (i = 1; i <= length; i++) { + + String[] info = lineList.get(i - 1); it.id[i] = info[0]; - it.forms[i] = info[5]; //normalize( - if (info.length<3) continue; + it.forms[i] = info[5]; // normalize( + if (info.length < 3) + continue; + + // it.lemmas[i] = info[2]; + it.plemmas[i] = info[6]; + it.gpos[i] = info[3]; - //it.lemmas[i] = info[2]; - it.plemmas[i] =info[6]; - it.gpos[i] = info[3]; + if (info.length < 5) + continue; + it.ppos[i] = info[7];// .split("\\|")[0]; - if (info.length<5) continue; - it.ppos[i] = info[7];//.split("\\|")[0]; - // feat 6 // pfeat 7 // this causes trouble in the perl eval09 scirpt - //it.ofeats[i]=info[6].equals(CONLLWriter09.DASH)? "" : info[6]; + // it.ofeats[i]=info[6].equals(CONLLWriter09.DASH)? "" : + // info[6]; // now we try underscore - it.ofeats[i]="_"; + it.ofeats[i] = "_"; - - // it.feats[i] ="_"; - it.pfeats[i] = "_"; - + // it.feats[i] ="_"; + it.pfeats[i] = "_"; - - - if (info[8].equals(US)) it.heads[i]=-1; - else it.heads[i] = Integer.parseInt(info[8]);// head - - it.pheads[i]=-1;// head + if (info[8].equals(US)) + it.heads[i] = -1; + else + it.heads[i] = Integer.parseInt(info[8]);// head - it.labels[i] = info[9]; - it.plabels[i] = "_"; - - it.fillp[i]=info[10]; - - if (info.length>11) { - if (!info[10].equals(US)) it.addPredicate(i,info[10]); - for(int k=11;k<info.length;k++) it.addArgument(i,k-11,info[k]); - } + it.pheads[i] = -1;// head + it.labels[i] = info[9]; + it.plabels[i] = "_"; + it.fillp[i] = info[10]; + if (info.length > 11) { + if (!info[10].equals(US)) + it.addPredicate(i, info[10]); + for (int k = 11; k < info.length; k++) + it.addArgument(i, k - 11, info[k]); + } } return it; - } catch(Exception e) { - System.out.println("\n!!! Error in input file sentence before line: "+lineNumber+" (in sentence line "+i+" ) "+e.toString()); + } catch (Exception e) { + System.out.println("\n!!! Error in input file sentence before line: " + lineNumber + " (in sentence line " + + i + " ) " + e.toString()); e.printStackTrace(); System.exit(0); - - - - //throw new Exception(); + // throw new Exception(); return null; } @@ -327,87 +311,97 @@ public class CONLLReader08 extends IOGenerals { /** * Read a instance an store it in a compressed format + * * @param is * @return * @throws IOException */ - final public SentenceData09 getNext(Instances is) { + final public SentenceData09 getNext(Instances is) { SentenceData09 it = getNext(); - if (is !=null) insert(is,it); + if (is != null) + insert(is, it); return it; } - - - final public boolean insert(Instances is, SentenceData09 it) { try { - if(it == null) { + if (it == null) { inputReader.close(); return false; } - int i= is.createInstance09(it.length()); + int i = is.createInstance09(it.length()); - for(int p = 0; p < it.length(); p++) { + for (int p = 0; p < it.length(); p++) { is.setForm(i, p, normalize(it.forms[p])); - is.setGPos(i, p, it.gpos[p]); + is.setGPos(i, p, it.gpos[p]); - // System.out.println(""+is.gpos[i][p]); + // System.out.println(""+is.gpos[i][p]); - if (it.ppos[p]==null||it.ppos[p].equals(US)) { - - is.setPPoss(i, p, it.gpos[p]); - } else is.setPPoss(i, p, it.ppos[p]); + if (it.ppos[p] == null || it.ppos[p].equals(US)) { + is.setPPoss(i, p, it.gpos[p]); + } else + is.setPPoss(i, p, it.ppos[p]); - if (it.plemmas[p]==null ||it.plemmas[p].equals(US)) { + if (it.plemmas[p] == null || it.plemmas[p].equals(US)) { is.setLemma(i, p, normalize(it.forms[p])); - } else is.setLemma(i, p, normalize(it.plemmas[p])); + } else + is.setLemma(i, p, normalize(it.plemmas[p])); - if (it.lemmas!=null) - if (it.lemmas[p]==null ) { // ||it.org_lemmas[p].equals(US) that harms a lot the lemmatizer + if (it.lemmas != null) + if (it.lemmas[p] == null) { // ||it.org_lemmas[p].equals(US) + // that harms a lot the + // lemmatizer is.setGLemma(i, p, it.plemmas[p]); - } else is.setGLemma(i, p, it.lemmas[p]); - - - if (it.feats!=null && it.feats[p]!=null) is.setFeats(i,p,it.feats[p]); + } else + is.setGLemma(i, p, it.lemmas[p]); - if (it.ofeats!=null) is.setFeature(i,p,it.ofeats[p]); + if (it.feats != null && it.feats[p] != null) + is.setFeats(i, p, it.feats[p]); + if (it.ofeats != null) + is.setFeature(i, p, it.ofeats[p]); - is.setRel(i,p,it.labels[p]); - if (it.plabels!=null) is.setPRel(i,p,it.plabels[p]); + is.setRel(i, p, it.labels[p]); + if (it.plabels != null) + is.setPRel(i, p, it.plabels[p]); - is.setHead(i,p,it.heads[p]); - if (it.pheads!=null) is.setPHead(i,p,it.pheads[p]); + is.setHead(i, p, it.heads[p]); + if (it.pheads != null) + is.setPHead(i, p, it.pheads[p]); - if (it.fillp!=null && it.fillp[p]!=null && it.fillp[p].startsWith("Y")) is.pfill[i].set(p); - else is.pfill[i].clear(p); + if (it.fillp != null && it.fillp[p] != null && it.fillp[p].startsWith("Y")) + is.pfill[i].set(p); + else + is.pfill[i].clear(p); } - if (is.createSem(i,it)) { - DB.println("count "+i+" len "+it.length()); + if (is.createSem(i, it)) { + DB.println("count " + i + " len " + it.length()); DB.println(it.printSem()); } - } catch(Exception e ){ - DB.println("head "+it); + } catch (Exception e) { + DB.println("head " + it); e.printStackTrace(); } return true; } - public String normalize (String s) { - if (!normalizeOn) return s; - if(s.matches(NUMBER)) return NUM; + + public String normalize(String s) { + if (!normalizeOn) + return s; + if (s.matches(NUMBER)) + return NUM; return s; - } + } } diff --git a/dependencyParser/mate-tools/src/is2/io/CONLLReader09.java b/dependencyParser/mate-tools/src/is2/io/CONLLReader09.java index c020579..cee897a 100755 --- a/dependencyParser/mate-tools/src/is2/io/CONLLReader09.java +++ b/dependencyParser/mate-tools/src/is2/io/CONLLReader09.java @@ -1,79 +1,73 @@ - package is2.io; -import is2.data.Instances; -import is2.data.SentenceData09; -import is2.util.DB; - import java.io.BufferedReader; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; import java.util.ArrayList; - +import is2.data.Instances; +import is2.data.SentenceData09; +import is2.util.DB; /** * This class reads files in the CONLL-09 format. - * + * * @author Bernd Bohnet */ public class CONLLReader09 extends IOGenerals { - private BufferedReader inputReader; public static final boolean NORMALIZE = true; public static final boolean NO_NORMALIZE = false; - public boolean normalizeOn =true; + public boolean normalizeOn = true; - static public String joint =""; - - private int format = 0; + static public String joint = ""; - private int lineNumber = 0; + private int format = 0; + private int lineNumber = 0; - public CONLLReader09(boolean normalize){ + public CONLLReader09(boolean normalize) { - normalizeOn=normalize; + normalizeOn = normalize; } - public CONLLReader09(String file){ - lineNumber=0; + public CONLLReader09(String file) { + lineNumber = 0; try { - inputReader = new BufferedReader(new InputStreamReader(new FileInputStream(file),"UTF-8"),32768); + inputReader = new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF-8"), 32768); } catch (Exception e) { e.printStackTrace(); } } - public CONLLReader09(String file, boolean normalize){ + public CONLLReader09(String file, boolean normalize) { this(file); - normalizeOn=normalize; + normalizeOn = normalize; } /** - * Sets the input format: - * - * CONLL09 is standard, - * ONE_LINE - * - * @param format the fomrat (see the constants starting with F_). + * Sets the input format: + * + * CONLL09 is standard, ONE_LINE + * + * @param format + * the fomrat (see the constants starting with F_). */ public void setInputFormat(int format) { - this.format=format; + this.format = format; } - - /** - * + * */ - public CONLLReader09() {} + public CONLLReader09() { + } /** * @param testfile @@ -83,62 +77,64 @@ public class CONLLReader09 extends IOGenerals { this(testfile); } - public void startReading(String file ){ - lineNumber=0; + public void startReading(String file) { + lineNumber = 0; try { - inputReader = new BufferedReader(new InputStreamReader(new FileInputStream(file),"UTF-8"),32768); + inputReader = new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF-8"), 32768); } catch (Exception e) { e.printStackTrace(); } } - public SentenceData09 getNext() { + public SentenceData09 getNext() { - if (F_ONE_LINE == format) return getNextOneLine(); - else return getNextCoNLL09(); + if (F_ONE_LINE == format) + return getNextOneLine(); + else + return getNextCoNLL09(); } - + /** * @return */ private SentenceData09 getNextOneLine() { - - String line=null; - int i=0; - try { + String line = null; + int i = 0; + try { line = inputReader.readLine(); lineNumber++; - if (line==null ) { + if (line == null) { inputReader.close(); return null; } - + String[] tokens = line.split(" "); int length = tokens.length; - if (line.isEmpty()) length=0; + if (line.isEmpty()) + length = 0; - SentenceData09 it = new SentenceData09(); + SentenceData09 it = new SentenceData09(); - it.forms = new String[length+1]; + it.forms = new String[length + 1]; - it.plemmas = new String[length+1]; - // it.ppos = new String[length+1]; - it.gpos = new String[length+1]; - it.labels = new String[length+1]; - it.heads = new int[length+1]; - it.pheads = new int[length+1]; - it.plabels = new String[length+1]; + it.plemmas = new String[length + 1]; + // it.ppos = new String[length+1]; + it.gpos = new String[length + 1]; + it.labels = new String[length + 1]; + it.heads = new int[length + 1]; + it.pheads = new int[length + 1]; + it.plabels = new String[length + 1]; - it.ppos = new String[length+1]; - it.lemmas = new String[length+1]; - it.fillp = new String[length+1]; - it.feats = new String[length+1][]; - it.ofeats = new String[length+1]; - it.pfeats = new String[length+1]; - it.id = new String[length+1]; + it.ppos = new String[length + 1]; + it.lemmas = new String[length + 1]; + it.fillp = new String[length + 1]; + it.feats = new String[length + 1][]; + it.ofeats = new String[length + 1]; + it.pfeats = new String[length + 1]; + it.id = new String[length + 1]; it.forms[0] = ROOT; it.plemmas[0] = ROOT_LEMMA; @@ -152,47 +148,42 @@ public class CONLLReader09 extends IOGenerals { it.plabels[0] = NO_TYPE; it.pheads[0] = -1; it.ofeats[0] = NO_TYPE; - it.id[0] ="0"; + it.id[0] = "0"; // root is 0 therefore start with 1 - for(i = 1; i <= length; i++) { - - it.id[i] = ""+i; - - it.forms[i] = this.normalizeOn?normalize(tokens[i-1]):tokens[i-1]; //normalize( + for (i = 1; i <= length; i++) { + + it.id[i] = "" + i; + it.forms[i] = this.normalizeOn ? normalize(tokens[i - 1]) : tokens[i - 1]; // normalize( } - + return it; - } catch(Exception e) { - System.out.println("\n!!! Error in input file sentence before line: "+lineNumber+" (in sentence line "+i+" ) "+e.toString()); + } catch (Exception e) { + System.out.println("\n!!! Error in input file sentence before line: " + lineNumber + " (in sentence line " + + i + " ) " + e.toString()); e.printStackTrace(); - - - - - //throw new Exception(); + // throw new Exception(); return null; } - - } - /**i.forms[heads[l]-1]+" "+rel+" "+ - * Read a instance + /** + * i.forms[heads[l]-1]+" "+rel+" "+ Read a instance + * * @return a instance - * @throws Exception + * @throws Exception */ - - public SentenceData09 getNextCoNLL09() { - String line=null; - int i=0; + public SentenceData09 getNextCoNLL09() { + + String line = null; + int i = 0; try { ArrayList<String[]> lineList = new ArrayList<String[]>(); @@ -200,46 +191,44 @@ public class CONLLReader09 extends IOGenerals { line = inputReader.readLine(); lineNumber++; - while(line !=null && line.length()==0) { + while (line != null && line.length() == 0) { line = inputReader.readLine(); lineNumber++; - System.out.println("skip empty line at line "+lineNumber); - } + System.out.println("skip empty line at line " + lineNumber); + } - while (line != null && line.length()!=0 && !line.startsWith(STRING) &&!line.startsWith(REGEX)) { + while (line != null && line.length() != 0 && !line.startsWith(STRING) && !line.startsWith(REGEX)) { lineList.add(line.split(REGEX)); line = inputReader.readLine(); lineNumber++; } - - int length = lineList.size(); - if(length == 0) { + if (length == 0) { inputReader.close(); return null; } SentenceData09 it = new SentenceData09(); - it.forms = new String[length+1]; + it.forms = new String[length + 1]; - it.plemmas = new String[length+1]; - // it.ppos = new String[length+1]; - it.gpos = new String[length+1]; - it.labels = new String[length+1]; - it.heads = new int[length+1]; - it.pheads = new int[length+1]; - it.plabels = new String[length+1]; + it.plemmas = new String[length + 1]; + // it.ppos = new String[length+1]; + it.gpos = new String[length + 1]; + it.labels = new String[length + 1]; + it.heads = new int[length + 1]; + it.pheads = new int[length + 1]; + it.plabels = new String[length + 1]; - it.ppos = new String[length+1]; - it.lemmas = new String[length+1]; - it.fillp = new String[length+1]; - it.feats = new String[length+1][]; - it.ofeats = new String[length+1]; - it.pfeats = new String[length+1]; - it.id = new String[length+1]; + it.ppos = new String[length + 1]; + it.lemmas = new String[length + 1]; + it.fillp = new String[length + 1]; + it.feats = new String[length + 1][]; + it.ofeats = new String[length + 1]; + it.pfeats = new String[length + 1]; + it.id = new String[length + 1]; it.forms[0] = ROOT; it.plemmas[0] = ROOT_LEMMA; @@ -253,69 +242,67 @@ public class CONLLReader09 extends IOGenerals { it.plabels[0] = NO_TYPE; it.pheads[0] = -1; it.ofeats[0] = NO_TYPE; - it.id[0] ="0"; + it.id[0] = "0"; // root is 0 therefore start with 1 - for(i = 1; i <= length; i++) { - - - - String[] info = lineList.get(i-1); + for (i = 1; i <= length; i++) { + + String[] info = lineList.get(i - 1); it.id[i] = info[0]; - it.forms[i] = info[1]; //normalize( - if (info.length<3) continue; + it.forms[i] = info[1]; // normalize( + if (info.length < 3) + continue; it.lemmas[i] = info[2]; - it.plemmas[i] =info[3]; - it.gpos[i] = info[4]; + it.plemmas[i] = info[3]; + it.gpos[i] = info[4]; - if (info.length<5) continue; - it.ppos[i] = info[5];//.split("\\|")[0]; + if (info.length < 5) + continue; + it.ppos[i] = info[5];// .split("\\|")[0]; // feat 6 - + // now we try underscore - it.ofeats[i]=info[6].equals(CONLLWriter09.DASH)? "_" : info[6]; - - if (info[7].equals(CONLLWriter09.DASH)) it.feats[i]=null; + it.ofeats[i] = info[6].equals(CONLLWriter09.DASH) ? "_" : info[6]; + + if (info[7].equals(CONLLWriter09.DASH)) + it.feats[i] = null; else { - it.feats[i] =info[7].split(PIPE); + it.feats[i] = info[7].split(PIPE); it.pfeats[i] = info[7]; } - - - if (info[8].equals(US))it.heads[i]=-1; - else it.heads[i] = Integer.parseInt(info[8]);// head - - it.pheads[i]=info[9].equals(US) ? it.pheads[i]=-1: Integer.parseInt(info[9]);// head + if (info[8].equals(US)) + it.heads[i] = -1; + else + it.heads[i] = Integer.parseInt(info[8]);// head + + it.pheads[i] = info[9].equals(US) ? it.pheads[i] = -1 : Integer.parseInt(info[9]);// head - it.labels[i] = info[10]; + it.labels[i] = info[10]; it.plabels[i] = info[11]; - it.fillp[i]=info[12]; + it.fillp[i] = info[12]; - if (info.length>13) { - if (!info[13].equals(US)) it.addPredicate(i,info[13]); - for(int k=14;k<info.length;k++) it.addArgument(i,k-14,info[k]); + if (info.length > 13) { + if (!info[13].equals(US)) + it.addPredicate(i, info[13]); + for (int k = 14; k < info.length; k++) + it.addArgument(i, k - 14, info[k]); } - - - } return it; - } catch(Exception e) { - System.out.println("\n!!! Error in input file sentence before line: "+lineNumber+" (in sentence line "+i+" ) "+e.toString()); + } catch (Exception e) { + System.out.println("\n!!! Error in input file sentence before line: " + lineNumber + " (in sentence line " + + i + " ) " + e.toString()); e.printStackTrace(); System.exit(0); - - - - //throw new Exception(); + // throw new Exception(); return null; } @@ -323,89 +310,100 @@ public class CONLLReader09 extends IOGenerals { /** * Read a instance an store it in a compressed format + * * @param is * @return * @throws IOException */ - final public SentenceData09 getNext(Instances is) { + final public SentenceData09 getNext(Instances is) { SentenceData09 it = getNext(); - if (is !=null) insert(is,it); + if (is != null) + insert(is, it); return it; } - - - final public boolean insert(Instances is, SentenceData09 it) { try { - if(it == null) { + if (it == null) { inputReader.close(); return false; } - int i= is.createInstance09(it.length()); + int i = is.createInstance09(it.length()); - for(int p = 0; p < it.length(); p++) { + for (int p = 0; p < it.length(); p++) { is.setForm(i, p, normalize(it.forms[p])); - // is.setFormOrg(i, p, it.forms[p]); - is.setGPos(i, p, it.gpos[p]); + // is.setFormOrg(i, p, it.forms[p]); + is.setGPos(i, p, it.gpos[p]); - // System.out.println(""+is.gpos[i][p]); + // System.out.println(""+is.gpos[i][p]); - if (it.ppos[p]==null||it.ppos[p].equals(US)) { - - is.setPPoss(i, p, it.gpos[p]); - } else is.setPPoss(i, p, it.ppos[p]); + if (it.ppos[p] == null || it.ppos[p].equals(US)) { + is.setPPoss(i, p, it.gpos[p]); + } else + is.setPPoss(i, p, it.ppos[p]); - if (it.plemmas[p]==null ||it.plemmas[p].equals(US)) { + if (it.plemmas[p] == null || it.plemmas[p].equals(US)) { is.setLemma(i, p, normalize(it.forms[p])); - } else is.setLemma(i, p, normalize(it.plemmas[p])); + } else + is.setLemma(i, p, normalize(it.plemmas[p])); - if (it.lemmas!=null) - if (it.lemmas[p]==null ) { // ||it.org_lemmas[p].equals(US) that harms a lot the lemmatizer + if (it.lemmas != null) + if (it.lemmas[p] == null) { // ||it.org_lemmas[p].equals(US) + // that harms a lot the + // lemmatizer is.setGLemma(i, p, it.plemmas[p]); - } else is.setGLemma(i, p, it.lemmas[p]); - - - if (it.feats!=null && it.feats[p]!=null) is.setFeats(i,p,it.feats[p]); + } else + is.setGLemma(i, p, it.lemmas[p]); - if (it.ofeats!=null) is.setFeature(i,p,it.ofeats[p]); - if (it.pfeats!=null) is.setPFeature(i,p,it.pfeats[p]); + if (it.feats != null && it.feats[p] != null) + is.setFeats(i, p, it.feats[p]); + if (it.ofeats != null) + is.setFeature(i, p, it.ofeats[p]); + if (it.pfeats != null) + is.setPFeature(i, p, it.pfeats[p]); - is.setRel(i,p,it.labels[p]); - if (it.plabels!=null) is.setPRel(i,p,it.plabels[p]); + is.setRel(i, p, it.labels[p]); + if (it.plabels != null) + is.setPRel(i, p, it.plabels[p]); - is.setHead(i,p,it.heads[p]); - if (it.pheads!=null) is.setPHead(i,p,it.pheads[p]); + is.setHead(i, p, it.heads[p]); + if (it.pheads != null) + is.setPHead(i, p, it.pheads[p]); - if (it.fillp!=null && it.fillp[p]!=null && it.fillp[p].startsWith("Y")) is.pfill[i].set(p); - else is.pfill[i].clear(p); + if (it.fillp != null && it.fillp[p] != null && it.fillp[p].startsWith("Y")) + is.pfill[i].set(p); + else + is.pfill[i].clear(p); } - if (is.createSem(i,it)) { - DB.println("count "+i+" len "+it.length()); + if (is.createSem(i, it)) { + DB.println("count " + i + " len " + it.length()); DB.println(it.printSem()); } - } catch(Exception e ){ - DB.println("head "+it); + } catch (Exception e) { + DB.println("head " + it); e.printStackTrace(); } return true; } - public String normalize (String s) { - if (!normalizeOn) return s; - if(s.matches(NUMBER)) return NUM; + + public String normalize(String s) { + if (!normalizeOn) + return s; + if (s.matches(NUMBER)) + return NUM; return s; - } + } } diff --git a/dependencyParser/mate-tools/src/is2/io/CONLLWriter06.java b/dependencyParser/mate-tools/src/is2/io/CONLLWriter06.java index 26762bc..24446b5 100755 --- a/dependencyParser/mate-tools/src/is2/io/CONLLWriter06.java +++ b/dependencyParser/mate-tools/src/is2/io/CONLLWriter06.java @@ -1,8 +1,5 @@ package is2.io; -import is2.data.SentenceData09; -import is2.util.DB; - import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.File; @@ -13,181 +10,190 @@ import java.io.InputStreamReader; import java.io.OutputStreamWriter; import java.util.StringTokenizer; +import is2.data.SentenceData09; +import is2.util.DB; -public class CONLLWriter06 { +public class CONLLWriter06 { public static final String DASH = "_"; - + protected BufferedWriter writer; - public CONLLWriter06 () { } - - - + public CONLLWriter06() { + } + public static void main(String args[]) throws IOException { - - - if (args.length==2) { - File f = new File(args[0]); - File f2 = new File(args[1]); - // BufferedReader bf = new BufferedReader(new FileInputStream(new File(args[0]),"UTF-8"),32768); - BufferedReader ir = new BufferedReader(new InputStreamReader(new FileInputStream(f),"ISO-8859"),32768); - BufferedWriter br = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(f2),"UTF-8"));; - boolean found =false; - boolean tab =false; - while(true) { - String l = ir.readLine(); - if (l==null) break; - String x =l.trim(); - if (x.endsWith("\t")) tab=true; - br.write(x); - br.newLine(); - if (!l.equals(x)) found =true; - - } - ir.close(); - br.flush(); - br.close(); - if (found) DB.println("found diff. found tab? "+tab); - } else if (args.length==3) { + if (args.length == 2) { + File f = new File(args[0]); + File f2 = new File(args[1]); + // BufferedReader bf = new BufferedReader(new FileInputStream(new + // File(args[0]),"UTF-8"),32768); + BufferedReader ir = new BufferedReader(new InputStreamReader(new FileInputStream(f), "ISO-8859"), 32768); + BufferedWriter br = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(f2), "UTF-8")); + ; + boolean found = false; + boolean tab = false; + while (true) { + String l = ir.readLine(); + if (l == null) + break; + String x = l.trim(); + if (x.endsWith("\t")) + tab = true; + br.write(x); + br.newLine(); + if (!l.equals(x)) + found = true; + + } + ir.close(); + br.flush(); + br.close(); + + if (found) + DB.println("found diff. found tab? " + tab); + } else if (args.length == 3) { File f1 = new File(args[1]); File f2 = new File(args[2]); - - BufferedReader ir1 = new BufferedReader(new InputStreamReader(new FileInputStream(f1),"ISO-8859"),32768); - BufferedReader ir2 = new BufferedReader(new InputStreamReader(new FileInputStream(f2),"UTF-8"),32768); - int line =0, alltabs1=0,alltabs2=0; - while(true) { + BufferedReader ir1 = new BufferedReader(new InputStreamReader(new FileInputStream(f1), "ISO-8859"), 32768); + BufferedReader ir2 = new BufferedReader(new InputStreamReader(new FileInputStream(f2), "UTF-8"), 32768); + + int line = 0, alltabs1 = 0, alltabs2 = 0; + while (true) { String l1 = ir1.readLine(); String l2 = ir2.readLine(); - if (l1==null && l2!=null) DB.println("files do not end at the same line "); - if (l1!=null && l2==null) DB.println("files do not end at the same line "); - if (l1==null ) break; - StringTokenizer t1 = new StringTokenizer(l1,"\t"); - StringTokenizer t2 = new StringTokenizer(l2,"\t"); - int tabs1=0; - while(t1.hasMoreTokens()) { + if (l1 == null && l2 != null) + DB.println("files do not end at the same line "); + if (l1 != null && l2 == null) + DB.println("files do not end at the same line "); + if (l1 == null) + break; + StringTokenizer t1 = new StringTokenizer(l1, "\t"); + StringTokenizer t2 = new StringTokenizer(l2, "\t"); + int tabs1 = 0; + while (t1.hasMoreTokens()) { t1.nextElement(); tabs1++; alltabs1++; } - - int tabs2=0; - while(t2.hasMoreTokens()) { + + int tabs2 = 0; + while (t2.hasMoreTokens()) { t2.nextElement(); tabs2++; alltabs2++; } - line ++; - if (tabs1!=tabs2) { - DB.println("number of tabs different in line "+line+" file1-tabs "+tabs1+" file2-tabs "+tabs2); + line++; + if (tabs1 != tabs2) { + DB.println("number of tabs different in line " + line + " file1-tabs " + tabs1 + " file2-tabs " + + tabs2); System.exit(0); } - - + } - DB.println("checked lines "+line+" with tabs in file 1 "+alltabs1+" in file2 "+alltabs2); - + ir1.close(); + ir2.close(); + DB.println("checked lines " + line + " with tabs in file 1 " + alltabs1 + " in file2 " + alltabs2); + } else { File f = new File(args[0]); - String[] dir =f.list(); - for(String fx :dir) { - BufferedReader ir = new BufferedReader(new InputStreamReader(new FileInputStream(args[0]+File.separatorChar+fx),"UTF-8"),32768); - System.out.println("check file "+fx); - while(true) { + String[] dir = f.list(); + for (String fx : dir) { + BufferedReader ir = new BufferedReader( + new InputStreamReader(new FileInputStream(args[0] + File.separatorChar + fx), "UTF-8"), 32768); + System.out.println("check file " + fx); + while (true) { String l = ir.readLine(); - if (l==null) break; + if (l == null) + break; if (l.endsWith("\t")) { - DB.println("found tab in file "+fx); + DB.println("found tab in file " + fx); break; } - } + } ir.close(); } } - + } - - -// public int version = CONLLReader09.TASK08; - - public CONLLWriter06 (String file) { - + + // public int version = CONLLReader09.TASK08; + + public CONLLWriter06(String file) { + try { - writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file),"UTF-8")); + writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file), "UTF-8")); } catch (Exception e) { e.printStackTrace(); } } - + public CONLLWriter06(String outfile, int formatTask) { this(outfile); - // version = formatTask; + // version = formatTask; } public void write(SentenceData09 inst) throws IOException { - for (int i=0; i<inst.length(); i++) { - - - writer.write(Integer.toString(i+1)); writer.write('\t'); // id - writer.write(inst.forms[i]); writer.write('\t'); // form - - if (inst.lemmas!=null && inst.lemmas[i]!=null) { - writer.write(inst.lemmas[i]); - } - else writer.write(DASH); // lemma - writer.write('\t'); - -// writer.write(DASH); // cpos -// writer.write('\t'); - - - writer.write(inst.gpos[i]); // cpos has to be included - writer.write('\t'); - - writer.write(inst.gpos[i]); // gpos - writer.write('\t'); - - - if (inst.ofeats[i].isEmpty()||inst.ofeats[i].equals(" ")) writer.write(DASH); - else writer.write(inst.ofeats[i]); - writer.write('\t'); - - - //writer.write(DASH); writer.write('\t'); // pfeat - - writer.write(Integer.toString(inst.heads[i])); writer.write('\t'); // head - - if (inst.labels[i]!=null) writer.write(inst.labels[i]); // rel - else writer.write(DASH); - writer.write('\t'); - - writer.write(DASH); - writer.write('\t'); - - writer.write(DASH); - writer.write('\t'); - - - writer.newLine(); + for (int i = 0; i < inst.length(); i++) { + + writer.write(Integer.toString(i + 1)); + writer.write('\t'); // id + writer.write(inst.forms[i]); + writer.write('\t'); // form + + if (inst.lemmas != null && inst.lemmas[i] != null) { + writer.write(inst.lemmas[i]); + } else + writer.write(DASH); // lemma + writer.write('\t'); + + // writer.write(DASH); // cpos + // writer.write('\t'); + + writer.write(inst.gpos[i]); // cpos has to be included + writer.write('\t'); + + writer.write(inst.gpos[i]); // gpos + writer.write('\t'); + + if (inst.ofeats[i].isEmpty() || inst.ofeats[i].equals(" ")) + writer.write(DASH); + else + writer.write(inst.ofeats[i]); + writer.write('\t'); + + // writer.write(DASH); writer.write('\t'); // pfeat + + writer.write(Integer.toString(inst.heads[i])); + writer.write('\t'); // head + + if (inst.labels[i] != null) + writer.write(inst.labels[i]); // rel + else + writer.write(DASH); + writer.write('\t'); + + writer.write(DASH); + writer.write('\t'); + + writer.write(DASH); + writer.write('\t'); + + writer.newLine(); } writer.newLine(); } - - - public void finishWriting () throws IOException { + public void finishWriting() throws IOException { writer.flush(); writer.close(); } - - - } diff --git a/dependencyParser/mate-tools/src/is2/io/CONLLWriter09.java b/dependencyParser/mate-tools/src/is2/io/CONLLWriter09.java index e7a92a5..c28ab24 100755 --- a/dependencyParser/mate-tools/src/is2/io/CONLLWriter09.java +++ b/dependencyParser/mate-tools/src/is2/io/CONLLWriter09.java @@ -1,8 +1,5 @@ package is2.io; -import is2.data.SentenceData09; -import is2.util.DB; - import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.File; @@ -14,123 +11,133 @@ import java.io.OutputStreamWriter; import java.io.Writer; import java.util.StringTokenizer; +import is2.data.SentenceData09; +import is2.util.DB; public class CONLLWriter09 extends IOGenerals { - - int format =0; - + int format = 0; + public static final String DASH = "_"; public static final boolean NO_ROOT = true, ROOT = false; - + protected BufferedWriter writer; - public CONLLWriter09 () { } - + public CONLLWriter09() { + } + public static void main(String args[]) throws IOException { - - - if (args.length==2) { - File f = new File(args[0]); - File f2 = new File(args[1]); - BufferedReader ir = new BufferedReader(new InputStreamReader(new FileInputStream(f),"UTF-8"),32768); - BufferedWriter br = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(f2),"UTF8"));; - boolean found =false; - boolean tab =false; - while(true) { - String l = ir.readLine(); - if (l==null) break; - String x =l.trim(); - if (x.endsWith("\t")) tab=true; - br.write(x); - br.newLine(); - if (!l.equals(x)) found =true; - - } - ir.close(); - br.flush(); - br.close(); - if (found) DB.println("found diff. found tab? "+tab); - } else if (args.length==3) { + if (args.length == 2) { + File f = new File(args[0]); + File f2 = new File(args[1]); + BufferedReader ir = new BufferedReader(new InputStreamReader(new FileInputStream(f), "UTF-8"), 32768); + BufferedWriter br = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(f2), "UTF8")); + ; + boolean found = false; + boolean tab = false; + while (true) { + String l = ir.readLine(); + if (l == null) + break; + String x = l.trim(); + if (x.endsWith("\t")) + tab = true; + br.write(x); + br.newLine(); + if (!l.equals(x)) + found = true; + + } + ir.close(); + br.flush(); + br.close(); + + if (found) + DB.println("found diff. found tab? " + tab); + } else if (args.length == 3) { File f1 = new File(args[1]); File f2 = new File(args[2]); - - BufferedReader ir1 = new BufferedReader(new InputStreamReader(new FileInputStream(f1),"UTF-8"),32768); - BufferedReader ir2 = new BufferedReader(new InputStreamReader(new FileInputStream(f2),"UTF-8"),32768); - int line =0, alltabs1=0,alltabs2=0; - while(true) { + BufferedReader ir1 = new BufferedReader(new InputStreamReader(new FileInputStream(f1), "UTF-8"), 32768); + BufferedReader ir2 = new BufferedReader(new InputStreamReader(new FileInputStream(f2), "UTF-8"), 32768); + + int line = 0, alltabs1 = 0, alltabs2 = 0; + while (true) { String l1 = ir1.readLine(); String l2 = ir2.readLine(); - if (l1==null && l2!=null) DB.println("files do not end at the same line "); - if (l1!=null && l2==null) DB.println("files do not end at the same line "); - if (l1==null ) break; - StringTokenizer t1 = new StringTokenizer(l1,"\t"); - StringTokenizer t2 = new StringTokenizer(l2,"\t"); - int tabs1=0; - while(t1.hasMoreTokens()) { + if (l1 == null && l2 != null) + DB.println("files do not end at the same line "); + if (l1 != null && l2 == null) + DB.println("files do not end at the same line "); + if (l1 == null) + break; + StringTokenizer t1 = new StringTokenizer(l1, "\t"); + StringTokenizer t2 = new StringTokenizer(l2, "\t"); + int tabs1 = 0; + while (t1.hasMoreTokens()) { t1.nextElement(); tabs1++; alltabs1++; } - - int tabs2=0; - while(t2.hasMoreTokens()) { + + int tabs2 = 0; + while (t2.hasMoreTokens()) { t2.nextElement(); tabs2++; alltabs2++; } - line ++; - if (tabs1!=tabs2) { - DB.println("number of tabs different in line "+line+" file1-tabs "+tabs1+" file2-tabs "+tabs2); + line++; + if (tabs1 != tabs2) { + DB.println("number of tabs different in line " + line + " file1-tabs " + tabs1 + " file2-tabs " + + tabs2); System.exit(0); } - - + } - DB.println("checked lines "+line+" with tabs in file 1 "+alltabs1+" in file2 "+alltabs2); - + ir1.close(); + ir2.close(); + DB.println("checked lines " + line + " with tabs in file 1 " + alltabs1 + " in file2 " + alltabs2); + } else { File f = new File(args[0]); - String[] dir =f.list(); - for(String fx :dir) { - BufferedReader ir = new BufferedReader(new InputStreamReader(new FileInputStream(args[0]+File.separatorChar+fx),"UTF-8"),32768); - System.out.println("check file "+fx); - while(true) { + String[] dir = f.list(); + for (String fx : dir) { + BufferedReader ir = new BufferedReader( + new InputStreamReader(new FileInputStream(args[0] + File.separatorChar + fx), "UTF-8"), 32768); + System.out.println("check file " + fx); + while (true) { String l = ir.readLine(); - if (l==null) break; + if (l == null) + break; if (l.endsWith("\t")) { - DB.println("found tab in file "+fx); + DB.println("found tab in file " + fx); break; } - } + } ir.close(); } } - + } - - - public CONLLWriter09 (String file) { - + + public CONLLWriter09(String file) { + try { - writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file),"UTF8")); + writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file), "UTF8")); } catch (Exception e) { e.printStackTrace(); } } - - public CONLLWriter09 (Writer writer) { + + public CONLLWriter09(Writer writer) { this.writer = new BufferedWriter(writer); } - - - + public CONLLWriter09(String outfile, int formatTask) { this(outfile); } @@ -138,170 +145,190 @@ public class CONLLWriter09 extends IOGenerals { public void write(SentenceData09 inst) throws IOException { write(inst, NO_ROOT); } - + /** - * - * @param inst - * @param root true: remove root node + * + * @param inst + * @param root + * true: remove root node * @throws IOException */ public void write(SentenceData09 inst, boolean root) throws IOException { int i, mod; - if(root&&(inst.forms[0].startsWith("<root")||(inst.lemmas[0]!=null&&inst.lemmas[0].startsWith("<root")))){ - i=1; mod=0; + if (root && (inst.forms[0].startsWith("<root") + || (inst.lemmas[0] != null && inst.lemmas[0].startsWith("<root")))) { + i = 1; + mod = 0; } else { - i=0; mod=1; + i = 0; + mod = 1; } - //=()?1:0; - - if (format == this.F_ONE_LINE) { - boolean first =true; - for (; i<inst.length(); i++) { - if (first ){ - first=false; - } else writer.write(" "); + // =()?1:0; + + if (format == IOGenerals.F_ONE_LINE) { + boolean first = true; + for (; i < inst.length(); i++) { + if (first) { + first = false; + } else + writer.write(" "); writer.write(inst.plemmas[i]); - } + } writer.newLine(); - - return ; + + return; } - - - for (; i<inst.length(); i++) { - - if (inst.id==null|| inst.id[i]==null) {writer.write(Integer.toString(i+mod)); writer.write('\t');} // id - else { writer.write(inst.id[i]); writer.write('\t');} - - writer.write(inst.forms[i]); writer.write('\t'); // form - - if (inst.lemmas!=null && inst.lemmas[i]!=null) { - writer.write(inst.lemmas[i]); - } - else writer.write(DASH); // lemma - writer.write('\t'); - - if (inst.plemmas!=null && inst.plemmas[i]!=null) writer.write(inst.plemmas[i]); - else writer.write(DASH); // plemma - writer.write('\t'); - - if (inst.gpos[i]!=null) writer.write(inst.gpos[i]); // gpos - else writer.write(DASH); - writer.write('\t'); - - if (inst.ppos!=null && inst.ppos[i]!=null) writer.write(inst.ppos[i]); - else writer.write(DASH); // ppos - writer.write('\t'); - - if (inst.ofeats!=null&& inst.ofeats[i]!=null) writer.write(inst.ofeats[i]); - else writer.write(DASH); - writer.write('\t'); - - //writer.write(DASH); writer.write('\t'); // feat - if (inst.pfeats!=null&&inst.pfeats[i]!=null) { - //System.out.println(""+inst.pfeats[i]); - writer.write(inst.pfeats[i]); - } - else writer.write(DASH); + + for (; i < inst.length(); i++) { + + if (inst.id == null || inst.id[i] == null) { + writer.write(Integer.toString(i + mod)); writer.write('\t'); - - - writer.write(Integer.toString(inst.heads[i])); writer.write('\t'); // head - - if (inst.pheads!=null ) writer.write(Integer.toString(inst.pheads[i])); - else writer.write(DASH); - writer.write('\t'); // phead - - if (inst.labels[i]!=null) writer.write(inst.labels[i]); // rel - else writer.write(DASH); + } // id + else { + writer.write(inst.id[i]); writer.write('\t'); - - if (inst.plabels!=null &&inst.plabels[i]!=null) writer.write(inst.plabels[i]); // rel - else writer.write(DASH); + } + + writer.write(inst.forms[i]); + writer.write('\t'); // form + + if (inst.lemmas != null && inst.lemmas[i] != null) { + writer.write(inst.lemmas[i]); + } else + writer.write(DASH); // lemma + writer.write('\t'); + + if (inst.plemmas != null && inst.plemmas[i] != null) + writer.write(inst.plemmas[i]); + else + writer.write(DASH); // plemma + writer.write('\t'); + + if (inst.gpos[i] != null) + writer.write(inst.gpos[i]); // gpos + else + writer.write(DASH); + writer.write('\t'); + + if (inst.ppos != null && inst.ppos[i] != null) + writer.write(inst.ppos[i]); + else + writer.write(DASH); // ppos + writer.write('\t'); + + if (inst.ofeats != null && inst.ofeats[i] != null) + writer.write(inst.ofeats[i]); + else + writer.write(DASH); + writer.write('\t'); + + // writer.write(DASH); writer.write('\t'); // feat + if (inst.pfeats != null && inst.pfeats[i] != null) { + // System.out.println(""+inst.pfeats[i]); + writer.write(inst.pfeats[i]); + } else + writer.write(DASH); + writer.write('\t'); + + writer.write(Integer.toString(inst.heads[i])); + writer.write('\t'); // head + + if (inst.pheads != null) + writer.write(Integer.toString(inst.pheads[i])); + else + writer.write(DASH); + writer.write('\t'); // phead + + if (inst.labels[i] != null) + writer.write(inst.labels[i]); // rel + else + writer.write(DASH); + writer.write('\t'); + + if (inst.plabels != null && inst.plabels[i] != null) + writer.write(inst.plabels[i]); // rel + else + writer.write(DASH); + writer.write('\t'); + + if (inst.fillp != null && inst.fillp[i] != null) + writer.write(inst.fillp[i]); // fill p + else { + writer.write(DASH); + } + + // writer.write('\t'); + + if (inst.sem == null) { writer.write('\t'); - - if (inst.fillp!=null && inst.fillp[i]!=null) writer.write(inst.fillp[i]); // fill p - else { - writer.write(DASH); + writer.write(DASH); + + } else { + + boolean foundPred = false; + // print the predicate + for (int p = 0; p < inst.sem.length; p++) { + if (inst.semposition[p] == i) { + foundPred = true; + // System.out.println("write pred "+inst.sem[p] ); + writer.write('\t'); + writer.write(inst.sem[p]); + + // if (inst.sem[p].startsWith(".")) DB.println("error + // "+inst.sem[p]); + } } - - -// writer.write('\t'); - - - if (inst.sem==null) { + + if (!foundPred) { writer.write('\t'); - writer.write(DASH); - - } else { - - - - boolean foundPred =false; - // print the predicate - for (int p =0;p< inst.sem.length;p++) { - if (inst.semposition[p]==i) { - foundPred=true; - // System.out.println("write pred "+inst.sem[p] ); - writer.write('\t'); writer.write(inst.sem[p]); - - // if (inst.sem[p].startsWith(".")) DB.println("error "+inst.sem[p]); - } - } - - if (!foundPred ) { - writer.write('\t'); - writer.write(DASH); -// writer.write('\t'); -// writer.write(DASH); - } - - // print the arguments - for (int p =0;p< inst.sem.length;p++) { - - boolean found =false; - if (inst.arg!=null &&inst.arg.length>p&&inst.arg[p]!=null) - for(int a = 0; a<inst.arg[p].length;a++) { - - if (i==inst.argposition[p][a]) { - writer.write('\t'); writer.write(inst.arg[p][a]); + writer.write(DASH); + // writer.write('\t'); + // writer.write(DASH); + } + + // print the arguments + for (int p = 0; p < inst.sem.length; p++) { + + boolean found = false; + if (inst.arg != null && inst.arg.length > p && inst.arg[p] != null) + for (int a = 0; a < inst.arg[p].length; a++) { + + if (i == inst.argposition[p][a]) { + writer.write('\t'); + writer.write(inst.arg[p][a]); found = true; break; - } - - } - if (!found) { - writer.write('\t'); - writer.write(DASH); + } + } - - + if (!found) { + writer.write('\t'); + writer.write(DASH); } - - - - + + } + } writer.newLine(); } writer.newLine(); } - public void finishWriting () throws IOException { + public void finishWriting() throws IOException { writer.flush(); writer.close(); } /** - * Sets the output format such as CoNLL or one line for the lemmata of the sentence (see F_xxxx constants). + * Sets the output format such as CoNLL or one line for the lemmata of the + * sentence (see F_xxxx constants). + * * @param formatTask */ public void setOutputFormat(int formatTask) { - format =formatTask; + format = formatTask; } - - - } diff --git a/dependencyParser/mate-tools/src/is2/io/IOGenerals.java b/dependencyParser/mate-tools/src/is2/io/IOGenerals.java index 456a17f..030bedd 100644 --- a/dependencyParser/mate-tools/src/is2/io/IOGenerals.java +++ b/dependencyParser/mate-tools/src/is2/io/IOGenerals.java @@ -1,12 +1,12 @@ /** - * + * */ package is2.io; /** * @author Dr. Bernd Bohnet, 18.08.2011 - * - * + * + * */ public class IOGenerals { @@ -21,13 +21,12 @@ public class IOGenerals { public static final String ROOT = "<root>"; public static final String EMPTY_FEAT = "<ef>"; - - // the different readers + // the different readers public static final int F_CONLL09 = 0; public static final int F_ONE_LINE = 1; - + // normalization of the input public static final String NUMBER = "[0-9]+|[0-9]+\\.[0-9]+|[0-9]+[0-9,]+"; public static final String NUM = "<num>"; - + } diff --git a/dependencyParser/mate-tools/src/is2/io/PSReader.java b/dependencyParser/mate-tools/src/is2/io/PSReader.java index 3598b3d..5e8b1ad 100644 --- a/dependencyParser/mate-tools/src/is2/io/PSReader.java +++ b/dependencyParser/mate-tools/src/is2/io/PSReader.java @@ -1,5 +1,5 @@ /** - * + * */ package is2.io; @@ -7,8 +7,8 @@ import is2.data.PSTree; /** * @author Dr. Bernd Bohnet, 07.02.2011 - * - * + * + * */ public interface PSReader { @@ -19,5 +19,5 @@ public interface PSReader { * @param filter */ public void startReading(String ps, String[] filter); - + } diff --git a/dependencyParser/mate-tools/src/is2/io/TigerReader.java b/dependencyParser/mate-tools/src/is2/io/TigerReader.java index 2a98b72..10fa0ea 100644 --- a/dependencyParser/mate-tools/src/is2/io/TigerReader.java +++ b/dependencyParser/mate-tools/src/is2/io/TigerReader.java @@ -1,41 +1,40 @@ /** - * + * */ package is2.io; -import is2.data.PSTree; -import is2.util.DB; - import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; -import java.io.FileNotFoundException; import java.io.InputStreamReader; import java.util.ArrayList; -import java.util.Stack; import java.util.StringTokenizer; +import is2.data.PSTree; + /** * @author Dr. Bernd Bohnet, 17.01.2011 - * - * Reads a sentences in Penn Tree Bank bracket style and return sentences. + * + * Reads a sentences in Penn Tree Bank bracket style and return + * sentences. */ public class TigerReader implements PSReader { BufferedReader inputReader; ArrayList<File> psFiles = new ArrayList<File>(); ArrayList<PSTree> psCache = new ArrayList<PSTree>(); - + String filter[] = null; - int startFilter =-1; - int endFilter =-1; + int startFilter = -1; + int endFilter = -1; - public TigerReader() {} + public TigerReader() { + } - public TigerReader(String file ) { + public TigerReader(String file) { try { - inputReader = new BufferedReader(new InputStreamReader(new FileInputStream(file),"ISO-8859-1"),32768); + inputReader = new BufferedReader(new InputStreamReader(new FileInputStream(file), "ISO-8859-1"), 32768); } catch (Exception e) { e.printStackTrace(); } @@ -46,14 +45,13 @@ public class TigerReader implements PSReader { */ @Override public void startReading(String file, String[] filter) { - - + try { - this.filter =filter; - startFilter =filter==null?-1:1; - endFilter =filter==null?-1:1; + this.filter = filter; + startFilter = filter == null ? -1 : 1; + endFilter = filter == null ? -1 : 1; - inputReader = new BufferedReader(new InputStreamReader(new FileInputStream(file),"ISO-8859-1"),32768); + inputReader = new BufferedReader(new InputStreamReader(new FileInputStream(file), "ISO-8859-1"), 32768); } catch (Exception e) { e.printStackTrace(); } @@ -67,337 +65,144 @@ public class TigerReader implements PSReader { String pos; int parent; String edge; - - + } - - static int stop=0; - + + static int stop = 0; + /** * @return */ + @Override public PSTree getNext() { PSTree ps = null; - String l =null; + String l = null; ArrayList<Line> lines = new ArrayList<Line>(); try { - int state=1, terminals=0, nonterminals=0; - while((l = inputReader.readLine())!=null) { - - if (startFilter==1 && l.startsWith("#BOS "+filter[0]) ) { - System.out.println("found start "+l); - startFilter=2; + int state = 1, terminals = 0, nonterminals = 0; + while ((l = inputReader.readLine()) != null) { + + if (startFilter == 1 && l.startsWith("#BOS " + filter[0])) { + System.out.println("found start " + l); + startFilter = 2; } - if (endFilter==1 && l.startsWith("#EOS "+filter[1]) ){ - System.out.println("found end "+l); - - endFilter=2; + if (endFilter == 1 && l.startsWith("#EOS " + filter[1])) { + System.out.println("found end " + l); + + endFilter = 2; } - - - if (startFilter==1||endFilter==2) continue; - + + if (startFilter == 1 || endFilter == 2) + continue; + if (l.startsWith("#BOS")) { - - state=2; + + state = 2; continue; } - if (l.startsWith("#500")) state=3; - if (l.startsWith("#EOS")) state=4; - if (state<2) continue; - - if ( state==4) { - + if (l.startsWith("#500")) + state = 3; + if (l.startsWith("#EOS")) + state = 4; + if (state < 2) + continue; + + if (state == 4) { + ps = new PSTree(); ps.create(terminals, nonterminals); - // System.out.println("terminals "+terminals); - //build ps tree - - int cnt=0; - // ps.entries[0] =CONLLReader09.ROOT; - // ps.head[0]=-1; - int root=-1; - for(Line line : lines) { - - /* if (cnt==terminals) { - // insert root - root =cnt; - cnt++; - } - */ + // System.out.println("terminals "+terminals); + // build ps tree + + int cnt = 0; + // ps.entries[0] =CONLLReader09.ROOT; + // ps.head[0]=-1; + int root = -1; + for (Line line : lines) { + + /* + * if (cnt==terminals) { // insert root root =cnt; + * cnt++; } + */ ps.entries[cnt] = line.form; - if (cnt<terminals) ps.pos[cnt] = line.pos; - else ps.entries[cnt] =line.pos; + if (cnt < terminals) + ps.pos[cnt] = line.pos; + else + ps.entries[cnt] = line.pos; ps.lemmas[cnt] = line.lemma; - ps.head[cnt] = line.parent==0?lines.size()-1:line.parent>=500?line.parent-500+terminals:line.parent; - // ps.head[cnt] = line.parent==0?lines.size()-1:line.parent>=500?line.parent-500+terminals:line.parent; - ps.morph[cnt]=line.morph; + ps.head[cnt] = line.parent == 0 ? lines.size() - 1 + : line.parent >= 500 ? line.parent - 500 + terminals : line.parent; + // ps.head[cnt] = + // line.parent==0?lines.size()-1:line.parent>=500?line.parent-500+terminals:line.parent; + ps.morph[cnt] = line.morph; cnt++; - + } - - if (root==-1) root= terminals; - ps.head[cnt-1]=0; // root - ps.terminalCount=terminals; + + if (root == -1) + root = terminals; + ps.head[cnt - 1] = 0; // root + ps.terminalCount = terminals; lines.clear(); - state=1; - + state = 1; + /* - for(int k=0;k<ps.head.length;k++) { - if (ps.head[k]<terminals && k!=root) { - ps.head[k]=root; - // DB.println("error "+k+" "+ps.head[k]); - } - } - */ - // System.out.println(""+ps.toString()); - // if (stop++ == 4)System.exit(0); + * for(int k=0;k<ps.head.length;k++) { if + * (ps.head[k]<terminals && k!=root) { ps.head[k]=root; // + * DB.println("error "+k+" "+ps.head[k]); } } + */ + // System.out.println(""+ps.toString()); + // if (stop++ == 4)System.exit(0); return ps; } - - - - StringTokenizer t = new StringTokenizer(l,"\t"); - int tc=0; + + StringTokenizer t = new StringTokenizer(l, "\t"); + int tc = 0; Line line = new Line(); lines.add(line); - while(t.hasMoreTokens()) { + while (t.hasMoreTokens()) { String token = t.nextToken(); - if (token.equals("\t"))continue; - if (tc==0) { - if (token.startsWith("#5")||token.startsWith("#6") ) { + if (token.equals("\t")) + continue; + if (tc == 0) { + if (token.startsWith("#5") || token.startsWith("#6")) { nonterminals++; - - } - else { + + } else { terminals++; - - //change it back to the wrong format since the conll stuff was derived from this. - // if (token.equals("durchblicken")) token="durchblikken"; + + // change it back to the wrong format since the + // conll stuff was derived from this. + // if (token.equals("durchblicken")) + // token="durchblikken"; line.form = token; } - - } else if (tc==1) { - line.lemma=token; - } else if (tc==2) { - line.pos=token; - } else if (tc==3) { - line.morph=token; - } else if (tc==4) { - line.edge=token; - } else if (tc==5) { - line.parent=Integer.parseInt(token); - } - - - if (token.length()>0)tc++; - } - - // read till #EOS - - - } - } catch(Exception e) { - e.printStackTrace(); - } - return ps; - - } - /** - * @param tree - */ - private void removeTraces(ArrayList<Object> tree) { - - Stack<ArrayList<Object>> s = new Stack<ArrayList<Object>>(); - - s.push(tree); - ArrayList<Object> list =null; - while (!s.isEmpty()) { - - ArrayList<Object> last =list; - list = s.pop(); - for(int k=0;k<list.size();k++) { - Object o = list.get(k); - if(o instanceof String) { - String t = (String)o; - if ((t.endsWith("-1")||t.endsWith("-2")||t.endsWith("-3")||t.endsWith("-4")) && list.size()>(k+1)) { - t = t.substring(0, t.length()-2); - list.set(k, t); - } - - if (t.startsWith("-NONE-")) { - - // remove the bigger surrounding phrase, e.g. (NP (-NONE- *)) - if (last.size()==2 && last.get(0) instanceof String && last.contains(list)) { - ArrayList<Object> rest = remove(tree, last); - if (rest!=null && rest.size()==1){ - rest = remove(tree, rest); - } - } - // remove the phrase only, e.g. (NP (AP nice small) (-NONE- *)) - else { - // there might a phrase with two empty elements (VP (-NONE- *) (-NONE- ...)) -// System.out.println("last "+last+" list "+list ); - ArrayList<Object> rest = remove(tree, list); - removeTraces(rest); - if (rest.size()==1) { - rest = remove(tree, rest); - if (rest!=null && rest.size()==1){ - System.out.println("rest "+rest); - System.exit(0); - } - } - } - continue; + } else if (tc == 1) { + line.lemma = token; + } else if (tc == 2) { + line.pos = token; + } else if (tc == 3) { + line.morph = token; + } else if (tc == 4) { + line.edge = token; + } else if (tc == 5) { + line.parent = Integer.parseInt(token); } - } - if (o instanceof ArrayList) { - s.push((ArrayList<Object>)o); - } - } - } - } - - - - /** - * Remove from tree p - * @param tree phrase structure tree - * @param p elment to remove - */ - private ArrayList<Object> remove(ArrayList<Object> tree, Object p) { - Stack<ArrayList<Object>> s = new Stack<ArrayList<Object>>(); - - s.push(tree); - - while (!s.isEmpty()) { - - ArrayList<Object> list = s.pop(); - for(int k=0;k<list.size();k++) { - Object o = list.get(k); - if (o == p) { - list.remove(p); - return list ; - } - if (o instanceof ArrayList) { - s.push((ArrayList<Object>)o); + if (token.length() > 0) + tc++; } - } - } - return null; - } - - /** - * Count the terminals - * @param current - * @return - */ - private int countTerminals(ArrayList<Object> current) { - - int count =0; - boolean found =false, all =true ; - for(Object o : current) { - if (o instanceof String) found =true; - else { - all =false; - if (o instanceof ArrayList) count +=countTerminals((ArrayList<Object>)o); - } - } - - if (found && all) { - // System.out.println(""+current); - count++; - } - - return count; - } - - /** - * Count the terminals - * @param current - * @return - */ - private int insert(PSTree ps, ArrayList<Object> current, Integer terminal, Integer xxx, int head) { - - boolean found =false, all =true; - String term =null; - String pos =null; - for(Object o : current) { - if (o instanceof String) { - if (found) term =(String)o; - if (!found) pos =(String)o; - found =true; - } else { - all =false; - // if (o instanceof ArrayList) count +=countTerminals((ArrayList<Object>)o); - } - } - - if (found && all) { - - if(term.equals("-LRB-")) term="("; - if(term.equals("-RRB-")) term=")"; - if(term.equals("-LCB-")) term="{"; - if(term.equals("-RCB-")) term="}"; - if(term.contains("1\\/2-year")) term=term.replace("\\/", "/"); - if(term.contains("1\\/2-foot-tall")) term=term.replace("\\/", "/"); - - - ps.entries[ps.terminalCount] =term; - ps.pos[ps.terminalCount]=pos; - ps.head[ps.terminalCount]=head; - // System.out.println("terminal "+term+" "+ps.terminal+" head "+head); - ps.terminalCount ++; - } else if (found && ! all) { - if(pos.startsWith("NP-SBJ")) pos="NP-SBJ"; - if(pos.startsWith("WHNP")) pos="WHNP"; - - ps.entries[ps.non] =pos; - ps.head[ps.non]=head; - // System.out.println("non terminal "+pos+" "+ps.non+" head "+ head); - int non =ps.non ++; - - for (Object o : current) { - if (o instanceof ArrayList) { - insert(ps,(ArrayList<Object>)o,terminal,ps.non, non); - } - } - } - if(!all && !found)for (Object o : current) { - if (o instanceof ArrayList) { - insert(ps,(ArrayList<Object>)o,terminal,0, ps.non-1); - } - } - return terminal; - } + // read till #EOS - /** - * Count the terminals - * @param current - * @return - */ - private int countNonTerminals(ArrayList<Object> current) { - - int count =0; - boolean found =false, all =true ; - for(Object o : current) { - if (o instanceof String) found =true; - else { - all =false; - if (o instanceof ArrayList) count +=countNonTerminals((ArrayList<Object>)o); } + } catch (Exception e) { + e.printStackTrace(); } + return ps; - if (found && !all) count++; - - return count; } - - - } diff --git a/dependencyParser/mate-tools/src/is2/lemmatizer/Evaluator.java b/dependencyParser/mate-tools/src/is2/lemmatizer/Evaluator.java index b333c62..cc1b423 100755 --- a/dependencyParser/mate-tools/src/is2/lemmatizer/Evaluator.java +++ b/dependencyParser/mate-tools/src/is2/lemmatizer/Evaluator.java @@ -1,100 +1,103 @@ package is2.lemmatizer; -import is2.data.SentenceData09; -import is2.io.CONLLReader09; - import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; import java.util.Hashtable; import java.util.Map.Entry; +import is2.data.SentenceData09; +import is2.io.CONLLReader09; public class Evaluator { - public static void evaluate (String act_file, String pred_file, String format) throws Exception { + public static void evaluate(String act_file, String pred_file, String format) throws Exception { CONLLReader09 goldReader = new CONLLReader09(act_file, CONLLReader09.NO_NORMALIZE); - CONLLReader09 predictedReader = new CONLLReader09(pred_file,CONLLReader09.NO_NORMALIZE); - // predictedReader.startReading(pred_file); + CONLLReader09 predictedReader = new CONLLReader09(pred_file, CONLLReader09.NO_NORMALIZE); + // predictedReader.startReading(pred_file); + Hashtable<String, Integer> errors = new Hashtable<String, Integer>(); - Hashtable<String,Integer> errors = new Hashtable<String,Integer>(); - - - int total = 0, corr = 0, corrL = 0, corrT=0; - int numsent = 0, corrsent = 0, corrsentL = 0; + int total = 0, corrL = 0, corrT = 0; + int numsent = 0; SentenceData09 goldInstance = goldReader.getNext(); SentenceData09 predInstance = predictedReader.getNext(); - while(goldInstance != null) { + while (goldInstance != null) { int instanceLength = goldInstance.length(); if (instanceLength != predInstance.length()) - System.out.println("Lengths do not match on sentence "+numsent); - + System.out.println("Lengths do not match on sentence " + numsent); String gold[] = goldInstance.lemmas; String pred[] = predInstance.plemmas; - boolean whole = true; boolean wholeL = true; - // NOTE: the first item is the root info added during nextInstance(), so we skip it. + // NOTE: the first item is the root info added during + // nextInstance(), so we skip it. for (int i = 1; i < instanceLength; i++) { - if (gold[i].toLowerCase().equals(pred[i].toLowerCase())) corrT++; - - if (gold[i].equals(pred[i])) corrL++; + if (gold[i].toLowerCase().equals(pred[i].toLowerCase())) + corrT++; + + if (gold[i].equals(pred[i])) + corrL++; else { - - // System.out.println("error gold:"+goldPos[i]+" pred:"+predPos[i]+" "+goldInstance.forms[i]+" snt "+numsent+" i:"+i); - String key = "gold: '"+gold[i]+"' pred: '"+pred[i]+"'"; - Integer cnt = errors.get(key); - if (cnt==null) { - errors.put(key,1); - } else { - errors.put(key,cnt+1); - } + + // System.out.println("error gold:"+goldPos[i]+" + // pred:"+predPos[i]+" "+goldInstance.forms[i]+" snt + // "+numsent+" i:"+i); + String key = "gold: '" + gold[i] + "' pred: '" + pred[i] + "'"; + Integer cnt = errors.get(key); + if (cnt == null) { + errors.put(key, 1); + } else { + errors.put(key, cnt + 1); } - + } + } - total += instanceLength - 1; // Subtract one to not score fake root token + total += instanceLength - 1; // Subtract one to not score fake root + // token - if(whole) corrsent++; - if(wholeL) corrsentL++; + if (whole) { + } + if (wholeL) { + } numsent++; goldInstance = goldReader.getNext(); predInstance = predictedReader.getNext(); } ArrayList<Entry<String, Integer>> opsl = new ArrayList<Entry<String, Integer>>(); - for(Entry<String, Integer> e : errors.entrySet()) { + for (Entry<String, Integer> e : errors.entrySet()) { opsl.add(e); } - - Collections.sort(opsl, new Comparator<Entry<String, Integer>>(){ + + Collections.sort(opsl, new Comparator<Entry<String, Integer>>() { @Override - public int compare(Entry<String, Integer> o1, - Entry<String, Integer> o2) { - - return o1.getValue()==o2.getValue()?0:o1.getValue()>o2.getValue()?1:-1; + public int compare(Entry<String, Integer> o1, Entry<String, Integer> o2) { + + return o1.getValue() == o2.getValue() ? 0 : o1.getValue() > o2.getValue() ? 1 : -1; } - - + }); - - for(Entry<String, Integer> e : opsl) { - // System.out.println(e.getKey()+" "+e.getValue()); - } - System.out.println("Tokens: " + total+" Correct: " + corrT+" "+(float)corrT/total+" correct uppercase "+(float)corrL/total); + /* + * for(Entry<String, Integer> e : opsl) { + * System.out.println(e.getKey()+" "+e.getValue()); } + */ + + System.out.println("Tokens: " + total + " Correct: " + corrT + " " + (float) corrT / total + + " correct uppercase " + (float) corrL / total); } - public static void main (String[] args) throws Exception { + public static void main(String[] args) throws Exception { String format = "CONLL"; if (args.length > 2) format = args[2]; diff --git a/dependencyParser/mate-tools/src/is2/lemmatizer/Lemmatizer.java b/dependencyParser/mate-tools/src/is2/lemmatizer/Lemmatizer.java index 33756dd..b15aaa7 100755 --- a/dependencyParser/mate-tools/src/is2/lemmatizer/Lemmatizer.java +++ b/dependencyParser/mate-tools/src/is2/lemmatizer/Lemmatizer.java @@ -1,23 +1,5 @@ package is2.lemmatizer; - -import is2.data.Cluster; -import is2.data.F2SF; -import is2.data.FV; -import is2.data.Instances; -import is2.data.InstancesTagger; -import is2.data.Long2Int; -import is2.data.ParametersFloat; -import is2.data.PipeGen; -import is2.data.SentenceData09; -import is2.io.CONLLReader09; -import is2.io.CONLLWriter09; -import is2.tools.IPipe; -import is2.tools.Tool; -import is2.tools.Train; -import is2.util.DB; -import is2.util.OptionsSuper; - import java.io.BufferedInputStream; import java.io.BufferedOutputStream; import java.io.DataInputStream; @@ -35,29 +17,44 @@ import java.util.zip.ZipEntry; import java.util.zip.ZipInputStream; import java.util.zip.ZipOutputStream; - +import is2.data.Cluster; +import is2.data.F2SF; +import is2.data.FV; +import is2.data.Instances; +import is2.data.InstancesTagger; +import is2.data.Long2Int; +import is2.data.ParametersFloat; +import is2.data.PipeGen; +import is2.data.SentenceData09; +import is2.io.CONLLReader09; +import is2.io.CONLLWriter09; +import is2.tools.IPipe; +import is2.tools.Tool; +import is2.tools.Train; +import is2.util.DB; +import is2.util.OptionsSuper; public class Lemmatizer implements Tool, Train { public Pipe pipe; public ParametersFloat params; private Long2Int li; - - private boolean doUppercase=false; - - private long[] vs= new long[40]; + private boolean doUppercase = false; + private long[] vs = new long[40]; /** * Creates a lemmatizer due to the model stored in modelFileName - * @param modelFileName the path and file name to a lemmatizer model + * + * @param modelFileName + * the path and file name to a lemmatizer model */ - public Lemmatizer(String modelFileName) { - + public Lemmatizer(String modelFileName) { + // tell the lemmatizer the location of the model try { - Options m_options = new Options(new String[] {"-model", modelFileName}); + Options m_options = new Options(new String[] { "-model", modelFileName }); li = new Long2Int(m_options.hsize); // initialize the lemmatizer @@ -67,37 +64,31 @@ public class Lemmatizer implements Tool, Train { e.printStackTrace(); } } - - - - - - public Lemmatizer(boolean doUppercase) {this.doUppercase=doUppercase; } - + public Lemmatizer(boolean doUppercase) { + this.doUppercase = doUppercase; + } - public static void main (String[] args) throws FileNotFoundException, Exception - { + public static void main(String[] args) throws FileNotFoundException, Exception { Options options = new Options(args); Lemmatizer lemmatizer = new Lemmatizer(options.upper); long start = System.currentTimeMillis(); - if (options.train) { - lemmatizer.li = new Long2Int(options.hsize); - lemmatizer.pipe = new Pipe (options,lemmatizer.li); + lemmatizer.pipe = new Pipe(options, lemmatizer.li); InstancesTagger is = lemmatizer.pipe.createInstances(options.trainfile); - DB.println("Features: " + lemmatizer.pipe.mf.size()+" Operations "+lemmatizer.pipe.mf.getFeatureCounter().get(Pipe.OPERATION)); + DB.println("Features: " + lemmatizer.pipe.mf.size() + " Operations " + + lemmatizer.pipe.mf.getFeatureCounter().get(Pipe.OPERATION)); ParametersFloat params = new ParametersFloat(lemmatizer.li.size()); - lemmatizer.train(options,lemmatizer.pipe,params,is); + lemmatizer.train(options, lemmatizer.pipe, params, is); lemmatizer.writeModel(options, lemmatizer.pipe, params); } @@ -106,48 +97,51 @@ public class Lemmatizer implements Tool, Train { lemmatizer.readModel(options); - lemmatizer.out(options,lemmatizer.pipe, lemmatizer.params); + lemmatizer.out(options, lemmatizer.pipe, lemmatizer.params); } System.out.println(); if (options.eval) { System.out.println("\nEVALUATION PERFORMANCE:"); - Evaluator.evaluate(options.goldfile, options.outfile,options.format); + Evaluator.evaluate(options.goldfile, options.outfile, options.format); } long end = System.currentTimeMillis(); - System.out.println("used time "+((float)((end-start)/100)/10)); + System.out.println("used time " + ((float) ((end - start) / 100) / 10)); } - /* (non-Javadoc) - * @see is2.tools.Train#writeModel(is2.util.OptionsSuper, is2.tools.IPipe, is2.data.ParametersFloat) + /* + * (non-Javadoc) + * + * @see is2.tools.Train#writeModel(is2.util.OptionsSuper, is2.tools.IPipe, + * is2.data.ParametersFloat) */ @Override - public void writeModel(OptionsSuper options, IPipe pipe, - ParametersFloat params) { + public void writeModel(OptionsSuper options, IPipe pipe, ParametersFloat params) { try { // store the model - ZipOutputStream zos = new ZipOutputStream(new BufferedOutputStream(new FileOutputStream(options.modelName))); - zos.putNextEntry(new ZipEntry("data")); + ZipOutputStream zos = new ZipOutputStream( + new BufferedOutputStream(new FileOutputStream(options.modelName))); + zos.putNextEntry(new ZipEntry("data")); DataOutputStream dos = new DataOutputStream(new BufferedOutputStream(zos)); - - this.pipe.mf.writeData(dos); + + MFO.writeData(dos); dos.flush(); params.write(dos); pipe.write(dos); - + dos.writeBoolean(this.doUppercase); - + dos.flush(); - dos.close(); - } catch(Exception e) { + dos.close(); + } catch (Exception e) { e.printStackTrace(); } } - + @Override public void readModel(OptionsSuper options) { try { @@ -161,159 +155,160 @@ public class Lemmatizer implements Tool, Train { mf.read(dis); params = new ParametersFloat(0); params.read(dis); - li =new Long2Int(params.size()); + li = new Long2Int(params.size()); pipe = new Pipe(options, li); - pipe.mf =mf; + pipe.mf = mf; pipe.initFeatures(); pipe.initValues(); pipe.readMap(dis); - for(Entry<String,Integer> e : mf.getFeatureSet().get(Pipe.OPERATION).entrySet()) { + for (Entry<String, Integer> e : MFO.getFeatureSet().get(Pipe.OPERATION).entrySet()) { this.pipe.types[e.getValue()] = e.getKey(); - // System.out.println("set pos "+e.getKey()); + // System.out.println("set pos "+e.getKey()); } - pipe.cl = new Cluster(dis); - if (dis.available()>0) this.doUppercase = dis.readBoolean(); - - + if (dis.available() > 0) + this.doUppercase = dis.readBoolean(); + dis.close(); DB.println("Loading data finished. "); - DB.println("number of params "+params.parameters.length); - DB.println("number of classes "+pipe.types.length); + DB.println("number of params " + params.parameters.length); + DB.println("number of classes " + pipe.types.length); - } catch (Exception e ) { + } catch (Exception e) { e.printStackTrace(); } } - - /** * Do the training + * * @param instanceLengths * @param options * @param pipe * @param params - * @param li + * @param li * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ + @Override public void train(OptionsSuper options, IPipe p, ParametersFloat params, Instances ist) { - InstancesTagger is = (InstancesTagger)ist; - - int i = 0,del=0; + InstancesTagger is = (InstancesTagger) ist; + + int i = 0, del = 0; FV g = new FV(), f = new FV(); - - int LC = this.pipe.types.length+1, UC = LC+1; - String wds[] = MFO.reverse(pipe.mf.getFeatureSet().get(Pipe.WORD)); - + int LC = this.pipe.types.length + 1, UC = LC + 1; + + String wds[] = MFO.reverse(MFO.getFeatureSet().get(PipeGen.WORD)); + F2SF fs = params.getFV(); - double upd=0; + double upd = 0; - for(i = 0; i < options.numIters; i++) { + for (i = 0; i < options.numIters; i++) { - System.out.print("Iteration "+i+": "); + System.out.print("Iteration " + i + ": "); long start = System.currentTimeMillis(); int numInstances = is.size(); - int correct =0,count=0; + int correct = 0, count = 0; - long last= System.currentTimeMillis(); - int wrongOp=0,correctOp=0, correctUC=0, wrongUC=0; + long last = System.currentTimeMillis(); + int wrongOp = 0, correctOp = 0, correctUC = 0, wrongUC = 0; - HashMap<String,Integer> map = new HashMap<String,Integer>(); + HashMap<String, Integer> map = new HashMap<String, Integer>(); - for(int n = 0; n < numInstances; n++) { + for (int n = 0; n < numInstances; n++) { - if((n+1) % 500 == 0) del= Pipe.outValueErr(n+1, (float)(count-correct),(float)correct/(float)count,del,last,upd); + if ((n + 1) % 500 == 0) + del = PipeGen.outValueErr(n + 1, count - correct, (float) correct / (float) count, del, last, upd); - upd = (double)(options.numIters*numInstances - (numInstances*i+(n+1))+ 1); + upd = options.numIters * numInstances - (numInstances * i + (n + 1)) + 1; - for(int k = 0; k < is.length(n); k++) { + for (int k = 0; k < is.length(n); k++) { double best = -1000; - String bestOp=""; - - + String bestOp = ""; count++; - pipe.addCoreFeatures(is, n, k, 0,wds[is.forms[n][k]], vs); + pipe.addCoreFeatures(is, n, k, 0, wds[is.forms[n][k]], vs); String lemma = pipe.opse.get(wds[is.forms[n][k]].toLowerCase()); - // predict - if (lemma==null) - for(int t = 0; t < pipe.types.length; t++) { + if (lemma == null) + for (int t = 0; t < pipe.types.length; t++) { fs.clear(); - for(int l=vs.length-1;l>=0;l--) if (vs[l]>0) fs.add(li.l2i(vs[l]+(t*Pipe.s_type))); + for (int l = vs.length - 1; l >= 0; l--) + if (vs[l] > 0) + fs.add(li.l2i(vs[l] + (t * Pipe.s_type))); float score = (float) fs.getScore(); - if (score >best) { + if (score > best) { bestOp = pipe.types[t]; - best =score; + best = score; } } if (doUppercase) { fs.clear(); - for(int l=vs.length-1;l>=0;l--) if (vs[l]>0) fs.add(li.l2i(vs[l]+(LC*Pipe.s_type))); + for (int l = vs.length - 1; l >= 0; l--) + if (vs[l] > 0) + fs.add(li.l2i(vs[l] + (LC * Pipe.s_type))); - int correctOP =-1, selectedOP =-1; - if (wds[is.glemmas[n][k]].length()>0 && - Character.isUpperCase(wds[is.glemmas[n][k]].charAt(0)) && - fs.score > 0) { + int correctOP = -1, selectedOP = -1; + if (wds[is.glemmas[n][k]].length() > 0 && Character.isUpperCase(wds[is.glemmas[n][k]].charAt(0)) + && fs.score > 0) { correctOP = UC; - selectedOP =LC; - } else if (wds[is.glemmas[n][k]].length()>0 - &&Character.isLowerCase(wds[is.glemmas[n][k]].charAt(0)) && - fs.score <= 0) { - + selectedOP = LC; + } else if (wds[is.glemmas[n][k]].length() > 0 + && Character.isLowerCase(wds[is.glemmas[n][k]].charAt(0)) && fs.score <= 0) { correctOP = LC; - selectedOP =UC; + selectedOP = UC; } - if (correctOP!=-1 && wds[is.glemmas[n][k]].length()>0) { + if (correctOP != -1 && wds[is.glemmas[n][k]].length() > 0) { wrongUC++; f.clear(); - for(int l=vs.length-1;l>=0;l--) if (vs[l]>0) f.add(li.l2i(vs[l]+(selectedOP*Pipe.s_type))); + for (int l = vs.length - 1; l >= 0; l--) + if (vs[l] > 0) + f.add(li.l2i(vs[l] + (selectedOP * Pipe.s_type))); - g.clear(); - for(int l=vs.length-1;l>=0;l--) if (vs[l]>0) g.add(li.l2i(vs[l]+(correctOP*Pipe.s_type))); + g.clear(); + for (int l = vs.length - 1; l >= 0; l--) + if (vs[l] > 0) + g.add(li.l2i(vs[l] + (correctOP * Pipe.s_type))); - double lam_dist = params.getScore(g) - params.getScore(f);//f + double lam_dist = params.getScore(g) - params.getScore(f);// f double loss = 1 - lam_dist; - FV dist = g.getDistVector(f); - dist.update(params.parameters, params.total, params.update(dist,loss), upd,false); + FV dist = g.getDistVector(f); + dist.update(params.parameters, params.total, params.update(dist, loss), upd, false); } else { correctUC++; } } - if (lemma!=null) { + if (lemma != null) { correct++; correctOp++; continue; } - - String op = Pipe.getOperation(is,n, k,wds); - if (op.equals(bestOp) ) { + String op = Pipe.getOperation(is, n, k, wds); + if (op.equals(bestOp)) { correct++; correctOp++; continue; @@ -321,65 +316,70 @@ public class Lemmatizer implements Tool, Train { wrongOp++; f.clear(); - int bop =pipe.mf.getValue(Pipe.OPERATION, bestOp); - for(int r=vs.length-1;r>=0;r--) if (vs[r]>0)f.add(li.l2i(vs[r]+(bop*Pipe.s_type))); + int bop = pipe.mf.getValue(Pipe.OPERATION, bestOp); + for (int r = vs.length - 1; r >= 0; r--) + if (vs[r] > 0) + f.add(li.l2i(vs[r] + (bop * Pipe.s_type))); g.clear(); - int gop =pipe.mf.getValue(Pipe.OPERATION, op); - for(int r=vs.length-1;r>=0;r--) if (vs[r]>0)g.add(li.l2i(vs[r]+(gop*Pipe.s_type))); - double lam_dist = params.getScore(g) - params.getScore(f);//f + int gop = pipe.mf.getValue(Pipe.OPERATION, op); + for (int r = vs.length - 1; r >= 0; r--) + if (vs[r] > 0) + g.add(li.l2i(vs[r] + (gop * Pipe.s_type))); + double lam_dist = params.getScore(g) - params.getScore(f);// f double loss = 1 - lam_dist; FV dist = g.getDistVector(f); - dist.update(params.parameters, params.total, params.update(dist,loss), upd,false); //0.05 + dist.update(params.parameters, params.total, params.update(dist, loss), upd, false); // 0.05 } } ArrayList<Entry<String, Integer>> opsl = new ArrayList<Entry<String, Integer>>(); - for(Entry<String, Integer> e : map.entrySet()) { - if(e.getValue()>1) { + for (Entry<String, Integer> e : map.entrySet()) { + if (e.getValue() > 1) { opsl.add(e); } } - Collections.sort(opsl, new Comparator<Entry<String, Integer>>(){ + Collections.sort(opsl, new Comparator<Entry<String, Integer>>() { @Override - public int compare(Entry<String, Integer> o1, - Entry<String, Integer> o2) { + public int compare(Entry<String, Integer> o1, Entry<String, Integer> o2) { - return o1.getValue()==o2.getValue()?0:o1.getValue()>o2.getValue()?1:-1; + return o1.getValue() == o2.getValue() ? 0 : o1.getValue() > o2.getValue() ? 1 : -1; } }); - if (opsl.size()>0) System.out.println(); - for(Entry<String, Integer> e : opsl) { - System.out.println(e.getKey()+" "+e.getValue()); + if (opsl.size() > 0) + System.out.println(); + for (Entry<String, Integer> e : opsl) { + System.out.println(e.getKey() + " " + e.getValue()); } map.clear(); - del= Pipe.outValueErr(numInstances, (float)(count-correct), (float)correct/(float)count,del,last,upd, - "time "+(System.currentTimeMillis()-start)+ - " corr/wrong "+correctOp+" "+wrongOp+" uppercase corr/wrong "+correctUC+" "+wrongUC); - del=0; - System.out.println(); + del = PipeGen.outValueErr(numInstances, count - correct, (float) correct / (float) count, del, last, upd, + "time " + (System.currentTimeMillis() - start) + " corr/wrong " + correctOp + " " + wrongOp + + " uppercase corr/wrong " + correctUC + " " + wrongUC); + del = 0; + System.out.println(); } - params.average(i*is.size()); + params.average(i * is.size()); } - /** * Do the work + * * @param options * @param pipe * @param params * @throws IOException */ - public void out (OptionsSuper options, IPipe pipe, ParametersFloat params) { + @Override + public void out(OptionsSuper options, IPipe pipe, ParametersFloat params) { long start = System.currentTimeMillis(); @@ -391,53 +391,55 @@ public class Lemmatizer implements Tool, Train { System.out.print("Processing Sentence: "); int cnt = 0; - int del=0; + int del = 0; try { - while(true) { + while (true) { InstancesTagger is = new InstancesTagger(); is.init(1, new MFO()); - SentenceData09 instance = depReader.getNext(is);//pipe.nextInstance(null, depReader); + SentenceData09 instance = depReader.getNext(is);// pipe.nextInstance(null, + // depReader); - if (instance==null) break; + if (instance == null) + break; is.fillChars(instance, 0, Pipe._CEND); cnt++; - SentenceData09 i09 =lemmatize(is, instance, this.li); - - if(options.normalize) for(int k=0;k<i09.length();k++) { - boolean save = depReader.normalizeOn; - depReader.normalizeOn =true; - i09.plemmas[k] = depReader.normalize(i09.plemmas[k]); - depReader.normalizeOn = save; - } - - if (options.overwritegold) i09.lemmas = i09.plemmas; - - - - depWriter.write(i09); + SentenceData09 i09 = lemmatize(is, instance, this.li); + + if (options.normalize) + for (int k = 0; k < i09.length(); k++) { + boolean save = depReader.normalizeOn; + depReader.normalizeOn = true; + i09.plemmas[k] = depReader.normalize(i09.plemmas[k]); + depReader.normalizeOn = save; + } + + if (options.overwritegold) + i09.lemmas = i09.plemmas; - if (cnt%100 ==0) del=Pipe.outValue(cnt, del); + depWriter.write(i09); + + if (cnt % 100 == 0) + del = PipeGen.outValue(cnt, del); } depWriter.finishWriting(); - del=Pipe.outValue(cnt, del); + del = PipeGen.outValue(cnt, del); long end = System.currentTimeMillis(); - System.out.println(PipeGen.getSecondsPerInstnace(cnt,(end-start))); - System.out.println(PipeGen.getUsedTime(end-start)); - } catch(Exception e) { + System.out.println(PipeGen.getSecondsPerInstnace(cnt, (end - start))); + System.out.println(PipeGen.getUsedTime(end - start)); + } catch (Exception e) { e.printStackTrace(); } } - private SentenceData09 lemmatize(InstancesTagger is, SentenceData09 instance, Long2Int li) { - int LC = pipe.types.length+1; + int LC = pipe.types.length + 1; is.feats[0] = new short[instance.length()][11]; @@ -445,91 +447,97 @@ public class Lemmatizer implements Tool, Train { int length = instance.length(); - F2SF fs = new F2SF(params.parameters); + F2SF fs = new F2SF(params.parameters); + for (int w1 = 0; w1 < length; w1++) { + instance.plemmas[w1] = "_"; + pipe.addCoreFeatures(is, 0, w1, 0, instance.forms[w1], vs); - for(int w1 = 0; w1 < length; w1++) { - instance.plemmas[w1]="_"; - pipe.addCoreFeatures(is, 0, w1, 0,instance.forms[w1], vs); - - String f =null; - if (is.forms[0][w1]!=-1) { + String f = null; + if (is.forms[0][w1] != -1) { f = pipe.opse.get(instance.forms[w1].toLowerCase()); - if (f!=null) { - instance.plemmas[w1]=f; + if (f != null) { + instance.plemmas[w1] = f; } - } + } double best = -1000.0; - int bestOp=0; + int bestOp = 0; - for(int t = 0; t < pipe.types.length; t++) { + for (int t = 0; t < pipe.types.length; t++) { fs.clear(); - for(int l=vs.length-1;l>=0;l--) if (vs[l]>0) fs.add(li.l2i(vs[l]+(t*Pipe.s_type))); + for (int l = vs.length - 1; l >= 0; l--) + if (vs[l] > 0) + fs.add(li.l2i(vs[l] + (t * Pipe.s_type))); - if (fs.score >=best) { - best =fs.score; - bestOp=t; - } + if (fs.score >= best) { + best = fs.score; + bestOp = t; + } } - //instance.ppos[w1]=""+bestOp; - if (f==null) instance.plemmas[w1] = StringEdit.change((doUppercase?instance.forms[w1]:instance.forms[w1].toLowerCase()),pipe.types[bestOp]); + // instance.ppos[w1]=""+bestOp; + if (f == null) + instance.plemmas[w1] = StringEdit.change( + (doUppercase ? instance.forms[w1] : instance.forms[w1].toLowerCase()), pipe.types[bestOp]); // check for empty string - if(instance.plemmas[w1].length()==0) instance.plemmas[w1] = "_"; + if (instance.plemmas[w1].length() == 0) + instance.plemmas[w1] = "_"; - if(doUppercase){ + if (doUppercase) { fs.clear(); - for(int l=vs.length-1;l>=0;l--) if (vs[l]>0) fs.add(li.l2i(vs[l]+(LC*Pipe.s_type))); - + for (int l = vs.length - 1; l >= 0; l--) + if (vs[l] > 0) + fs.add(li.l2i(vs[l] + (LC * Pipe.s_type))); try { - if (fs.score<=0 && instance.plemmas[w1].length()>1) { - instance.plemmas[w1] = Character.toUpperCase(instance.plemmas[w1].charAt(0))+instance.plemmas[w1].substring(1); - } else if (fs.score<=0 && instance.plemmas[w1].length()>0) { + if (fs.score <= 0 && instance.plemmas[w1].length() > 1) { + instance.plemmas[w1] = Character.toUpperCase(instance.plemmas[w1].charAt(0)) + + instance.plemmas[w1].substring(1); + } else if (fs.score <= 0 && instance.plemmas[w1].length() > 0) { instance.plemmas[w1] = String.valueOf(Character.toUpperCase(instance.plemmas[w1].charAt(0))); - } else if (fs.score>0) { + } else if (fs.score > 0) { instance.plemmas[w1] = instance.plemmas[w1].toLowerCase(); } - } catch(Exception e){ + } catch (Exception e) { e.printStackTrace(); - // System.out.println("error "+pipe.types[bestOp]+" "+instance.forms[w1]); + // System.out.println("error "+pipe.types[bestOp]+" + // "+instance.forms[w1]); } } } - SentenceData09 i09 = new SentenceData09(instance); i09.createSemantic(instance); return i09; } - - /* (non-Javadoc) + /* + * (non-Javadoc) + * * @see is2.tools.Tool#apply(is2.data.SentenceData09) */ @Override public SentenceData09 apply(SentenceData09 snt) { InstancesTagger is = new InstancesTagger(); - + // be robust - if (snt.length()== 0) return snt; - + if (snt.length() == 0) + return snt; + SentenceData09 it = new SentenceData09(); it.createWithRoot(snt); - - + is.init(1, new MFO()); is.createInstance09(it.length()); is.fillChars(it, 0, Pipe._CEND); - for(int j = 0; j < it.length(); j++) is.setForm(0, j, it.forms[j]); + for (int j = 0; j < it.length(); j++) + is.setForm(0, j, it.forms[j]); - return lemmatize(is, it,li); + return lemmatize(is, it, li); } - - } diff --git a/dependencyParser/mate-tools/src/is2/lemmatizer/MFO.java b/dependencyParser/mate-tools/src/is2/lemmatizer/MFO.java index 249ca42..305e827 100755 --- a/dependencyParser/mate-tools/src/is2/lemmatizer/MFO.java +++ b/dependencyParser/mate-tools/src/is2/lemmatizer/MFO.java @@ -1,257 +1,246 @@ package is2.lemmatizer; - -import is2.data.IEncoder; -import is2.data.IEncoderPlus; -import is2.data.IFV; -import is2.data.Long2IntInterface; -import is2.util.DB; - import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.IOException; import java.util.HashMap; import java.util.Map.Entry; +import is2.data.IEncoderPlus; +import is2.util.DB; + /** * Map Features, do not map long to integer - * + * * @author Bernd Bohnet, 20.09.2009 */ -final public class MFO implements IEncoderPlus { - +final public class MFO implements IEncoderPlus { + /** The features and its values */ - static private final HashMap<String,HashMap<String,Integer>> m_featureSets = new HashMap<String,HashMap<String,Integer>>(); + static private final HashMap<String, HashMap<String, Integer>> m_featureSets = new HashMap<String, HashMap<String, Integer>>(); /** The feature class and the number of values */ - static private final HashMap<String,Integer> m_featureCounters = new HashMap<String,Integer>(); + static private final HashMap<String, Integer> m_featureCounters = new HashMap<String, Integer>(); /** The number of bits needed to encode a feature */ - static final HashMap<String,Integer> m_featureBits = new HashMap<String,Integer>(); - + static final HashMap<String, Integer> m_featureBits = new HashMap<String, Integer>(); + /** Integer counter for long2int */ - static private int count=0; - + static private int count = 0; + /** Stop growing */ - public boolean stop=false; - - final public static String NONE="<None>"; - - - - - - - - public MFO () {} - - - public int size() {return count;} - - - + public boolean stop = false; + + final public static String NONE = "<None>"; + + public MFO() { + } + + public int size() { + return count; + } + /** * Register an attribute class, if it not exists and add a possible value + * * @param type * @param type2 */ + @Override final public int register(String a, String v) { - HashMap<String,Integer> fs = getFeatureSet().get(a); - if (fs==null) { - fs = new HashMap<String,Integer>(); + HashMap<String, Integer> fs = getFeatureSet().get(a); + if (fs == null) { + fs = new HashMap<String, Integer>(); getFeatureSet().put(a, fs); fs.put(NONE, 0); getFeatureCounter().put(a, 1); } Integer c = getFeatureCounter().get(a); - + Integer i = fs.get(v); - if (i==null) { + if (i == null) { fs.put(v, c); c++; - getFeatureCounter().put(a,c); - return c-1; - } else return i; + getFeatureCounter().put(a, c); + return c - 1; + } else + return i; } - + /** * Calculates the number of bits needed to encode a feature */ - public void calculateBits() { - - int total=0; - for(Entry<String,Integer> e : getFeatureCounter().entrySet() ){ - int bits =(int)Math.ceil((Math.log(e.getValue()+1)/Math.log(2))); + public void calculateBits() { + + for (Entry<String, Integer> e : getFeatureCounter().entrySet()) { + int bits = (int) Math.ceil((Math.log(e.getValue() + 1) / Math.log(2))); m_featureBits.put(e.getKey(), bits); - total+=bits; - // System.out.println(" "+e.getKey()+" bits "+bits+" number "+(e.getValue()+1)); } - -// System.out.println("total number of needed bits "+total); + + // System.out.println("total number of needed bits "+total); } - - - - public String toString() { - + + @Override + public String toString() { + StringBuffer content = new StringBuffer(); - for(Entry<String,Integer> e : getFeatureCounter().entrySet() ){ - content.append(e.getKey()+" "+e.getValue()); - content.append(':'); - // HashMap<String,Integer> vs = getFeatureSet().get(e.getKey()); - content.append(getFeatureBits(e.getKey())); - - /*if (vs.size()<120) - for(Entry<String,Integer> e2 : vs.entrySet()) { - content.append(e2.getKey()+" ("+e2.getValue()+") "); - }*/ - content.append('\n'); - + for (Entry<String, Integer> e : getFeatureCounter().entrySet()) { + content.append(e.getKey() + " " + e.getValue()); + content.append(':'); + // HashMap<String,Integer> vs = getFeatureSet().get(e.getKey()); + content.append(getFeatureBits(e.getKey())); + + /* + * if (vs.size()<120) for(Entry<String,Integer> e2 : vs.entrySet()) + * { content.append(e2.getKey()+" ("+e2.getValue()+") "); } + */ + content.append('\n'); + } return content.toString(); } - - - + static final public short getFeatureBits(String a) { - if(m_featureBits.get(a)==null) return 0; - return (short)m_featureBits.get(a).intValue(); + if (m_featureBits.get(a) == null) + return 0; + return (short) m_featureBits.get(a).intValue(); } - - /** * Get the integer place holder of the string value v of the type a - * - * @param t the type - * @param v the value + * + * @param t + * the type + * @param v + * the value * @return the integer place holder of v */ - final public int getValue(String t, String v) { - - if (m_featureSets.get(t)==null) return -1; + @Override + final public int getValue(String t, String v) { + + if (m_featureSets.get(t) == null) + return -1; Integer vi = m_featureSets.get(t).get(v); - if (vi==null) return -1; //stop && + if (vi == null) + return -1; // stop && return vi.intValue(); } - /** - * Static version of getValue - * @see getValue - */ + /** + * Static version of getValue + * + * @see getValue + */ static final public int getValueS(String a, String v) { - - if (m_featureSets.get(a)==null) return -1; - Integer vi = m_featureSets.get(a).get(v); - if (vi==null) return -1; //stop && - return vi.intValue(); - } - - public int hasValue(String a, String v) { - + + if (m_featureSets.get(a) == null) + return -1; Integer vi = m_featureSets.get(a).get(v); - if (vi==null) return -1; + if (vi == null) + return -1; // stop && return vi.intValue(); } - - + + public int hasValue(String a, String v) { + + Integer vi = m_featureSets.get(a).get(v); + if (vi == null) + return -1; + return vi.intValue(); + } + public static String printBits(int k) { StringBuffer s = new StringBuffer(); - for(int i =0;i<31;i++) { - s.append((k&0x00000001)==1?'1':'0'); - k=k>>1; - + for (int i = 0; i < 31; i++) { + s.append((k & 0x00000001) == 1 ? '1' : '0'); + k = k >> 1; + } s.reverse(); return s.toString(); } - - - - - - - - /** - * Maps a long to a integer value. This is very useful to save memory for sparse data long values + + /** + * Maps a long to a integer value. This is very useful to save memory for + * sparse data long values + * * @param l * @return the integer */ - static public int misses = 0; - static public int good = 0; - + static public int misses = 0; + static public int good = 0; - - /** * Write the data + * * @param dos * @throws IOException */ - static public void writeData(DataOutputStream dos) throws IOException { - dos.writeInt(getFeatureSet().size()); - // DB.println("write"+getFeatureSet().size()); - for(Entry<String, HashMap<String,Integer>> e : getFeatureSet().entrySet()) { - dos.writeUTF(e.getKey()); - dos.writeInt(e.getValue().size()); - - for(Entry<String,Integer> e2 : e.getValue().entrySet()) { - - if(e2.getKey()==null) DB.println("key "+e2.getKey()+" value "+e2.getValue()+" e -key "+e.getKey()); - dos.writeUTF(e2.getKey()); - dos.writeInt(e2.getValue()); - - } - - } - } - public void read(DataInputStream din) throws IOException { - + static public void writeData(DataOutputStream dos) throws IOException { + dos.writeInt(getFeatureSet().size()); + // DB.println("write"+getFeatureSet().size()); + for (Entry<String, HashMap<String, Integer>> e : getFeatureSet().entrySet()) { + dos.writeUTF(e.getKey()); + dos.writeInt(e.getValue().size()); + + for (Entry<String, Integer> e2 : e.getValue().entrySet()) { + + if (e2.getKey() == null) + DB.println("key " + e2.getKey() + " value " + e2.getValue() + " e -key " + e.getKey()); + dos.writeUTF(e2.getKey()); + dos.writeInt(e2.getValue()); + + } + + } + } + + public void read(DataInputStream din) throws IOException { + int size = din.readInt(); - for(int i=0; i<size;i++) { + for (int i = 0; i < size; i++) { String k = din.readUTF(); int size2 = din.readInt(); - - HashMap<String,Integer> h = new HashMap<String,Integer>(); - getFeatureSet().put(k,h); - for(int j = 0;j<size2;j++) { + + HashMap<String, Integer> h = new HashMap<String, Integer>(); + getFeatureSet().put(k, h); + for (int j = 0; j < size2; j++) { h.put(din.readUTF(), din.readInt()); } getFeatureCounter().put(k, size2); } - count =size; - // stop(); + count = size; + // stop(); calculateBits(); } - - /** + /** * Clear the data */ - static public void clearData() { - getFeatureSet().clear(); - m_featureBits.clear(); - getFeatureSet().clear(); - } + static public void clearData() { + getFeatureSet().clear(); + m_featureBits.clear(); + getFeatureSet().clear(); + } - public HashMap<String,Integer> getFeatureCounter() { + @Override + public HashMap<String, Integer> getFeatureCounter() { return m_featureCounters; } - static public HashMap<String,HashMap<String,Integer>> getFeatureSet() { + static public HashMap<String, HashMap<String, Integer>> getFeatureSet() { return m_featureSets; } - - static public String[] reverse(HashMap<String,Integer> v){ + + static public String[] reverse(HashMap<String, Integer> v) { String[] set = new String[v.size()]; - for(Entry<String,Integer> e : v.entrySet()) { - set[e.getValue()]=e.getKey(); + for (Entry<String, Integer> e : v.entrySet()) { + set[e.getValue()] = e.getKey(); } return set; } - } diff --git a/dependencyParser/mate-tools/src/is2/lemmatizer/Options.java b/dependencyParser/mate-tools/src/is2/lemmatizer/Options.java index a4b9e69..30c2567 100755 --- a/dependencyParser/mate-tools/src/is2/lemmatizer/Options.java +++ b/dependencyParser/mate-tools/src/is2/lemmatizer/Options.java @@ -5,47 +5,48 @@ import java.io.IOException; import is2.util.OptionsSuper; - public final class Options extends OptionsSuper { - - public Options (String[] args) throws IOException { - + public Options(String[] args) throws IOException { + + for (int i = 0; i < args.length; i++) { - - for(int i = 0; i < args.length; i++) { + if (args[i].equals("--help")) + explain(); - if (args[i].equals("--help")) explain(); - if (args[i].equals("-normalize")) { - normalize=Boolean.parseBoolean(args[++i]); + normalize = Boolean.parseBoolean(args[++i]); } else if (args[i].equals("-features")) { - features= args[i+1]; i++; + features = args[i + 1]; + i++; } else if (args[i].equals("-hsize")) { - hsize= Integer.parseInt(args[i+1]); i++; + hsize = Integer.parseInt(args[i + 1]); + i++; } else if (args[i].equals("-len")) { - maxLen= Integer.parseInt(args[i+1]); i++; - } else if (args[i].equals("-tmp")) { - tmp = args[i+1]; i++; - } else if (args[i].equals("-uc")) { - upper=true; - System.out.println("set uppercase "+upper); + maxLen = Integer.parseInt(args[i + 1]); + i++; + } else if (args[i].equals("-tmp")) { + tmp = args[i + 1]; + i++; + } else if (args[i].equals("-uc")) { + upper = true; + System.out.println("set uppercase " + upper); + + } else + super.addOption(args, i); - } else super.addOption(args, i); - } - if (trainfile!=null) { - - - if (tmp!=null) trainforest = File.createTempFile("train", ".tmp", new File(tmp)); - else trainforest = File.createTempFile("train", ".tmp"); //,new File("F:\\") + if (trainfile != null) { + + if (tmp != null) + trainforest = File.createTempFile("train", ".tmp", new File(tmp)); + else + trainforest = File.createTempFile("train", ".tmp"); // ,new + // File("F:\\") trainforest.deleteOnExit(); } - - - } private void explain() { @@ -54,14 +55,18 @@ public final class Options extends OptionsSuper { System.out.println(); System.out.println("Options:"); System.out.println(""); - System.out.println(" -train <file> the corpus a model is trained on; default "+this.trainfile); - System.out.println(" -test <file> the input corpus for testing; default "+this.testfile); - System.out.println(" -out <file> the output corpus (result) of a test run; default "+this.outfile); + System.out.println(" -train <file> the corpus a model is trained on; default " + this.trainfile); + System.out.println(" -test <file> the input corpus for testing; default " + this.testfile); + System.out.println(" -out <file> the output corpus (result) of a test run; default " + this.outfile); System.out.println(" -model <file> the parsing model for traing the model is stored in the files"); - System.out.println(" and for parsing the model is load from this file; default "+this.modelName); - System.out.println(" -i <number> the number of training iterations; good numbers are 10 for smaller corpora and 6 for bigger; default "+this.numIters); - System.out.println(" -count <number> the n first sentences of the corpus are take for the training default "+this.count); - + System.out.println( + " and for parsing the model is load from this file; default " + this.modelName); + System.out.println( + " -i <number> the number of training iterations; good numbers are 10 for smaller corpora and 6 for bigger; default " + + this.numIters); + System.out.println(" -count <number> the n first sentences of the corpus are take for the training default " + + this.count); + System.exit(0); } } diff --git a/dependencyParser/mate-tools/src/is2/lemmatizer/Pipe.java b/dependencyParser/mate-tools/src/is2/lemmatizer/Pipe.java index 37647ee..c8b4bba 100755 --- a/dependencyParser/mate-tools/src/is2/lemmatizer/Pipe.java +++ b/dependencyParser/mate-tools/src/is2/lemmatizer/Pipe.java @@ -1,17 +1,5 @@ package is2.lemmatizer; -import is2.data.Cluster; -import is2.data.D4; -import is2.data.Instances; -import is2.data.InstancesTagger; -import is2.data.PipeGen; -import is2.data.SentenceData09; -import is2.io.CONLLReader09; -import is2.tools.IPipe; -import is2.util.DB; -import is2.util.OptionsSuper; -import is2.data.Long2Int; - import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.IOException; @@ -22,177 +10,195 @@ import java.util.HashMap; import java.util.HashSet; import java.util.Map.Entry; - - +import is2.data.Cluster; +import is2.data.D4; +import is2.data.Instances; +import is2.data.InstancesTagger; +import is2.data.Long2Int; +import is2.data.PipeGen; +import is2.data.SentenceData09; +import is2.io.CONLLReader09; +import is2.tools.IPipe; +import is2.util.DB; +import is2.util.OptionsSuper; final public class Pipe extends PipeGen implements IPipe { - private static final int _MIN_WORDS_MAPPED_BY_SCRIPT = 1; private static final int _MIN_OCCURENT_FOR_SCRIPT_USE = 4; private static final String _F0 = "F0"; - private static final String _F1 = "F1",_F2 = "F2",_F3 = "F3",_F4 = "F4",_F5 = "F5",_F6= "F6",_F7= "F7",_F8= "F8",_F9="F9",_F10 = "F10"; - private static final String _F11="F11",_F12="F12",_F13= "F13",_F14="F14",_F15="F15",_F16="F16",_F17="F17",_F18="F18",_F19="F19",_F20="F20"; - private static final String _F21="F21",_F22="F22",_F23= "F23",_F24="F24",_F25="F25",_F26="F26",_F27="F27",_F28="F28",_F29="F29",_F30="F30"; - private static final String _F31="F31",_F32="F32",_F33= "F33",_F34="F34",_F35="F35",_F36="F36",_F37="F37",_F38="F38",_F39="F39",_F40="F40"; - private static final String _F41="F41"; - - private static int _f0,_f1,_f2,_f3,_f4,_f5,_f6,_f7,_f8,_f9,_f10,_f11,_f12,_f13,_f14,_f15,_f16,_f17,_f18,_f19,_f20; - private static int _f21,_f22,_f23,_f24,_f25,_f26,_f27,_f28,_f29,_f30,_f31,_f32,_f33,_f34,_f35,_f36,_f37,_f38,_f39,_f41; - public static int _CEND,_swrd,_ewrd; - - public static final String MID = "MID", END = "END",STR = "STR",OPERATION = "OP"; + private static final String _F1 = "F1", _F2 = "F2", _F3 = "F3", _F4 = "F4", _F5 = "F5", _F6 = "F6", _F7 = "F7", + _F8 = "F8", _F9 = "F9", _F10 = "F10"; + private static final String _F11 = "F11", _F12 = "F12", _F13 = "F13", _F14 = "F14", _F15 = "F15", _F16 = "F16", + _F17 = "F17", _F18 = "F18", _F19 = "F19", _F20 = "F20"; + private static final String _F21 = "F21", _F22 = "F22", _F23 = "F23", _F24 = "F24", _F25 = "F25", _F26 = "F26", + _F27 = "F27", _F28 = "F28", _F29 = "F29", _F30 = "F30"; + private static final String _F31 = "F31", _F32 = "F32", _F33 = "F33", _F34 = "F34", _F35 = "F35", _F36 = "F36", + _F37 = "F37", _F38 = "F38"; + private static int _f0, _f1, _f4, _f5, _f6, _f7, _f8, _f9, _f10, _f11, _f12, _f13, _f14, _f15, _f16, _f17, _f18, + _f19; + private static int _f21, _f27, _f28, _f31, _f32, _f33; + public static int _CEND, _swrd, _ewrd; + + public static final String MID = "MID", END = "END", STR = "STR", OPERATION = "OP"; private CONLLReader09 depReader; - - public HashMap<String,String> opse = new HashMap<String, String> (); + public HashMap<String, String> opse = new HashMap<String, String>(); public String[] types; - - public MFO mf =new MFO(); - private D4 z, x; - + public MFO mf = new MFO(); + private D4 z, x; Cluster cl; OptionsSuper options; Long2Int li; - public Pipe (OptionsSuper options2, Long2Int l) { - - options=options2; - li=l; - } + public Pipe(OptionsSuper options2, Long2Int l) { + options = options2; + li = l; + } - public InstancesTagger createInstances(String file) { + @Override + public InstancesTagger createInstances(String file) { InstancesTagger is = new InstancesTagger(); depReader = new CONLLReader09(CONLLReader09.NO_NORMALIZE); - - depReader.startReading(file); - mf.register(REL,"<root-type>"); - mf.register(POS,"<root-POS>"); + depReader.startReading(file); + mf.register(REL, "<root-type>"); + mf.register(POS, "<root-POS>"); System.out.print("Registering feature parts "); - HashMap<String,Integer> ops = new HashMap<String, Integer> (); - HashMap<String,HashSet<String>> op2form = new HashMap<String, HashSet<String>> (); - int ic=0; - int del=0; - HashSet<String> rm = new HashSet<String> (); + HashMap<String, Integer> ops = new HashMap<String, Integer>(); + HashMap<String, HashSet<String>> op2form = new HashMap<String, HashSet<String>>(); + int ic = 0; + int del = 0; + HashSet<String> rm = new HashSet<String>(); - while(true) { + while (true) { SentenceData09 instance1 = depReader.getNext(); - if (instance1== null) break; + if (instance1 == null) + break; ic++; - if (ic % 100 ==0) {del = outValue(ic, del);} - + if (ic % 100 == 0) { + del = outValue(ic, del); + } String[] labs1 = instance1.labels; - for(int i1 = 0; i1 < labs1.length; i1++) { - //typeAlphabet.lookupIndex(labs1[i1]); - mf.register(REL, labs1[i1]); + for (String element : labs1) { + // typeAlphabet.lookupIndex(labs1[i1]); + mf.register(REL, element); } String[] w = instance1.forms; - for(int i1 = 0; i1 < w.length; i1++) { + for (int i1 = 0; i1 < w.length; i1++) { // saw the first time? - if (mf.getValue(WORD, w[i1].toLowerCase())==-1) + if (mf.getValue(WORD, w[i1].toLowerCase()) == -1) opse.put(instance1.forms[i1].toLowerCase(), instance1.lemmas[i1]); - - mf.register(WORD, w[i1].toLowerCase()); + + mf.register(WORD, w[i1].toLowerCase()); } - for(int i1 = 0; i1 < w.length; i1++) mf.register(WORD, w[i1]); + for (String element : w) + mf.register(WORD, element); w = instance1.lemmas; - for(int i1 = 0; i1 < w.length; i1++) mf.register(WORD, w[i1]); - for(int i1 = 0; i1 < w.length; i1++) mf.register(WORD, w[i1].toLowerCase()); + for (String element : w) + mf.register(WORD, element); + for (String element : w) + mf.register(WORD, element.toLowerCase()); w = instance1.plemmas; - for(int i1 = 0; i1 < w.length; i1++) mf.register(WORD, w[i1]); - for(int i1 = 0; i1 < w.length; i1++) mf.register(WORD, w[i1].toLowerCase()); - + for (String element : w) + mf.register(WORD, element); + for (String element : w) + mf.register(WORD, element.toLowerCase()); - for(int i1 = 0; i1 < w.length; i1++) registerChars(CHAR, w[i1]); + for (String element : w) + registerChars(CHAR, element); w = instance1.ppos; - for(int i1 = 0; i1 < w.length; i1++) mf.register(POS, w[i1]); + for (String element : w) + mf.register(POS, element); w = instance1.gpos; - for(int i1 = 0; i1 < w.length; i1++) mf.register(POS, w[i1]); + for (String element : w) + mf.register(POS, element); - - for(int i1 = 1; i1 < w.length; i1++) { + for (int i1 = 1; i1 < w.length; i1++) { String op = getOperation(instance1, i1); - if (ops.get(op)==null) ops.put(op, 1); + if (ops.get(op) == null) + ops.put(op, 1); else { - ops.put(op, (ops.get(op)+1)); - if (ops.get(op)>4) rm.add(instance1.forms[i1].toLowerCase()); + ops.put(op, (ops.get(op) + 1)); + if (ops.get(op) > 4) + rm.add(instance1.forms[i1].toLowerCase()); } - HashSet<String> forms = op2form.get(op); - if (forms==null) { + if (forms == null) { forms = new HashSet<String>(); op2form.put(op, forms); } forms.add(instance1.forms[i1].toLowerCase()); - + } } - int countFreqSingleMappings =0; - - int sc=0; + int countFreqSingleMappings = 0; + + int sc = 0; ArrayList<Entry<String, Integer>> opsl = new ArrayList<Entry<String, Integer>>(); - for(Entry<String, Integer> e : ops.entrySet()) { + for (Entry<String, Integer> e : ops.entrySet()) { - // do not use scripts for infrequent cases or frequent single mappings (der -> die) - if(e.getValue()>_MIN_OCCURENT_FOR_SCRIPT_USE && op2form.get(e.getKey()).size()>_MIN_WORDS_MAPPED_BY_SCRIPT) { + // do not use scripts for infrequent cases or frequent single + // mappings (der -> die) + if (e.getValue() > _MIN_OCCURENT_FOR_SCRIPT_USE + && op2form.get(e.getKey()).size() > _MIN_WORDS_MAPPED_BY_SCRIPT) { mf.register(OPERATION, e.getKey()); sc++; opsl.add(e); } else { // do not remove the infrequent cases rm.removeAll(op2form.get(e.getKey())); - - if (op2form.get(e.getKey()).size()<=1) countFreqSingleMappings+=op2form.get(e.getKey()).size(); + + if (op2form.get(e.getKey()).size() <= 1) + countFreqSingleMappings += op2form.get(e.getKey()).size(); } } - for(String k : rm) { + for (String k : rm) { opse.remove(k); } - Collections.sort(opsl, new Comparator<Entry<String, Integer>>(){ + Collections.sort(opsl, new Comparator<Entry<String, Integer>>() { @Override public int compare(Entry<String, Integer> o1, Entry<String, Integer> o2) { - return o1.getValue()==o2.getValue()?0:o1.getValue()>o2.getValue()?1:-1; + return o1.getValue() == o2.getValue() ? 0 : o1.getValue() > o2.getValue() ? 1 : -1; } }); - - - for(Entry<String, Integer> e : opsl) { - // System.out.println(e.getKey()+" "+e.getValue()); - } - + /* + * for(Entry<String, Integer> e : opsl) { + * System.out.println(e.getKey()+" "+e.getValue()); } + */ - if (options.clusterFile==null)cl = new Cluster(); - else cl= new Cluster(options.clusterFile, mf,6); + if (options.clusterFile == null) + cl = new Cluster(); + else + cl = new Cluster(options.clusterFile, mf, 6); - - System.out.println("\nfound scripts "+ops.size()+" used scripts "+sc); - System.out.println("found mappings of single words "+countFreqSingleMappings); - System.out.println("use word maps instead of scripts "+this.opse.size()); - // System.out.println(" "+opse); - System.out.println(""+mf.toString()); + System.out.println("\nfound scripts " + ops.size() + " used scripts " + sc); + System.out.println("found mappings of single words " + countFreqSingleMappings); + System.out.println("use word maps instead of scripts " + this.opse.size()); + // System.out.println(" "+opse); + System.out.println("" + mf.toString()); initFeatures(); @@ -205,45 +211,46 @@ final public class Pipe extends PipeGen implements IPipe { long start1 = System.currentTimeMillis(); System.out.print("Creating Features: "); - is.init(ic, mf) ; - del=0; - while(true) { + is.init(ic, mf); + del = 0; + while (true) { try { - if (i % 100 ==0) {del = outValue(i, del);} - SentenceData09 instance1 = depReader.getNext(is); - if (instance1== null) break; - - is.fillChars(instance1, i, _CEND); - - if (i>options.count) break; - - i++; - } catch(Exception e) { - DB.println("error in sentnence "+i); - e.printStackTrace(); - } + if (i % 100 == 0) { + del = outValue(i, del); + } + SentenceData09 instance1 = depReader.getNext(is); + if (instance1 == null) + break; + + is.fillChars(instance1, i, _CEND); + + if (i > options.count) + break; + + i++; + } catch (Exception e) { + DB.println("error in sentnence " + i); + e.printStackTrace(); + } } long end1 = System.currentTimeMillis(); System.gc(); - long mem2 = Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory(); - System.out.print(" time "+(end1-start1)+" mem "+(mem2/1024)+" kb"); + long mem2 = Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory(); + System.out.print(" time " + (end1 - start1) + " mem " + (mem2 / 1024) + " kb"); types = new String[mf.getFeatureCounter().get(OPERATION)]; - for(Entry<String,Integer> e : mf.getFeatureSet().get(OPERATION).entrySet()) { + for (Entry<String, Integer> e : MFO.getFeatureSet().get(OPERATION).entrySet()) { types[e.getValue()] = e.getKey(); - // System.out.println("set pos "+e.getKey()); + // System.out.println("set pos "+e.getKey()); } System.out.println("Num Features: " + mf.size()); - - return is; } - /** * @param is * @param n @@ -253,26 +260,19 @@ final public class Pipe extends PipeGen implements IPipe { */ public static String getOperation(Instances is, int n, int k, String[] wds) { - String form = wds[is.forms[n][k]]; String olemma = wds[is.glemmas[n][k]]; String s = new StringBuffer(form.toLowerCase()).reverse().toString(); String t = new StringBuffer(olemma.toLowerCase()).reverse().toString(); - - return getOperation2(s, t); } - - public static String getOperation(SentenceData09 instance1, int i1) { String s = new StringBuffer(instance1.forms[i1].toLowerCase()).reverse().toString(); String t = new StringBuffer(instance1.lemmas[i1].toLowerCase()).reverse().toString(); - - return getOperation2(s, t); } @@ -280,79 +280,88 @@ final public class Pipe extends PipeGen implements IPipe { String s = new StringBuffer(si.toLowerCase()).reverse().toString(); String t = new StringBuffer(ti.toLowerCase()).reverse().toString(); - - return getOperation2(s, t); } - private static String getOperation2(String s, String t) { StringBuffer po = new StringBuffer(); String op; if (!s.equals(t)) { - - int[][] d =StringEdit.LD(s, t); - StringEdit.searchPath(s,t,d, po, false); + int[][] d = StringEdit.LD(s, t); + StringEdit.searchPath(s, t, d, po, false); op = po.toString(); - } else op ="0"; // do nothing + } else + op = "0"; // do nothing return op; } - - private void registerChars(String type, String word) { - for(int i=0;i<word.length();i++) mf.register(type, Character.toString(word.charAt(i))); + for (int i = 0; i < word.length(); i++) + mf.register(type, Character.toString(word.charAt(i))); } - - + @Override public void initValues() { z = new D4(li); - + x = new D4(li); - x.a0=s_type; + x.a0 = s_type; - s_pos = mf.getFeatureCounter().get(POS).intValue();//mf.getFeatureBits(POS); + s_pos = mf.getFeatureCounter().get(POS).intValue();// mf.getFeatureBits(POS); s_word = mf.getFeatureCounter().get(WORD); - s_type = mf.getFeatureCounter().get(TYPE).intValue();//mf.getFeatureBits(TYPE); - s_char = mf.getFeatureCounter().get(CHAR).intValue();//mf.getFeatureBits(CHAR); - s_oper = mf.getFeatureCounter().get(OPERATION).intValue();//mf.getFeatureBits(OPERATION); + s_type = mf.getFeatureCounter().get(TYPE).intValue();// mf.getFeatureBits(TYPE); + s_char = mf.getFeatureCounter().get(CHAR).intValue();// mf.getFeatureBits(CHAR); + s_oper = mf.getFeatureCounter().get(OPERATION).intValue();// mf.getFeatureBits(OPERATION); types = new String[mf.getFeatureCounter().get(Pipe.OPERATION)]; - for(Entry<String,Integer> e : mf.getFeatureSet().get(Pipe.OPERATION).entrySet()) types[e.getValue()] = e.getKey(); - - //wds = new String[mf.getFeatureCounter().get(Pipe.WORD)]; - //for(Entry<String,Integer> e : mf.getFeatureSet().get(Pipe.WORD).entrySet()) wds[e.getValue()] = e.getKey(); - + for (Entry<String, Integer> e : MFO.getFeatureSet().get(Pipe.OPERATION).entrySet()) + types[e.getValue()] = e.getKey(); - z.a0 = s_type;z.a1 = s_oper; z.a2 = s_char; z.a3 = s_char; z.a4 = s_char;z.a5 = s_char;z.a6 = s_char;z.a7 = s_char; - x.a0 = s_type; x.a1 = s_oper;x.a2 = s_word; x.a3 = s_word; x.a4 = s_word;x.a5 = s_char;x.a6 = s_char;x.a7 = s_char; + // wds = new String[mf.getFeatureCounter().get(Pipe.WORD)]; + // for(Entry<String,Integer> e : + // mf.getFeatureSet().get(Pipe.WORD).entrySet()) wds[e.getValue()] = + // e.getKey(); + + z.a0 = s_type; + z.a1 = s_oper; + z.a2 = s_char; + z.a3 = s_char; + z.a4 = s_char; + z.a5 = s_char; + z.a6 = s_char; + z.a7 = s_char; + x.a0 = s_type; + x.a1 = s_oper; + x.a2 = s_word; + x.a3 = s_word; + x.a4 = s_word; + x.a5 = s_char; + x.a6 = s_char; + x.a7 = s_char; } - public static int s_pos,s_word,s_type,s_dir,s_dist, s_char, s_oper; - - + public static int s_pos, s_word, s_type, s_dir, s_dist, s_char, s_oper; /** * Initialize the features. + * * @param maxFeatures */ + @Override public void initFeatures() { - - - for(int k=0;k<50;k++) { - mf.register(TYPE, "F"+k); + for (int k = 0; k < 50; k++) { + mf.register(TYPE, "F" + k); } - + _f0 = mf.register(TYPE, _F0); _f1 = mf.register(TYPE, _F1); - _f2 = mf.register(TYPE, _F2); - _f3 = mf.register(TYPE, _F3); + mf.register(TYPE, _F2); + mf.register(TYPE, _F3); _f4 = mf.register(TYPE, _F4); _f5 = mf.register(TYPE, _F5); _f6 = mf.register(TYPE, _F6); @@ -369,28 +378,27 @@ final public class Pipe extends PipeGen implements IPipe { _f17 = mf.register(TYPE, _F17); _f18 = mf.register(TYPE, _F18); _f19 = mf.register(TYPE, _F19); - _f20 = mf.register(TYPE, _F20); + mf.register(TYPE, _F20); _f21 = mf.register(TYPE, _F21); - _f22 = mf.register(TYPE, _F22); - _f23 = mf.register(TYPE, _F23); - _f24 = mf.register(TYPE, _F24); - _f25 = mf.register(TYPE, _F25); - _f26 = mf.register(TYPE, _F26); + mf.register(TYPE, _F22); + mf.register(TYPE, _F23); + mf.register(TYPE, _F24); + mf.register(TYPE, _F25); + mf.register(TYPE, _F26); _f27 = mf.register(TYPE, _F27); _f28 = mf.register(TYPE, _F28); - _f29 = mf.register(TYPE, _F29); - _f30 = mf.register(TYPE, _F30); + mf.register(TYPE, _F29); + mf.register(TYPE, _F30); _f31 = mf.register(TYPE, _F31); _f32 = mf.register(TYPE, _F32); _f33 = mf.register(TYPE, _F33); - _f34 = mf.register(TYPE, _F34); - - _f35 = mf.register(TYPE, _F35); - _f36 = mf.register(TYPE, _F36); - _f37 = mf.register(TYPE, _F37); - _f38 = mf.register(TYPE, _F38); + mf.register(TYPE, _F34); + mf.register(TYPE, _F35); + mf.register(TYPE, _F36); + mf.register(TYPE, _F37); + mf.register(TYPE, _F38); mf.register(POS, MID); mf.register(POS, STR); @@ -400,142 +408,226 @@ final public class Pipe extends PipeGen implements IPipe { _swrd = mf.register(WORD, STR); _ewrd = mf.register(WORD, END); - _CEND = mf.register(CHAR, END); - } - - final public void addCoreFeatures(InstancesTagger is, int ic, int i, int oper, String form, long[] f) { - for(int l=f.length-1;l>=0;l--) f[l]=0; - - int formi =is.forms[ic][i]; - int wl =is.chars[ic][i][11];//.forms[i].length(); + for (int l = f.length - 1; l >= 0; l--) + f[l] = 0; + + int formi = is.forms[ic][i]; + int wl = is.chars[ic][i][11];// .forms[i].length(); - int position = 1+(i<3?i:3); + int position = 1 + (i < 3 ? i : 3); - int c0= is.chars[ic][i][0], c1=is.chars[ic][i][1], c2=is.chars[ic][i][2], c3=is.chars[ic][i][3], c4=is.chars[ic][i][4],c5=is.chars[ic][i][5]; - int e0 =is.chars[ic][i][6], e1 =is.chars[ic][i][7],e2 =is.chars[ic][i][8],e3 =is.chars[ic][i][9],e4 =is.chars[ic][i][10]; + int c0 = is.chars[ic][i][0], c1 = is.chars[ic][i][1], c2 = is.chars[ic][i][2], c3 = is.chars[ic][i][3], + c4 = is.chars[ic][i][4]; + int e0 = is.chars[ic][i][6], e1 = is.chars[ic][i][7], e2 = is.chars[ic][i][8], e3 = is.chars[ic][i][9], + e4 = is.chars[ic][i][10]; int len = is.length(ic); - - - x.v1=oper; x.v0 = _f0; x.v2 = formi; x.cz3(); f[0]=x.getVal(); f[1]=x.csa(3, position); - x.v0 = _f1; x.v2 = formi; x.v3 =i+1>=len?x.v3=_ewrd:is.forms[ic][i+1];x.cz4(); f[2]=x.getVal(); + x.v1 = oper; + x.v0 = _f0; + x.v2 = formi; + x.cz3(); + f[0] = x.getVal(); + f[1] = x.csa(3, position); + x.v0 = _f1; + x.v2 = formi; + x.v3 = i + 1 >= len ? x.v3 = _ewrd : is.forms[ic][i + 1]; + x.cz4(); + f[2] = x.getVal(); // contains upper case include again!!! - - short upper =0; + + short upper = 0; short number = 1; - for(int k1=0;k1<wl;k1++){ - char c =form.charAt(k1); + for (int k1 = 0; k1 < wl; k1++) { + char c = form.charAt(k1); if (Character.isUpperCase(c)) { - if (k1==0) upper=1; + if (k1 == 0) + upper = 1; else { // first char + another - if (upper==1)upper=3; + if (upper == 1) + upper = 3; // another uppercase in the word - else if (upper==0) upper=2; + else if (upper == 0) + upper = 2; } } - if (Character.isDigit(c) && k1==0) number =2 ; - else if (Character.isDigit(c) && number==1) number = 3 ; + if (Character.isDigit(c) && k1 == 0) + number = 2; + else if (Character.isDigit(c) && number == 1) + number = 3; } // contains a number - z.v0= _f21; z.v2=number; z.cz3();f[3]=z.getVal(); - - z.v0 = _f4; z.v1 = oper; z.v2=c0; z.cz3();f[4]=z.getVal(); - z.v0 = _f5; z.v2 = e0;z.cz3();f[5]=z.getVal(); - - z.v2=c0; z.v3=c1; z.v4=c2; z.v5=c3; z.v6=c4; - z.v0=_f6; z.cz4(); f[6]=z.getVal(); - z.v0=_f7; z.cz5(); f[7]=z.getVal(); - z.v0=_f8; z.cz6(); f[8]=z.getVal(); - z.v0=_f9; z.cz7(); f[9]=z.getVal(); - - int c=10; - z.v2=e0; z.v3=e1; z.v4=e2; z.v5=e3; z.v6=e4; - z.v0 =_f10; z.cz4();f[c++]=z.getVal(); f[c++]= z.csa(3, upper); - z.v0 =_f11; z.cz5();f[c++]=z.getVal(); f[c++]= z.csa(3, upper); - z.v0 =_f12; z.cz6();f[c++]=z.getVal(); f[c++]= z.csa(3, upper); - z.v0 =_f13; z.cz7();f[c++]=z.getVal(); f[c++]= z.csa(3, upper); - - if (len>i+1) { - - z.v0 = _f14; z.v2 = is.chars[ic][i+1][0]; - z.cz3();f[c++]=z.getVal(); - - z.v0 = _f15; z.v2 = is.chars[ic][i+1][5];z.cz3();f[c++]=z.getVal(); - - if (is.chars[ic][i+1][11]>1 ) { - z.v0 = _f16; z.v2 = is.chars[ic][i+1][0]; - z.v3 = is.chars[ic][i+1][2];z.cz4();f[c++]=z.getVal(); - - z.v0 = _f17; z.v2 = is.chars[ic][i+1][1]; - z.v3 = is.chars[ic][i+1][6]; - z.cz4();f[c++]=z.getVal();//fv.add(li.l2i(mf.calc4(b))); + z.v0 = _f21; + z.v2 = number; + z.cz3(); + f[3] = z.getVal(); + + z.v0 = _f4; + z.v1 = oper; + z.v2 = c0; + z.cz3(); + f[4] = z.getVal(); + z.v0 = _f5; + z.v2 = e0; + z.cz3(); + f[5] = z.getVal(); + + z.v2 = c0; + z.v3 = c1; + z.v4 = c2; + z.v5 = c3; + z.v6 = c4; + z.v0 = _f6; + z.cz4(); + f[6] = z.getVal(); + z.v0 = _f7; + z.cz5(); + f[7] = z.getVal(); + z.v0 = _f8; + z.cz6(); + f[8] = z.getVal(); + z.v0 = _f9; + z.cz7(); + f[9] = z.getVal(); + + int c = 10; + z.v2 = e0; + z.v3 = e1; + z.v4 = e2; + z.v5 = e3; + z.v6 = e4; + z.v0 = _f10; + z.cz4(); + f[c++] = z.getVal(); + f[c++] = z.csa(3, upper); + z.v0 = _f11; + z.cz5(); + f[c++] = z.getVal(); + f[c++] = z.csa(3, upper); + z.v0 = _f12; + z.cz6(); + f[c++] = z.getVal(); + f[c++] = z.csa(3, upper); + z.v0 = _f13; + z.cz7(); + f[c++] = z.getVal(); + f[c++] = z.csa(3, upper); + + if (len > i + 1) { + + z.v0 = _f14; + z.v2 = is.chars[ic][i + 1][0]; + z.cz3(); + f[c++] = z.getVal(); + + z.v0 = _f15; + z.v2 = is.chars[ic][i + 1][5]; + z.cz3(); + f[c++] = z.getVal(); + + if (is.chars[ic][i + 1][11] > 1) { + z.v0 = _f16; + z.v2 = is.chars[ic][i + 1][0]; + z.v3 = is.chars[ic][i + 1][2]; + z.cz4(); + f[c++] = z.getVal(); + + z.v0 = _f17; + z.v2 = is.chars[ic][i + 1][1]; + z.v3 = is.chars[ic][i + 1][6]; + z.cz4(); + f[c++] = z.getVal();// fv.add(li.l2i(mf.calc4(b))); } + x.v0 = _f18; + x.v2 = is.forms[ic][i + 1]; + x.cz3(); + f[c++] = x.getVal(); - x.v0 = _f18; - x.v2 = is.forms[ic][i+1]; - x.cz3();f[c++]=x.getVal(); - - if (len>i+2) { - x.v0 = _f32; - x.v2 = is.forms[ic][i+2]; x.v3 = is.forms[ic][i+1]; x.cz4();f[c++]=x.getVal(); - x.cz3();f[c++]=x.getVal();//fv.add(li.l2i(mf.calc3(b))); + if (len > i + 2) { + x.v0 = _f32; + x.v2 = is.forms[ic][i + 2]; + x.v3 = is.forms[ic][i + 1]; + x.cz4(); + f[c++] = x.getVal(); + x.cz3(); + f[c++] = x.getVal();// fv.add(li.l2i(mf.calc3(b))); } - if (len>i+3) { - x.v0 = _f33; x.v2 = is.forms[ic][i+3]; x.v3 = is.forms[ic][i+2];x.cz4();f[c++]=x.getVal();//fv.add(li.l2i(mf.calc4(b))); - x.cz3();f[27]=x.getVal();//fv.add(li.l2i(mf.calc3(b))); + if (len > i + 3) { + x.v0 = _f33; + x.v2 = is.forms[ic][i + 3]; + x.v3 = is.forms[ic][i + 2]; + x.cz4(); + f[c++] = x.getVal();// fv.add(li.l2i(mf.calc4(b))); + x.cz3(); + f[27] = x.getVal();// fv.add(li.l2i(mf.calc3(b))); } } // length - z.v0= _f19; z.v1=oper; z.v2=wl;z.cz3();f[c++]=z.getVal();//fv.add(li.l2i(mf.calc3(dl1))); - - if (i<1) return ; + z.v0 = _f19; + z.v1 = oper; + z.v2 = wl; + z.cz3(); + f[c++] = z.getVal();// fv.add(li.l2i(mf.calc3(dl1))); - x.v0 = _f27; x.v1=oper; - x.v2 = is.forms[ic][i-1];x.cz3();f[c++]=x.getVal();//fv.add(li.l2i(mf.calc3(b))); + if (i < 1) + return; + x.v0 = _f27; + x.v1 = oper; + x.v2 = is.forms[ic][i - 1]; + x.cz3(); + f[c++] = x.getVal();// fv.add(li.l2i(mf.calc3(b))); - if (i<2) return ; + if (i < 2) + return; - //added this before it was 99.46 - x.v0 = _f28; x.v2 = is.forms[ic][i-2];x.cz3();f[c++]=x.getVal();//fv.add(li.l2i(mf.calc3(b))); + // added this before it was 99.46 + x.v0 = _f28; + x.v2 = is.forms[ic][i - 2]; + x.cz3(); + f[c++] = x.getVal();// fv.add(li.l2i(mf.calc3(b))); // result 99.484 - if (i<3) return ; + if (i < 3) + return; - x.v0 = _f31; x.v1=oper; x.v2 = is.forms[ic][i-3]; x.v3 = is.forms[ic][i-2]; x.cz4();f[c++]=x.getVal();//fv.add(li.l2i(mf.calc4(b))); + x.v0 = _f31; + x.v1 = oper; + x.v2 = is.forms[ic][i - 3]; + x.v3 = is.forms[ic][i - 2]; + x.cz4(); + f[c++] = x.getVal();// fv.add(li.l2i(mf.calc4(b))); } - - -// public String[] wds; + // public String[] wds; /** * Write the lemma that are not mapped by operations + * * @param dos */ private void writeMap(DataOutputStream dos) { try { dos.writeInt(opse.size()); - for(Entry<String, String> e : opse.entrySet()) { + for (Entry<String, String> e : opse.entrySet()) { dos.writeUTF(e.getKey()); dos.writeUTF(e.getValue()); } @@ -544,16 +636,15 @@ final public class Pipe extends PipeGen implements IPipe { } } - - /** * Read the form-lemma mapping not read by operations + * * @param dis */ public void readMap(DataInputStream dis) { try { int size = dis.readInt(); - for(int i =0; i<size;i++) { + for (int i = 0; i < size; i++) { opse.put(dis.readUTF(), dis.readUTF()); } } catch (IOException e1) { @@ -561,12 +652,9 @@ final public class Pipe extends PipeGen implements IPipe { } } - - - - - - /* (non-Javadoc) + /* + * (non-Javadoc) + * * @see is2.tools.IPipe#write(java.io.DataOutputStream) */ @Override @@ -577,9 +665,7 @@ final public class Pipe extends PipeGen implements IPipe { } catch (IOException e) { e.printStackTrace(); } - - } - + } } diff --git a/dependencyParser/mate-tools/src/is2/lemmatizer/StringEdit.java b/dependencyParser/mate-tools/src/is2/lemmatizer/StringEdit.java index 8a4080e..69fd872 100755 --- a/dependencyParser/mate-tools/src/is2/lemmatizer/StringEdit.java +++ b/dependencyParser/mate-tools/src/is2/lemmatizer/StringEdit.java @@ -1,268 +1,256 @@ package is2.lemmatizer; -import is2.util.DB; - import java.util.ArrayList; public class StringEdit { - public static void main(String args[]) { - - String s = new StringBuffer(args[0]).reverse().toString(); String t = new StringBuffer(args[1]).reverse().toString(); - - int d[][] = LD(s, t); - + int d[][] = LD(s, t); StringBuffer opersations = new StringBuffer(); - searchPath(s,t,d, opersations, false); - System.out.println("resuylt "+" "+opersations); + searchPath(s, t, d, opersations, false); + System.out.println("resuylt " + " " + opersations); } - - - - - //**************************** + // **************************** // Get minimum of three values - //**************************** + // **************************** - static private int Minimum (int a, int b, int c) { + static private int Minimum(int a, int b, int c) { int mi; mi = a; - if (b < mi) mi = b; - if (c < mi) mi = c; - + if (b < mi) + mi = b; + if (c < mi) + mi = c; + return mi; } - //***************************** + // ***************************** // Compute Levenshtein distance - //***************************** - - static public int[][] LD (String s, String t) { - - int n = s.length (); - int m = t.length ();; // length of t - // char s_i; // ith character of s - // char t_j; // jth character of t + // ***************************** + + static public int[][] LD(String s, String t) { + + int n = s.length(); + int m = t.length(); + ; // length of t + // char s_i; // ith character of s + // char t_j; // jth character of t int cost; // cost // Step 1 - - int[][] d = new int[n+1][m+1]; + int[][] d = new int[n + 1][m + 1]; + + if (n == 0) + return d; + if (m == 0) + return d; - if (n == 0) return d; - if (m == 0) return d; - // Step 2 - for (int i = 0; i <= n; i++) d[i][0] = i; - for (int j = 0; j <= m; j++) d[0][j] = j; - + for (int i = 0; i <= n; i++) + d[i][0] = i; + for (int j = 0; j <= m; j++) + d[0][j] = j; // Step 3 for (int i = 1; i <= n; i++) { - int s_i = s.charAt (i - 1); + int s_i = s.charAt(i - 1); // Step 4 for (int j = 1; j <= m; j++) { -// t_j = t.charAt (j - 1); + // t_j = t.charAt (j - 1); // Step 5 - if (s_i == t.charAt (j - 1)) cost = 0; - else cost = 1; - + if (s_i == t.charAt(j - 1)) + cost = 0; + else + cost = 1; // Step 6 - d[i][j] = Minimum (d[i-1][j]+1, d[i][j-1]+1, d[i-1][j-1] + cost); + d[i][j] = Minimum(d[i - 1][j] + 1, d[i][j - 1] + 1, d[i - 1][j - 1] + cost); } } // Step 7 - - return d; } - - - - static String searchPath(String s, String t, int[][] d, StringBuffer operations, boolean debug) { - + StringBuffer result = new StringBuffer(s); - + int n = d.length; int m = d[0].length; - - int x=n-1; - int y=m-1; - boolean changed =false; - while(true) { - if (debug && changed )System.out.println("result "+new StringBuffer(result) .reverse()); - - if (d[x][y]==0)break; - if (y>0&&x>0&& d[x-1][y-1]<d[x][y]) { - if (debug) System.out.println("min d[x-1][y-1] "+d[x-1][y-1]+" d[x][y] "+d[x][y]+" rep "+s.charAt(x-1)+" with "+t.charAt(y-1)+" at "+(x-1)); - - operations.append('R').append(Character.toString((char)((int)x-1))).append(s.charAt(x-1)).append(t.charAt(y-1)); - if (debug) result.setCharAt(x-1, t.charAt(y-1)); + + int x = n - 1; + int y = m - 1; + boolean changed = false; + while (true) { + if (debug && changed) + System.out.println("result " + new StringBuffer(result).reverse()); + + if (d[x][y] == 0) + break; + if (y > 0 && x > 0 && d[x - 1][y - 1] < d[x][y]) { + if (debug) + System.out.println("min d[x-1][y-1] " + d[x - 1][y - 1] + " d[x][y] " + d[x][y] + " rep " + + s.charAt(x - 1) + " with " + t.charAt(y - 1) + " at " + (x - 1)); + + operations.append('R').append(Character.toString((char) (x - 1))).append(s.charAt(x - 1)) + .append(t.charAt(y - 1)); + if (debug) + result.setCharAt(x - 1, t.charAt(y - 1)); y--; x--; - changed =true; + changed = true; continue; } - if (y>0&& d[x][y-1]<d[x][y]) { - if (debug) System.out.println("min d[x][y-1] "+d[x][y-1]+" d[x][y] "+d[x][y]+" ins "+t.charAt(y-1)+" at "+(x)); - operations.append('I').append(Character.toString((char)((int)x))).append(t.charAt(y-1)); - if (debug)result.insert(x, t.charAt(y-1)); + if (y > 0 && d[x][y - 1] < d[x][y]) { + if (debug) + System.out.println("min d[x][y-1] " + d[x][y - 1] + " d[x][y] " + d[x][y] + " ins " + + t.charAt(y - 1) + " at " + (x)); + operations.append('I').append(Character.toString((char) (x))).append(t.charAt(y - 1)); + if (debug) + result.insert(x, t.charAt(y - 1)); y--; - changed =true; + changed = true; continue; } - if (x>0&& d[x-1][y]<d[x][y]) { - if (debug)System.out.println("min d[x-1][y] "+d[x-1][y]+" d[x][y] "+d[x][y]+" del "+s.charAt(x-1)+" at "+(x-1)); - operations.append('D').append(Character.toString((char)((int)x-1))).append(s.charAt(x-1)); - if (debug)result.deleteCharAt(x-1); + if (x > 0 && d[x - 1][y] < d[x][y]) { + if (debug) + System.out.println("min d[x-1][y] " + d[x - 1][y] + " d[x][y] " + d[x][y] + " del " + + s.charAt(x - 1) + " at " + (x - 1)); + operations.append('D').append(Character.toString((char) (x - 1))).append(s.charAt(x - 1)); + if (debug) + result.deleteCharAt(x - 1); x--; - changed =true; + changed = true; continue; } - changed =false; - if (x>0&& y>0 && d[x-1][y-1]==d[x][y]) { - x--; y--; - continue ; + changed = false; + if (x > 0 && y > 0 && d[x - 1][y - 1] == d[x][y]) { + x--; + y--; + continue; } - if (x>0&& d[x-1][y]==d[x][y]) { - x--; + if (x > 0 && d[x - 1][y] == d[x][y]) { + x--; continue; } - if (y>0 && d[x][y-1]==d[x][y]) { + if (y > 0 && d[x][y - 1] == d[x][y]) { y--; continue; } - + } - if (debug) return result.reverse().toString(); - else return null; + if (debug) + return result.reverse().toString(); + else + return null; } public static String change(String s, String operations) { - + StringBuffer result = new StringBuffer(s).reverse(); - - int pc =0; - while(true) { - if (operations.length()<=pc) break; + + int pc = 0; + while (true) { + if (operations.length() <= pc) + break; char nextOperation = operations.charAt(pc); pc++; if (nextOperation == 'R') { - //pc++; - int xm1 = (char)operations.charAt(pc); + // pc++; + int xm1 = operations.charAt(pc); pc++; char replace = operations.charAt(pc); pc++; char with = operations.charAt(pc); - //operations.append('R').append((char)x-1).append(s.charAt(x-1)).append(t.charAt(y-1)); - // System.out.println(""+result+" xm1 "+xm1+" op "+operations); - - - if (result.length()<=xm1) return s; - - if (result.charAt(xm1)==replace) result.setCharAt(xm1, with); - //if (debug) result.setCharAt(x-1, t.charAt(y-1)); + // operations.append('R').append((char)x-1).append(s.charAt(x-1)).append(t.charAt(y-1)); + // System.out.println(""+result+" xm1 "+xm1+" op "+operations); + + if (result.length() <= xm1) + return s; + + if (result.charAt(xm1) == replace) + result.setCharAt(xm1, with); + // if (debug) result.setCharAt(x-1, t.charAt(y-1)); pc++; - - }else if (nextOperation == 'I') { - // if (debug) System.out.println("min d[x][y-1] "+d[x][y-1]+" d[x][y] "+d[x][y]+" ins "+t.charAt(y-1)+" at "+(x)); - //operations.append('I').append((char)x).append(t.charAt(y-1)); - - //if (debug)result.insert(x, t.charAt(y-1)); - //y--; - //changed =true; - //pc++; + + } else if (nextOperation == 'I') { + // if (debug) System.out.println("min d[x][y-1] "+d[x][y-1]+" + // d[x][y] "+d[x][y]+" ins "+t.charAt(y-1)+" at "+(x)); + // operations.append('I').append((char)x).append(t.charAt(y-1)); + + // if (debug)result.insert(x, t.charAt(y-1)); + // y--; + // changed =true; + // pc++; int x = operations.charAt(pc); pc++; char in = operations.charAt(pc); - - if (result.length()<x) return s; - + + if (result.length() < x) + return s; + result.insert(x, in); pc++; - } else if (nextOperation == 'D' ) { - //pc++; + } else if (nextOperation == 'D') { + // pc++; int xm1 = operations.charAt(pc); - - - if (result.length()<=xm1) return s; - + + if (result.length() <= xm1) + return s; + result.deleteCharAt(xm1); pc++; // delete with pc++; - // operations.append('D').append((char)x-1).append(s.charAt(x-1)); - // if (debug)result.deleteCharAt(x-1); + // operations.append('D').append((char)x-1).append(s.charAt(x-1)); + // if (debug)result.deleteCharAt(x-1); } - + } return result.reverse().toString(); - //else return null; + // else return null; } - - - - - - - - - - - - - - - - /** * @param opers * @param postion * @return */ public static String get(ArrayList<String> opers, int position) { - for(String s : opers) { - int p = (int)s.charAt(1); - if (p==position) { + for (String s : opers) { + int p = s.charAt(1); + if (p == position) { return s; } } return "0"; } - - - - /** * @param form * @param string @@ -270,41 +258,38 @@ public class StringEdit { * @return */ public static String changeSimple(String form, String operation, int c) { - - if (operation.equals("0")) return form; - - if (operation.charAt(0)=='I') { + + if (operation.equals("0")) + return form; + + if (operation.charAt(0) == 'I') { StringBuffer f = new StringBuffer(form); - if (f.length()<=c) { - // DB.println("fail insert "); + if (f.length() <= c) { + // DB.println("fail insert "); return form; } - f.insert(c+1, operation.charAt(1)); + f.insert(c + 1, operation.charAt(1)); return f.toString(); } - if (operation.charAt(0)=='R') { + if (operation.charAt(0) == 'R') { StringBuffer f = new StringBuffer(form); - // if (f.length()<=c) f.append(' '); - if (f.length()<=c) { - // DB.println("fail replace "); + // if (f.length()<=c) f.append(' '); + if (f.length() <= c) { + // DB.println("fail replace "); return form; } f.setCharAt(c, operation.charAt(2)); return f.toString(); } - - if (operation.charAt(0)=='D') { + + if (operation.charAt(0) == 'D') { StringBuffer f = new StringBuffer(form); - f.delete(c, c+1);//.append(' '); + f.delete(c, c + 1);// .append(' '); return f.toString(); } return form; } - - - - /** * @param string * @return diff --git a/dependencyParser/mate-tools/src/is2/mtag/Convert.java b/dependencyParser/mate-tools/src/is2/mtag/Convert.java index e262269..05b0741 100755 --- a/dependencyParser/mate-tools/src/is2/mtag/Convert.java +++ b/dependencyParser/mate-tools/src/is2/mtag/Convert.java @@ -1,10 +1,8 @@ /** - * + * */ package is2.mtag; - - import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.File; @@ -17,82 +15,85 @@ import java.util.ArrayList; /** * @author Dr. Bernd Bohnet, 20.01.2010 - * - * + * + * */ public class Convert { - - public static void main (String[] args) throws IOException { - + + public static void main(String[] args) throws IOException { + Options options = new Options(args); - + split(options.trainfile); - + } /** * @param trainfile - * @throws IOException + * @throws IOException */ private static void split(String trainfile) throws IOException { - + String dir = "split"; boolean success = (new File("split")).mkdir(); - if (success) System.out.println("Directory: " + dir + " created"); - - - ArrayList<String> corpus = new ArrayList<String>(); - - BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(trainfile),"UTF-8"),32768); - String l =null; - int sentences = 0; - try { - while( (l = reader.readLine())!=null) { - + if (success) + System.out.println("Directory: " + dir + " created"); + + ArrayList<String> corpus = new ArrayList<String>(); + + BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(trainfile), "UTF-8"), + 32768); + String l = null; + int sentences = 0; + try { + while ((l = reader.readLine()) != null) { + corpus.add(l); - if (l.length()<8) sentences++; - + if (l.length() < 8) + sentences++; + } } catch (IOException e) { e.printStackTrace(); } - System.out.println("Corpus has "+sentences+" sentences."); - + System.out.println("Corpus has " + sentences + " sentences."); + int partSize = sentences / 20; - System.out.println("Prepare corpus for cross annotations with 20 parts with part size "+partSize+" number of lines "+corpus.size()); - - - - for(int k=0;k<20;k++) { - BufferedWriter br = new BufferedWriter(new OutputStreamWriter(new FileOutputStream("split/p-"+k),"UTF-8")); - BufferedWriter rest = new BufferedWriter(new OutputStreamWriter(new FileOutputStream("split/r-"+k),"UTF-8")); - int skip=k*partSize; - - int countSentences=0; - int countSentencesWrote=0; - System.out.println("skip from "+skip+" to "+(skip+partSize-1)); - for(String x : corpus) { - if (countSentences>=skip && (countSentences<(skip+partSize)||k==19)){ + System.out.println("Prepare corpus for cross annotations with 20 parts with part size " + partSize + + " number of lines " + corpus.size()); + + for (int k = 0; k < 20; k++) { + BufferedWriter br = new BufferedWriter( + new OutputStreamWriter(new FileOutputStream("split/p-" + k), "UTF-8")); + BufferedWriter rest = new BufferedWriter( + new OutputStreamWriter(new FileOutputStream("split/r-" + k), "UTF-8")); + int skip = k * partSize; + + int countSentences = 0; + int countSentencesWrote = 0; + System.out.println("skip from " + skip + " to " + (skip + partSize - 1)); + for (String x : corpus) { + if (countSentences >= skip && (countSentences < (skip + partSize) || k == 19)) { rest.write(x); rest.newLine(); - if (x.length()<8) countSentencesWrote++; + if (x.length() < 8) + countSentencesWrote++; } else { br.write(x); br.newLine(); } - - if (x.length()<8) countSentences++; + + if (x.length() < 8) + countSentences++; } - System.out.println("wrote for this part "+countSentencesWrote); + System.out.println("wrote for this part " + countSentencesWrote); br.flush(); br.close(); rest.flush(); rest.close(); } - - + } - } diff --git a/dependencyParser/mate-tools/src/is2/mtag/Evaluator.java b/dependencyParser/mate-tools/src/is2/mtag/Evaluator.java index 09d1455..16c7bba 100755 --- a/dependencyParser/mate-tools/src/is2/mtag/Evaluator.java +++ b/dependencyParser/mate-tools/src/is2/mtag/Evaluator.java @@ -9,96 +9,104 @@ import java.util.Map.Entry; import is2.data.SentenceData09; import is2.io.CONLLReader09; - public class Evaluator { - public static void evaluate (String act_file, String pred_file, String format) throws Exception { + public static void evaluate(String act_file, String pred_file, String format) throws Exception { - CONLLReader09 goldReader = new CONLLReader09(act_file);//DependencyReader.createDependencyReader(); - // boolean labeled = goldReader.startReading(act_file); + CONLLReader09 goldReader = new CONLLReader09(act_file);// DependencyReader.createDependencyReader(); + // boolean labeled = goldReader.startReading(act_file); CONLLReader09 predictedReader = new CONLLReader09(); - predictedReader.startReading(pred_file); - -// if (labeled != predLabeled) -// System.out.println("Gold file and predicted file appear to differ on whether or not they are labeled. Expect problems!!!"); + predictedReader.startReading(pred_file); + // if (labeled != predLabeled) + // System.out.println("Gold file and predicted file appear to differ on + // whether or not they are labeled. Expect problems!!!"); - int total = 0, totalP=0,corr = 0, corrL = 0, corrT=0,totalX=0; - int totalD=0, corrD=0,err=0; - int numsent = 0, corrsent = 0, corrsentL = 0; + int total = 0, totalP = 0, corrT = 0; + int totalD = 0, corrD = 0, err = 0; + int numsent = 0; SentenceData09 goldInstance = goldReader.getNext(); SentenceData09 predInstance = predictedReader.getNext(); - Hashtable<String,Integer> errors = new Hashtable<String,Integer>(); - Hashtable<String,StringBuffer> words = new Hashtable<String,StringBuffer>(); + Hashtable<String, Integer> errors = new Hashtable<String, Integer>(); + Hashtable<String, StringBuffer> words = new Hashtable<String, StringBuffer>(); - - while(goldInstance != null) { + while (goldInstance != null) { int instanceLength = goldInstance.length(); if (instanceLength != predInstance.length()) - System.out.println("Lengths do not match on sentence "+numsent); + System.out.println("Lengths do not match on sentence " + numsent); - String gold[] = goldInstance.ofeats; String pred[] = predInstance.pfeats; boolean whole = true; boolean wholeL = true; - // NOTE: the first item is the root info added during nextInstance(), so we skip it. + // NOTE: the first item is the root info added during + // nextInstance(), so we skip it. for (int i = 1; i < instanceLength; i++) { - if (gold[i].equals(pred[i])||(gold[i].equals("_")&&pred[i]==null)) corrT++; + if (gold[i].equals(pred[i]) || (gold[i].equals("_") && pred[i] == null)) + corrT++; else { - // System.out.println("gold:"+goldFeats[i]+" pred:"+predFeats[i]+" "+goldInstance.forms[i]+" snt "+numsent+" i:"+i); - //for (int k = 1; k < instanceLength; k++) { - - // System.out.print(goldInstance.forms[k]+":"+goldInstance.gpos[k]); - // if (k==i) System.out.print(":"+predInstance.gpos[k]); - // System.out.print(" "); - - // } - //System.out.println(); - String key = "gold: '"+gold[i]+"' pred: '"+pred[i]+"'"; + // System.out.println("gold:"+goldFeats[i]+" + // pred:"+predFeats[i]+" "+goldInstance.forms[i]+" snt + // "+numsent+" i:"+i); + // for (int k = 1; k < instanceLength; k++) { + + // System.out.print(goldInstance.forms[k]+":"+goldInstance.gpos[k]); + // if (k==i) System.out.print(":"+predInstance.gpos[k]); + // System.out.print(" "); + + // } + // System.out.println(); + String key = "gold: '" + gold[i] + "' pred: '" + pred[i] + "'"; Integer cnt = errors.get(key); StringBuffer errWrd = words.get(key); - if (cnt==null) { - errors.put(key,1); + if (cnt == null) { + errors.put(key, 1); words.put(key, new StringBuffer().append(goldInstance.forms[i])); - } - else { - errors.put(key,cnt+1); - errWrd.append(" "+goldInstance.forms[i]); + } else { + errors.put(key, cnt + 1); + errWrd.append(" " + goldInstance.forms[i]); } err++; } String[] gf = gold[i].split("|"); - int eq=0; - - if (pred[i]!=null) { + int eq = 0; + + if (pred[i] != null) { String[] pf = pred[i].split("|"); - totalP +=pf.length; - - if (pf.length>gf.length) totalX +=pf.length; - else totalX+=gf.length; - - for(String g : gf) { - for(String p : pf) { - if (g.equals(p)) {eq++;break;} - } + totalP += pf.length; + + if (pf.length > gf.length) { + } else { + } + + for (String g : gf) { + for (String p : pf) { + if (g.equals(p)) { + eq++; + break; + } + } + } + } else { } - } else totalX+=gf.length; - totalD +=gf.length; - corrD +=eq; + totalD += gf.length; + corrD += eq; } - total += instanceLength - 1; // Subtract one to not score fake root token + total += instanceLength - 1; // Subtract one to not score fake root + // token - if(whole) corrsent++; - if(wholeL) corrsentL++; + if (whole) { + } + if (wholeL) { + } numsent++; goldInstance = goldReader.getNext(); @@ -106,38 +114,31 @@ public class Evaluator { } ArrayList<Entry<String, Integer>> opsl = new ArrayList<Entry<String, Integer>>(); - for(Entry<String, Integer> e : errors.entrySet()) { + for (Entry<String, Integer> e : errors.entrySet()) { opsl.add(e); } - - Collections.sort(opsl, new Comparator<Entry<String, Integer>>(){ + + Collections.sort(opsl, new Comparator<Entry<String, Integer>>() { @Override - public int compare(Entry<String, Integer> o1, - Entry<String, Integer> o2) { - - return o1.getValue()==o2.getValue()?0:o1.getValue()>o2.getValue()?-1:1; + public int compare(Entry<String, Integer> o1, Entry<String, Integer> o2) { + + return o1.getValue() == o2.getValue() ? 0 : o1.getValue() > o2.getValue() ? -1 : 1; } - - + }); - - - int cnt=0; + System.out.println("10 top most errors:"); - for(Entry<String, Integer> e : opsl) { - cnt++; - // System.out.println(e.getKey()+" "+e.getValue()+" context: "+words.get(e.getKey())); - } - - System.out.println("Tokens: " + total+" Correct: " + corrT+" "+(float)corrT/total+" R "+((float)corrD/totalD)+" tP "+totalP+" tG "+totalD+" P "+(float)corrD/totalP); - System.out.println("err: " + err+" total "+total+" corr "+corrT); -// System.out.println("Unlabeled Complete Correct: " + ((double)corrsent/numsent)); + System.out.println("Tokens: " + total + " Correct: " + corrT + " " + (float) corrT / total + " R " + + ((float) corrD / totalD) + " tP " + totalP + " tG " + totalD + " P " + (float) corrD / totalP); + System.out.println("err: " + err + " total " + total + " corr " + corrT); + // System.out.println("Unlabeled Complete Correct: " + + // ((double)corrsent/numsent)); } - public static void main (String[] args) throws Exception { + public static void main(String[] args) throws Exception { String format = "CONLL"; if (args.length > 2) format = args[2]; diff --git a/dependencyParser/mate-tools/src/is2/mtag/ExtractorM.java b/dependencyParser/mate-tools/src/is2/mtag/ExtractorM.java index 864b977..e84f859 100644 --- a/dependencyParser/mate-tools/src/is2/mtag/ExtractorM.java +++ b/dependencyParser/mate-tools/src/is2/mtag/ExtractorM.java @@ -1,5 +1,11 @@ package is2.mtag; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map.Entry; import is2.data.Cluster; import is2.data.F2SF; @@ -11,466 +17,630 @@ import is2.data.ParametersFloat; import is2.data.PipeGen; import is2.data.SentenceData09; import is2.io.CONLLReader09; +import is2.io.IOGenerals; import is2.tools.IPipe; import is2.util.OptionsSuper; -import java.io.DataInputStream; -import java.io.DataOutputStream; -import java.io.IOException; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Map.Entry; - - final public class ExtractorM extends PipeGen implements IPipe { - public static int _CEND; - + public static int _CEND; - private static final String STWRD = "STWRD",STPOS = "STPOS",END = "END",STR = "STR"; + private static final String STWRD = "STWRD", STPOS = "STPOS", END = "END", STR = "STR"; public String[] types; Cluster cl; - final public MFO mf =new MFO(); + final public MFO mf = new MFO(); public Long2IntInterface li; - - - final MFO.Data4 d1 = new MFO.Data4(),d2 = new MFO.Data4(),d3 = new MFO.Data4(),dw = new MFO.Data4(); - final MFO.Data4 dwp = new MFO.Data4(),dp = new MFO.Data4(); - + final MFO.Data4 d1 = new MFO.Data4(), d2 = new MFO.Data4(), d3 = new MFO.Data4(), dw = new MFO.Data4(); + final MFO.Data4 dwp = new MFO.Data4(), dp = new MFO.Data4(); private OptionsSuper options; private int _ewrd; - static private int _mid, _strp,_endp; + static private int _strp; - public ExtractorM (Options options, Long2Int long2Int) throws IOException { + public ExtractorM(Options options, Long2Int long2Int) throws IOException { this.options = options; - li =long2Int; + li = long2Int; } - public ExtractorM (OptionsSuper options) { + public ExtractorM(OptionsSuper options) { this.options = options; } + public HashMap<Integer, Integer> form2morph = new HashMap<Integer, Integer>(); - public HashMap<Integer,Integer> form2morph = new HashMap<Integer, Integer> (); - - + @Override public Instances createInstances(String file) { CONLLReader09 depReader = new CONLLReader09(CONLLReader09.NO_NORMALIZE); - + depReader.startReading(file); - mf.register(POS,"<root-POS>"); + mf.register(POS, "<root-POS>"); - mf.register(FFEATS, CONLLReader09.NO_TYPE); + mf.register(FFEATS, IOGenerals.NO_TYPE); mf.register(FFEATS, ""); - + InstancesTagger is = new InstancesTagger(); System.out.println("Registering feature parts "); - HashMap<String,HashSet<String>> op2form = new HashMap<String, HashSet<String>> (); - HashMap<String,Integer> freq = new HashMap<String, Integer> (); - - - int ic=0; - while(true) { + HashMap<String, HashSet<String>> op2form = new HashMap<String, HashSet<String>>(); + HashMap<String, Integer> freq = new HashMap<String, Integer>(); + + int ic = 0; + while (true) { SentenceData09 instance1 = depReader.getNext(); - if (instance1== null) break; + if (instance1 == null) + break; ic++; - String[] w = instance1.forms; - for(int i1 = 0; i1 < w.length; i1++) mf.register(WORD, w[i1]); - for(int i1 = 0; i1 < w.length; i1++) registerChars(CHAR, w[i1]); - - for(int i1 = 0; i1 < w.length; i1++) { - mf.register(WORD, w[i1].toLowerCase()); + for (String element : w) + mf.register(WORD, element); + for (String element : w) + registerChars(CHAR, element); + + for (int i1 = 0; i1 < w.length; i1++) { + mf.register(WORD, w[i1].toLowerCase()); Integer f = freq.get(w[i1].toLowerCase()); - - if (f==null) freq.put(w[i1].toLowerCase(), 1); - else freq.put(w[i1].toLowerCase(), f+1); + + if (f == null) + freq.put(w[i1].toLowerCase(), 1); + else + freq.put(w[i1].toLowerCase(), f + 1); HashSet<String> forms = op2form.get(w[i1].toLowerCase()); - if (forms==null) { + if (forms == null) { forms = new HashSet<String>(); op2form.put(w[i1].toLowerCase(), forms); } - forms.add(instance1.ofeats[i1]==null?"_":instance1.ofeats[i1]); + forms.add(instance1.ofeats[i1] == null ? "_" : instance1.ofeats[i1]); } - for(int i1 = 0; i1 < w.length; i1++) registerChars(CHAR, w[i1].toLowerCase()); + for (String element : w) + registerChars(CHAR, element.toLowerCase()); w = instance1.plemmas; - for(int i1 = 0; i1 < w.length; i1++) mf.register(WORD, w[i1]); - for(int i1 = 0; i1 < w.length; i1++) registerChars(CHAR, w[i1]); + for (String element : w) + mf.register(WORD, element); + for (String element : w) + registerChars(CHAR, element); w = instance1.ppos; - for(int i1 = 0; i1 < w.length; i1++) mf.register(POS, w[i1]); + for (String element : w) + mf.register(POS, element); w = instance1.gpos; - for(int i1 = 0; i1 < w.length; i1++) mf.register(POS, w[i1]); + for (String element : w) + mf.register(POS, element); w = instance1.ofeats; - for(int i1 = 0; i1 < w.length; i1++) if (w[i1]!=null) mf.register(FEAT, w[i1]); - for(int i1 = 0; i1 < w.length; i1++) if (w[i1]!=null) mf.register(FFEATS, w[i1]); - - // w = instance1.pfeats; - //for(int i1 = 0; i1 < w.length; i1++) if (w[i1]!=null) mf.register(FEAT, w[i1]); + for (String element : w) + if (element != null) + mf.register(FEAT, element); + for (String element : w) + if (element != null) + mf.register(FFEATS, element); + + // w = instance1.pfeats; + // for(int i1 = 0; i1 < w.length; i1++) if (w[i1]!=null) + // mf.register(FEAT, w[i1]); + } + + for (Entry<String, HashSet<String>> e : op2form.entrySet()) { + if (e.getValue().size() == 1 && freq.get(e.getKey()) > 10) { + // System.out.println("found map "+e.getKey()+" "+e.getValue()+" + // "+freq.get(e.getKey())); + form2morph.put(mf.getValue(PipeGen.WORD, e.getKey()), + mf.getValue(FFEATS, (String) e.getValue().toArray()[0])); + } } - - for(Entry<String,HashSet<String>> e : op2form.entrySet()) { - if (e.getValue().size()==1 &&freq.get(e.getKey())>10) { - // System.out.println("found map "+e.getKey()+" "+e.getValue()+" "+freq.get(e.getKey())); - form2morph.put(mf.getValue(ExtractorM.WORD, e.getKey()), mf.getValue(FFEATS, (String)e.getValue().toArray()[0])); - } - } - initFeatures(); mf.calculateBits(); initValues(); - System.out.println(""+mf.toString()); + System.out.println("" + mf.toString()); depReader.startReading(file); int num1 = 0; long start1 = System.currentTimeMillis(); - + System.out.print("Creating Features: "); - is.init(ic, mf) ; - int del=0; + is.init(ic, mf); + int del = 0; - while(true) { - if (num1 % 100 ==0) {del = outValue(num1, del);} + while (true) { + if (num1 % 100 == 0) { + del = outValue(num1, del); + } SentenceData09 instance1 = depReader.getNext(is); - if (instance1== null) break; + if (instance1 == null) + break; - if (num1>options.count) break; + if (num1 > options.count) + break; num1++; } long end1 = System.currentTimeMillis(); System.gc(); - long mem2 = Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory(); - System.out.print(" time "+(end1-start1)+" mem "+(mem2/1024)+" kb"); + long mem2 = Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory(); + System.out.print(" time " + (end1 - start1) + " mem " + (mem2 / 1024) + " kb"); types = new String[mf.getFeatureCounter().get(FFEATS)]; - for(Entry<String,Integer> e : mf.getFeatureSet().get(FFEATS).entrySet()) { + for (Entry<String, Integer> e : MFO.getFeatureSet().get(FFEATS).entrySet()) { types[e.getValue()] = e.getKey(); } - - if (options.clusterFile==null)cl = new Cluster(); - else cl= new Cluster(options.clusterFile, mf,6); - + if (options.clusterFile == null) + cl = new Cluster(); + else + cl = new Cluster(options.clusterFile, mf, 6); System.out.println("Num Features: " + types.length); - - depReader.startReading(file); + int num11 = 0; - - int num11=0; - - while(true) { + while (true) { SentenceData09 instance = depReader.getNext(); - if (instance==null) break; - - is.fillChars(instance, num11, _CEND); + if (instance == null) + break; + is.fillChars(instance, num11, _CEND); - if (num11>options.count) break; + if (num11 > options.count) + break; num11++; } - return is;//.toNativeArray(); + return is;// .toNativeArray(); } private void registerChars(String type, String word) { - for(int i=0;i<word.length();i++) mf.register(type, Character.toString(word.charAt(i))); + for (int i = 0; i < word.length(); i++) + mf.register(type, Character.toString(word.charAt(i))); } - - + @Override public void initValues() { - s_feat = mf.getFeatureBits(FFEATS); - s_word = mf.getFeatureBits(WORD); - s_type = mf.getFeatureBits(TYPE); - s_char = mf.getFeatureBits(CHAR); - s_pos =mf.getFeatureBits(POS); - // dl1.a[0] = s_type; dl1.a[1] = s_pos; - // for (int k = 2; k < 7; k++) dl1.a[k] = s_pos; - - d1.a0 = s_type; d1.a1 = s_feat; d1.a2= s_word; - d2.a0 = s_type; d2.a1 = s_feat; d2.a2= s_feat; d2.a3= s_feat; d2.a4= s_feat; d2.a5= s_feat; d2.a6= s_feat; - d3.a0 = s_type; d3.a1 = s_feat; d3.a2= s_char; d3.a3= s_char; d3.a4= s_char; d3.a5= s_char; d3.a6= s_char; d3.a7= s_char; - dp.a0 = s_type; dp.a1 = s_feat; dp.a2= s_pos; dp.a3= s_pos; dp.a4= s_feat;// dp.a5= s_char; dp.a6= s_char; dp.a7= s_char; - dw.a0 = s_type; dw.a1 = s_feat;dw.a2= s_word; dw.a3= s_word; dw.a4= s_word; dw.a5= s_word; dw.a6= s_word; dw.a7= s_word; - dwp.a0 = s_type; dwp.a1 = s_feat;dwp.a2= s_word ; dwp.a3= s_feat; dwp.a4= s_word; + s_feat = MFO.getFeatureBits(FFEATS); + s_word = MFO.getFeatureBits(WORD); + s_type = MFO.getFeatureBits(TYPE); + s_char = MFO.getFeatureBits(CHAR); + s_pos = MFO.getFeatureBits(POS); + // dl1.a[0] = s_type; dl1.a[1] = s_pos; + // for (int k = 2; k < 7; k++) dl1.a[k] = s_pos; + + d1.a0 = s_type; + d1.a1 = s_feat; + d1.a2 = s_word; + d2.a0 = s_type; + d2.a1 = s_feat; + d2.a2 = s_feat; + d2.a3 = s_feat; + d2.a4 = s_feat; + d2.a5 = s_feat; + d2.a6 = s_feat; + d3.a0 = s_type; + d3.a1 = s_feat; + d3.a2 = s_char; + d3.a3 = s_char; + d3.a4 = s_char; + d3.a5 = s_char; + d3.a6 = s_char; + d3.a7 = s_char; + dp.a0 = s_type; + dp.a1 = s_feat; + dp.a2 = s_pos; + dp.a3 = s_pos; + dp.a4 = s_feat;// dp.a5= s_char; dp.a6= s_char; dp.a7= s_char; + dw.a0 = s_type; + dw.a1 = s_feat; + dw.a2 = s_word; + dw.a3 = s_word; + dw.a4 = s_word; + dw.a5 = s_word; + dw.a6 = s_word; + dw.a7 = s_word; + dwp.a0 = s_type; + dwp.a1 = s_feat; + dwp.a2 = s_word; + dwp.a3 = s_feat; + dwp.a4 = s_word; } - public static short s_feat,s_word,s_type,s_dir,s_dist,s_char,s_pos; - - + public static short s_feat, s_word, s_type, s_dir, s_dist, s_char, s_pos; /** * Initialize the features types. */ + @Override public void initFeatures() { - for(int t=0;t<62;t++) { - mf.register(TYPE,"F"+t); + for (int t = 0; t < 62; t++) { + mf.register(TYPE, "F" + t); } - -// _mid = mf.register(POS, MID); + // _mid = mf.register(POS, MID); _strp = mf.register(POS, STR); - _endp= mf.register(POS, END); + mf.register(POS, END); mf.register(WORD, STR); - _ewrd = mf.register(WORD, END); - + _ewrd = mf.register(WORD, END); _CEND = mf.register(CHAR, END); - - - // optional features - mf.register(WORD,STWRD); - mf.register(POS,STPOS); - + mf.register(WORD, STWRD); + mf.register(POS, STPOS); } + final public void addCF(InstancesTagger is, int ic, String fs, int i, short pfeat[], short ppos[], int[] forms, + int[] lemmas, long[] vs) { - final public void addCF(InstancesTagger is, int ic, String fs,int i, short pfeat[],short ppos[], int[] forms, int[] lemmas, long[] vs) { - - int c0= is.chars[ic][i][0], c1=is.chars[ic][i][1], c2=is.chars[ic][i][2], c3=is.chars[ic][i][3], c4=is.chars[ic][i][4],c5=is.chars[ic][i][5]; - int e0 =is.chars[ic][i][6], e1 =is.chars[ic][i][7],e2 =is.chars[ic][i][8],e3 =is.chars[ic][i][9],e4 =is.chars[ic][i][10]; + int c0 = is.chars[ic][i][0], c1 = is.chars[ic][i][1], c2 = is.chars[ic][i][2], c3 = is.chars[ic][i][3], + c4 = is.chars[ic][i][4], c5 = is.chars[ic][i][5]; + int e0 = is.chars[ic][i][6], e1 = is.chars[ic][i][7], e2 = is.chars[ic][i][8], e3 = is.chars[ic][i][9], + e4 = is.chars[ic][i][10]; - int f=1,n=0; - short upper =0, number = 1; - for(int k1=0;k1<fs.length();k1++){ + int f = 1, n = 0; + short upper = 0, number = 1; + for (int k1 = 0; k1 < fs.length(); k1++) { char c = fs.charAt(k1); if (Character.isUpperCase(c)) { - if (k1==0) upper=1; + if (k1 == 0) + upper = 1; else { // first char + another - if (upper==1) upper=3; + if (upper == 1) + upper = 3; // another uppercase in the word - else if (upper==0) upper=2; + else if (upper == 0) + upper = 2; } } - if (Character.isDigit(c) && k1==0) number =2 ; - else if (Character.isDigit(c) && number==1) number = 3; + if (Character.isDigit(c) && k1 == 0) + number = 2; + else if (Character.isDigit(c) && number == 1) + number = 3; } int form = forms[i]; - int len = forms.length; + int len = forms.length; long l; - d1.v0 = f++; d1.v2=form; l=mf.calc3(d1); vs[n++]=mf.calc3(d1); - - d1.v0 = f++; d1.v2=is.formlc[ic][i]; vs[n++]=mf.calc3(d1); - - d3.v2=c0; d3.v3=c1; d3.v4=c2; d3.v5=c3; d3.v6=c4; - d3.v0=f++; vs[n++]=mf.calc3(d3); - d3.v0=f++; vs[n++]=mf.calc4(d3); - d3.v0=f++; vs[n++]=mf.calc5(d3); - d3.v0=f++; vs[n++]=mf.calc6(d3); - d3.v0=f++; vs[n++]=mf.calc7(d3); - - if (form!=-1) { - d3.v2=c2; d3.v3=c3; d3.v4=c4; d3.v5=c5; d3.v6=cl.getLP(form); - d3.v0=f; vs[n++]=mf.calc6(d3); d3.v0=f+1; vs[n++]=mf.calc7(d3); - } - f+=2; - - if (form>0) { - d3.v0=f; d3.v5=cl.getLP(form); vs[n++]=mf.calc6(d3); - d3.v0=f+1; d3.v4=cl.getLP(form); vs[n++]=mf.calc5(d3); - d3.v0=f+2; d3.v3=cl.getLP(form); vs[n++]=mf.calc4(d3); + d1.v0 = f++; + d1.v2 = form; + l = mf.calc3(d1); + vs[n++] = mf.calc3(d1); + + d1.v0 = f++; + d1.v2 = is.formlc[ic][i]; + vs[n++] = mf.calc3(d1); + + d3.v2 = c0; + d3.v3 = c1; + d3.v4 = c2; + d3.v5 = c3; + d3.v6 = c4; + d3.v0 = f++; + vs[n++] = mf.calc3(d3); + d3.v0 = f++; + vs[n++] = mf.calc4(d3); + d3.v0 = f++; + vs[n++] = mf.calc5(d3); + d3.v0 = f++; + vs[n++] = mf.calc6(d3); + d3.v0 = f++; + vs[n++] = mf.calc7(d3); + + if (form != -1) { + d3.v2 = c2; + d3.v3 = c3; + d3.v4 = c4; + d3.v5 = c5; + d3.v6 = cl.getLP(form); + d3.v0 = f; + vs[n++] = mf.calc6(d3); + d3.v0 = f + 1; + vs[n++] = mf.calc7(d3); } - f+=3; - - d3.v2=e0; d3.v3=e1; d3.v4=e2; d3.v5=e3; d3.v6=e4; - d3.v0 =f++; vs[n++]=mf.calc3(d3); - d3.v0 =f++; vs[n++]=l=mf.calc4(d3); vs[n++]=d3.calcs(3, upper, l); - d3.v0 =f++; vs[n++]=l=mf.calc5(d3); vs[n++]=d3.calcs(3, upper, l); - d3.v0 =f++; vs[n++]=l=mf.calc6(d3); vs[n++]=d3.calcs(3, upper, l); - d3.v0 =f++; vs[n++]=l=mf.calc7(d3); vs[n++]=d3.calcs(3, upper, l); - - if (form>0) { - d3.v0=f; d3.v5=cl.getLP(form); vs[n++]=mf.calc6(d3); - d3.v0=f+1; d3.v4=cl.getLP(form); vs[n++]=mf.calc5(d3); - d3.v0=f+2; d3.v3=cl.getLP(form); vs[n++]=mf.calc4(d3); + f += 2; + + if (form > 0) { + d3.v0 = f; + d3.v5 = cl.getLP(form); + vs[n++] = mf.calc6(d3); + d3.v0 = f + 1; + d3.v4 = cl.getLP(form); + vs[n++] = mf.calc5(d3); + d3.v0 = f + 2; + d3.v3 = cl.getLP(form); + vs[n++] = mf.calc4(d3); } - f+=3; - - - dw.v0=f++; dw.v2=i+1<len?forms[i+1]:_ewrd;dw.v3= forms[i];vs[n++]=mf.calc4(dw); - - if (len>i+1) { - - dw.v0=f; dw.v2= forms[i+1]; vs[n++]=mf.calc3(dw); - d3.v0=f+1; d3.v2 =is.chars[ic][i+1][0];vs[n++]=mf.calc3(d3); - d3.v0=f+2; d3.v2 =is.chars[ic][i+1][6];vs[n++]=mf.calc3(d3); - - d3.v2=e0; d3.v3=e1; - - d3.v0 =f+3; d3.v4 =is.chars[ic][i+1][0];vs[n++]=mf.calc5(d3); - d3.v0 =f+4; d3.v4 =is.chars[ic][i+1][6];vs[n++]=mf.calc5(d3); - - - - if (is.chars[ic][i+1][11]>1 ) { // instance.forms[i+1].length() - - d3.v0=f+5; d3.v2=is.chars[ic][i+1][0]; d3.v3=is.chars[ic][i+1][1]; vs[n++]=mf.calc4(d3); - d3.v0=f+6; d3.v2=is.chars[ic][i+1][6]; d3.v3=is.chars[ic][i+1][7]; vs[n++]=mf.calc4(d3); + f += 3; + + d3.v2 = e0; + d3.v3 = e1; + d3.v4 = e2; + d3.v5 = e3; + d3.v6 = e4; + d3.v0 = f++; + vs[n++] = mf.calc3(d3); + d3.v0 = f++; + vs[n++] = l = mf.calc4(d3); + vs[n++] = d3.calcs(3, upper, l); + d3.v0 = f++; + vs[n++] = l = mf.calc5(d3); + vs[n++] = d3.calcs(3, upper, l); + d3.v0 = f++; + vs[n++] = l = mf.calc6(d3); + vs[n++] = d3.calcs(3, upper, l); + d3.v0 = f++; + vs[n++] = l = mf.calc7(d3); + vs[n++] = d3.calcs(3, upper, l); + + if (form > 0) { + d3.v0 = f; + d3.v5 = cl.getLP(form); + vs[n++] = mf.calc6(d3); + d3.v0 = f + 1; + d3.v4 = cl.getLP(form); + vs[n++] = mf.calc5(d3); + d3.v0 = f + 2; + d3.v3 = cl.getLP(form); + vs[n++] = mf.calc4(d3); + } + f += 3; + + dw.v0 = f++; + dw.v2 = i + 1 < len ? forms[i + 1] : _ewrd; + dw.v3 = forms[i]; + vs[n++] = mf.calc4(dw); + + if (len > i + 1) { + + dw.v0 = f; + dw.v2 = forms[i + 1]; + vs[n++] = mf.calc3(dw); + d3.v0 = f + 1; + d3.v2 = is.chars[ic][i + 1][0]; + vs[n++] = mf.calc3(d3); + d3.v0 = f + 2; + d3.v2 = is.chars[ic][i + 1][6]; + vs[n++] = mf.calc3(d3); + + d3.v2 = e0; + d3.v3 = e1; + + d3.v0 = f + 3; + d3.v4 = is.chars[ic][i + 1][0]; + vs[n++] = mf.calc5(d3); + d3.v0 = f + 4; + d3.v4 = is.chars[ic][i + 1][6]; + vs[n++] = mf.calc5(d3); + + if (is.chars[ic][i + 1][11] > 1) { // instance.forms[i+1].length() + + d3.v0 = f + 5; + d3.v2 = is.chars[ic][i + 1][0]; + d3.v3 = is.chars[ic][i + 1][1]; + vs[n++] = mf.calc4(d3); + d3.v0 = f + 6; + d3.v2 = is.chars[ic][i + 1][6]; + d3.v3 = is.chars[ic][i + 1][7]; + vs[n++] = mf.calc4(d3); + + d3.v2 = e0; + d3.v3 = e1; + + d3.v0 = f + 7; + d3.v4 = is.chars[ic][i + 1][0]; + d3.v5 = is.chars[ic][i + 1][1]; + vs[n++] = mf.calc6(d3); + d3.v0 = f + 8; + d3.v4 = is.chars[ic][i + 1][6]; + d3.v5 = is.chars[ic][i + 1][7]; + vs[n++] = mf.calc6(d3); + + if (forms[i + 1] > 0) { + d3.v0 = f + 9; + d3.v2 = is.chars[ic][i + 1][0]; + d3.v3 = is.chars[ic][i + 1][1]; + d3.v4 = cl.getLP(forms[i + 1]); + vs[n++] = mf.calc5(d3); + d3.v0 = f + 10; + d3.v2 = is.chars[ic][i + 1][6]; + d3.v3 = is.chars[ic][i + 1][7]; + d3.v4 = cl.getLP(forms[i + 1]); + vs[n++] = mf.calc5(d3); + } + } - d3.v2=e0; d3.v3=e1; + if (forms[i + 1] > 0) { + dw.v0 = f + 11; + dw.v2 = cl.getLP(forms[i + 1]); + dw.v3 = forms[i]; + vs[n++] = mf.calc4(dw); + } - d3.v0=f+7; d3.v4 = is.chars[ic][i+1][0]; d3.v5 =is.chars[ic][i+1][1]; vs[n++]=mf.calc6(d3); - d3.v0=f+8; d3.v4 = is.chars[ic][i+1][6]; d3.v5=is.chars[ic][i+1][7]; vs[n++]=mf.calc6(d3); + if (len > i + 2) { + dw.v0 = f + 12; + dw.v2 = forms[i + 2]; + dw.v3 = forms[i + 1]; + vs[n++] = mf.calc4(dw); + vs[n++] = mf.calc3(dw); + // d2.v0=f+13; d2.v2=pfeat[i+1]; d2.v3= pfeat[i+2]; + // vs[n++]=mf.calc4(d2); + // dp.v0= f+14; dp.v2=ppos[i+1]; dp.v3=ppos[i+2]; + // vs[n++]=mf.calc4(dp); - if (forms[i+1]>0) { - d3.v0=f+9; d3.v2=is.chars[ic][i+1][0]; d3.v3=is.chars[ic][i+1][1]; d3.v4 =cl.getLP(forms[i+1]); vs[n++]=mf.calc5(d3); - d3.v0=f+10; d3.v2=is.chars[ic][i+1][6]; d3.v3=is.chars[ic][i+1][7]; d3.v4 =cl.getLP(forms[i+1]); vs[n++]=mf.calc5(d3); - } - } - - if (forms[i+1]>0) { - dw.v0=f+11; dw.v2= cl.getLP(forms[i+1]); dw.v3= forms[i];vs[n++]=mf.calc4(dw); - } - - if (len>i+2) { - dw.v0=f+12; dw.v2= forms[i+2]; dw.v3 = forms[i+1];vs[n++]=mf.calc4(dw);vs[n++]=mf.calc3(dw); -// d2.v0=f+13; d2.v2=pfeat[i+1]; d2.v3= pfeat[i+2]; vs[n++]=mf.calc4(d2); - // dp.v0= f+14; dp.v2=ppos[i+1]; dp.v3=ppos[i+2]; vs[n++]=mf.calc4(dp); + } - } + if (len > i + 3) { + dw.v0 = f + 14; + dw.v2 = forms[i + 3]; + dw.v3 = forms[i + 2]; + vs[n++] = mf.calc4(dw); + vs[n++] = mf.calc3(dw); - if (len>i+3) { - dw.v0=f+14; dw.v2= forms[i+3]; dw.v3 = forms[i+2]; vs[n++]=mf.calc4(dw); vs[n++]=mf.calc3(dw); - - } - } - f+=16; + } + } + f += 16; // length - d2.v0=f++; d2.v2=is.chars[ic][i][11];vs[n++]=mf.calc3(d2); - + d2.v0 = f++; + d2.v2 = is.chars[ic][i][11]; + vs[n++] = mf.calc3(d2); // contains a number - d2.v0=f++; d2.v2=number; vs[n++]=mf.calc3(d2); - d1.v0=f++; d1.v2=lemmas[i]; vs[n++]=mf.calc3(d1); - - if (i!=0 &&len>i+1) { - dw.v0=f; dw.v2=lemmas[i-1];dw.v3=lemmas[i+1];vs[n++]=mf.calc4(dw); - d2.v0=f+1; d2.v2=pfeat[i-1]; d2.v3=pfeat[i+1];vs[n++]=mf.calc4(d2); - } - f+=2; - - d2.v0= f++; d2.v2=i>=1? pfeat[i-1]:_strp; vs[n++]=mf.calc3(d2); - dp.v0= f++; dp.v2=ppos[i]; vs[n++]=mf.calc3(dp); - - if (i>0) { - dw.v0 = f++; dw.v2 =i>=1? forms[i-1]:_strp; vs[n++]=mf.calc3(dw); - dw.v0 = f++; dw.v2 = i>=1? lemmas[i-1]:_strp; vs[n++]=mf.calc3(dw); - - if (len>i+1) { -// d2.v0=f; d2.v2= pfeat[i-1];d2.v3= pfeat[i+1]; vs[n++]=mf.calc4(d2); - // dp.v0= f+1; dp.v2=ppos[i-1]; dp.v3=ppos[i+1]; vs[n++]=mf.calc4(dp); + d2.v0 = f++; + d2.v2 = number; + vs[n++] = mf.calc3(d2); + d1.v0 = f++; + d1.v2 = lemmas[i]; + vs[n++] = mf.calc3(d1); + + if (i != 0 && len > i + 1) { + dw.v0 = f; + dw.v2 = lemmas[i - 1]; + dw.v3 = lemmas[i + 1]; + vs[n++] = mf.calc4(dw); + d2.v0 = f + 1; + d2.v2 = pfeat[i - 1]; + d2.v3 = pfeat[i + 1]; + vs[n++] = mf.calc4(d2); + } + f += 2; + + d2.v0 = f++; + d2.v2 = i >= 1 ? pfeat[i - 1] : _strp; + vs[n++] = mf.calc3(d2); + dp.v0 = f++; + dp.v2 = ppos[i]; + vs[n++] = mf.calc3(dp); + + if (i > 0) { + dw.v0 = f++; + dw.v2 = i >= 1 ? forms[i - 1] : _strp; + vs[n++] = mf.calc3(dw); + dw.v0 = f++; + dw.v2 = i >= 1 ? lemmas[i - 1] : _strp; + vs[n++] = mf.calc3(dw); + + if (len > i + 1) { + // d2.v0=f; d2.v2= pfeat[i-1];d2.v3= pfeat[i+1]; + // vs[n++]=mf.calc4(d2); + // dp.v0= f+1; dp.v2=ppos[i-1]; dp.v3=ppos[i+1]; + // vs[n++]=mf.calc4(dp); } f++; - dp.v0= f++; dp.v2=ppos[i]; dp.v3=ppos[i-1]; vs[n++]=mf.calc4(dp); - - if (i>1) { - d2.v0=f++; d2.v2=i<2?_strp: pfeat[i-2]; vs[n++]=mf.calc3(d2); - d2.v0=f++; d2.v2= pfeat[i-1]; d2.v3= pfeat[i-2]; vs[n++]=mf.calc4(d2); - - dw.v0=f++; dw.v2= forms[i-2]; vs[n++]=mf.calc3(dw); - dwp.v0=f++; dwp.v2 = forms[i-1]; dwp.v3 = pfeat[i-2];vs[n++]=mf.calc4(dwp); - dwp.v0=f++; dwp.v2 = forms[i-2]; dwp.v3 = pfeat[i-1];vs[n++]=mf.calc4(dwp); - - if (i>2) { - d2.v0=f++; d2.v2=pfeat[i-3]; vs[n++]=mf.calc3(d2); - d2.v0=f++; d2.v2=pfeat[i-2]; d2.v3= pfeat[i-3]; vs[n++]=mf.calc4(d2); - dw.v0=f++; dw.v2 = forms[i-3]; dw.v3 = forms[i-2]; vs[n++]=mf.calc4(dw); - // dp.v0= f++; dp.v2=ppos[i-3]; dp.v3=ppos[i-2]; vs[n++]=mf.calc4(dp); + dp.v0 = f++; + dp.v2 = ppos[i]; + dp.v3 = ppos[i - 1]; + vs[n++] = mf.calc4(dp); + + if (i > 1) { + d2.v0 = f++; + d2.v2 = i < 2 ? _strp : pfeat[i - 2]; + vs[n++] = mf.calc3(d2); + d2.v0 = f++; + d2.v2 = pfeat[i - 1]; + d2.v3 = pfeat[i - 2]; + vs[n++] = mf.calc4(d2); + + dw.v0 = f++; + dw.v2 = forms[i - 2]; + vs[n++] = mf.calc3(dw); + dwp.v0 = f++; + dwp.v2 = forms[i - 1]; + dwp.v3 = pfeat[i - 2]; + vs[n++] = mf.calc4(dwp); + dwp.v0 = f++; + dwp.v2 = forms[i - 2]; + dwp.v3 = pfeat[i - 1]; + vs[n++] = mf.calc4(dwp); + + if (i > 2) { + d2.v0 = f++; + d2.v2 = pfeat[i - 3]; + vs[n++] = mf.calc3(d2); + d2.v0 = f++; + d2.v2 = pfeat[i - 2]; + d2.v3 = pfeat[i - 3]; + vs[n++] = mf.calc4(d2); + dw.v0 = f++; + dw.v2 = forms[i - 3]; + dw.v3 = forms[i - 2]; + vs[n++] = mf.calc4(dw); + // dp.v0= f++; dp.v2=ppos[i-3]; dp.v3=ppos[i-2]; + // vs[n++]=mf.calc4(dp); } } } vs[n] = Integer.MIN_VALUE; } - - - - - - - public int fillFeatureVectorsOne(ParametersFloat params, int w1, String form, Instances is, int n, short[] features, long[] vs) { + public int fillFeatureVectorsOne(ParametersFloat params, int w1, String form, Instances is, int n, short[] features, + long[] vs) { double best = -1; - int bestType=-1; + int bestType = -1; F2SF f = new F2SF(params.parameters); - //is.gfeats[n] - addCF((InstancesTagger)is, n, form, w1, features,is.pposs[n], is.forms[n], is.plemmas[n], vs); - - for(int t = 0; t < types.length; t++) { + // is.gfeats[n] + addCF((InstancesTagger) is, n, form, w1, features, is.pposs[n], is.forms[n], is.plemmas[n], vs); + + for (int t = 0; t < types.length; t++) { f.clear(); - int p = t<<ExtractorM.s_type; - for(int k=0;k<vs.length;k++) { - if (vs[k]==Integer.MIN_VALUE) break; - if (vs[k]>=0) f.add(li.l2i(vs[k]+p)); + int p = t << ExtractorM.s_type; + for (long element : vs) { + if (element == Integer.MIN_VALUE) + break; + if (element >= 0) + f.add(li.l2i(element + p)); } - if (f.score >best) { - bestType=t; - best =f.score; + if (f.score > best) { + bestType = t; + best = f.score; } - } + } return bestType; } - - - //static ArrayList<T> todo = new ArrayList<T>(); + // static ArrayList<T> todo = new ArrayList<T>(); static SentenceData09 instance; - - - public static int _FC =200; - - + + public static int _FC = 200; + /** * Write the lemma that are not mapped by operations + * * @param dos */ public void writeMap(DataOutputStream dos) { try { dos.writeInt(this.form2morph.size()); - for(Entry<Integer, Integer> e : form2morph.entrySet()) { + for (Entry<Integer, Integer> e : form2morph.entrySet()) { dos.writeInt(e.getKey()); dos.writeInt(e.getValue()); } @@ -479,16 +649,15 @@ final public class ExtractorM extends PipeGen implements IPipe { } } - - /** * Read the form-lemma mapping not read by operations + * * @param dis */ public void readMap(DataInputStream dis) { try { int size = dis.readInt(); - for(int i =0; i<size;i++) { + for (int i = 0; i < size; i++) { form2morph.put(dis.readInt(), dis.readInt()); } } catch (IOException e1) { @@ -496,8 +665,9 @@ final public class ExtractorM extends PipeGen implements IPipe { } } - - /* (non-Javadoc) + /* + * (non-Javadoc) + * * @see is2.tools.IPipe#write(java.io.DataOutputStream) */ @Override @@ -510,5 +680,4 @@ final public class ExtractorM extends PipeGen implements IPipe { } } - } diff --git a/dependencyParser/mate-tools/src/is2/mtag/MFO.java b/dependencyParser/mate-tools/src/is2/mtag/MFO.java index d91991e..e315ba4 100755 --- a/dependencyParser/mate-tools/src/is2/mtag/MFO.java +++ b/dependencyParser/mate-tools/src/is2/mtag/MFO.java @@ -1,218 +1,215 @@ package is2.mtag; - -import is2.data.IEncoderPlus; -import is2.util.DB; - import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.IOException; import java.util.HashMap; import java.util.Map.Entry; +import is2.data.IEncoderPlus; +import is2.util.DB; + /** * Map Features, do not map long to integer - * + * * @author Bernd Bohnet, 20.09.2009 */ -final public class MFO implements IEncoderPlus { +final public class MFO implements IEncoderPlus { /** The features and its values */ - static private final HashMap<String,HashMap<String,Integer>> m_featureSets = new HashMap<String,HashMap<String,Integer>>(); + static private final HashMap<String, HashMap<String, Integer>> m_featureSets = new HashMap<String, HashMap<String, Integer>>(); /** The feature class and the number of values */ - static private final HashMap<String,Integer> m_featureCounters = new HashMap<String,Integer>(); + static private final HashMap<String, Integer> m_featureCounters = new HashMap<String, Integer>(); /** The number of bits needed to encode a feature */ - static final HashMap<String,Integer> m_featureBits = new HashMap<String,Integer>(); + static final HashMap<String, Integer> m_featureBits = new HashMap<String, Integer>(); /** Integer counter for long2int */ - //private int count=0; + // private int count=0; /** Stop growing */ - public boolean stop=false; + public boolean stop = false; - final public static String NONE="<None>"; + final public static String NONE = "<None>"; public static class Data { public final String[] a = new String[8]; public final String[] v = new String[8]; final short[] s = new short[9]; + public void clear(int i) { - v[i]=null; + v[i] = null; } } - - final public static class Data4 { public int shift; - public short a0,a1,a2,a3,a4,a5,a6,a7,a8,a9; - public int v0,v1,v2,v3,v4,v5,v6,v7,v8,v9; + public short a0, a1, a2, a3, a4, a5, a6, a7, a8, a9; + public int v0, v1, v2, v3, v4, v5, v6, v7, v8, v9; final public long calcs(int b, long v, long l) { - if (l<0) return l; - l |= v<<shift; - shift +=b; + if (l < 0) + return l; + l |= v << shift; + shift += b; return l; } - final public long calc2() { - if (v0<0||v1<0) return -1; + if (v0 < 0 || v1 < 0) + return -1; long l = v0; - shift =a0; - l |= (long)v1<<shift; - shift +=a1; + shift = a0; + l |= (long) v1 << shift; + shift += a1; return l; } - - final public long calc3() { - if (v0<0||v1<0||v2<0) return -1; - // if (v1<0||v2<0) return -1; + if (v0 < 0 || v1 < 0 || v2 < 0) + return -1; + // if (v1<0||v2<0) return -1; long l = v0; - shift =a0; - l |= (long)v1<<shift; - shift +=a1; - l |= (long)v2<<shift; - shift=(short) (shift + a2); + shift = a0; + l |= (long) v1 << shift; + shift += a1; + l |= (long) v2 << shift; + shift = (short) (shift + a2); - //shift=; + // shift=; return l; } - final public long calc4() { - if (v0<0||v1<0||v2<0||v3<0) return -1; + if (v0 < 0 || v1 < 0 || v2 < 0 || v3 < 0) + return -1; long l = v0; - shift =a0; - l |= (long)v1<<shift; - shift +=a1; - l |= (long)v2<<shift; - shift +=a2; - l |= (long)v3<<shift; - shift= shift +a3; + shift = a0; + l |= (long) v1 << shift; + shift += a1; + l |= (long) v2 << shift; + shift += a2; + l |= (long) v3 << shift; + shift = shift + a3; return l; } - - final public long calc5() { - if (v0<0||v1<0||v2<0||v3<0||v4<0) return -1; + if (v0 < 0 || v1 < 0 || v2 < 0 || v3 < 0 || v4 < 0) + return -1; long l = v0; - shift =a0; - l |= (long)v1<<shift; - shift +=a1; - l |= (long)v2<<shift; - shift +=a2; - l |= (long)v3<<shift; - shift +=a3; - l |= (long)v4<<shift; - shift =shift+a4; + shift = a0; + l |= (long) v1 << shift; + shift += a1; + l |= (long) v2 << shift; + shift += a2; + l |= (long) v3 << shift; + shift += a3; + l |= (long) v4 << shift; + shift = shift + a4; return l; } - final public long calc6() { - if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0) return -1; + if (v0 < 0 || v1 < 0 || v2 < 0 || v3 < 0 || v4 < 0 || v5 < 0) + return -1; long l = v0; - shift =a0; - l |= (long)v1<<shift; - shift +=a1; - l |= (long)v2<<shift; - shift +=a2; - l |= (long)v3<<shift; - shift +=a3; - l |= (long)v4<<shift; - shift +=a4; - l |= (long)v5<<shift; - shift =shift+a5; + shift = a0; + l |= (long) v1 << shift; + shift += a1; + l |= (long) v2 << shift; + shift += a2; + l |= (long) v3 << shift; + shift += a3; + l |= (long) v4 << shift; + shift += a4; + l |= (long) v5 << shift; + shift = shift + a5; return l; } final public long calc7() { - if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0||v6<0) return -1; + if (v0 < 0 || v1 < 0 || v2 < 0 || v3 < 0 || v4 < 0 || v5 < 0 || v6 < 0) + return -1; long l = v0; - shift =a0; - l |= (long)v1<<shift; - shift +=a1; - l |= (long)v2<<shift; - shift +=a2; - l |= (long)v3<<shift; - shift +=a3; - l |= (long)v4<<shift; - shift +=a4; - l |= (long)v5<<shift; - shift +=a5; - l |= (long)v6<<shift; - shift =shift+a6; + shift = a0; + l |= (long) v1 << shift; + shift += a1; + l |= (long) v2 << shift; + shift += a2; + l |= (long) v3 << shift; + shift += a3; + l |= (long) v4 << shift; + shift += a4; + l |= (long) v5 << shift; + shift += a5; + l |= (long) v6 << shift; + shift = shift + a6; return l; } - final public long calc8() { - if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0||v6<0||v7<0) return -1; + if (v0 < 0 || v1 < 0 || v2 < 0 || v3 < 0 || v4 < 0 || v5 < 0 || v6 < 0 || v7 < 0) + return -1; long l = v0; - shift =a0; - l |= (long)v1<<shift; - shift +=a1; - l |= (long)v2<<shift; - shift +=a2; - l |= (long)v3<<shift; - shift +=a3; - l |= (long)v4<<shift; - shift +=a4; - l |= (long)v5<<shift; - shift +=a5; - l |= (long)v6<<shift; - shift +=a6; - l |= (long)v7<<shift; - shift =shift+a7; + shift = a0; + l |= (long) v1 << shift; + shift += a1; + l |= (long) v2 << shift; + shift += a2; + l |= (long) v3 << shift; + shift += a3; + l |= (long) v4 << shift; + shift += a4; + l |= (long) v5 << shift; + shift += a5; + l |= (long) v6 << shift; + shift += a6; + l |= (long) v7 << shift; + shift = shift + a7; return l; } } - public MFO () {} - - - // public int size() {return count;} - + public MFO() { + } - + // public int size() {return count;} /** * Register an attribute class, if it not exists and add a possible value + * * @param type * @param type2 */ - final public int register(String a, String v) { + @Override + final public int register(String a, String v) { - HashMap<String,Integer> fs = getFeatureSet().get(a); - if (fs==null) { - fs = new HashMap<String,Integer>(); + HashMap<String, Integer> fs = getFeatureSet().get(a); + if (fs == null) { + fs = new HashMap<String, Integer>(); getFeatureSet().put(a, fs); fs.put(NONE, 0); getFeatureCounter().put(a, 1); @@ -220,289 +217,284 @@ final public class MFO implements IEncoderPlus { Integer c = getFeatureCounter().get(a); Integer i = fs.get(v); - if (i==null) { + if (i == null) { fs.put(v, c); c++; - getFeatureCounter().put(a,c); - return c-1; - } else return i; + getFeatureCounter().put(a, c); + return c - 1; + } else + return i; } /** * Calculates the number of bits needed to encode a feature */ - public void calculateBits() { + public void calculateBits() { - int total=0; - for(Entry<String,Integer> e : getFeatureCounter().entrySet() ){ - int bits =(int)Math.ceil((Math.log(e.getValue()+1)/Math.log(2))); + for (Entry<String, Integer> e : getFeatureCounter().entrySet()) { + int bits = (int) Math.ceil((Math.log(e.getValue() + 1) / Math.log(2))); m_featureBits.put(e.getKey(), bits); - total+=bits; - // System.out.println(" "+e.getKey()+" bits "+bits+" number "+(e.getValue()+1)); } - // System.out.println("total number of needed bits "+total); + // System.out.println("total number of needed bits "+total); } - - @Override - public String toString() { + public String toString() { StringBuffer content = new StringBuffer(); - for(Entry<String,Integer> e : getFeatureCounter().entrySet() ){ - content.append(e.getKey()+" "+e.getValue()); + for (Entry<String, Integer> e : getFeatureCounter().entrySet()) { + content.append(e.getKey() + " " + e.getValue()); content.append(':'); - // HashMap<String,Integer> vs = getFeatureSet().get(e.getKey()); + // HashMap<String,Integer> vs = getFeatureSet().get(e.getKey()); content.append(getFeatureBits(e.getKey())); - /*if (vs.size()<120) - for(Entry<String,Integer> e2 : vs.entrySet()) { - content.append(e2.getKey()+" ("+e2.getValue()+") "); - }*/ + /* + * if (vs.size()<120) for(Entry<String,Integer> e2 : vs.entrySet()) + * { content.append(e2.getKey()+" ("+e2.getValue()+") "); } + */ content.append('\n'); } return content.toString(); } - - static final public long calcs(Data4 d,int b, long v, long l) { - if (l<0) return l; - l |= v<<d.shift; - d.shift +=b; + static final public long calcs(Data4 d, int b, long v, long l) { + if (l < 0) + return l; + l |= v << d.shift; + d.shift += b; return l; } - static final public short getFeatureBits(String a) { - return (short)m_featureBits.get(a).intValue(); + return (short) m_featureBits.get(a).intValue(); } - - /** * Get the integer place holder of the string value v of the type a - * - * @param t the type - * @param v the value + * + * @param t + * the type + * @param v + * the value * @return the integer place holder of v */ + @Override final public int getValue(String t, String v) { - if (m_featureSets.get(t)==null) return -1; + if (m_featureSets.get(t) == null) + return -1; Integer vi = m_featureSets.get(t).get(v); - if (vi==null) return -1; //stop && + if (vi == null) + return -1; // stop && return vi.intValue(); } /** * Static version of getValue + * * @see getValue */ static final public int getValueS(String a, String v) { - if (m_featureSets.get(a)==null) return -1; + if (m_featureSets.get(a) == null) + return -1; Integer vi = m_featureSets.get(a).get(v); - if (vi==null) return -1; //stop && + if (vi == null) + return -1; // stop && return vi.intValue(); } public int hasValue(String a, String v) { Integer vi = m_featureSets.get(a).get(v); - if (vi==null) return -1; + if (vi == null) + return -1; return vi.intValue(); } - - - final public long calc2(Data4 d) { - if (d.v0<0||d.v1<0) return -1; - // if (d.v1<0||d.v2<0) return -1; + if (d.v0 < 0 || d.v1 < 0) + return -1; + // if (d.v1<0||d.v2<0) return -1; long l = d.v0; - short shift =d.a0; - l |= (long)d.v1<<shift; - shift +=d.a1; - // l |= (long)d.v2<<shift; - d.shift=shift; + short shift = d.a0; + l |= (long) d.v1 << shift; + shift += d.a1; + // l |= (long)d.v2<<shift; + d.shift = shift; - //d.shift=; + // d.shift=; return l; } - - final public long calc3(Data4 d) { - if (d.v0<0||d.v1<0||d.v2<0) return -1; - // if (d.v1<0||d.v2<0) return -1; + if (d.v0 < 0 || d.v1 < 0 || d.v2 < 0) + return -1; + // if (d.v1<0||d.v2<0) return -1; long l = d.v0; - short shift =d.a0; - l |= (long)d.v1<<shift; - shift +=d.a1; - l |= (long)d.v2<<shift; - d.shift=shift + d.a2; + short shift = d.a0; + l |= (long) d.v1 << shift; + shift += d.a1; + l |= (long) d.v2 << shift; + d.shift = shift + d.a2; - //d.shift=; + // d.shift=; return l; } - final public long calc4(Data4 d) { - if (d.v0<0||d.v1<0||d.v2<0||d.v3<0) return -1; + if (d.v0 < 0 || d.v1 < 0 || d.v2 < 0 || d.v3 < 0) + return -1; long l = d.v0; - int shift =d.a0; - l |= (long)d.v1<<shift; - shift +=d.a1; - l |= (long)d.v2<<shift; - shift +=d.a2; - l |= (long)d.v3<<shift; - d.shift= shift +d.a3; + int shift = d.a0; + l |= (long) d.v1 << shift; + shift += d.a1; + l |= (long) d.v2 << shift; + shift += d.a2; + l |= (long) d.v3 << shift; + d.shift = shift + d.a3; return l; } - - final public long calc5(Data4 d) { - if (d.v0<0||d.v1<0||d.v2<0||d.v3<0||d.v4<0) return -1; + if (d.v0 < 0 || d.v1 < 0 || d.v2 < 0 || d.v3 < 0 || d.v4 < 0) + return -1; long l = d.v0; - int shift =d.a0; - l |= (long)d.v1<<shift; - shift +=d.a1; - l |= (long)d.v2<<shift; - shift +=d.a2; - l |= (long)d.v3<<shift; - shift +=d.a3; - l |= (long)d.v4<<shift; - d.shift =shift+d.a4; + int shift = d.a0; + l |= (long) d.v1 << shift; + shift += d.a1; + l |= (long) d.v2 << shift; + shift += d.a2; + l |= (long) d.v3 << shift; + shift += d.a3; + l |= (long) d.v4 << shift; + d.shift = shift + d.a4; return l; } - final public long calc6(Data4 d) { - if (d.v0<0||d.v1<0||d.v2<0||d.v3<0||d.v4<0||d.v5<0) return -1; + if (d.v0 < 0 || d.v1 < 0 || d.v2 < 0 || d.v3 < 0 || d.v4 < 0 || d.v5 < 0) + return -1; long l = d.v0; - int shift =d.a0; - l |= (long)d.v1<<shift; - shift +=d.a1; - l |= (long)d.v2<<shift; - shift +=d.a2; - l |= (long)d.v3<<shift; - shift +=d.a3; - l |= (long)d.v4<<shift; - shift +=d.a4; - l |= (long)d.v5<<shift; - d.shift =shift+d.a5; + int shift = d.a0; + l |= (long) d.v1 << shift; + shift += d.a1; + l |= (long) d.v2 << shift; + shift += d.a2; + l |= (long) d.v3 << shift; + shift += d.a3; + l |= (long) d.v4 << shift; + shift += d.a4; + l |= (long) d.v5 << shift; + d.shift = shift + d.a5; return l; } final public long calc7(Data4 d) { - if (d.v0<0||d.v1<0||d.v2<0||d.v3<0||d.v4<0||d.v5<0||d.v6<0) return -1; + if (d.v0 < 0 || d.v1 < 0 || d.v2 < 0 || d.v3 < 0 || d.v4 < 0 || d.v5 < 0 || d.v6 < 0) + return -1; long l = d.v0; - int shift =d.a0; - l |= (long)d.v1<<shift; - shift +=d.a1; - l |= (long)d.v2<<shift; - shift +=d.a2; - l |= (long)d.v3<<shift; - shift +=d.a3; - l |= (long)d.v4<<shift; - shift +=d.a4; - l |= (long)d.v5<<shift; - shift +=d.a5; - l |= (long)d.v6<<shift; - d.shift =shift+d.a6; + int shift = d.a0; + l |= (long) d.v1 << shift; + shift += d.a1; + l |= (long) d.v2 << shift; + shift += d.a2; + l |= (long) d.v3 << shift; + shift += d.a3; + l |= (long) d.v4 << shift; + shift += d.a4; + l |= (long) d.v5 << shift; + shift += d.a5; + l |= (long) d.v6 << shift; + d.shift = shift + d.a6; return l; } - final public long calc8(Data4 d) { - if (d.v0<0||d.v1<0||d.v2<0||d.v3<0||d.v4<0||d.v5<0||d.v6<0||d.v7<0) return -1; + if (d.v0 < 0 || d.v1 < 0 || d.v2 < 0 || d.v3 < 0 || d.v4 < 0 || d.v5 < 0 || d.v6 < 0 || d.v7 < 0) + return -1; long l = d.v0; - int shift =d.a0; - l |= (long)d.v1<<shift; - shift +=d.a1; - l |= (long)d.v2<<shift; - shift +=d.a2; - l |= (long)d.v3<<shift; - shift +=d.a3; - l |= (long)d.v4<<shift; - shift +=d.a4; - l |= (long)d.v5<<shift; - shift +=d.a5; - l |= (long)d.v6<<shift; - shift +=d.a6; - l |= (long)d.v7<<shift; - d.shift =shift+d.a7; + int shift = d.a0; + l |= (long) d.v1 << shift; + shift += d.a1; + l |= (long) d.v2 << shift; + shift += d.a2; + l |= (long) d.v3 << shift; + shift += d.a3; + l |= (long) d.v4 << shift; + shift += d.a4; + l |= (long) d.v5 << shift; + shift += d.a5; + l |= (long) d.v6 << shift; + shift += d.a6; + l |= (long) d.v7 << shift; + d.shift = shift + d.a7; return l; } - - - - - - - /** - * Maps a long to a integer value. This is very useful to save memory for sparse data long values + /** + * Maps a long to a integer value. This is very useful to save memory for + * sparse data long values + * * @param node * @return the integer */ - static public int misses = 0; - static public int good = 0; - - - + static public int misses = 0; + static public int good = 0; /** * Write the data + * * @param dos * @throws IOException */ static public void writeData(DataOutputStream dos) throws IOException { - + dos.writeInt(getFeatureSet().size()); - for(Entry<String, HashMap<String,Integer>> e : getFeatureSet().entrySet()) { + for (Entry<String, HashMap<String, Integer>> e : getFeatureSet().entrySet()) { dos.writeUTF(e.getKey()); dos.writeInt(e.getValue().size()); - for(Entry<String,Integer> e2 : e.getValue().entrySet()) { + for (Entry<String, Integer> e2 : e.getValue().entrySet()) { - if(e2.getKey()==null) DB.println("key "+e2.getKey()+" value "+e2.getValue()+" e -key "+e.getKey()); - dos.writeUTF(e2.getKey()); + if (e2.getKey() == null) + DB.println("key " + e2.getKey() + " value " + e2.getValue() + " e -key " + e.getKey()); + dos.writeUTF(e2.getKey()); dos.writeInt(e2.getValue()); - } + } } } - public void read(DataInputStream din) throws IOException { + + public void read(DataInputStream din) throws IOException { int size = din.readInt(); - for(int i=0; i<size;i++) { + for (int i = 0; i < size; i++) { String k = din.readUTF(); int size2 = din.readInt(); - HashMap<String,Integer> h = new HashMap<String,Integer>(); - getFeatureSet().put(k,h); - for(int j = 0;j<size2;j++) { + HashMap<String, Integer> h = new HashMap<String, Integer>(); + getFeatureSet().put(k, h); + for (int j = 0; j < size2; j++) { h.put(din.readUTF(), din.readInt()); } getFeatureCounter().put(k, size2); @@ -511,8 +503,7 @@ final public class MFO implements IEncoderPlus { calculateBits(); } - - /** + /** * Clear the data */ static public void clearData() { @@ -521,18 +512,19 @@ final public class MFO implements IEncoderPlus { getFeatureSet().clear(); } - public HashMap<String,Integer> getFeatureCounter() { + @Override + public HashMap<String, Integer> getFeatureCounter() { return m_featureCounters; } - static public HashMap<String,HashMap<String,Integer>> getFeatureSet() { + static public HashMap<String, HashMap<String, Integer>> getFeatureSet() { return m_featureSets; } - static public String[] reverse(HashMap<String,Integer> v){ + static public String[] reverse(HashMap<String, Integer> v) { String[] set = new String[v.size()]; - for(Entry<String,Integer> e : v.entrySet()) { - set[e.getValue()]=e.getKey(); + for (Entry<String, Integer> e : v.entrySet()) { + set[e.getValue()] = e.getKey(); } return set; } diff --git a/dependencyParser/mate-tools/src/is2/mtag/Options.java b/dependencyParser/mate-tools/src/is2/mtag/Options.java index 6b9d806..20969ff 100755 --- a/dependencyParser/mate-tools/src/is2/mtag/Options.java +++ b/dependencyParser/mate-tools/src/is2/mtag/Options.java @@ -4,22 +4,26 @@ import is2.util.OptionsSuper; public final class Options extends OptionsSuper { - - public Options (String[] args) { - - for(int i = 0; i < args.length; i++) { + public Options(String[] args) { + + for (int i = 0; i < args.length; i++) { + + if (args[i].equals("--help")) + explain(); - if (args[i].equals("--help")) explain(); - if (args[i].equals("-nonormalize")) { - normalize=false; + normalize = false; } else if (args[i].equals("-features")) { - features= args[i+1]; i++; + features = args[i + 1]; + i++; } else if (args[i].equals("-hsize")) { - hsize= Integer.parseInt(args[i+1]); i++; + hsize = Integer.parseInt(args[i + 1]); + i++; } else if (args[i].equals("-len")) { - maxLen= Integer.parseInt(args[i+1]); i++; - } else super.addOption(args, i); + maxLen = Integer.parseInt(args[i + 1]); + i++; + } else + super.addOption(args, i); } } @@ -28,18 +32,23 @@ public final class Options extends OptionsSuper { System.out.println("java -cp anna.jar is2.mtag.Tagger [Options]"); System.out.println(); System.out.println("Example: "); - System.out.println(" java -cp mate.jar is2.mtag.Tagger -model eps3.model -train corpora/conll08st/train/train.closed -test corpora/conll08st/devel/devel.closed -out b3.test -eval corpora/conll08st/devel/devel.closed -count 2000 -i 6"); + System.out.println( + " java -cp mate.jar is2.mtag.Tagger -model eps3.model -train corpora/conll08st/train/train.closed -test corpora/conll08st/devel/devel.closed -out b3.test -eval corpora/conll08st/devel/devel.closed -count 2000 -i 6"); System.out.println(""); System.out.println("Options:"); System.out.println(""); - System.out.println(" -train <file> the corpus a model is trained on; default "+this.trainfile); - System.out.println(" -test <file> the input corpus for testing; default "+this.testfile); - System.out.println(" -out <file> the output corpus (result) of a test run; default "+this.outfile); + System.out.println(" -train <file> the corpus a model is trained on; default " + this.trainfile); + System.out.println(" -test <file> the input corpus for testing; default " + this.testfile); + System.out.println(" -out <file> the output corpus (result) of a test run; default " + this.outfile); System.out.println(" -model <file> the parsing model for traing the model is stored in the files"); - System.out.println(" and for parsing the model is load from this file; default "+this.modelName); - System.out.println(" -i <number> the number of training iterations; good numbers are 10 for smaller corpora and 6 for bigger; default "+this.numIters); - System.out.println(" -count <number> the n first sentences of the corpus are take for the training default "+this.count); - + System.out.println( + " and for parsing the model is load from this file; default " + this.modelName); + System.out.println( + " -i <number> the number of training iterations; good numbers are 10 for smaller corpora and 6 for bigger; default " + + this.numIters); + System.out.println(" -count <number> the n first sentences of the corpus are take for the training default " + + this.count); + System.exit(0); } } diff --git a/dependencyParser/mate-tools/src/is2/mtag/Pipe.java b/dependencyParser/mate-tools/src/is2/mtag/Pipe.java index b25b953..75fb3fe 100755 --- a/dependencyParser/mate-tools/src/is2/mtag/Pipe.java +++ b/dependencyParser/mate-tools/src/is2/mtag/Pipe.java @@ -1,5 +1,11 @@ package is2.mtag; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map.Entry; import is2.data.Cluster; import is2.data.F2SF; @@ -11,460 +17,622 @@ import is2.data.ParametersFloat; import is2.data.PipeGen; import is2.data.SentenceData09; import is2.io.CONLLReader09; +import is2.io.IOGenerals; import is2.tools.IPipe; import is2.util.OptionsSuper; -import java.io.DataInputStream; -import java.io.DataOutputStream; -import java.io.IOException; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Map.Entry; - - final public class Pipe extends PipeGen implements IPipe { - public static int _CEND; - + public static int _CEND; - private static final String STWRD = "STWRD",STPOS = "STPOS",END = "END",STR = "STR"; + private static final String STWRD = "STWRD", STPOS = "STPOS", END = "END", STR = "STR"; public String[] types; Cluster cl; - final public MFO mf =new MFO(); + final public MFO mf = new MFO(); public Long2IntInterface li; - - - final MFO.Data4 d1 = new MFO.Data4(),d2 = new MFO.Data4(),d3 = new MFO.Data4(),dw = new MFO.Data4(); - final MFO.Data4 dwp = new MFO.Data4(),dp = new MFO.Data4(); - + final MFO.Data4 d1 = new MFO.Data4(), d2 = new MFO.Data4(), d3 = new MFO.Data4(), dw = new MFO.Data4(); + final MFO.Data4 dwp = new MFO.Data4(), dp = new MFO.Data4(); private OptionsSuper options; private int _ewrd; - static private int _mid, _strp,_endp; + static private int _strp; - public Pipe (Options options, Long2Int long2Int) throws IOException { + public Pipe(Options options, Long2Int long2Int) throws IOException { this.options = options; - li =long2Int; + li = long2Int; } - public Pipe (OptionsSuper options) { + public Pipe(OptionsSuper options) { this.options = options; } + public HashMap<Integer, Integer> form2morph = new HashMap<Integer, Integer>(); - public HashMap<Integer,Integer> form2morph = new HashMap<Integer, Integer> (); - - + @Override public Instances createInstances(String file) { CONLLReader09 depReader = new CONLLReader09(CONLLReader09.NO_NORMALIZE); - + depReader.startReading(file); - mf.register(POS,"<root-POS>"); + mf.register(POS, "<root-POS>"); - mf.register(FEAT, CONLLReader09.NO_TYPE); + mf.register(FEAT, IOGenerals.NO_TYPE); mf.register(FEAT, ""); - + InstancesTagger is = new InstancesTagger(); System.out.println("Registering feature parts "); - HashMap<String,HashSet<String>> op2form = new HashMap<String, HashSet<String>> (); - HashMap<String,Integer> freq = new HashMap<String, Integer> (); - - - int ic=0; - while(true) { + HashMap<String, HashSet<String>> op2form = new HashMap<String, HashSet<String>>(); + HashMap<String, Integer> freq = new HashMap<String, Integer>(); + + int ic = 0; + while (true) { SentenceData09 instance1 = depReader.getNext(); - if (instance1== null) break; + if (instance1 == null) + break; ic++; - String[] w = instance1.forms; - for(int i1 = 0; i1 < w.length; i1++) mf.register(WORD, w[i1]); - for(int i1 = 0; i1 < w.length; i1++) registerChars(CHAR, w[i1]); - for(int i1 = 0; i1 < w.length; i1++) { - mf.register(WORD, w[i1].toLowerCase()); + for (String element : w) + mf.register(WORD, element); + for (String element : w) + registerChars(CHAR, element); + for (int i1 = 0; i1 < w.length; i1++) { + mf.register(WORD, w[i1].toLowerCase()); Integer f = freq.get(w[i1].toLowerCase()); - if (f==null) freq.put(w[i1].toLowerCase(), 1); - else freq.put(w[i1].toLowerCase(), f+1); + if (f == null) + freq.put(w[i1].toLowerCase(), 1); + else + freq.put(w[i1].toLowerCase(), f + 1); HashSet<String> forms = op2form.get(w[i1].toLowerCase()); - if (forms==null) { + if (forms == null) { forms = new HashSet<String>(); op2form.put(w[i1].toLowerCase(), forms); } - forms.add(instance1.ofeats[i1]==null?"_":instance1.ofeats[i1]); + forms.add(instance1.ofeats[i1] == null ? "_" : instance1.ofeats[i1]); } - for(int i1 = 0; i1 < w.length; i1++) registerChars(CHAR, w[i1].toLowerCase()); + for (String element : w) + registerChars(CHAR, element.toLowerCase()); w = instance1.plemmas; - for(int i1 = 0; i1 < w.length; i1++) mf.register(WORD, w[i1]); - for(int i1 = 0; i1 < w.length; i1++) registerChars(CHAR, w[i1]); + for (String element : w) + mf.register(WORD, element); + for (String element : w) + registerChars(CHAR, element); w = instance1.ppos; - for(int i1 = 0; i1 < w.length; i1++) mf.register(POS, w[i1]); + for (String element : w) + mf.register(POS, element); w = instance1.gpos; - for(int i1 = 0; i1 < w.length; i1++) mf.register(POS, w[i1]); + for (String element : w) + mf.register(POS, element); w = instance1.ofeats; - for(int i1 = 0; i1 < w.length; i1++) if (w[i1]!=null) mf.register(FEAT, w[i1]); - - // w = instance1.pfeats; - //for(int i1 = 0; i1 < w.length; i1++) if (w[i1]!=null) mf.register(FEAT, w[i1]); + for (String element : w) + if (element != null) + mf.register(FEAT, element); + + // w = instance1.pfeats; + // for(int i1 = 0; i1 < w.length; i1++) if (w[i1]!=null) + // mf.register(FEAT, w[i1]); + } + + for (Entry<String, HashSet<String>> e : op2form.entrySet()) { + if (e.getValue().size() == 1 && freq.get(e.getKey()) > 10) { + // System.out.println("found map "+e.getKey()+" "+e.getValue()+" + // "+freq.get(e.getKey())); + form2morph.put(mf.getValue(PipeGen.WORD, e.getKey()), + mf.getValue(FEAT, (String) e.getValue().toArray()[0])); + } } - - for(Entry<String,HashSet<String>> e : op2form.entrySet()) { - if (e.getValue().size()==1 &&freq.get(e.getKey())>10) { - // System.out.println("found map "+e.getKey()+" "+e.getValue()+" "+freq.get(e.getKey())); - form2morph.put(mf.getValue(Pipe.WORD, e.getKey()), mf.getValue(FEAT, (String)e.getValue().toArray()[0])); - } - } - initFeatures(); mf.calculateBits(); initValues(); - System.out.println(""+mf.toString()); + System.out.println("" + mf.toString()); depReader.startReading(file); int num1 = 0; long start1 = System.currentTimeMillis(); - + System.out.print("Creating Features: "); - is.init(ic, mf) ; - int del=0; + is.init(ic, mf); + int del = 0; - while(true) { - if (num1 % 100 ==0) {del = outValue(num1, del);} + while (true) { + if (num1 % 100 == 0) { + del = outValue(num1, del); + } SentenceData09 instance1 = depReader.getNext(is); - if (instance1== null) break; + if (instance1 == null) + break; - if (num1>options.count) break; + if (num1 > options.count) + break; num1++; } long end1 = System.currentTimeMillis(); System.gc(); - long mem2 = Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory(); - System.out.print(" time "+(end1-start1)+" mem "+(mem2/1024)+" kb"); + long mem2 = Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory(); + System.out.print(" time " + (end1 - start1) + " mem " + (mem2 / 1024) + " kb"); types = new String[mf.getFeatureCounter().get(FEAT)]; - for(Entry<String,Integer> e : mf.getFeatureSet().get(FEAT).entrySet()) { + for (Entry<String, Integer> e : MFO.getFeatureSet().get(FEAT).entrySet()) { types[e.getValue()] = e.getKey(); } - - if (options.clusterFile==null)cl = new Cluster(); - else cl= new Cluster(options.clusterFile, mf,6); - + if (options.clusterFile == null) + cl = new Cluster(); + else + cl = new Cluster(options.clusterFile, mf, 6); System.out.println("Num Features: " + types.length); - - depReader.startReading(file); + int num11 = 0; - - int num11=0; - - while(true) { + while (true) { SentenceData09 instance = depReader.getNext(); - if (instance==null) break; - - is.fillChars(instance, num11, _CEND); + if (instance == null) + break; + is.fillChars(instance, num11, _CEND); - if (num11>options.count) break; + if (num11 > options.count) + break; num11++; } - return is;//.toNativeArray(); + return is;// .toNativeArray(); } private void registerChars(String type, String word) { - for(int i=0;i<word.length();i++) mf.register(type, Character.toString(word.charAt(i))); + for (int i = 0; i < word.length(); i++) + mf.register(type, Character.toString(word.charAt(i))); } - - + @Override public void initValues() { - s_feat = mf.getFeatureBits(FEAT); - s_word = mf.getFeatureBits(WORD); - s_type = mf.getFeatureBits(TYPE); - s_char = mf.getFeatureBits(CHAR); - s_pos =mf.getFeatureBits(POS); - // dl1.a[0] = s_type; dl1.a[1] = s_pos; - // for (int k = 2; k < 7; k++) dl1.a[k] = s_pos; - - d1.a0 = s_type; d1.a1 = s_feat; d1.a2= s_word; - d2.a0 = s_type; d2.a1 = s_feat; d2.a2= s_feat; d2.a3= s_feat; d2.a4= s_feat; d2.a5= s_feat; d2.a6= s_feat; - d3.a0 = s_type; d3.a1 = s_feat; d3.a2= s_char; d3.a3= s_char; d3.a4= s_char; d3.a5= s_char; d3.a6= s_char; d3.a7= s_char; - dp.a0 = s_type; dp.a1 = s_feat; dp.a2= s_pos; dp.a3= s_pos; dp.a4= s_feat;// dp.a5= s_char; dp.a6= s_char; dp.a7= s_char; - dw.a0 = s_type; dw.a1 = s_feat;dw.a2= s_word; dw.a3= s_word; dw.a4= s_word; dw.a5= s_word; dw.a6= s_word; dw.a7= s_word; - dwp.a0 = s_type; dwp.a1 = s_feat;dwp.a2= s_word ; dwp.a3= s_feat; dwp.a4= s_word; + s_feat = MFO.getFeatureBits(FEAT); + s_word = MFO.getFeatureBits(WORD); + s_type = MFO.getFeatureBits(TYPE); + s_char = MFO.getFeatureBits(CHAR); + s_pos = MFO.getFeatureBits(POS); + // dl1.a[0] = s_type; dl1.a[1] = s_pos; + // for (int k = 2; k < 7; k++) dl1.a[k] = s_pos; + + d1.a0 = s_type; + d1.a1 = s_feat; + d1.a2 = s_word; + d2.a0 = s_type; + d2.a1 = s_feat; + d2.a2 = s_feat; + d2.a3 = s_feat; + d2.a4 = s_feat; + d2.a5 = s_feat; + d2.a6 = s_feat; + d3.a0 = s_type; + d3.a1 = s_feat; + d3.a2 = s_char; + d3.a3 = s_char; + d3.a4 = s_char; + d3.a5 = s_char; + d3.a6 = s_char; + d3.a7 = s_char; + dp.a0 = s_type; + dp.a1 = s_feat; + dp.a2 = s_pos; + dp.a3 = s_pos; + dp.a4 = s_feat;// dp.a5= s_char; dp.a6= s_char; dp.a7= s_char; + dw.a0 = s_type; + dw.a1 = s_feat; + dw.a2 = s_word; + dw.a3 = s_word; + dw.a4 = s_word; + dw.a5 = s_word; + dw.a6 = s_word; + dw.a7 = s_word; + dwp.a0 = s_type; + dwp.a1 = s_feat; + dwp.a2 = s_word; + dwp.a3 = s_feat; + dwp.a4 = s_word; } - public static short s_feat,s_word,s_type,s_dir,s_dist,s_char,s_pos; - - + public static short s_feat, s_word, s_type, s_dir, s_dist, s_char, s_pos; /** * Initialize the features types. */ + @Override public void initFeatures() { - for(int t=0;t<62;t++) { - mf.register(TYPE,"F"+t); + for (int t = 0; t < 62; t++) { + mf.register(TYPE, "F" + t); } - -// _mid = mf.register(POS, MID); + // _mid = mf.register(POS, MID); _strp = mf.register(POS, STR); - _endp= mf.register(POS, END); + mf.register(POS, END); mf.register(WORD, STR); - _ewrd = mf.register(WORD, END); - + _ewrd = mf.register(WORD, END); _CEND = mf.register(CHAR, END); - - - // optional features - mf.register(WORD,STWRD); - mf.register(POS,STPOS); - + mf.register(WORD, STWRD); + mf.register(POS, STPOS); } + final public void addCF(InstancesTagger is, int ic, String fs, int i, int pfeat[], short ppos[], int[] forms, + int[] lemmas, long[] vs) { - final public void addCF(InstancesTagger is, int ic, String fs,int i, int pfeat[],short ppos[], int[] forms, int[] lemmas, long[] vs) { + int c0 = is.chars[ic][i][0], c1 = is.chars[ic][i][1], c2 = is.chars[ic][i][2], c3 = is.chars[ic][i][3], + c4 = is.chars[ic][i][4], c5 = is.chars[ic][i][5]; + int e0 = is.chars[ic][i][6], e1 = is.chars[ic][i][7], e2 = is.chars[ic][i][8], e3 = is.chars[ic][i][9], + e4 = is.chars[ic][i][10]; - int c0= is.chars[ic][i][0], c1=is.chars[ic][i][1], c2=is.chars[ic][i][2], c3=is.chars[ic][i][3], c4=is.chars[ic][i][4],c5=is.chars[ic][i][5]; - int e0 =is.chars[ic][i][6], e1 =is.chars[ic][i][7],e2 =is.chars[ic][i][8],e3 =is.chars[ic][i][9],e4 =is.chars[ic][i][10]; - - int f=1,n=0; - short upper =0, number = 1; - for(int k1=0;k1<fs.length();k1++){ + int f = 1, n = 0; + short upper = 0, number = 1; + for (int k1 = 0; k1 < fs.length(); k1++) { char c = fs.charAt(k1); if (Character.isUpperCase(c)) { - if (k1==0) upper=1; + if (k1 == 0) + upper = 1; else { // first char + another - if (upper==1) upper=3; + if (upper == 1) + upper = 3; // another uppercase in the word - else if (upper==0) upper=2; + else if (upper == 0) + upper = 2; } } - if (Character.isDigit(c) && k1==0) number =2 ; - else if (Character.isDigit(c) && number==1) number = 3; + if (Character.isDigit(c) && k1 == 0) + number = 2; + else if (Character.isDigit(c) && number == 1) + number = 3; } int form = forms[i]; - int len = forms.length; + int len = forms.length; long l; - d1.v0 = f++; d1.v2=form; l=mf.calc3(d1); vs[n++]=mf.calc3(d1); - - d1.v0 = f++; d1.v2=is.formlc[ic][i]; vs[n++]=mf.calc3(d1); - - d3.v2=c0; d3.v3=c1; d3.v4=c2; d3.v5=c3; d3.v6=c4; - d3.v0=f++; vs[n++]=mf.calc3(d3); - d3.v0=f++; vs[n++]=mf.calc4(d3); - d3.v0=f++; vs[n++]=mf.calc5(d3); - d3.v0=f++; vs[n++]=mf.calc6(d3); - d3.v0=f++; vs[n++]=mf.calc7(d3); - - if (form!=-1) { - d3.v2=c2; d3.v3=c3; d3.v4=c4; d3.v5=c5; d3.v6=cl.getLP(form); - d3.v0=f; vs[n++]=mf.calc6(d3); d3.v0=f+1; vs[n++]=mf.calc7(d3); - } - f+=2; - - if (form>0) { - d3.v0=f; d3.v5=cl.getLP(form); vs[n++]=mf.calc6(d3); - d3.v0=f+1; d3.v4=cl.getLP(form); vs[n++]=mf.calc5(d3); - d3.v0=f+2; d3.v3=cl.getLP(form); vs[n++]=mf.calc4(d3); + d1.v0 = f++; + d1.v2 = form; + l = mf.calc3(d1); + vs[n++] = mf.calc3(d1); + + d1.v0 = f++; + d1.v2 = is.formlc[ic][i]; + vs[n++] = mf.calc3(d1); + + d3.v2 = c0; + d3.v3 = c1; + d3.v4 = c2; + d3.v5 = c3; + d3.v6 = c4; + d3.v0 = f++; + vs[n++] = mf.calc3(d3); + d3.v0 = f++; + vs[n++] = mf.calc4(d3); + d3.v0 = f++; + vs[n++] = mf.calc5(d3); + d3.v0 = f++; + vs[n++] = mf.calc6(d3); + d3.v0 = f++; + vs[n++] = mf.calc7(d3); + + if (form != -1) { + d3.v2 = c2; + d3.v3 = c3; + d3.v4 = c4; + d3.v5 = c5; + d3.v6 = cl.getLP(form); + d3.v0 = f; + vs[n++] = mf.calc6(d3); + d3.v0 = f + 1; + vs[n++] = mf.calc7(d3); } - f+=3; - - d3.v2=e0; d3.v3=e1; d3.v4=e2; d3.v5=e3; d3.v6=e4; - d3.v0 =f++; vs[n++]=mf.calc3(d3); - d3.v0 =f++; vs[n++]=l=mf.calc4(d3); vs[n++]=d3.calcs(3, upper, l); - d3.v0 =f++; vs[n++]=l=mf.calc5(d3); vs[n++]=d3.calcs(3, upper, l); - d3.v0 =f++; vs[n++]=l=mf.calc6(d3); vs[n++]=d3.calcs(3, upper, l); - d3.v0 =f++; vs[n++]=l=mf.calc7(d3); vs[n++]=d3.calcs(3, upper, l); - - if (form>0) { - d3.v0=f; d3.v5=cl.getLP(form); vs[n++]=mf.calc6(d3); - d3.v0=f+1; d3.v4=cl.getLP(form); vs[n++]=mf.calc5(d3); - d3.v0=f+2; d3.v3=cl.getLP(form); vs[n++]=mf.calc4(d3); + f += 2; + + if (form > 0) { + d3.v0 = f; + d3.v5 = cl.getLP(form); + vs[n++] = mf.calc6(d3); + d3.v0 = f + 1; + d3.v4 = cl.getLP(form); + vs[n++] = mf.calc5(d3); + d3.v0 = f + 2; + d3.v3 = cl.getLP(form); + vs[n++] = mf.calc4(d3); } - f+=3; - - - dw.v0=f++; dw.v2=i+1<len?forms[i+1]:_ewrd;dw.v3= forms[i];vs[n++]=mf.calc4(dw); - - if (len>i+1) { - - dw.v0=f; dw.v2= forms[i+1]; vs[n++]=mf.calc3(dw); - d3.v0=f+1; d3.v2 =is.chars[ic][i+1][0];vs[n++]=mf.calc3(d3); - d3.v0=f+2; d3.v2 =is.chars[ic][i+1][6];vs[n++]=mf.calc3(d3); - - d3.v2=e0; d3.v3=e1; - - d3.v0 =f+3; d3.v4 =is.chars[ic][i+1][0];vs[n++]=mf.calc5(d3); - d3.v0 =f+4; d3.v4 =is.chars[ic][i+1][6];vs[n++]=mf.calc5(d3); - - - - if (is.chars[ic][i+1][11]>1 ) { // instance.forms[i+1].length() - - d3.v0=f+5; d3.v2=is.chars[ic][i+1][0]; d3.v3=is.chars[ic][i+1][1]; vs[n++]=mf.calc4(d3); - d3.v0=f+6; d3.v2=is.chars[ic][i+1][6]; d3.v3=is.chars[ic][i+1][7]; vs[n++]=mf.calc4(d3); + f += 3; + + d3.v2 = e0; + d3.v3 = e1; + d3.v4 = e2; + d3.v5 = e3; + d3.v6 = e4; + d3.v0 = f++; + vs[n++] = mf.calc3(d3); + d3.v0 = f++; + vs[n++] = l = mf.calc4(d3); + vs[n++] = d3.calcs(3, upper, l); + d3.v0 = f++; + vs[n++] = l = mf.calc5(d3); + vs[n++] = d3.calcs(3, upper, l); + d3.v0 = f++; + vs[n++] = l = mf.calc6(d3); + vs[n++] = d3.calcs(3, upper, l); + d3.v0 = f++; + vs[n++] = l = mf.calc7(d3); + vs[n++] = d3.calcs(3, upper, l); + + if (form > 0) { + d3.v0 = f; + d3.v5 = cl.getLP(form); + vs[n++] = mf.calc6(d3); + d3.v0 = f + 1; + d3.v4 = cl.getLP(form); + vs[n++] = mf.calc5(d3); + d3.v0 = f + 2; + d3.v3 = cl.getLP(form); + vs[n++] = mf.calc4(d3); + } + f += 3; + + dw.v0 = f++; + dw.v2 = i + 1 < len ? forms[i + 1] : _ewrd; + dw.v3 = forms[i]; + vs[n++] = mf.calc4(dw); + + if (len > i + 1) { + + dw.v0 = f; + dw.v2 = forms[i + 1]; + vs[n++] = mf.calc3(dw); + d3.v0 = f + 1; + d3.v2 = is.chars[ic][i + 1][0]; + vs[n++] = mf.calc3(d3); + d3.v0 = f + 2; + d3.v2 = is.chars[ic][i + 1][6]; + vs[n++] = mf.calc3(d3); + + d3.v2 = e0; + d3.v3 = e1; + + d3.v0 = f + 3; + d3.v4 = is.chars[ic][i + 1][0]; + vs[n++] = mf.calc5(d3); + d3.v0 = f + 4; + d3.v4 = is.chars[ic][i + 1][6]; + vs[n++] = mf.calc5(d3); + + if (is.chars[ic][i + 1][11] > 1) { // instance.forms[i+1].length() + + d3.v0 = f + 5; + d3.v2 = is.chars[ic][i + 1][0]; + d3.v3 = is.chars[ic][i + 1][1]; + vs[n++] = mf.calc4(d3); + d3.v0 = f + 6; + d3.v2 = is.chars[ic][i + 1][6]; + d3.v3 = is.chars[ic][i + 1][7]; + vs[n++] = mf.calc4(d3); + + d3.v2 = e0; + d3.v3 = e1; + + d3.v0 = f + 7; + d3.v4 = is.chars[ic][i + 1][0]; + d3.v5 = is.chars[ic][i + 1][1]; + vs[n++] = mf.calc6(d3); + d3.v0 = f + 8; + d3.v4 = is.chars[ic][i + 1][6]; + d3.v5 = is.chars[ic][i + 1][7]; + vs[n++] = mf.calc6(d3); + + if (forms[i + 1] > 0) { + d3.v0 = f + 9; + d3.v2 = is.chars[ic][i + 1][0]; + d3.v3 = is.chars[ic][i + 1][1]; + d3.v4 = cl.getLP(forms[i + 1]); + vs[n++] = mf.calc5(d3); + d3.v0 = f + 10; + d3.v2 = is.chars[ic][i + 1][6]; + d3.v3 = is.chars[ic][i + 1][7]; + d3.v4 = cl.getLP(forms[i + 1]); + vs[n++] = mf.calc5(d3); + } + } - d3.v2=e0; d3.v3=e1; + if (forms[i + 1] > 0) { + dw.v0 = f + 11; + dw.v2 = cl.getLP(forms[i + 1]); + dw.v3 = forms[i]; + vs[n++] = mf.calc4(dw); + } - d3.v0=f+7; d3.v4 = is.chars[ic][i+1][0]; d3.v5 =is.chars[ic][i+1][1]; vs[n++]=mf.calc6(d3); - d3.v0=f+8; d3.v4 = is.chars[ic][i+1][6]; d3.v5=is.chars[ic][i+1][7]; vs[n++]=mf.calc6(d3); + if (len > i + 2) { + dw.v0 = f + 12; + dw.v2 = forms[i + 2]; + dw.v3 = forms[i + 1]; + vs[n++] = mf.calc4(dw); + vs[n++] = mf.calc3(dw); + // d2.v0=f+13; d2.v2=pfeat[i+1]; d2.v3= pfeat[i+2]; + // vs[n++]=mf.calc4(d2); + // dp.v0= f+14; dp.v2=ppos[i+1]; dp.v3=ppos[i+2]; + // vs[n++]=mf.calc4(dp); - if (forms[i+1]>0) { - d3.v0=f+9; d3.v2=is.chars[ic][i+1][0]; d3.v3=is.chars[ic][i+1][1]; d3.v4 =cl.getLP(forms[i+1]); vs[n++]=mf.calc5(d3); - d3.v0=f+10; d3.v2=is.chars[ic][i+1][6]; d3.v3=is.chars[ic][i+1][7]; d3.v4 =cl.getLP(forms[i+1]); vs[n++]=mf.calc5(d3); - } - } - - if (forms[i+1]>0) { - dw.v0=f+11; dw.v2= cl.getLP(forms[i+1]); dw.v3= forms[i];vs[n++]=mf.calc4(dw); - } - - if (len>i+2) { - dw.v0=f+12; dw.v2= forms[i+2]; dw.v3 = forms[i+1];vs[n++]=mf.calc4(dw);vs[n++]=mf.calc3(dw); -// d2.v0=f+13; d2.v2=pfeat[i+1]; d2.v3= pfeat[i+2]; vs[n++]=mf.calc4(d2); - // dp.v0= f+14; dp.v2=ppos[i+1]; dp.v3=ppos[i+2]; vs[n++]=mf.calc4(dp); + } - } + if (len > i + 3) { + dw.v0 = f + 14; + dw.v2 = forms[i + 3]; + dw.v3 = forms[i + 2]; + vs[n++] = mf.calc4(dw); + vs[n++] = mf.calc3(dw); - if (len>i+3) { - dw.v0=f+14; dw.v2= forms[i+3]; dw.v3 = forms[i+2]; vs[n++]=mf.calc4(dw); vs[n++]=mf.calc3(dw); - - } - } - f+=16; + } + } + f += 16; // length - d2.v0=f++; d2.v2=is.chars[ic][i][11];vs[n++]=mf.calc3(d2); - + d2.v0 = f++; + d2.v2 = is.chars[ic][i][11]; + vs[n++] = mf.calc3(d2); // contains a number - d2.v0=f++; d2.v2=number; vs[n++]=mf.calc3(d2); - d1.v0=f++; d1.v2=lemmas[i]; vs[n++]=mf.calc3(d1); - - if (i!=0 &&len>i+1) { - dw.v0=f; dw.v2=lemmas[i-1];dw.v3=lemmas[i+1];vs[n++]=mf.calc4(dw); - d2.v0=f+1; d2.v2=pfeat[i-1]; d2.v3=pfeat[i+1];vs[n++]=mf.calc4(d2); - } - f+=2; - - d2.v0= f++; d2.v2=i>=1? pfeat[i-1]:_strp; vs[n++]=mf.calc3(d2); - dp.v0= f++; dp.v2=ppos[i]; vs[n++]=mf.calc3(dp); - - if (i>0) { - dw.v0 = f++; dw.v2 =i>=1? forms[i-1]:_strp; vs[n++]=mf.calc3(dw); - dw.v0 = f++; dw.v2 = i>=1? lemmas[i-1]:_strp; vs[n++]=mf.calc3(dw); - - if (len>i+1) { -// d2.v0=f; d2.v2= pfeat[i-1];d2.v3= pfeat[i+1]; vs[n++]=mf.calc4(d2); - // dp.v0= f+1; dp.v2=ppos[i-1]; dp.v3=ppos[i+1]; vs[n++]=mf.calc4(dp); + d2.v0 = f++; + d2.v2 = number; + vs[n++] = mf.calc3(d2); + d1.v0 = f++; + d1.v2 = lemmas[i]; + vs[n++] = mf.calc3(d1); + + if (i != 0 && len > i + 1) { + dw.v0 = f; + dw.v2 = lemmas[i - 1]; + dw.v3 = lemmas[i + 1]; + vs[n++] = mf.calc4(dw); + d2.v0 = f + 1; + d2.v2 = pfeat[i - 1]; + d2.v3 = pfeat[i + 1]; + vs[n++] = mf.calc4(d2); + } + f += 2; + + d2.v0 = f++; + d2.v2 = i >= 1 ? pfeat[i - 1] : _strp; + vs[n++] = mf.calc3(d2); + dp.v0 = f++; + dp.v2 = ppos[i]; + vs[n++] = mf.calc3(dp); + + if (i > 0) { + dw.v0 = f++; + dw.v2 = i >= 1 ? forms[i - 1] : _strp; + vs[n++] = mf.calc3(dw); + dw.v0 = f++; + dw.v2 = i >= 1 ? lemmas[i - 1] : _strp; + vs[n++] = mf.calc3(dw); + + if (len > i + 1) { + // d2.v0=f; d2.v2= pfeat[i-1];d2.v3= pfeat[i+1]; + // vs[n++]=mf.calc4(d2); + // dp.v0= f+1; dp.v2=ppos[i-1]; dp.v3=ppos[i+1]; + // vs[n++]=mf.calc4(dp); } f++; - dp.v0= f++; dp.v2=ppos[i]; dp.v3=ppos[i-1]; vs[n++]=mf.calc4(dp); - - if (i>1) { - d2.v0=f++; d2.v2=i<2?_strp: pfeat[i-2]; vs[n++]=mf.calc3(d2); - d2.v0=f++; d2.v2= pfeat[i-1]; d2.v3= pfeat[i-2]; vs[n++]=mf.calc4(d2); - - dw.v0=f++; dw.v2= forms[i-2]; vs[n++]=mf.calc3(dw); - dwp.v0=f++; dwp.v2 = forms[i-1]; dwp.v3 = pfeat[i-2];vs[n++]=mf.calc4(dwp); - dwp.v0=f++; dwp.v2 = forms[i-2]; dwp.v3 = pfeat[i-1];vs[n++]=mf.calc4(dwp); - - if (i>2) { - d2.v0=f++; d2.v2=pfeat[i-3]; vs[n++]=mf.calc3(d2); - d2.v0=f++; d2.v2=pfeat[i-2]; d2.v3= pfeat[i-3]; vs[n++]=mf.calc4(d2); - dw.v0=f++; dw.v2 = forms[i-3]; dw.v3 = forms[i-2]; vs[n++]=mf.calc4(dw); - // dp.v0= f++; dp.v2=ppos[i-3]; dp.v3=ppos[i-2]; vs[n++]=mf.calc4(dp); + dp.v0 = f++; + dp.v2 = ppos[i]; + dp.v3 = ppos[i - 1]; + vs[n++] = mf.calc4(dp); + + if (i > 1) { + d2.v0 = f++; + d2.v2 = i < 2 ? _strp : pfeat[i - 2]; + vs[n++] = mf.calc3(d2); + d2.v0 = f++; + d2.v2 = pfeat[i - 1]; + d2.v3 = pfeat[i - 2]; + vs[n++] = mf.calc4(d2); + + dw.v0 = f++; + dw.v2 = forms[i - 2]; + vs[n++] = mf.calc3(dw); + dwp.v0 = f++; + dwp.v2 = forms[i - 1]; + dwp.v3 = pfeat[i - 2]; + vs[n++] = mf.calc4(dwp); + dwp.v0 = f++; + dwp.v2 = forms[i - 2]; + dwp.v3 = pfeat[i - 1]; + vs[n++] = mf.calc4(dwp); + + if (i > 2) { + d2.v0 = f++; + d2.v2 = pfeat[i - 3]; + vs[n++] = mf.calc3(d2); + d2.v0 = f++; + d2.v2 = pfeat[i - 2]; + d2.v3 = pfeat[i - 3]; + vs[n++] = mf.calc4(d2); + dw.v0 = f++; + dw.v2 = forms[i - 3]; + dw.v3 = forms[i - 2]; + vs[n++] = mf.calc4(dw); + // dp.v0= f++; dp.v2=ppos[i-3]; dp.v3=ppos[i-2]; + // vs[n++]=mf.calc4(dp); } } } vs[n] = Integer.MIN_VALUE; } - - - - - - - public int fillFeatureVectorsOne(ParametersFloat params, int w1, String form, Instances is, int n, int[] features, long[] vs) { + public int fillFeatureVectorsOne(ParametersFloat params, int w1, String form, Instances is, int n, int[] features, + long[] vs) { double best = -1; - int bestType=-1; + int bestType = -1; F2SF f = new F2SF(params.parameters); - //is.gfeats[n] - addCF((InstancesTagger)is, n, form, w1, features,is.pposs[n], is.forms[n], is.plemmas[n], vs); - - for(int t = 0; t < types.length; t++) { + // is.gfeats[n] + addCF((InstancesTagger) is, n, form, w1, features, is.pposs[n], is.forms[n], is.plemmas[n], vs); + + for (int t = 0; t < types.length; t++) { f.clear(); - int p = t<<Pipe.s_type; - for(int k=vs.length-1;k>=0;k--) if (vs[k]>=0) f.add(li.l2i(vs[k]+p)); - if (f.score >best) { - bestType=t; - best =f.score; + int p = t << Pipe.s_type; + for (int k = vs.length - 1; k >= 0; k--) + if (vs[k] >= 0) + f.add(li.l2i(vs[k] + p)); + if (f.score > best) { + bestType = t; + best = f.score; } - } + } return bestType; } - - - //static ArrayList<T> todo = new ArrayList<T>(); + // static ArrayList<T> todo = new ArrayList<T>(); static SentenceData09 instance; - - - public static int _FC =200; - - + + public static int _FC = 200; + /** * Write the lemma that are not mapped by operations + * * @param dos */ public void writeMap(DataOutputStream dos) { try { dos.writeInt(this.form2morph.size()); - for(Entry<Integer, Integer> e : form2morph.entrySet()) { + for (Entry<Integer, Integer> e : form2morph.entrySet()) { dos.writeInt(e.getKey()); dos.writeInt(e.getValue()); } @@ -473,16 +641,15 @@ final public class Pipe extends PipeGen implements IPipe { } } - - /** * Read the form-lemma mapping not read by operations + * * @param dis */ public void readMap(DataInputStream dis) { try { int size = dis.readInt(); - for(int i =0; i<size;i++) { + for (int i = 0; i < size; i++) { form2morph.put(dis.readInt(), dis.readInt()); } } catch (IOException e1) { @@ -490,8 +657,9 @@ final public class Pipe extends PipeGen implements IPipe { } } - - /* (non-Javadoc) + /* + * (non-Javadoc) + * * @see is2.tools.IPipe#write(java.io.DataOutputStream) */ @Override @@ -504,5 +672,4 @@ final public class Pipe extends PipeGen implements IPipe { } } - } diff --git a/dependencyParser/mate-tools/src/is2/mtag/Tagger.java b/dependencyParser/mate-tools/src/is2/mtag/Tagger.java index da31a5b..05aa8d7 100644 --- a/dependencyParser/mate-tools/src/is2/mtag/Tagger.java +++ b/dependencyParser/mate-tools/src/is2/mtag/Tagger.java @@ -1,5 +1,17 @@ package is2.mtag; +import java.io.BufferedInputStream; +import java.io.BufferedOutputStream; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.IOException; +import java.util.Map.Entry; +import java.util.zip.ZipEntry; +import java.util.zip.ZipInputStream; +import java.util.zip.ZipOutputStream; import is2.data.Cluster; import is2.data.FV; @@ -12,360 +24,363 @@ import is2.data.SentenceData09; import is2.io.CONLLReader09; import is2.io.CONLLWriter09; import is2.tools.IPipe; -import is2.tools.Train; import is2.tools.Tool; +import is2.tools.Train; import is2.util.DB; import is2.util.OptionsSuper; -import java.io.BufferedInputStream; -import java.io.BufferedOutputStream; -import java.io.DataInputStream; -import java.io.DataOutputStream; -import java.io.FileInputStream; -import java.io.FileNotFoundException; -import java.io.FileOutputStream; -import java.io.IOException; -import java.util.Map.Entry; -import java.util.zip.ZipEntry; -import java.util.zip.ZipInputStream; -import java.util.zip.ZipOutputStream; - - public class Tagger implements Tool, Train { ExtractorM pipe; ParametersFloat params; - /** - * Initialize + * Initialize + * * @param options */ - public Tagger (Options options) { - + public Tagger(Options options) { + // load the model try { readModel(options); } catch (Exception e) { e.printStackTrace(); } - + } - + /** * @param string - * @throws IOException + * @throws IOException */ public Tagger(String modelFileName) { - this(new Options(new String[] {"-model",modelFileName})); + this(new Options(new String[] { "-model", modelFileName })); } - public Tagger() { } + public Tagger() { + } - public static void main (String[] args) throws FileNotFoundException, Exception - { + public static void main(String[] args) throws FileNotFoundException, Exception { Options options = new Options(args); - + Tagger tagger = new Tagger(); - + if (options.train) { Long2Int li = new Long2Int(options.hsize); - tagger.pipe = new ExtractorM (options,li); - InstancesTagger is = (InstancesTagger)tagger.pipe.createInstances(options.trainfile); + tagger.pipe = new ExtractorM(options, li); + InstancesTagger is = (InstancesTagger) tagger.pipe.createInstances(options.trainfile); ParametersFloat params = new ParametersFloat(li.size()); - - tagger.train(options, tagger.pipe,params,is); + + tagger.train(options, tagger.pipe, params, is); tagger.writeModel(options, tagger.pipe, params); } if (options.test) { - + tagger.readModel(options); - tagger.out(options,tagger.pipe, tagger.params); + tagger.out(options, tagger.pipe, tagger.params); } if (options.eval) { - + System.out.println("\nEvaluate:"); - Evaluator.evaluate(options.goldfile, options.outfile,options.format); + Evaluator.evaluate(options.goldfile, options.outfile, options.format); } } - /* (non-Javadoc) - * @see is2.mtag2.Learn#writeModel(is2.mtag2.Options, is2.mtag2.Pipe, is2.data.ParametersFloat) + /* + * (non-Javadoc) + * + * @see is2.mtag2.Learn#writeModel(is2.mtag2.Options, is2.mtag2.Pipe, + * is2.data.ParametersFloat) */ - public void writeModel(OptionsSuper options, IPipe pipe,ParametersFloat params) { - + @Override + public void writeModel(OptionsSuper options, IPipe pipe, ParametersFloat params) { + try { - ZipOutputStream zos = new ZipOutputStream(new BufferedOutputStream(new FileOutputStream(options.modelName))); - zos.putNextEntry(new ZipEntry("data")); - DataOutputStream dos = new DataOutputStream(new BufferedOutputStream(zos)); - - MFO.writeData(dos); - - MFO.clearData(); - - DB.println("number of parameters "+params.parameters.length); - dos.flush(); - params.write(dos); - pipe.write(dos); - dos.flush(); - dos.close(); - } catch (Exception e){ + ZipOutputStream zos = new ZipOutputStream( + new BufferedOutputStream(new FileOutputStream(options.modelName))); + zos.putNextEntry(new ZipEntry("data")); + DataOutputStream dos = new DataOutputStream(new BufferedOutputStream(zos)); + + MFO.writeData(dos); + + MFO.clearData(); + + DB.println("number of parameters " + params.parameters.length); + dos.flush(); + params.write(dos); + pipe.write(dos); + dos.flush(); + dos.close(); + } catch (Exception e) { e.printStackTrace(); } } - /* (non-Javadoc) + /* + * (non-Javadoc) + * * @see is2.mtag2.Learn#readModel(is2.mtag2.Options) */ + @Override public void readModel(OptionsSuper options) { - - try { - pipe = new ExtractorM(options); - params = new ParametersFloat(0); - // load the model - ZipInputStream zis = new ZipInputStream(new BufferedInputStream(new FileInputStream(options.modelName))); - zis.getNextEntry(); - DataInputStream dis = new DataInputStream(new BufferedInputStream(zis)); - pipe.mf.read(dis); - pipe.initValues(); - pipe.initFeatures(); - - params.read(dis); - pipe.li = new Long2Int(params.parameters.length); - pipe.cl = new Cluster(dis); - pipe.readMap(dis); - dis.close(); - - this.pipe.types = new String[pipe.mf.getFeatureCounter().get(ExtractorM.FFEATS)]; - for(Entry<String,Integer> e :pipe.mf.getFeatureSet().get(ExtractorM.FFEATS).entrySet()) - this.pipe.types[e.getValue()] = e.getKey(); - - - DB.println("Loading data finished. "); - - DB.println("number of parameter "+params.parameters.length); - DB.println("number of classes "+this.pipe.types.length); - } catch(Exception e) { + try { + pipe = new ExtractorM(options); + params = new ParametersFloat(0); + + // load the model + ZipInputStream zis = new ZipInputStream(new BufferedInputStream(new FileInputStream(options.modelName))); + zis.getNextEntry(); + DataInputStream dis = new DataInputStream(new BufferedInputStream(zis)); + pipe.mf.read(dis); + pipe.initValues(); + pipe.initFeatures(); + + params.read(dis); + pipe.li = new Long2Int(params.parameters.length); + pipe.cl = new Cluster(dis); + pipe.readMap(dis); + dis.close(); + + this.pipe.types = new String[pipe.mf.getFeatureCounter().get(PipeGen.FFEATS)]; + for (Entry<String, Integer> e : MFO.getFeatureSet().get(PipeGen.FFEATS).entrySet()) + this.pipe.types[e.getValue()] = e.getKey(); + + DB.println("Loading data finished. "); + + DB.println("number of parameter " + params.parameters.length); + DB.println("number of classes " + this.pipe.types.length); + } catch (Exception e) { e.printStackTrace(); } } - /* (non-Javadoc) - * @see is2.mtag2.Learn#train(is2.mtag2.Options, is2.mtag2.Pipe, is2.data.ParametersFloat, is2.data.InstancesTagger) + /* + * (non-Javadoc) + * + * @see is2.mtag2.Learn#train(is2.mtag2.Options, is2.mtag2.Pipe, + * is2.data.ParametersFloat, is2.data.InstancesTagger) */ - public void train(OptionsSuper options, IPipe pipe, ParametersFloat params, Instances is) { + @Override + public void train(OptionsSuper options, IPipe pipe, ParametersFloat params, Instances is) { int i = 0; - int del=0; - - String[] wds = this.pipe.mf.reverse(this.pipe.mf.getFeatureSet().get(ExtractorM.WORD)); + int del = 0; + + String[] wds = MFO.reverse(MFO.getFeatureSet().get(PipeGen.WORD)); int numInstances = is.size(); - float upd = (options.numIters*numInstances + 1); + float upd = (options.numIters * numInstances + 1); - - for(i = 0; i < options.numIters; i++) { + for (i = 0; i < options.numIters; i++) { long start = System.currentTimeMillis(); - - - long last= System.currentTimeMillis(); - + + long last = System.currentTimeMillis(); + FV pred = new FV(), gold = new FV(); - int correct =0,count=0; - - for(int n = 0; n < numInstances; n++) { + int correct = 0, count = 0; + + for (int n = 0; n < numInstances; n++) { upd--; - if((n+1) % 500 == 0) del= PipeGen.outValueErr(n+1, (count-correct),(float)correct/(float)count,del,last,upd); - + if ((n + 1) % 500 == 0) + del = PipeGen.outValueErr(n + 1, (count - correct), (float) correct / (float) count, del, last, + upd); + int length = is.length(n); int feats[] = new int[length]; - long[] vs = new long[ExtractorM._FC]; - - - for(int w1 = 0; w1 < length; w1++) { - + long[] vs = new long[ExtractorM._FC]; + + for (int w1 = 0; w1 < length; w1++) { count++; - if (this.pipe.form2morph.get(is.forms[n][w1])!=null){ + if (this.pipe.form2morph.get(is.forms[n][w1]) != null) { correct++; continue; } - - int bestType = this.pipe.fillFeatureVectorsOne(params, w1, wds[is.forms[n][w1]],is, n, is.gfeats[n],vs); - feats[w1]=bestType; - - if (bestType == is.gfeats[n][w1] ) { + int bestType = this.pipe.fillFeatureVectorsOne(params, w1, wds[is.forms[n][w1]], is, n, + is.gfeats[n], vs); + feats[w1] = bestType; + + if (bestType == is.gfeats[n][w1]) { correct++; - continue; + continue; } pred.clear(); int p = bestType << ExtractorM.s_type; - // System.out.println("test type "+bestType+" ex type "+ExtractorM.s_type); - for(int k=0;k<vs.length;k++) { - if (vs[k]==Integer.MIN_VALUE) break; - if (vs[k]>=0) pred.add(this.pipe.li.l2i(vs[k]+p)); + // System.out.println("test type "+bestType+" ex type + // "+ExtractorM.s_type); + for (long element : vs) { + if (element == Integer.MIN_VALUE) + break; + if (element >= 0) + pred.add(this.pipe.li.l2i(element + p)); } - + gold.clear(); p = is.gfeats[n][w1] << ExtractorM.s_type; - for(int k=0;k<vs.length;k++) { - if (vs[k]==Integer.MIN_VALUE) break; - if (vs[k]>=0) gold.add(this.pipe.li.l2i(vs[k]+p)); + for (long element : vs) { + if (element == Integer.MIN_VALUE) + break; + if (element >= 0) + gold.add(this.pipe.li.l2i(element + p)); } - params.update(pred,gold, (float)upd, 1.0f); + params.update(pred, gold, upd, 1.0f); } } long end = System.currentTimeMillis(); - String info = "time "+(end-start); - del= PipeGen.outValueErr(numInstances, (count-correct),(float)correct/(float)count,del,last,0,info); - - System.out.println(); + String info = "time " + (end - start); + del = PipeGen.outValueErr(numInstances, (count - correct), (float) correct / (float) count, del, last, 0, + info); + + System.out.println(); } - params.average(i*is.size()); + params.average(i * is.size()); } - - public void out (OptionsSuper options, IPipe pipe, ParametersFloat params) { + @Override + public void out(OptionsSuper options, IPipe pipe, ParametersFloat params) { - try { - long start = System.currentTimeMillis(); + long start = System.currentTimeMillis(); - CONLLReader09 depReader = new CONLLReader09(options.testfile, options.formatTask); - CONLLWriter09 depWriter = new CONLLWriter09(options.outfile, options.formatTask); + CONLLReader09 depReader = new CONLLReader09(options.testfile, options.formatTask); + CONLLWriter09 depWriter = new CONLLWriter09(options.outfile, options.formatTask); - depReader.normalizeOn=false; + depReader.normalizeOn = false; - System.out.print("Processing Sentence: "); - pipe.initValues(); - - int cnt = 0; - int del=0; - while(true) { + System.out.print("Processing Sentence: "); + pipe.initValues(); - InstancesTagger is = new InstancesTagger(); - is.init(1, this.pipe.mf); - cnt++; - - SentenceData09 instance = depReader.getNext(is); - if (instance == null || instance.forms == null) break; - is.fillChars(instance, 0, ExtractorM._CEND); - - instance = exec(instance, this.pipe, params,(InstancesTagger)is); - - SentenceData09 i09 = new SentenceData09(instance); - i09.createSemantic(instance); - - if (options.overwritegold) i09.ofeats = i09.pfeats; - - depWriter.write(i09); - - if (cnt%100==0) del=PipeGen.outValue(cnt, del); + int cnt = 0; + int del = 0; + while (true) { - } - depWriter.finishWriting(); - - del=PipeGen.outValue(cnt, del); - - long end = System.currentTimeMillis(); - System.out.println(PipeGen.getSecondsPerInstnace(cnt,(end-start))); - System.out.println(PipeGen.getUsedTime((end-start))); - } catch(Exception e){ + InstancesTagger is = new InstancesTagger(); + is.init(1, this.pipe.mf); + cnt++; + + SentenceData09 instance = depReader.getNext(is); + if (instance == null || instance.forms == null) + break; + is.fillChars(instance, 0, ExtractorM._CEND); + + instance = exec(instance, this.pipe, params, is); + + SentenceData09 i09 = new SentenceData09(instance); + i09.createSemantic(instance); + + if (options.overwritegold) + i09.ofeats = i09.pfeats; + + depWriter.write(i09); + + if (cnt % 100 == 0) + del = PipeGen.outValue(cnt, del); + + } + depWriter.finishWriting(); + + del = PipeGen.outValue(cnt, del); + + long end = System.currentTimeMillis(); + System.out.println(PipeGen.getSecondsPerInstnace(cnt, (end - start))); + System.out.println(PipeGen.getUsedTime((end - start))); + } catch (Exception e) { e.printStackTrace(); } } - - - private SentenceData09 exec(SentenceData09 instance, ExtractorM pipe, ParametersFloat params, InstancesTagger is) { - + + private SentenceData09 exec(SentenceData09 instance, ExtractorM pipe, ParametersFloat params, InstancesTagger is) { + int length = instance.ppos.length; - + short[] feats = new short[instance.gpos.length]; - + long vs[] = new long[ExtractorM._FC]; String[] forms = instance.forms; instance.pfeats = new String[instance.gpos.length]; - - for(int j = 0; j < length; j++) { - if (pipe.form2morph.get(is.forms[0][j])!=null) { - feats[j] = (short)pipe.form2morph.get(is.forms[0][j]).intValue(); + for (int j = 0; j < length; j++) { + if (pipe.form2morph.get(is.forms[0][j]) != null) { + feats[j] = (short) pipe.form2morph.get(is.forms[0][j]).intValue(); instance.pfeats[j] = this.pipe.types[feats[j]]; } else { - int bestType = pipe.fillFeatureVectorsOne(params,j, forms[j], is, 0,feats,vs); - feats[j] = (short)bestType; - instance.pfeats[j]= this.pipe.types[bestType]; + int bestType = pipe.fillFeatureVectorsOne(params, j, forms[j], is, 0, feats, vs); + feats[j] = (short) bestType; + instance.pfeats[j] = this.pipe.types[bestType]; } } - for(int j = 0; j < length; j++) { - if (pipe.form2morph.get(is.forms[0][j])!=null) { - feats[j] =(short)pipe.form2morph.get(is.forms[0][j]).intValue(); + for (int j = 0; j < length; j++) { + if (pipe.form2morph.get(is.forms[0][j]) != null) { + feats[j] = (short) pipe.form2morph.get(is.forms[0][j]).intValue(); instance.pfeats[j] = this.pipe.types[feats[j]]; } else { - int bestType = pipe.fillFeatureVectorsOne(params,j, forms[j], is, 0,feats,vs); - feats[j] = (short)bestType; - instance.pfeats[j]= this.pipe.types[bestType]; + int bestType = pipe.fillFeatureVectorsOne(params, j, forms[j], is, 0, feats, vs); + feats[j] = (short) bestType; + instance.pfeats[j] = this.pipe.types[bestType]; } } return instance; } - - - /* (non-Javadoc) + /* + * (non-Javadoc) + * * @see is2.tools.Tool#apply(is2.data.SentenceData09) */ @Override - public SentenceData09 apply(SentenceData09 snt) { - - try { + public SentenceData09 apply(SentenceData09 snt) { + + try { SentenceData09 it = new SentenceData09(); it.createWithRoot(snt); - + InstancesTagger is = new InstancesTagger(); is.init(1, pipe.mf); is.createInstance09(it.forms.length); - + String[] forms = it.forms; - - + int length = forms.length; - - // is.setForm(0, 0, CONLLReader09.ROOT); - for(int i=0;i<length;i++) is.setForm(0, i, forms[i]); - for(int i=0;i<length;i++) is.setLemma(0, i, it.plemmas[i]); - for(int i=0;i<length;i++) is.setPPoss(0, i, it.ppos[i]); - + + // is.setForm(0, 0, CONLLReader09.ROOT); + for (int i = 0; i < length; i++) + is.setForm(0, i, forms[i]); + for (int i = 0; i < length; i++) + is.setLemma(0, i, it.plemmas[i]); + for (int i = 0; i < length; i++) + is.setPPoss(0, i, it.ppos[i]); + is.fillChars(it, 0, ExtractorM._CEND); - exec(it,pipe,params,is); + exec(it, pipe, params, is); SentenceData09 i09 = new SentenceData09(it); i09.createSemantic(it); return i09; - } catch(Exception e) { + } catch (Exception e) { e.printStackTrace(); } - return null; + return null; } } diff --git a/dependencyParser/mate-tools/src/is2/parser/Closed.java b/dependencyParser/mate-tools/src/is2/parser/Closed.java index ed61657..af491aa 100755 --- a/dependencyParser/mate-tools/src/is2/parser/Closed.java +++ b/dependencyParser/mate-tools/src/is2/parser/Closed.java @@ -2,31 +2,29 @@ package is2.parser; import is2.data.Parse; - -final public class Closed { +final public class Closed { public float p; -// short b,e,m; + // short b,e,m; byte dir; - + Closed d; Open u; - public Closed(short s, short t, int m, int dir,Open u, Closed d, float score) { - // this.b = s; - // this.e = t; - // this.m = (short)m; - this.dir = (byte)dir; - this.u=u; - this.d =d; - p=score; + public Closed(short s, short t, int m, int dir, Open u, Closed d, float score) { + // this.b = s; + // this.e = t; + // this.m = (short)m; + this.dir = (byte) dir; + this.u = u; + this.d = d; + p = score; } - public void create(Parse parse) { - if (u != null) u.create(parse); - if (d != null) d.create(parse); + if (u != null) + u.create(parse); + if (d != null) + d.create(parse); } } - - diff --git a/dependencyParser/mate-tools/src/is2/parser/D5.java b/dependencyParser/mate-tools/src/is2/parser/D5.java index 58adc0a..407b4e1 100644 --- a/dependencyParser/mate-tools/src/is2/parser/D5.java +++ b/dependencyParser/mate-tools/src/is2/parser/D5.java @@ -1,199 +1,224 @@ /** - * + * */ package is2.parser; import is2.data.DX; - import is2.data.IFV; import is2.data.Long2IntInterface; /** * @author Dr. Bernd Bohnet, 30.10.2010 - * - * + * + * */ final public class D5 extends DX { - - - public long shift; private long h; - - - - /* (non-Javadoc) + /* + * (non-Javadoc) + * * @see is2.parser52L.DX#cz2() */ final public void cz2() { - if (v0<0||v1<0) { - shift=0; - h=-1; - return ; + if (v0 < 0 || v1 < 0) { + shift = 0; + h = -1; + return; } - h = v0 | v1<<(shift=a0); - shift +=a1; + h = v0 | v1 << (shift = a0); + shift += a1; } - - - /* (non-Javadoc) + /* + * (non-Javadoc) + * * @see is2.parser52L.DX#cz3() */ + @Override final public void cz3() { - if (v0<0||v1<0||v2<0) { - shift=0; - h=-1; - return ; + if (v0 < 0 || v1 < 0 || v2 < 0) { + shift = 0; + h = -1; + return; } - h = v0 | v1<<(shift=a0) | v2<<(shift +=a1); - shift= shift + a2; + h = v0 | v1 << (shift = a0) | v2 << (shift += a1); + shift = shift + a2; } - - /* (non-Javadoc) + /* + * (non-Javadoc) + * * @see is2.parser52L.DX#cz4() */ + @Override final public void cz4() { - if (v0<0||v1<0||v2<0||v3<0) { - shift=0; - h=-1; - return ; + if (v0 < 0 || v1 < 0 || v2 < 0 || v3 < 0) { + shift = 0; + h = -1; + return; } - h = v0 | v1<<(shift=a0) | v2<<(shift +=a1) | v3<<(shift +=a2); - shift= shift +a3; + h = v0 | v1 << (shift = a0) | v2 << (shift += a1) | v3 << (shift += a2); + shift = shift + a3; } - - - /* (non-Javadoc) + /* + * (non-Javadoc) + * * @see is2.parser52L.DX#cz5() */ + @Override final public void cz5() { - if (v0<0||v1<0||v2<0||v3<0||v4<0) { - shift=0; - h=-1; - return ; + if (v0 < 0 || v1 < 0 || v2 < 0 || v3 < 0 || v4 < 0) { + shift = 0; + h = -1; + return; } - h = v0 | v1<<(shift=a0) | v2<<(shift +=a1) | v3<<(shift +=a2) | v4<<(shift +=a3); - shift =shift+a4; - + h = v0 | v1 << (shift = a0) | v2 << (shift += a1) | v3 << (shift += a2) | v4 << (shift += a3); + shift = shift + a4; } - - /* (non-Javadoc) + /* + * (non-Javadoc) + * * @see is2.parser52L.DX#cz6() */ + @Override final public void cz6() { - if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0) { - shift=0; - h=-1; - return ; + if (v0 < 0 || v1 < 0 || v2 < 0 || v3 < 0 || v4 < 0 || v5 < 0) { + shift = 0; + h = -1; + return; } - h = v0 | v1<<(shift=a0) | v2<<(shift +=a1) | v3<<(shift +=a2) | v4<<(shift +=a3) | v5<<(shift +=a4); - shift =shift+a5; + h = v0 | v1 << (shift = a0) | v2 << (shift += a1) | v3 << (shift += a2) | v4 << (shift += a3) + | v5 << (shift += a4); + shift = shift + a5; } - /* (non-Javadoc) + /* + * (non-Javadoc) + * * @see is2.parser52L.DX#cz7() */ + @Override final public void cz7() { - if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0||v6<0) { - shift=0; - h=-1; - return ; + if (v0 < 0 || v1 < 0 || v2 < 0 || v3 < 0 || v4 < 0 || v5 < 0 || v6 < 0) { + shift = 0; + h = -1; + return; } - h = v0 | v1<<(shift=a0) | v2<<(shift +=a1) | v3<<(shift +=a2) | v4<<(shift +=a3) | v5<<(shift +=a4) | v6<<(shift +=a5); - shift =shift+a6; + h = v0 | v1 << (shift = a0) | v2 << (shift += a1) | v3 << (shift += a2) | v4 << (shift += a3) + | v5 << (shift += a4) | v6 << (shift += a5); + shift = shift + a6; } - - /* (non-Javadoc) + /* + * (non-Javadoc) + * * @see is2.parser52L.DX#cz8() */ + @Override final public void cz8() { - if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0||v6<0||v7<0) { - h=-1; - shift=0; - return ; + if (v0 < 0 || v1 < 0 || v2 < 0 || v3 < 0 || v4 < 0 || v5 < 0 || v6 < 0 || v7 < 0) { + h = -1; + shift = 0; + return; } - h = v0 | v1<<(shift=a0) | v2<<(shift +=a1) | v3<<(shift +=a2) | v4<<(shift +=a3) | v5<<(shift +=a4) | v6<<(shift +=a5) | v7<<(shift +=a6); - shift =shift+a7; - + h = v0 | v1 << (shift = a0) | v2 << (shift += a1) | v3 << (shift += a2) | v4 << (shift += a3) + | v5 << (shift += a4) | v6 << (shift += a5) | v7 << (shift += a6); + shift = shift + a7; } - - - - - - /* (non-Javadoc) + /* + * (non-Javadoc) + * * @see is2.parser52L.DX#clean() */ + @Override final public void clean() { - v0=0;v1=0;v2=0;v3=0;v4=0;v5=0;v6=0;v7=0;v8=0; - shift=0;h =0; + v0 = 0; + v1 = 0; + v2 = 0; + v3 = 0; + v4 = 0; + v5 = 0; + v6 = 0; + v7 = 0; + v8 = 0; + shift = 0; + h = 0; } - public final Long2IntInterface _li; + public final Long2IntInterface _li; + public D5(Long2IntInterface li) { - _li=li; + _li = li; } - /* (non-Javadoc) + /* + * (non-Javadoc) + * * @see is2.parser52L.DX#cs(int, int) */ + @Override final public long cs(int b, int v) { - if (h<0) { - h=-1; shift=0; + if (h < 0) { + h = -1; + shift = 0; return -1; } - h |= (long)v<<shift; - shift +=b; - if (shift>64) { - System.out.println("shift too large "+shift); + h |= (long) v << shift; + shift += b; + if (shift > 64) { + System.out.println("shift too large " + shift); new Exception().printStackTrace(); } return h; } - /* (non-Javadoc) + + /* + * (non-Javadoc) + * * @see is2.parser52L.DX#csa(int, int) */ + @Override final public long csa(int b, int v) { - if (h<0) { - h=-1; shift=0; return -1; + if (h < 0) { + h = -1; + shift = 0; + return -1; } - h |= (long)v<<shift; - shift +=b; - if (shift>64) { - System.out.println("shift too large "+shift); + h |= (long) v << shift; + shift += b; + if (shift > 64) { + System.out.println("shift too large " + shift); new Exception().printStackTrace(); } @@ -201,54 +226,68 @@ final public class D5 extends DX { } - /* (non-Javadoc) + /* + * (non-Javadoc) + * * @see is2.parser52L.DX#csa(int, int, is2.data.IFV) */ - final public void csa(int b, int v, IFV f ) { - if (h<0) { - h=-1; shift=0; return; + @Override + final public void csa(int b, int v, IFV f) { + if (h < 0) { + h = -1; + shift = 0; + return; } - h |= (long)v<<shift; - shift +=b; - if (shift>64) { - System.out.println("shift too large "+shift); + h |= (long) v << shift; + shift += b; + if (shift > 64) { + System.out.println("shift too large " + shift); new Exception().printStackTrace(); } - f.add((int)_li.l2i(h)); + f.add(_li.l2i(h)); } - /* (non-Javadoc) + /* + * (non-Javadoc) + * * @see is2.parser52L.DX#getVal() */ + @Override public long getVal() { - if (h<0) { - h=-1; shift=0; return h; + if (h < 0) { + h = -1; + shift = 0; + return h; } return h; } - /* (non-Javadoc) + /* + * (non-Javadoc) + * * @see is2.parser52L.DX#map(is2.data.IFV, long) */ + @Override public void map(IFV f, long l) { - if (l>0) f.add(_li.l2i(l)); + if (l > 0) + f.add(_li.l2i(l)); } - - - /* (non-Javadoc) + /* + * (non-Javadoc) + * * @see is2.data.DX#computeLabeValue(short, short) */ @Override public int computeLabeValue(int label, int shift) { - return label<<shift; + return label << shift; } + @Override public void fix() { - + } - - + } \ No newline at end of file diff --git a/dependencyParser/mate-tools/src/is2/parser/Decoder.java b/dependencyParser/mate-tools/src/is2/parser/Decoder.java index 9fe833a..1fe2340 100755 --- a/dependencyParser/mate-tools/src/is2/parser/Decoder.java +++ b/dependencyParser/mate-tools/src/is2/parser/Decoder.java @@ -3,18 +3,15 @@ package is2.parser; import java.util.ArrayList; import java.util.concurrent.ExecutorService; - import is2.data.DataFES; import is2.data.Parse; -import is2.util.DB; - /** * @author Bernd Bohnet, 01.09.2009 - * - * This methods do the actual work and they build the dependency trees. + * + * This methods do the actual work and they build the dependency trees. */ -final public class Decoder { +final public class Decoder { public static final boolean TRAINING = true; public static long timeDecotder; @@ -25,49 +22,57 @@ final public class Decoder { */ public static float NON_PROJECTIVITY_THRESHOLD = 0.3F; - - static ExecutorService executerService =java.util.concurrent.Executors.newFixedThreadPool(Parser.THREADS); + static ExecutorService executerService = java.util.concurrent.Executors.newFixedThreadPool(Parser.THREADS); // do not initialize - private Decoder() {}; - - + private Decoder() { + }; + /** * Build a dependency tree based on the data - * @param pos part-of-speech tags - * @param x the data - * @param projective projective or non-projective - * @param edges the edges + * + * @param pos + * part-of-speech tags + * @param x + * the data + * @param projective + * projective or non-projective + * @param edges + * the edges * @return a parse tree * @throws InterruptedException */ - public static Parse decode(short[] pos, DataFES x, boolean projective, boolean training) throws InterruptedException { + public static Parse decode(short[] pos, DataFES x, boolean projective, boolean training) + throws InterruptedException { long ts = System.nanoTime(); - - if (executerService.isShutdown()) executerService = java.util.concurrent.Executors.newCachedThreadPool(); + + if (executerService.isShutdown()) + executerService = java.util.concurrent.Executors.newCachedThreadPool(); final int n = pos.length; final Open O[][][][] = new Open[n][n][2][]; final Closed C[][][][] = new Closed[n][n][2][]; - ArrayList<ParallelDecoder> pe = new ArrayList<ParallelDecoder>(); + ArrayList<ParallelDecoder> pe = new ArrayList<ParallelDecoder>(); + + for (int i = 0; i < Parser.THREADS; i++) + pe.add(new ParallelDecoder(pos, x, O, C, n)); - for(int i=0;i<Parser.THREADS ;i++) pe.add(new ParallelDecoder(pos, x, O, C, n)); - for (short k = 1; k < n; k++) { // provide the threads the data for (short s = 0; s < n; s++) { short t = (short) (s + k); - if (t >= n) break; - - ParallelDecoder.add(s,t); + if (t >= n) + break; + + ParallelDecoder.add(s, t); } - + executerService.invokeAll(pe); } - + float bestSpanScore = (-1.0F / 0.0F); Closed bestSpan = null; for (int m = 1; m < n; m++) @@ -76,75 +81,153 @@ final public class Decoder { bestSpan = C[0][n - 1][1][m]; } - // build the dependency tree from the chart - Parse out= new Parse(pos.length); + // build the dependency tree from the chart + Parse out = new Parse(pos.length); bestSpan.create(out); - out.heads[0]=-1; - out.labels[0]=0; + out.heads[0] = -1; + out.labels[0] = 0; + + timeDecotder += (System.nanoTime() - ts); - timeDecotder += (System.nanoTime()-ts); - ts = System.nanoTime(); - - if (!projective) rearrange(pos, out.heads, out.labels,x,training); - - timeRearrange += (System.nanoTime()-ts); + + if (!projective) + rearrange(pos, out.heads, out.labels, x, training); + + timeRearrange += (System.nanoTime() - ts); + + return out; + } + + public static Parse[] decodeAll(short[] pos, DataFES x, boolean projective, boolean training) + throws InterruptedException { + + long ts = System.nanoTime(); + + if (executerService.isShutdown()) + executerService = java.util.concurrent.Executors.newCachedThreadPool(); + final int n = pos.length; + + final Open O[][][][] = new Open[n][n][2][]; + final Closed C[][][][] = new Closed[n][n][2][]; + + ArrayList<ParallelDecoder> pe = new ArrayList<ParallelDecoder>(); + + for (int i = 0; i < Parser.THREADS; i++) + pe.add(new ParallelDecoder(pos, x, O, C, n)); + + for (short k = 1; k < n; k++) { + + // provide the threads the data + for (short s = 0; s < n; s++) { + short t = (short) (s + k); + if (t >= n) + break; + + ParallelDecoder.add(s, t); + } + + executerService.invokeAll(pe); + } + + Parse[] out = new Parse[n - 1]; + + // float bestSpanScore = (-1.0F / 0.0F); + // Closed bestSpan = null; + for (int m = 1; m < n; m++) { + // if (C[0][n - 1][1][m].p > bestSpanScore) { + // bestSpanScore = C[0][n - 1][1][m].p; + // bestSpan = C[0][n - 1][1][m]; + // } + out[m - 1] = new Parse(pos.length); + C[0][n - 1][1][m].create(out[m - 1]); + out[m - 1].heads[0] = -1; + out[m - 1].labels[0] = 0; + } + + // build the dependency tree from the chart + // Parse out= new Parse(pos.length); + + // bestSpan.create(out); + + // out.heads[0]=-1; + // out.labels[0]=0; + + timeDecotder += (System.nanoTime() - ts); + + ts = System.nanoTime(); + + if (!projective) + for (Parse p : out) + rearrange(pos, p.heads, p.labels, x, training); + // if (!projective) rearrange(pos, out.heads, out.labels,x,training); + + timeRearrange += (System.nanoTime() - ts); return out; } - /** * This is the parallel non-projective edge re-arranger - * - * @param pos part-of-speech tags - * @param heads parent child relation - * @param labs edge labels - * @param x the data - * @param edges the existing edges defined by part-of-speech tags + * + * @param pos + * part-of-speech tags + * @param heads + * parent child relation + * @param labs + * edge labels + * @param x + * the data + * @param edges + * the existing edges defined by part-of-speech tags * @throws InterruptedException */ - public static void rearrange(short[] pos, short[] heads, short[] labs, DataFES x, boolean training) throws InterruptedException { + public static void rearrange(short[] pos, short[] heads, short[] labs, DataFES x, boolean training) + throws InterruptedException { + + int threads = (pos.length > Parser.THREADS) ? Parser.THREADS : pos.length; - int threads =(pos.length>Parser.THREADS)? Parser.THREADS: pos.length; + // wh what to change, nPar - new parent, nType - new type + short wh = -1, nPar = -1, nType = -1; + ArrayList<ParallelRearrange> pe = new ArrayList<ParallelRearrange>(); - - - // wh what to change, nPar - new parent, nType - new type - short wh = -1, nPar = -1,nType = -1; - ArrayList<ParallelRearrange> pe = new ArrayList<ParallelRearrange>(); - - while(true) { + while (true) { boolean[][] isChild = new boolean[heads.length][heads.length]; - for(int i = 1, l1=1; i < heads.length; i++,l1=i) - while((l1= heads[l1]) != -1) isChild[l1][i] = true; - + for (int i = 1, l1 = 1; i < heads.length; i++, l1 = i) + while ((l1 = heads[l1]) != -1) + isChild[l1][i] = true; + float max = Float.NEGATIVE_INFINITY; float p = Extractor.encode3(pos, heads, labs, x); pe.clear(); - for(int i=0;i<threads;i++) pe.add(new ParallelRearrange( isChild, pos,x,heads,labs)); - - for(int ch = 1; ch < heads.length; ch++) { + for (int i = 0; i < threads; i++) + pe.add(new ParallelRearrange(isChild, pos, x, heads, labs)); - for(short pa = 0; pa < heads.length; pa++) { - if(ch == pa || pa == heads[ch] || isChild[ch][pa]) continue; + for (int ch = 1; ch < heads.length; ch++) { - ParallelRearrange.add(p,(short) ch, pa); - } - } + for (short pa = 0; pa < heads.length; pa++) { + if (ch == pa || pa == heads[ch] || isChild[ch][pa]) + continue; + + ParallelRearrange.add(p, (short) ch, pa); + } + } executerService.invokeAll(pe); - - for(ParallelRearrange.PA rp :ParallelRearrange.order) - if(max < rp.max ) { - max = rp.max; wh = rp.wh; - nPar = rp.nPar; nType = rp.nType ; + + for (ParallelRearrange.PA rp : ParallelRearrange.order) + if (max < rp.max) { + max = rp.max; + wh = rp.wh; + nPar = rp.nPar; + nType = rp.nType; } ParallelRearrange.order.clear(); - - if(max <= NON_PROJECTIVITY_THRESHOLD) break; // bb: changed from 0.0 + + if (max <= NON_PROJECTIVITY_THRESHOLD) + break; // bb: changed from 0.0 heads[wh] = nPar; labs[wh] = nType; @@ -154,8 +237,7 @@ final public class Decoder { public static String getInfo() { - return "Decoder non-projectivity threshold: "+NON_PROJECTIVITY_THRESHOLD; + return "Decoder non-projectivity threshold: " + NON_PROJECTIVITY_THRESHOLD; } - } diff --git a/dependencyParser/mate-tools/src/is2/parser/Edges.java b/dependencyParser/mate-tools/src/is2/parser/Edges.java index 5ad892b..2574583 100644 --- a/dependencyParser/mate-tools/src/is2/parser/Edges.java +++ b/dependencyParser/mate-tools/src/is2/parser/Edges.java @@ -1,10 +1,8 @@ /** - * + * */ package is2.parser; -import is2.data.PipeGen; - import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.IOException; @@ -14,103 +12,102 @@ import java.util.Map.Entry; /** * @author Dr. Bernd Bohnet, 13.05.2009; - * - * + * + * */ public final class Edges { - private static short[][][] edges; - private static HashMap<Short,Integer> labelCount = new HashMap<Short,Integer>(); + private static HashMap<Short, Integer> labelCount = new HashMap<Short, Integer>(); - private static HashMap<String,Integer> slabelCount = new HashMap<String,Integer>(); + private static HashMap<String, Integer> slabelCount = new HashMap<String, Integer>(); - static short[] def = new short[1]; - - private Edges () {} - + + private Edges() { + } + /** * @param length */ public static void init(int length) { - edges = new short[length][length][]; + edges = new short[length][length][]; } - - - public static void findDefault(){ - - int best =0; - - - - for(Entry<Short,Integer> e : labelCount.entrySet()) { - - - if (best<e.getValue()) { + + public static void findDefault() { + + int best = 0; + + for (Entry<Short, Integer> e : labelCount.entrySet()) { + + if (best < e.getValue()) { best = e.getValue(); - def[0]=e.getKey(); + def[0] = e.getKey(); } } - - // labelCount=null; - // String[] types = new String[mf.getFeatureCounter().get(PipeGen.REL)]; - // for (Entry<String, Integer> e : MFO.getFeatureSet().get(PipeGen.REL).entrySet()) types[e.getValue()] = e.getKey(); + // labelCount=null; + // String[] types = new String[mf.getFeatureCounter().get(PipeGen.REL)]; + // for (Entry<String, Integer> e : + // MFO.getFeatureSet().get(PipeGen.REL).entrySet()) types[e.getValue()] + // = e.getKey(); + + is2.util.DB.println("set default label to " + def[0] + " "); - is2.util.DB.println("set default label to "+def[0]+" " ); + // System.out.println("found default "+def[0]); - // System.out.println("found default "+def[0]); - } - final static public void put(int pos1, int pos2, short label) { - putD(pos1, pos2,label); - // putD(pos2, pos1,!dir, label); + putD(pos1, pos2, label); + // putD(pos2, pos1,!dir, label); } - - + final static public void putD(int pos1, int pos2, short label) { - + Integer lc = labelCount.get(label); - if (lc==null) labelCount.put(label, 1); - else labelCount.put(label, lc+1); + if (lc == null) + labelCount.put(label, 1); + else + labelCount.put(label, lc + 1); - String key = pos1+"-"+pos2+label; + String key = pos1 + "-" + pos2 + label; Integer lcs = slabelCount.get(key); - if (lcs==null) slabelCount.put(key, 1); - else slabelCount.put(key, lcs+1); - - if (edges[pos1][pos2]==null) { - edges[pos1][pos2]=new short[1]; - edges[pos1][pos2][0]=label; - -// edgesh[pos1][pos2][dir?0:1] = new TIntHashSet(2); -// edgesh[pos1][pos2][dir?0:1].add(label); + if (lcs == null) + slabelCount.put(key, 1); + else + slabelCount.put(key, lcs + 1); + + if (edges[pos1][pos2] == null) { + edges[pos1][pos2] = new short[1]; + edges[pos1][pos2][0] = label; + + // edgesh[pos1][pos2][dir?0:1] = new TIntHashSet(2); + // edgesh[pos1][pos2][dir?0:1].add(label); } else { short labels[] = edges[pos1][pos2]; - for(short l : labels) { - //contains label already? - if(l==label) return; + for (short l : labels) { + // contains label already? + if (l == label) + return; } - - short[] nlabels = new short[labels.length+1]; + + short[] nlabels = new short[labels.length + 1]; System.arraycopy(labels, 0, nlabels, 0, labels.length); - nlabels[labels.length]=label; - edges[pos1][pos2]=nlabels; - - // edgesh[pos1][pos2][dir?0:1].add(label); + nlabels[labels.length] = label; + edges[pos1][pos2] = nlabels; + + // edgesh[pos1][pos2][dir?0:1].add(label); } } - + final static public short[] get(int pos1, int pos2) { - - if (pos1<0 || pos2<0 || edges[pos1][pos2]==null) return def; + + if (pos1 < 0 || pos2 < 0 || edges[pos1][pos2] == null) + return def; return edges[pos1][pos2]; } - /** * @param dis */ @@ -119,24 +116,24 @@ public final class Edges { int len = edges.length; d.writeShort(len); - for(int p1 =0;p1<len;p1++) { - for(int p2 =0;p2<len;p2++) { - if (edges[p1][p2]==null) d.writeShort(0); + for (int p1 = 0; p1 < len; p1++) { + for (int p2 = 0; p2 < len; p2++) { + if (edges[p1][p2] == null) + d.writeShort(0); else { d.writeShort(edges[p1][p2].length); - for(int l =0;l<edges[p1][p2].length;l++) { + for (int l = 0; l < edges[p1][p2].length; l++) { d.writeShort(edges[p1][p2][l]); } - + } } } - + d.writeShort(def[0]); } - /** * @param dis */ @@ -144,21 +141,21 @@ public final class Edges { int len = d.readShort(); edges = new short[len][len][]; - for(int p1 =0;p1<len;p1++) { - for(int p2 =0;p2<len;p2++) { + for (int p1 = 0; p1 < len; p1++) { + for (int p2 = 0; p2 < len; p2++) { int ll = d.readShort(); - if (ll==0) { - edges[p1][p2]=null; + if (ll == 0) { + edges[p1][p2] = null; } else { edges[p1][p2] = new short[ll]; - for(int l =0;l<ll;l++) { - edges[p1][p2][l]=d.readShort(); - } + for (int l = 0; l < ll; l++) { + edges[p1][p2][l] = d.readShort(); + } } } } - - def[0]= d.readShort(); + + def[0] = d.readShort(); } @@ -169,38 +166,32 @@ public final class Edges { } String _key; - + public C(String key) { super(); - _key=key; + _key = key; } - /* (non-Javadoc) + /* + * (non-Javadoc) + * * @see java.util.Comparator#compare(java.lang.Object, java.lang.Object) */ @Override public int compare(Short l1, Short l2) { - - // int c1 = labelCount.get(l1); - // int c2 = labelCount.get(l2); - // if (true) return c1==c2?0:c1>c2?-1:1; - - int x1 = slabelCount.get(_key+l1.shortValue()); - int x2 = slabelCount.get(_key+l2.shortValue()); - // System.out.println(x1+" "+x2); - - - return x1==x2?0:x1>x2?-1:1; - - - - } - + // int c1 = labelCount.get(l1); + // int c2 = labelCount.get(l2); + // if (true) return c1==c2?0:c1>c2?-1:1; + + int x1 = slabelCount.get(_key + l1.shortValue()); + int x2 = slabelCount.get(_key + l2.shortValue()); + // System.out.println(x1+" "+x2); + + return x1 == x2 ? 0 : x1 > x2 ? -1 : 1; + + } - - } - - + } diff --git a/dependencyParser/mate-tools/src/is2/parser/Evaluator.java b/dependencyParser/mate-tools/src/is2/parser/Evaluator.java index c764cc6..f0d45ec 100755 --- a/dependencyParser/mate-tools/src/is2/parser/Evaluator.java +++ b/dependencyParser/mate-tools/src/is2/parser/Evaluator.java @@ -3,23 +3,20 @@ package is2.parser; import is2.data.SentenceData09; import is2.io.CONLLReader09; - public class Evaluator { - + public static final String PUNCT = "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"; - public static final String PUNCT ="!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"; - public static class Results { public int total; public int corr; public float las; public float ula; - + } - - public static Results evaluate (String act_file, String pred_file) throws Exception { + + public static Results evaluate(String act_file, String pred_file) throws Exception { CONLLReader09 goldReader = new CONLLReader09(act_file, -1); CONLLReader09 predictedReader = new CONLLReader09(pred_file, -1); @@ -29,12 +26,12 @@ public class Evaluator { SentenceData09 goldInstance = goldReader.getNext(); SentenceData09 predInstance = predictedReader.getNext(); - while(goldInstance != null) { + while (goldInstance != null) { int instanceLength = goldInstance.length(); if (instanceLength != predInstance.length()) - System.out.println("Lengths do not match on sentence "+numsent); + System.out.println("Lengths do not match on sentence " + numsent); int[] goldHeads = goldInstance.heads; String[] goldLabels = goldInstance.labels; @@ -44,51 +41,60 @@ public class Evaluator { boolean whole = true; boolean wholeL = true; - // NOTE: the first item is the root info added during nextInstance(), so we skip it. + // NOTE: the first item is the root info added during + // nextInstance(), so we skip it. - int punc=0; + int punc = 0; for (int i = 1; i < instanceLength; i++) { if (predHeads[i] == goldHeads[i]) { corr++; - if (goldLabels[i].equals(predLabels[i])) corrL++; + if (goldLabels[i].equals(predLabels[i])) + corrL++; else { - // System.out.println(numsent+" error gold "+goldLabels[i]+" "+predLabels[i]+" head "+goldHeads[i]+" child "+i); + // System.out.println(numsent+" error gold + // "+goldLabels[i]+" "+predLabels[i]+" head + // "+goldHeads[i]+" child "+i); wholeL = false; } - } - else { - // System.out.println(numsent+"error gold "+goldLabels[i]+" "+predLabels[i]+" head "+goldHeads[i]+" child "+i); - whole = false; wholeL = false; + } else { + // System.out.println(numsent+"error gold "+goldLabels[i]+" + // "+predLabels[i]+" head "+goldHeads[i]+" child "+i); + whole = false; + wholeL = false; } } - total += ((instanceLength - 1) - punc); // Subtract one to not score fake root token + total += ((instanceLength - 1) - punc); // Subtract one to not score + // fake root token - if(whole) corrsent++; - if(wholeL) corrsentL++; + if (whole) + corrsent++; + if (wholeL) + corrsentL++; numsent++; goldInstance = goldReader.getNext(); predInstance = predictedReader.getNext(); } - + Results r = new Results(); - + r.total = total; r.corr = corr; - r.las =(float)Math.round(((double)corrL/total)*100000)/1000; - r.ula =(float)Math.round(((double)corr /total)*100000)/1000; - System.out.print("Total: " + total+" \tCorrect: " + corr+" "); - System.out.println("LAS: " + (double)Math.round(((double)corrL/total)*100000)/1000+" \tTotal: " + (double)Math.round(((double)corrsentL/numsent)*100000)/1000+ - " \tULA: " + (double)Math.round(((double)corr /total)*100000)/1000+" \tTotal: " + (double)Math.round(((double)corrsent /numsent)*100000)/1000); - + r.las = (float) Math.round(((double) corrL / total) * 100000) / 1000; + r.ula = (float) Math.round(((double) corr / total) * 100000) / 1000; + System.out.print("Total: " + total + " \tCorrect: " + corr + " "); + System.out.println("LAS: " + (double) Math.round(((double) corrL / total) * 100000) / 1000 + " \tTotal: " + + (double) Math.round(((double) corrsentL / numsent) * 100000) / 1000 + " \tULA: " + + (double) Math.round(((double) corr / total) * 100000) / 1000 + " \tTotal: " + + (double) Math.round(((double) corrsent / numsent) * 100000) / 1000); + return r; } - - - public static float round (double v){ - - return Math.round(v*10000F)/10000F; + + public static float round(double v) { + + return Math.round(v * 10000F) / 10000F; } - + } diff --git a/dependencyParser/mate-tools/src/is2/parser/Extractor.java b/dependencyParser/mate-tools/src/is2/parser/Extractor.java index 35c90f2..3ba9cc9 100755 --- a/dependencyParser/mate-tools/src/is2/parser/Extractor.java +++ b/dependencyParser/mate-tools/src/is2/parser/Extractor.java @@ -1,465 +1,977 @@ package is2.parser; - - -import java.util.concurrent.atomic.AtomicInteger; - import is2.data.Cluster; -import is2.data.D4; import is2.data.D6; import is2.data.DX; - import is2.data.DataFES; import is2.data.F2SF; import is2.data.FV; import is2.data.IFV; import is2.data.Instances; import is2.data.Long2IntInterface; - - +import is2.data.PipeGen; import is2.util.DB; import is2.util.OptionsSuper; - - - final public class Extractor { static final int _SIB = 85; - public static int s_rel,s_word,s_type,s_dir,s_dist,s_feat,s_child,s_spath,s_lpath,s_pos,s_rel1; - public final DX d0 ,dl1,dl2, dwr,dr,dwwp,dw,dwp,dlf,d3lp, d2lp,d2pw,d2pp ; + public static int s_rel, s_word, s_type, s_dir, s_dist, s_feat, s_child, s_spath, s_lpath, s_pos, s_rel1; + public final DX d0, dl1, dl2, dwr, dr, dwwp, dw, dwp, dlf, d3lp, d2lp, d2pw, d2pp; public final Long2IntInterface li; - public boolean s_stack=false; + public boolean s_stack = false; - public Extractor(Long2IntInterface li, boolean stack, int what) { + public Extractor(Long2IntInterface li, boolean stack, int what) { - s_stack=stack; + s_stack = stack; - this.li=li; + this.li = li; - if (what == OptionsSuper.MULTIPLICATIVE) { - d0 = new D6(li);dl1 = new D6(li);dl2 = new D6(li);dwr = new D6(li);dr = new D6(li);dwwp = new D6(li); - dw = new D6(li);dwp = new D6(li);dlf = new D6(li);d3lp = new D6(li); d2lp = new D6(li); d2pw = new D6(li); d2pp = new D6(li); + if (what == OptionsSuper.MULTIPLICATIVE) { + d0 = new D6(li); + dl1 = new D6(li); + dl2 = new D6(li); + dwr = new D6(li); + dr = new D6(li); + dwwp = new D6(li); + dw = new D6(li); + dwp = new D6(li); + dlf = new D6(li); + d3lp = new D6(li); + d2lp = new D6(li); + d2pw = new D6(li); + d2pp = new D6(li); } else { - d0 = new D5(li);dl1 = new D5(li);dl2 = new D5(li);dwr = new D5(li);dr = new D5(li);dwwp = new D5(li); - dw = new D5(li);dwp = new D5(li);dlf = new D5(li);d3lp = new D5(li); d2lp = new D5(li); d2pw = new D5(li); d2pp = new D5(li); + d0 = new D5(li); + dl1 = new D5(li); + dl2 = new D5(li); + dwr = new D5(li); + dr = new D5(li); + dwwp = new D5(li); + dw = new D5(li); + dwp = new D5(li); + dlf = new D5(li); + d3lp = new D5(li); + d2lp = new D5(li); + d2pw = new D5(li); + d2pp = new D5(li); } } - public static void initStat(int what ) { + public static void initStat(int what) { MFO mf = new MFO(); - if (what == OptionsSuper.MULTIPLICATIVE) { - + if (what == OptionsSuper.MULTIPLICATIVE) { + DB.println("mult (d4) "); - - s_rel = mf.getFeatureCounter().get(REL).intValue()*16; - s_rel1 =mf.getFeatureCounter().get(REL).intValue()+1; - s_pos = mf.getFeatureCounter().get(POS).intValue(); + + s_rel = mf.getFeatureCounter().get(REL).intValue() * 16; + s_rel1 = mf.getFeatureCounter().get(REL).intValue() + 1; + s_pos = mf.getFeatureCounter().get(POS).intValue(); s_word = mf.getFeatureCounter().get(WORD).intValue(); s_type = mf.getFeatureCounter().get(TYPE).intValue(); - s_dir = (int)(mf.getFeatureCounter().get(DIR)); + s_dir = (mf.getFeatureCounter().get(DIR)); la = (mf.getValue(DIR, LA)); ra = (mf.getValue(DIR, RA)); - s_dist = (int)(mf.getFeatureCounter().get(DIST));//mf.getFeatureBits(DIST); - s_feat = (int)(mf.getFeatureCounter().get(Pipe.FEAT));//mf.getFeatureBits(Pipe.FEAT); - s_spath = (mf.getFeatureCounter().get(Cluster.SPATH)==null?0:mf.getFeatureCounter().get(Cluster.SPATH));//mf.getFeatureBits(Cluster.SPATH); - s_lpath = (mf.getFeatureCounter().get(Cluster.LPATH)==null?0:mf.getFeatureCounter().get(Cluster.LPATH));//mf.getFeatureBits(Cluster.LPATH); + s_dist = (mf.getFeatureCounter().get(DIST));// mf.getFeatureBits(DIST); + s_feat = (mf.getFeatureCounter().get(PipeGen.FEAT));// mf.getFeatureBits(Pipe.FEAT); + s_spath = (mf.getFeatureCounter().get(Cluster.SPATH) == null ? 0 + : mf.getFeatureCounter().get(Cluster.SPATH));// mf.getFeatureBits(Cluster.SPATH); + s_lpath = (mf.getFeatureCounter().get(Cluster.LPATH) == null ? 0 + : mf.getFeatureCounter().get(Cluster.LPATH));// mf.getFeatureBits(Cluster.LPATH); } else { - - s_rel = mf.getFeatureBits(REL); - s_pos = mf.getFeatureBits(POS); - s_word = mf.getFeatureBits(WORD); - s_type = mf.getFeatureBits(TYPE); - s_dir = mf.getFeatureBits(DIR); + + s_rel = MFO.getFeatureBits(REL); + s_pos = MFO.getFeatureBits(POS); + s_word = MFO.getFeatureBits(WORD); + s_type = MFO.getFeatureBits(TYPE); + s_dir = MFO.getFeatureBits(DIR); la = mf.getValue(DIR, LA); ra = mf.getValue(DIR, RA); - s_dist = mf.getFeatureBits(DIST); - s_feat = mf.getFeatureBits(Pipe.FEAT); - s_spath = mf.getFeatureBits(Cluster.SPATH); - s_lpath = mf.getFeatureBits(Cluster.LPATH); - + s_dist = MFO.getFeatureBits(DIST); + s_feat = MFO.getFeatureBits(PipeGen.FEAT); + s_spath = MFO.getFeatureBits(Cluster.SPATH); + s_lpath = MFO.getFeatureBits(Cluster.LPATH); + DB.println("shift init (d5) "); } - - - - } - public void init(){ - - - - d0.a0 = s_type;d0.a1 = s_pos;d0.a2 = s_pos;d0.a3 = s_pos;d0.a4 = s_pos;d0.a5 = s_pos;d0.a6 = s_pos;d0.a7 = s_pos; d0.fix(); - dl1.a0 = s_type;dl1.a1 = s_rel; dl1.a2 = s_pos;dl1.a3 = s_pos; dl1.a4 = s_pos; dl1.a5 = s_pos; dl1.a6 = s_pos; dl1.a7 = s_pos; dl1.fix(); - dl2.a0 = s_type;dl2.a1 = s_rel;dl2.a2 = s_word;dl2.a3 = s_pos;dl2.a4 = s_pos;dl2.a5 = s_pos;dl2.a6 = s_pos;dl2.a7 = s_pos; dl2.fix(); - dwp.a0 = s_type; dwp.a1 = s_rel; dwp.a2 = s_word; dwp.a3 = s_pos; dwp.a4 = s_pos; dwp.a5 = s_word;dwp.fix(); - dwwp.a0 = s_type; dwwp.a1 = s_rel; dwwp.a2 = s_word; dwwp.a3 = s_word; dwwp.a4 = s_pos; dwwp.a5 = s_word; dwwp.fix(); - dlf.a0 = s_type;dlf.a1 = s_rel; dlf.a2 = s_pos;dlf.a3 = s_pos; dlf.a4 = s_feat; dlf.a5 = s_feat; dlf.a6 = s_pos; dlf.a7 = s_pos; dlf.fix(); - d3lp.a0 = s_type; d3lp.a1 = s_rel; d3lp.a2 = s_lpath; d3lp.a3 = s_lpath; d3lp.a4 = s_lpath; d3lp.a5 = s_word; d3lp.a6 = s_spath; d3lp.a7 = s_spath;d3lp.fix(); - d2lp.a0 = s_type; d2lp.a1 = s_rel; d2lp.a2 = s_lpath; d2lp.a3 = s_lpath; d2lp.a4 = s_word; d2lp.a5 = s_word; d2lp.fix(); //d3lp.a6 = s_spath; d3lp.a7 = s_spath; - d2pw.a0 = s_type; d2pw.a1 = s_rel; d2pw.a2 = s_lpath; d2pw.a3 = s_lpath; d2pw.a4 = s_word; d2pw.a5 = s_word;d2pw.fix(); //d3lp.a6 = s_spath; d3lp.a7 = s_spath; - d2pp.a0 = s_type; d2pp.a1 = s_rel; d2pp.a2 = s_lpath; d2pp.a3 = s_lpath; d2pp.a4 = s_pos; d2pp.a5 = s_pos; d2pp.fix(); //d3lp.a6 = s_spath; d3lp.a7 = s_spath; } + public void init() { + + d0.a0 = s_type; + d0.a1 = s_pos; + d0.a2 = s_pos; + d0.a3 = s_pos; + d0.a4 = s_pos; + d0.a5 = s_pos; + d0.a6 = s_pos; + d0.a7 = s_pos; + d0.fix(); + dl1.a0 = s_type; + dl1.a1 = s_rel; + dl1.a2 = s_pos; + dl1.a3 = s_pos; + dl1.a4 = s_pos; + dl1.a5 = s_pos; + dl1.a6 = s_pos; + dl1.a7 = s_pos; + dl1.fix(); + dl2.a0 = s_type; + dl2.a1 = s_rel; + dl2.a2 = s_word; + dl2.a3 = s_pos; + dl2.a4 = s_pos; + dl2.a5 = s_pos; + dl2.a6 = s_pos; + dl2.a7 = s_pos; + dl2.fix(); + dwp.a0 = s_type; + dwp.a1 = s_rel; + dwp.a2 = s_word; + dwp.a3 = s_pos; + dwp.a4 = s_pos; + dwp.a5 = s_word; + dwp.fix(); + dwwp.a0 = s_type; + dwwp.a1 = s_rel; + dwwp.a2 = s_word; + dwwp.a3 = s_word; + dwwp.a4 = s_pos; + dwwp.a5 = s_word; + dwwp.fix(); + dlf.a0 = s_type; + dlf.a1 = s_rel; + dlf.a2 = s_pos; + dlf.a3 = s_pos; + dlf.a4 = s_feat; + dlf.a5 = s_feat; + dlf.a6 = s_pos; + dlf.a7 = s_pos; + dlf.fix(); + d3lp.a0 = s_type; + d3lp.a1 = s_rel; + d3lp.a2 = s_lpath; + d3lp.a3 = s_lpath; + d3lp.a4 = s_lpath; + d3lp.a5 = s_word; + d3lp.a6 = s_spath; + d3lp.a7 = s_spath; + d3lp.fix(); + d2lp.a0 = s_type; + d2lp.a1 = s_rel; + d2lp.a2 = s_lpath; + d2lp.a3 = s_lpath; + d2lp.a4 = s_word; + d2lp.a5 = s_word; + d2lp.fix(); // d3lp.a6 = s_spath; d3lp.a7 = s_spath; + d2pw.a0 = s_type; + d2pw.a1 = s_rel; + d2pw.a2 = s_lpath; + d2pw.a3 = s_lpath; + d2pw.a4 = s_word; + d2pw.a5 = s_word; + d2pw.fix(); // d3lp.a6 = s_spath; d3lp.a7 = s_spath; + d2pp.a0 = s_type; + d2pp.a1 = s_rel; + d2pp.a2 = s_lpath; + d2pp.a3 = s_lpath; + d2pp.a4 = s_pos; + d2pp.a5 = s_pos; + d2pp.fix(); // d3lp.a6 = s_spath; d3lp.a7 = s_spath; + } - - - public int basic(short[] pposs, int p, int d, IFV f) - { - - d0.clean(); dl1.clean(); dl2.clean(); dwp.clean(); dwwp.clean(); dlf.clean(); d3lp.clean(); - - d3lp.clean(); d2lp.clean();d2pw.clean(); d2pp.clean(); - - int n=1; - int dir= (p < d)? ra:la; - d0.v0= n++; d0.v1=pposs[p]; d0.v2=pposs[d]; //d0.stop=4; - int end= (p >= d ? p : d); + public int basic(short[] pposs, int p, int d, IFV f) { + + d0.clean(); + dl1.clean(); + dl2.clean(); + dwp.clean(); + dwwp.clean(); + dlf.clean(); + d3lp.clean(); + + d3lp.clean(); + d2lp.clean(); + d2pw.clean(); + d2pp.clean(); + + int n = 1; + int dir = (p < d) ? ra : la; + d0.v0 = n++; + d0.v1 = pposs[p]; + d0.v2 = pposs[d]; // d0.stop=4; + int end = (p >= d ? p : d); int start = (p >= d ? d : p) + 1; - for(int i = start ; i <end ; i++) { - d0.v3=pposs[i]; + for (int i = start; i < end; i++) { + d0.v3 = pposs[i]; d0.cz4(); - d0.csa(s_dir,dir,f); + d0.csa(s_dir, dir, f); } return n; } + public int firstm(Instances is, int i, int prnt, int dpnt, int label, Cluster cluster, long[] f) { - public int firstm(Instances is, int i, int prnt, int dpnt, int label, Cluster cluster, long[] f) - { - - - for(int k=0;k<f.length;k++) f[k]=0; + for (int k = 0; k < f.length; k++) + f[k] = 0; short[] pposs = is.pposs[i]; - int[] form =is.forms[i]; + int[] form = is.forms[i]; short[][] feats = is.feats[i]; + int pF = form[prnt], dF = form[dpnt]; + int pL = is.plemmas[i][prnt], dL = is.plemmas[i][dpnt]; + int pP = pposs[prnt], dP = pposs[dpnt]; + + int prntLS = pF == -1 ? -1 : cluster.getLP(pF), chldLS = dF == -1 ? -1 : cluster.getLP(dF); + + // final int dir= (prnt < dpnt)? ra:la; + + if (pF > maxForm) + pF = -1; + if (pL > maxForm) + pL = -1; + + if (dF > maxForm) + dF = -1; + if (dL > maxForm) + dL = -1; + + int n = 3, c = 0; + + dl2.v1 = label; + dl2.v0 = n++; + dl2.v2 = pF; + dl2.v3 = dP; + dl2.cz4(); + f[c++] = dl2.getVal(); + dl2.v0 = n++; + dl2.cz3(); + f[c++] = dl2.getVal(); + dl2.v0 = n++; + dl2.v2 = dF; + dl2.v3 = pP; + dl2.cz4(); + f[c++] = dl2.getVal(); + dl2.v0 = n++; + dl2.cz3(); + f[c++] = dl2.getVal(); + + dwwp.v1 = label; + dwwp.v0 = n++; + dwwp.v2 = pF; + dwwp.v3 = dF; + dwwp.cz4(); + f[c++] = dwwp.getVal(); + + dl1.v1 = label; + dl1.v0 = n++; + dl1.v2 = dP; + dl1.cz3(); + f[c++] = dl1.getVal(); + dl1.v0 = n++; + dl1.v2 = pP; + dl1.cz3(); + f[c++] = dl1.getVal(); + dl1.v0 = n++; + dl1.v3 = dP; + dl1.cz4(); + f[c++] = dl1.getVal(); + + int pPm1 = prnt > 0 ? pposs[prnt - 1] : s_str, dPm1 = dpnt > 0 ? pposs[dpnt - 1] : s_str; + int pPp1 = prnt < pposs.length - 1 ? pposs[prnt + 1] : s_end, + dPp1 = dpnt < pposs.length - 1 ? pposs[dpnt + 1] : s_end; + + int pPm2 = prnt > 1 ? pposs[prnt - 2] : s_str, dPm2 = dpnt > 1 ? pposs[dpnt - 2] : s_str; + int pPp2 = prnt < pposs.length - 2 ? pposs[prnt + 2] : s_end, + dPp2 = dpnt < pposs.length - 2 ? pposs[dpnt + 2] : s_end; + + int pFm1 = prnt > 0 ? form[prnt - 1] : s_stwrd, dFm1 = dpnt > 0 ? form[dpnt - 1] : s_stwrd; + int pFp1 = prnt < form.length - 1 ? form[prnt + 1] : s_stwrd, + dFp1 = dpnt < form.length - 1 ? form[dpnt + 1] : s_stwrd; + + if (prnt - 1 == dpnt) + pPm1 = -1; + if (prnt == dpnt - 1) + dPm1 = -1; + + if (prnt + 1 == dpnt) + pPp1 = -1; + if (prnt == dpnt + 1) + dPp1 = -1; + + if (prnt - 2 == dpnt) + pPm2 = -1; + if (prnt == dpnt - 2) + dPm2 = -1; + + if (prnt + 2 == dpnt) + pPp2 = -1; + if (prnt == dpnt + 2) + dPp2 = -1; + + dl1.v0 = n++; + dl1.v2 = pP; + dl1.v3 = pPp1; + dl1.v4 = dP; + dl1.v5 = dPp1; + dl1.v6 = (prnt + 1 == dpnt ? 4 : prnt == dpnt + 1 ? 5 : 6); + dl1.cz7(); + f[c++] = dl1.getVal(); + dl1.v0 = n++; + dl1.v5 = dPm1; + dl1.v6 = (prnt + 1 == dpnt ? 4 : prnt == dpnt - 1 ? 5 : 6); + dl1.cz7(); + f[c++] = dl1.getVal(); + dl1.v0 = n++; + dl1.v3 = pPm1; + dl1.v6 = (prnt - 1 == dpnt ? 4 : prnt == dpnt - 1 ? 5 : 6); + dl1.cz7(); + f[c++] = dl1.getVal(); + dl1.v0 = n++; + dl1.v5 = dPp1; + dl1.v6 = (prnt - 1 == dpnt ? 4 : prnt == dpnt + 1 ? 5 : 6); + dl1.cz7(); + f[c++] = dl1.getVal(); + + dl1.v0 = n++; + dl1.v3 = pPm1; + dl1.v5 = (prnt - 1 == dpnt ? 4 : 5); + dl1.cz6(); + f[c++] = dl1.getVal(); + dl1.v0 = n++; + dl1.v3 = dPm1; + dl1.v5 = (prnt == dpnt - 1 ? 4 : 5); + dl1.cz6(); + f[c++] = dl1.getVal(); + dl1.v0 = n++; + dl1.v3 = dPp1; + dl1.v5 = (prnt == dpnt + 1 ? 4 : 5); + dl1.cz6(); + f[c++] = dl1.getVal(); + dl1.v0 = n++; + dl1.v3 = pPp1; + dl1.v5 = (prnt + 1 == dpnt ? 4 : 5); + dl1.cz6(); + f[c++] = dl1.getVal(); + + dl1.v0 = n++; + dl1.v2 = pP; + dl1.v3 = pPp2; + dl1.v4 = dP; + dl1.v5 = dPp2; + dl1.v6 = (prnt + 2 == dpnt ? 4 : prnt == dpnt + 2 ? 5 : 6); + dl1.cz7(); + f[c++] = dl1.getVal(); + dl1.v0 = n++; + dl1.v5 = dPm2; + dl1.v6 = (prnt + 2 == dpnt ? 4 : prnt == dpnt - 2 ? 5 : 6); + dl1.cz7(); + f[c++] = dl1.getVal(); + dl1.v0 = n++; + dl1.v3 = pPm2; + dl1.v6 = (prnt - 2 == dpnt ? 4 : prnt == dpnt - 2 ? 5 : 6); + dl1.cz7(); + f[c++] = dl1.getVal(); + dl1.v0 = n++; + dl1.v5 = dPp2; + dl1.v6 = (prnt - 2 == dpnt ? 4 : prnt == dpnt + 2 ? 5 : 6); + dl1.cz7(); + f[c++] = dl1.getVal(); - int pF = form[prnt],dF = form[dpnt]; - int pL = is.plemmas[i][prnt],dL = is.plemmas[i][dpnt]; - int pP = pposs[prnt],dP = pposs[dpnt]; - - int prntLS = pF==-1?-1:cluster.getLP(pF), chldLS = dF==-1?-1:cluster.getLP(dF); - - // final int dir= (prnt < dpnt)? ra:la; - - if (pF>maxForm) pF=-1; - if (pL>maxForm) pL=-1; - - if (dF>maxForm) dF=-1; - if (dL>maxForm) dL=-1; - - - int n=3,c=0; - - dl2.v1=label; - dl2.v0= n++; dl2.v2=pF; dl2.v3=dP; dl2.cz4(); f[c++]=dl2.getVal(); - dl2.v0= n++; dl2.cz3(); f[c++]=dl2.getVal(); - dl2.v0= n++; dl2.v2=dF; dl2.v3=pP; dl2.cz4(); f[c++]=dl2.getVal(); - dl2.v0= n++; dl2.cz3(); f[c++]=dl2.getVal(); - - - dwwp.v1=label; - dwwp.v0= n++; dwwp.v2=pF; dwwp.v3=dF; dwwp.cz4(); f[c++]=dwwp.getVal(); - - dl1.v1=label; - dl1.v0= n++; dl1.v2=dP; dl1.cz3(); f[c++]=dl1.getVal(); - dl1.v0= n++; dl1.v2=pP; dl1.cz3(); f[c++]=dl1.getVal(); - dl1.v0= n++; dl1.v3=dP; dl1.cz4(); f[c++]=dl1.getVal(); - - int pPm1 = prnt > 0 ? pposs[prnt - 1] : s_str, dPm1 = dpnt > 0 ? pposs[dpnt - 1] : s_str; - int pPp1 = prnt < pposs.length - 1 ? pposs[prnt + 1]:s_end, dPp1 = dpnt < pposs.length - 1 ? pposs[dpnt + 1]:s_end; - - int pPm2 = prnt > 1 ? pposs[prnt - 2] : s_str, dPm2 = dpnt > 1 ? pposs[dpnt - 2] : s_str; - int pPp2 = prnt < pposs.length - 2 ? pposs[prnt + 2]:s_end, dPp2 = dpnt < pposs.length - 2 ? pposs[dpnt + 2]:s_end; - - int pFm1 = prnt > 0 ? form[prnt - 1] : s_stwrd, dFm1 = dpnt > 0 ? form[dpnt - 1] : s_stwrd; - int pFp1 = prnt < form.length - 1 ? form[prnt + 1]:s_stwrd, dFp1 = dpnt < form.length - 1 ? form[dpnt + 1]:s_stwrd; - - - if (prnt-1 == dpnt) pPm1 =-1; - if (prnt == dpnt-1) dPm1 =-1; - - if (prnt+1 == dpnt) pPp1 =-1; - if (prnt == dpnt+1) dPp1 =-1; - - if (prnt-2 == dpnt) pPm2 =-1; - if (prnt == dpnt-2) dPm2 =-1; - - if (prnt+2 == dpnt) pPp2 =-1; - if (prnt == dpnt+2) dPp2 =-1; - - - dl1.v0= n++;dl1.v2=pP; dl1.v3=pPp1; dl1.v4=dP;dl1.v5=dPp1; dl1.v6= (prnt+1==dpnt?4:prnt==dpnt+1?5:6) ; dl1.cz7(); f[c++]=dl1.getVal(); - dl1.v0= n++; dl1.v5=dPm1; dl1.v6= (prnt+1==dpnt?4:prnt==dpnt-1?5:6) ; dl1.cz7(); f[c++]=dl1.getVal(); - dl1.v0= n++; dl1.v3=pPm1; dl1.v6= (prnt-1==dpnt?4:prnt==dpnt-1?5:6) ; dl1.cz7(); f[c++]=dl1.getVal(); - dl1.v0= n++; dl1.v5=dPp1; dl1.v6= (prnt-1==dpnt?4:prnt==dpnt+1?5:6) ; dl1.cz7(); f[c++]=dl1.getVal(); - - - dl1.v0= n++; dl1.v3=pPm1; dl1.v5= (prnt-1==dpnt?4:5) ; dl1.cz6(); f[c++]=dl1.getVal(); - dl1.v0= n++; dl1.v3=dPm1; dl1.v5= (prnt==dpnt-1?4:5) ; dl1.cz6(); f[c++]=dl1.getVal(); - dl1.v0= n++; dl1.v3=dPp1; dl1.v5= (prnt==dpnt+1?4:5) ; dl1.cz6(); f[c++]=dl1.getVal(); - dl1.v0= n++; dl1.v3=pPp1; dl1.v5= (prnt+1==dpnt?4:5) ; dl1.cz6(); f[c++]=dl1.getVal(); - - dl1.v0= n++;dl1.v2=pP; dl1.v3=pPp2; dl1.v4=dP;dl1.v5=dPp2; dl1.v6= (prnt+2==dpnt?4:prnt==dpnt+2?5:6) ; dl1.cz7(); f[c++]=dl1.getVal(); - dl1.v0= n++; dl1.v5=dPm2; dl1.v6= (prnt+2==dpnt?4:prnt==dpnt-2?5:6) ; dl1.cz7(); f[c++]=dl1.getVal(); - dl1.v0= n++; dl1.v3=pPm2; dl1.v6= (prnt-2==dpnt?4:prnt==dpnt-2?5:6) ; dl1.cz7(); f[c++]=dl1.getVal(); - dl1.v0= n++; dl1.v5=dPp2; dl1.v6= (prnt-2==dpnt?4:prnt==dpnt+2?5:6) ; dl1.cz7(); f[c++]=dl1.getVal(); - - // remove this again - dl1.v0= n++; dl1.v3=pPm2; dl1.v5= (prnt-2==dpnt?4:5); dl1.cz6(); f[c++]=dl1.getVal(); - dl1.v0= n++; dl1.v3=dPm2; dl1.v5= (prnt==dpnt-2?4:5); dl1.cz6(); f[c++]=dl1.getVal(); - dl1.v0= n++; dl1.v3=dPp2; dl1.v5= (prnt==dpnt+2?4:5); dl1.cz6(); f[c++]=dl1.getVal(); - dl1.v0= n++; dl1.v3=pPp2; dl1.v5= (prnt+2==dpnt?4:5); dl1.cz6(); f[c++]=dl1.getVal(); - - - - dl2.v0= n++; dl2.v3=dFm1; dl2.v3=pPp1;dl2.v4=pP; dl2.v5= (prnt+1==dpnt?4:prnt==dpnt-1?5:6) ; dl2.cz6(); f[c++]=dl2.getVal(); - dl2.v0= n++; dl2.v3=dFp1; dl2.v3=pPm1; dl2.v5= (prnt-1==dpnt?4:prnt==dpnt+1?5:6) ; dl2.cz6(); f[c++]=dl2.getVal(); - dl2.v0= n++; dl2.v3=pFm1; dl2.v3=dPp1;dl2.v4=dP; dl2.v5= (prnt-1==dpnt?4:prnt==dpnt+1?5:6) ; dl2.cz6(); f[c++]=dl2.getVal(); - dl2.v0= n++; dl2.v3=pFp1; dl2.v3=dPm1; dl2.v5= (prnt+1==dpnt?4:prnt==dpnt-1?5:6) ; dl2.cz6(); f[c++]=dl2.getVal(); - + dl1.v0 = n++; + dl1.v3 = pPm2; + dl1.v5 = (prnt - 2 == dpnt ? 4 : 5); + dl1.cz6(); + f[c++] = dl1.getVal(); + dl1.v0 = n++; + dl1.v3 = dPm2; + dl1.v5 = (prnt == dpnt - 2 ? 4 : 5); + dl1.cz6(); + f[c++] = dl1.getVal(); + dl1.v0 = n++; + dl1.v3 = dPp2; + dl1.v5 = (prnt == dpnt + 2 ? 4 : 5); + dl1.cz6(); + f[c++] = dl1.getVal(); + dl1.v0 = n++; + dl1.v3 = pPp2; + dl1.v5 = (prnt + 2 == dpnt ? 4 : 5); + dl1.cz6(); + f[c++] = dl1.getVal(); + + dl2.v0 = n++; + dl2.v3 = dFm1; + dl2.v3 = pPp1; + dl2.v4 = pP; + dl2.v5 = (prnt + 1 == dpnt ? 4 : prnt == dpnt - 1 ? 5 : 6); + dl2.cz6(); + f[c++] = dl2.getVal(); + dl2.v0 = n++; + dl2.v3 = dFp1; + dl2.v3 = pPm1; + dl2.v5 = (prnt - 1 == dpnt ? 4 : prnt == dpnt + 1 ? 5 : 6); + dl2.cz6(); + f[c++] = dl2.getVal(); + dl2.v0 = n++; + dl2.v3 = pFm1; + dl2.v3 = dPp1; + dl2.v4 = dP; + dl2.v5 = (prnt - 1 == dpnt ? 4 : prnt == dpnt + 1 ? 5 : 6); + dl2.cz6(); + f[c++] = dl2.getVal(); + dl2.v0 = n++; + dl2.v3 = pFp1; + dl2.v3 = dPm1; + dl2.v5 = (prnt + 1 == dpnt ? 4 : prnt == dpnt - 1 ? 5 : 6); + dl2.cz6(); + f[c++] = dl2.getVal(); + + // maybe without dir + dl2.v0 = n++; + dl2.v3 = dFm1; + dl2.v3 = dPm2; + dl2.v4 = pP; + dl2.v5 = (prnt == dpnt - 1 ? 4 : prnt == dpnt - 2 ? 5 : 6); + dl2.cz6(); + f[c++] = dl2.getVal(); + dl2.v0 = n++; + dl2.v3 = dFp1; + dl2.v3 = dPp2; + dl2.v5 = (prnt == dpnt + 1 ? 4 : prnt == dpnt + 2 ? 5 : 6); + dl2.cz6(); + f[c++] = dl2.getVal(); + dl2.v0 = n++; + dl2.v3 = pFm1; + dl2.v3 = pPm2; + dl2.v4 = dP; + dl2.v5 = (prnt - 1 == dpnt ? 4 : prnt - 2 == dpnt ? 5 : 6); + dl2.cz6(); + f[c++] = dl2.getVal(); + dl2.v0 = n++; + dl2.v3 = pFp1; + dl2.v3 = pPp2; + dl2.v5 = (prnt + 1 == dpnt ? 4 : prnt + 2 == dpnt ? 5 : 6); + dl2.cz6(); + f[c++] = dl2.getVal(); + + dwwp.v0 = n++; + dwwp.v2 = pF; + dwwp.v3 = dF; + dwwp.v4 = dP; + dwwp.cz5(); + f[c++] = dwwp.getVal(); + dwwp.v0 = n++; + dwwp.v2 = pF; + dwwp.v3 = dF; + dwwp.v4 = pP; + dwwp.cz5(); + f[c++] = dwwp.getVal(); + // dwwp.v0= n++; dwwp.v2=dF; dwwp.v3=pF; dwwp.v4=pP; dwwp.v4=dP; + // dwwp.cz6(); f[c++]=dwwp.getVal(); - // maybe without dir - dl2.v0= n++; dl2.v3=dFm1; dl2.v3=dPm2;dl2.v4=pP; dl2.v5= (prnt==dpnt-1?4:prnt==dpnt-2?5:6) ; dl2.cz6(); f[c++]=dl2.getVal(); - dl2.v0= n++; dl2.v3=dFp1; dl2.v3=dPp2; dl2.v5= (prnt==dpnt+1?4:prnt==dpnt+2?5:6) ; dl2.cz6(); f[c++]=dl2.getVal(); - dl2.v0= n++; dl2.v3=pFm1; dl2.v3=pPm2;dl2.v4=dP; dl2.v5= (prnt-1==dpnt?4:prnt-2==dpnt?5:6) ; dl2.cz6(); f[c++]=dl2.getVal(); - dl2.v0= n++; dl2.v3=pFp1; dl2.v3=pPp2; dl2.v5= (prnt+1==dpnt?4:prnt+2==dpnt?5:6) ; dl2.cz6(); f[c++]=dl2.getVal(); - - - dwwp.v0= n++; dwwp.v2=pF; dwwp.v3=dF; dwwp.v4=dP; dwwp.cz5(); f[c++]=dwwp.getVal(); - dwwp.v0= n++; dwwp.v2=pF; dwwp.v3=dF; dwwp.v4=pP; dwwp.cz5(); f[c++]=dwwp.getVal(); -// dwwp.v0= n++; dwwp.v2=dF; dwwp.v3=pF; dwwp.v4=pP; dwwp.v4=dP; dwwp.cz6(); f[c++]=dwwp.getVal(); - - // until here - // lemmas - dl2.v1=label; - dl2.v0= n++; dl2.v2=pL; dl2.v3=dP; dl2.cz4(); f[c++]=dl2.getVal(); - dl2.v0= n++; dl2.cz3(); f[c++]=dl2.getVal(); - dl2.v0= n++; dl2.v2=dL; dl2.v3=pP; dl2.cz4(); f[c++]=dl2.getVal(); - dl2.v0= n++; dl2.cz3(); f[c++]=dl2.getVal(); - - - dwwp.v1=label; - dwwp.v0= n++; dwwp.v2=pL; dwwp.v3=dL; dwwp.cz4(); f[c++]=dwwp.getVal(); - - dwp.v1= label; - dwp.v0=n++;dwp.v2=dL; dwp.v3=pP;dwp.v4=dP;dwp.v5=pL; //dwp.cz6(); f[c++]=dwp.getVal(); - - dwp.v0=n++;dwp.v2=pL; dwp.v3=pP;dwp.v4=dP; dwp.v0=n++;dwp.cz5(); f[c++]=dwp.getVal(); - - - - dwp.v0=n++;dwp.v2=pL; dwp.cz5(); f[c++]=dwp.getVal(); - dwwp.v0= n++; dwwp.v2=pL; dwwp.v3=dL; dwwp.v4=dP; dwwp.cz5(); f[c++]=dwwp.getVal(); - dwwp.v0= n++; dwwp.v4=pP; dwwp.cz5(); f[c++]=dwwp.getVal(); - + dl2.v1 = label; + dl2.v0 = n++; + dl2.v2 = pL; + dl2.v3 = dP; + dl2.cz4(); + f[c++] = dl2.getVal(); + dl2.v0 = n++; + dl2.cz3(); + f[c++] = dl2.getVal(); + dl2.v0 = n++; + dl2.v2 = dL; + dl2.v3 = pP; + dl2.cz4(); + f[c++] = dl2.getVal(); + dl2.v0 = n++; + dl2.cz3(); + f[c++] = dl2.getVal(); + + dwwp.v1 = label; + dwwp.v0 = n++; + dwwp.v2 = pL; + dwwp.v3 = dL; + dwwp.cz4(); + f[c++] = dwwp.getVal(); + + dwp.v1 = label; + dwp.v0 = n++; + dwp.v2 = dL; + dwp.v3 = pP; + dwp.v4 = dP; + dwp.v5 = pL; // dwp.cz6(); f[c++]=dwp.getVal(); + + dwp.v0 = n++; + dwp.v2 = pL; + dwp.v3 = pP; + dwp.v4 = dP; + dwp.v0 = n++; + dwp.cz5(); + f[c++] = dwp.getVal(); + + dwp.v0 = n++; + dwp.v2 = pL; + dwp.cz5(); + f[c++] = dwp.getVal(); + dwwp.v0 = n++; + dwwp.v2 = pL; + dwwp.v3 = dL; + dwwp.v4 = dP; + dwwp.cz5(); + f[c++] = dwwp.getVal(); + dwwp.v0 = n++; + dwwp.v4 = pP; + dwwp.cz5(); + f[c++] = dwwp.getVal(); // cluster - if (cluster.size()>10) { - d2pw.v1=label; - d2pw.v0=n++; d2pw.v2=prntLS; d2pw.v3=chldLS; d2pw.cz4(); f[c++]=d2pw.getVal(); - d2pw.v0=n++; d2pw.v4=pF; d2pw.cz5(); f[c++]=d2pw.getVal(); - d2pw.v0=n++; d2pw.v4=dF; d2pw.cz5(); f[c++]=d2pw.getVal(); - // d2pw.v0=n++; d2pw.v5=pF; d2pw.cz6(); f[c++]=d2pw.getVal(); - - - d2pp.v1=label; - d2pp.v0=n++; d2pp.v2=prntLS; d2pp.v3=chldLS; d2pp.cz4(); f[c++]=d2pp.getVal(); - d2pp.v0=n++; d2pp.v4=pP; d2pp.cz5(); f[c++]=d2pp.getVal(); - d2pp.v0=n++; d2pp.v4=dP; d2pp.cz5(); f[c++]=d2pp.getVal(); - d2pp.v0=n++; d2pp.v5=pP; d2pp.cz6(); f[c++]=d2pp.getVal(); + if (cluster.size() > 10) { + d2pw.v1 = label; + d2pw.v0 = n++; + d2pw.v2 = prntLS; + d2pw.v3 = chldLS; + d2pw.cz4(); + f[c++] = d2pw.getVal(); + d2pw.v0 = n++; + d2pw.v4 = pF; + d2pw.cz5(); + f[c++] = d2pw.getVal(); + d2pw.v0 = n++; + d2pw.v4 = dF; + d2pw.cz5(); + f[c++] = d2pw.getVal(); + // d2pw.v0=n++; d2pw.v5=pF; d2pw.cz6(); f[c++]=d2pw.getVal(); + + d2pp.v1 = label; + d2pp.v0 = n++; + d2pp.v2 = prntLS; + d2pp.v3 = chldLS; + d2pp.cz4(); + f[c++] = d2pp.getVal(); + d2pp.v0 = n++; + d2pp.v4 = pP; + d2pp.cz5(); + f[c++] = d2pp.getVal(); + d2pp.v0 = n++; + d2pp.v4 = dP; + d2pp.cz5(); + f[c++] = d2pp.getVal(); + d2pp.v0 = n++; + d2pp.v5 = pP; + d2pp.cz6(); + f[c++] = d2pp.getVal(); } - if (s_stack) { + if (s_stack) { short[] prel = is.plabels[i]; short[] phead = is.pheads[i]; - //take those in for stacking - dl2.v1=label; - dl2.v0= n++;dl2.v2=prel[dpnt];dl2.v3=pP;dl2.v4=dP; dl2.v5=prnt==phead[dpnt]?1:2; dl2.cz6(); f[c++]=dl2.getVal(); - dl2.v0= n++;dl2.v2=pP;dl2.v3=dP; dl2.v4=prnt==phead[dpnt]?1:2; dl2.cz5(); f[c++]=dl2.getVal(); + // take those in for stacking + dl2.v1 = label; + dl2.v0 = n++; + dl2.v2 = prel[dpnt]; + dl2.v3 = pP; + dl2.v4 = dP; + dl2.v5 = prnt == phead[dpnt] ? 1 : 2; + dl2.cz6(); + f[c++] = dl2.getVal(); + dl2.v0 = n++; + dl2.v2 = pP; + dl2.v3 = dP; + dl2.v4 = prnt == phead[dpnt] ? 1 : 2; + dl2.cz5(); + f[c++] = dl2.getVal(); } - - - if (feats==null) return c; - - short[] featsP =feats[prnt], featsD =feats[dpnt]; - dlf.v0= n++; dlf.v1=label; dlf.v2=pP; dlf.v3=dP; - c =extractFeat(f, c, featsP, featsD); + if (feats == null) + return c; + short[] featsP = feats[prnt], featsD = feats[dpnt]; + dlf.v0 = n++; + dlf.v1 = label; + dlf.v2 = pP; + dlf.v3 = dP; + c = extractFeat(f, c, featsP, featsD); return c; } + public int second(Instances is, int i, int p, int d, int x, int label, Cluster cluster, long[] f) { + + // for(int k=0;k<f.length;k++) f[k]=0; - public int second(Instances is , int i,int p, int d, int x, int label, Cluster cluster, long[] f) - { + dl1.clean(); + dwp.clean(); + dlf.clean(); + dwwp.clean(); - //for(int k=0;k<f.length;k++) f[k]=0; + short[] pos = is.pposs[i]; + int[] forms = is.forms[i], lemmas = is.plemmas[i]; - dl1.clean(); dwp.clean();dlf.clean(); dwwp.clean(); - - short[] pos= is.pposs[i]; - int[] forms=is.forms[i],lemmas=is.plemmas[i]; - - int pP = pos[p], dP = pos[d]; - int pF = forms[p],dF = forms[d]; + int pF = forms[p], dF = forms[d]; int pL = lemmas[p], cL = lemmas[d]; - int sP = x!=-1 ? pos[x] : s_str, sF = x!=-1 ? forms[x] : s_stwrd, sL = x!=-1 ? lemmas[x] : s_stwrd; - - int n=_SIB; - if (pF>maxForm) pF=-1; - if (pL>maxForm) pL=-1; + int sP = x != -1 ? pos[x] : s_str, sF = x != -1 ? forms[x] : s_stwrd, sL = x != -1 ? lemmas[x] : s_stwrd; + + int n = _SIB; + if (pF > maxForm) + pF = -1; + if (pL > maxForm) + pL = -1; + + if (dF > maxForm) + dF = -1; + if (cL > maxForm) + cL = -1; + + if (sF > maxForm) + sF = -1; + if (sL > maxForm) + sL = -1; + + int c = 0; + + dl1.v1 = label; + dwwp.v1 = label; + dwp.v1 = label; + + dl1.v0 = n++; + dl1.v2 = pP; + dl1.v3 = dP; + dl1.v4 = sP; + dl1.cz5(); + f[c++] = dl1.getVal(); // f[c++]=dl1.csa(s_dist,dist); + dl1.v0 = n++; + dl1.v3 = sP; + dl1.cz4(); + f[c++] = dl1.getVal(); // f[c++]=dl1.csa(s_dist,dist); + dl1.v0 = n++; + dl1.v2 = dP; + dl1.cz4(); + f[c++] = dl1.getVal(); // f[c++]=dl1.csa(s_dist,dist); - if (dF>maxForm) dF=-1; - if (cL>maxForm) cL=-1; - - if (sF>maxForm) sF=-1; - if (sL>maxForm) sL=-1; - - int c =0; + // sibling only could be tried - dl1.v1=label;dwwp.v1=label;dwp.v1=label; - - dl1.v0= n++;dl1.v2=pP; dl1.v3=dP;dl1.v4=sP; dl1.cz5(); f[c++]=dl1.getVal() ; // f[c++]=dl1.csa(s_dist,dist); - dl1.v0= n++; dl1.v3=sP; dl1.cz4(); f[c++]=dl1.getVal(); //f[c++]=dl1.csa(s_dist,dist); - dl1.v0= n++; dl1.v2=dP; dl1.cz4(); f[c++]=dl1.getVal(); //f[c++]=dl1.csa(s_dist,dist); + dwwp.v0 = n++; + dwwp.v2 = pF; + dwwp.v3 = sF; + dwwp.cz4(); + f[c++] = dwwp.getVal(); // f[c++]=dwwp.csa(s_dist,dist); + dwwp.v0 = n++; + dwwp.v2 = dF; + dwwp.cz4(); + f[c++] = dwwp.getVal(); // f[c++]=dwwp.csa(s_dist,dist); - // sibling only could be tried - - dwwp.v0= n++; dwwp.v2=pF; dwwp.v3=sF; dwwp.cz4(); f[c++]=dwwp.getVal(); //f[c++]=dwwp.csa(s_dist,dist); - dwwp.v0= n++; dwwp.v2=dF; dwwp.cz4(); f[c++]=dwwp.getVal(); //f[c++]=dwwp.csa(s_dist,dist); - // 154 - dwp.v0= n++; dwp.v2=sF; dwp.v3=pP; dwp.cz4(); f[c++]=dwp.getVal(); //f[c++]=dwp.csa(s_dist,dist); - dwp.v0= n++; /*dwp.v1=label; */dwp.v3=dP; dwp.cz4(); f[c++]=dwp.getVal(); //f[c++]=dwp.csa(s_dist,dist); - dwp.v0= n++; /*dwp.v1=label;*/ dwp.v2=pF; dwp.v3=sP; dwp.cz4(); f[c++]=dwp.getVal(); //f[c++]=dwp.csa(s_dist,dist); - dwp.v0= n++; /*dwp.v1=label;*/ dwp.v2=dF; dwp.cz4(); f[c++]=dwp.getVal();// f[c++]=dwp.csa(s_dist,dist); + dwp.v0 = n++; + dwp.v2 = sF; + dwp.v3 = pP; + dwp.cz4(); + f[c++] = dwp.getVal(); // f[c++]=dwp.csa(s_dist,dist); + dwp.v0 = n++; + /* dwp.v1=label; */dwp.v3 = dP; + dwp.cz4(); + f[c++] = dwp.getVal(); // f[c++]=dwp.csa(s_dist,dist); + dwp.v0 = n++; + /* dwp.v1=label; */ dwp.v2 = pF; + dwp.v3 = sP; + dwp.cz4(); + f[c++] = dwp.getVal(); // f[c++]=dwp.csa(s_dist,dist); + dwp.v0 = n++; + /* dwp.v1=label; */ dwp.v2 = dF; + dwp.cz4(); + f[c++] = dwp.getVal();// f[c++]=dwp.csa(s_dist,dist); // 158 - //lemmas - - dwwp.v0= n++; dwwp.v2=pL; dwwp.v3=sL; dwwp.cz4(); f[c++]=dwwp.getVal(); - dwwp.v0= n++; dwwp.v2=cL; dwwp.cz4(); f[c++]=dwwp.getVal(); //f[c++]=dwwp.csa(s_dist,dist); - dwp.v0= n++; dwp.v2=sL; dwp.v3=pP; dwp.cz4(); f[c++]=dwp.getVal();// f[c++]=dwp.csa(s_dist,dist); - dwp.v0= n++; dwp.v3=dP; dwp.cz4(); f[c++]=dwp.getVal(); // f[c++]=dwp.csa(s_dist,dist); - + // lemmas + + dwwp.v0 = n++; + dwwp.v2 = pL; + dwwp.v3 = sL; + dwwp.cz4(); + f[c++] = dwwp.getVal(); + dwwp.v0 = n++; + dwwp.v2 = cL; + dwwp.cz4(); + f[c++] = dwwp.getVal(); // f[c++]=dwwp.csa(s_dist,dist); + dwp.v0 = n++; + dwp.v2 = sL; + dwp.v3 = pP; + dwp.cz4(); + f[c++] = dwp.getVal();// f[c++]=dwp.csa(s_dist,dist); + dwp.v0 = n++; + dwp.v3 = dP; + dwp.cz4(); + f[c++] = dwp.getVal(); // f[c++]=dwp.csa(s_dist,dist); + // 162 - dwp.v0= n++; dwp.v2=pL; dwp.v3=sP; dwp.cz4(); f[c++]=dwp.getVal(); //f[c++]=dwp.csa(s_dist,dist); - dwp.v0= n++; dwp.v2=cL; dwp.cz4(); f[c++]=dwp.getVal();// f[c++]=dwp.csa(s_dist,dist); + dwp.v0 = n++; + dwp.v2 = pL; + dwp.v3 = sP; + dwp.cz4(); + f[c++] = dwp.getVal(); // f[c++]=dwp.csa(s_dist,dist); + dwp.v0 = n++; + dwp.v2 = cL; + dwp.cz4(); + f[c++] = dwp.getVal();// f[c++]=dwp.csa(s_dist,dist); // clusters - if (cluster.size()>10) { - + if (cluster.size() > 10) { + } - int pPm1 = p!=0 ? pos[p-1] : s_str; - int chldPm1 = d-1>=0 ? pos[d-1] : s_str; - int prntPp1 = p!=pos.length-1 ? pos[p+1] : s_end; - int chldPp1 = d!=pos.length-1 ? pos[d+1] : s_end; + int pPm1 = p != 0 ? pos[p - 1] : s_str; + int chldPm1 = d - 1 >= 0 ? pos[d - 1] : s_str; + int prntPp1 = p != pos.length - 1 ? pos[p + 1] : s_end; + int chldPp1 = d != pos.length - 1 ? pos[d + 1] : s_end; // sibling part of speech minus and plus 1 - int sPm1 = x>0 ? pos[x-1]:s_str; - int sPp1 = x<pos.length-1 ? pos[x + 1]:s_end; - - if (x+1==x|| x+1==p || x+1==d) sPp1=-1; - if (p+1==x|| p+1==p || p+1==d) prntPp1=-1; - if (d+1==x|| d+1==p || d+1==d) chldPp1=-1; - - if (x-1==x|| x-1==p || x-1==d) sPm1=-1; - if (d-1==x|| d-1==p || d-1==d) chldPm1=-1; - if (p-1==x|| p-1==p || p-1==d) pPm1=-1; - - - dl1.v0=n++; dl1.v2=sP; dl1.v3=sPp1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.getVal(); + int sPm1 = x > 0 ? pos[x - 1] : s_str; + int sPp1 = x < pos.length - 1 ? pos[x + 1] : s_end; + + if (x + 1 == x || x + 1 == p || x + 1 == d) + sPp1 = -1; + if (p + 1 == x || p + 1 == p || p + 1 == d) + prntPp1 = -1; + if (d + 1 == x || d + 1 == p || d + 1 == d) + chldPp1 = -1; + + if (x - 1 == x || x - 1 == p || x - 1 == d) + sPm1 = -1; + if (d - 1 == x || d - 1 == p || d - 1 == d) + chldPm1 = -1; + if (p - 1 == x || p - 1 == p || p - 1 == d) + pPm1 = -1; + + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = sPp1; + dl1.v4 = pP; + dl1.cz5(); + f[c++] = dl1.getVal(); // 165 - dl1.v0=n++; dl1.v2=sP; dl1.v3=sPm1;dl1.v4=pP; dl1.v5= (x-1==p?3:x-1==d?4:5); dl1.cz6(); f[c++]= dl1.getVal(); //dl1.getVal();// f.add(li.l2i(l)); - dl1.v0=n++; dl1.v2=sP; dl1.v3=pP;dl1.v4=prntPp1;dl1.v5= (x==p+1?3:4); dl1.cz6(); f[c++]=dl1.getVal();// f.add(li.l2i(l)); - dl1.v0=n++; dl1.v2=sP; dl1.v3=pP;dl1.v4=pPm1;dl1.v5= (x==p-1?3:4); dl1.cz6(); f[c++]=dl1.getVal();// f.add(li.l2i(l)); - dl1.v0=n++; dl1.v2=sP; dl1.v3=sPp1;dl1.v4=pPm1;dl1.v5=pP;dl1.v6= (x==p-1?3:x==p+1?4:5); dl1.cz7(); f[c++]=dl1.getVal(); - dl1.v0=n++; dl1.v2=sPm1; dl1.v3=sP;dl1.v4=pPm1;dl1.v5=pP;dl1.v6= (x==p-1?3:x-1==p?4:5); dl1.cz7(); f[c++]=dl1.getVal();// f.add(li.l2i(l)); - dl1.v0=n++; dl1.v2=sP; dl1.v3=sPp1;dl1.v4=pP;dl1.v5=prntPp1;dl1.v6= (x+1==p?3:x==p+1?4:5); dl1.cz7(); f[c++]=dl1.getVal();// f.add(li.l2i(l)); - dl1.v0=n++; dl1.v2=sPm1; dl1.v3=sP; dl1.v4=pP;dl1.v5=prntPp1;dl1.v6= (x==p-1?3:x==p+1?4:5); dl1.cz7(); f[c++]=dl1.getVal();// f.add(li.l2i(l)); - - dl1.v0=n++; dl1.v2=sP; dl1.v3=sPp1;dl1.v4=dP; dl1.v5= (x+1==d?3:x+1==p?4:5); dl1.cz6(); f[c++]=dl1.getVal(); - dl1.v0=n++; dl1.v2=sP; dl1.v3=sPm1;dl1.v4=dP; dl1.v5= (x-1==d?3:x-1==p?4:5); dl1.cz6(); f[c++]=dl1.getVal(); - - dl1.v0=n++; dl1.v2=sP; dl1.v3=dP;dl1.v4=chldPp1;dl1.v5= (x==d+1?3:d+1==p?4:5); dl1.cz6(); f[c++]=dl1.getVal(); - dl1.v0=n++; dl1.v2=sP; dl1.v3=dP;dl1.v4=chldPm1; dl1.v5= (x==d-1?3:d-1==p?4:5); dl1.cz6(); f[c++]=dl1.getVal(); - dl1.v0=n++; dl1.v2=sP; dl1.v3=sPp1;dl1.v4=chldPm1;dl1.v5=dP; dl1.v6= (x==d-1?3:x+1==d?4:5); dl1.cz7(); f[c++]=dl1.getVal();// f.add(li.l2i(l)); - dl1.v0=n++; dl1.v2=sPm1; dl1.v3=sP;dl1.v4=chldPm1;dl1.v5=dP; dl1.v6= (x-1==d?3:d-1==x?4:5); dl1.cz7(); f[c++]=dl1.getVal(); - dl1.v0= n++;dl1.v2=sP; dl1.v3=sPp1;dl1.v4=dP;dl1.v5=chldPp1;dl1.v6= (x==d+1?3:x+1==d?4:5); dl1.cz7();f[c++]=dl1.getVal();// f.add(li.l2i(l)); - dl1.v0= n++; dl1.v2=sPm1; dl1.v3=sP;dl1.v4=dP;dl1.v5=chldPp1;dl1.v6= (x-1==d?3:d+1==x?4:5);dl1.cz7(); f[c++]=dl1.getVal(); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = sPm1; + dl1.v4 = pP; + dl1.v5 = (x - 1 == p ? 3 : x - 1 == d ? 4 : 5); + dl1.cz6(); + f[c++] = dl1.getVal(); // dl1.getVal();// f.add(li.l2i(l)); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = pP; + dl1.v4 = prntPp1; + dl1.v5 = (x == p + 1 ? 3 : 4); + dl1.cz6(); + f[c++] = dl1.getVal();// f.add(li.l2i(l)); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = pP; + dl1.v4 = pPm1; + dl1.v5 = (x == p - 1 ? 3 : 4); + dl1.cz6(); + f[c++] = dl1.getVal();// f.add(li.l2i(l)); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = sPp1; + dl1.v4 = pPm1; + dl1.v5 = pP; + dl1.v6 = (x == p - 1 ? 3 : x == p + 1 ? 4 : 5); + dl1.cz7(); + f[c++] = dl1.getVal(); + dl1.v0 = n++; + dl1.v2 = sPm1; + dl1.v3 = sP; + dl1.v4 = pPm1; + dl1.v5 = pP; + dl1.v6 = (x == p - 1 ? 3 : x - 1 == p ? 4 : 5); + dl1.cz7(); + f[c++] = dl1.getVal();// f.add(li.l2i(l)); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = sPp1; + dl1.v4 = pP; + dl1.v5 = prntPp1; + dl1.v6 = (x + 1 == p ? 3 : x == p + 1 ? 4 : 5); + dl1.cz7(); + f[c++] = dl1.getVal();// f.add(li.l2i(l)); + dl1.v0 = n++; + dl1.v2 = sPm1; + dl1.v3 = sP; + dl1.v4 = pP; + dl1.v5 = prntPp1; + dl1.v6 = (x == p - 1 ? 3 : x == p + 1 ? 4 : 5); + dl1.cz7(); + f[c++] = dl1.getVal();// f.add(li.l2i(l)); + + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = sPp1; + dl1.v4 = dP; + dl1.v5 = (x + 1 == d ? 3 : x + 1 == p ? 4 : 5); + dl1.cz6(); + f[c++] = dl1.getVal(); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = sPm1; + dl1.v4 = dP; + dl1.v5 = (x - 1 == d ? 3 : x - 1 == p ? 4 : 5); + dl1.cz6(); + f[c++] = dl1.getVal(); + + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = dP; + dl1.v4 = chldPp1; + dl1.v5 = (x == d + 1 ? 3 : d + 1 == p ? 4 : 5); + dl1.cz6(); + f[c++] = dl1.getVal(); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = dP; + dl1.v4 = chldPm1; + dl1.v5 = (x == d - 1 ? 3 : d - 1 == p ? 4 : 5); + dl1.cz6(); + f[c++] = dl1.getVal(); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = sPp1; + dl1.v4 = chldPm1; + dl1.v5 = dP; + dl1.v6 = (x == d - 1 ? 3 : x + 1 == d ? 4 : 5); + dl1.cz7(); + f[c++] = dl1.getVal();// f.add(li.l2i(l)); + dl1.v0 = n++; + dl1.v2 = sPm1; + dl1.v3 = sP; + dl1.v4 = chldPm1; + dl1.v5 = dP; + dl1.v6 = (x - 1 == d ? 3 : d - 1 == x ? 4 : 5); + dl1.cz7(); + f[c++] = dl1.getVal(); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = sPp1; + dl1.v4 = dP; + dl1.v5 = chldPp1; + dl1.v6 = (x == d + 1 ? 3 : x + 1 == d ? 4 : 5); + dl1.cz7(); + f[c++] = dl1.getVal();// f.add(li.l2i(l)); + dl1.v0 = n++; + dl1.v2 = sPm1; + dl1.v3 = sP; + dl1.v4 = dP; + dl1.v5 = chldPp1; + dl1.v6 = (x - 1 == d ? 3 : d + 1 == x ? 4 : 5); + dl1.cz7(); + f[c++] = dl1.getVal(); // c=61; /* - if (cluster.size()>10) { - AtomicInteger N = new AtomicInteger(n); - c = addClusterFeatures(d, p, x, pos, forms, cluster, N, c, f,label); - n = N.get(); - } - */ + * if (cluster.size()>10) { AtomicInteger N = new AtomicInteger(n); c = + * addClusterFeatures(d, p, x, pos, forms, cluster, N, c, f,label); n = + * N.get(); } + */ // take those in for stacking if (s_stack) { - short[] prel = is.plabels[i],phead=is.pheads[i]; + short[] prel = is.plabels[i], phead = is.pheads[i]; - int g = p==phead[d]?1:2 ; - if (x>=0) g += p==phead[x]?4:8; + int g = p == phead[d] ? 1 : 2; + if (x >= 0) + g += p == phead[x] ? 4 : 8; - int gr = x==-1?s_relend:prel[x]; - - - dl2.v1 = label; - dl2.v0= n++;dl2.v2=prel[d];dl2.v3=g;dl2.v4=sP;dl2.v5=dP;dl2.cz6();f[c++]=dl2.getVal(); - dl2.v0= n++;dl2.v2=prel[d];dl2.v3=g;dl2.v4=sP;dl2.v5=pP;dl2.cz6();f[c++]=dl2.getVal(); - dl2.v0= n++;dl2.v2=prel[d];dl2.v3=g;dl2.v4=sP;dl2.v5=pP;dl2.v6=dP;dl2.cz7();f[c++]=dl2.getVal(); + int gr = x == -1 ? s_relend : prel[x]; - dl2.v0= n++;dl2.v2=gr;dl2.v3=g;dl2.v4=sP;dl2.v5=dP;dl2.cz6();f[c++]=dl2.getVal(); - dl2.v0= n++;dl2.v2=gr;dl2.v3=g;dl2.v4=sP;dl2.v5=pP;dl2.cz6();f[c++]=dl2.getVal(); - dl2.v0= n++;dl2.v2=gr;dl2.v3=g;dl2.v4=sP;dl2.v5=pP;dl2.v6=dP;dl2.cz7();f[c++]=dl2.getVal(); + dl2.v1 = label; + dl2.v0 = n++; + dl2.v2 = prel[d]; + dl2.v3 = g; + dl2.v4 = sP; + dl2.v5 = dP; + dl2.cz6(); + f[c++] = dl2.getVal(); + dl2.v0 = n++; + dl2.v2 = prel[d]; + dl2.v3 = g; + dl2.v4 = sP; + dl2.v5 = pP; + dl2.cz6(); + f[c++] = dl2.getVal(); + dl2.v0 = n++; + dl2.v2 = prel[d]; + dl2.v3 = g; + dl2.v4 = sP; + dl2.v5 = pP; + dl2.v6 = dP; + dl2.cz7(); + f[c++] = dl2.getVal(); + + dl2.v0 = n++; + dl2.v2 = gr; + dl2.v3 = g; + dl2.v4 = sP; + dl2.v5 = dP; + dl2.cz6(); + f[c++] = dl2.getVal(); + dl2.v0 = n++; + dl2.v2 = gr; + dl2.v3 = g; + dl2.v4 = sP; + dl2.v5 = pP; + dl2.cz6(); + f[c++] = dl2.getVal(); + dl2.v0 = n++; + dl2.v2 = gr; + dl2.v3 = g; + dl2.v4 = sP; + dl2.v5 = pP; + dl2.v6 = dP; + dl2.cz7(); + f[c++] = dl2.getVal(); } - - short[][] feats=is.feats[i]; - if (feats==null) return c; + short[][] feats = is.feats[i]; + if (feats == null) + return c; - short[] featsP =feats[d]; - short[] featsSbl =x!=-1?feats[x]:null; - dlf.v1=label; - dlf.v0= n++; dlf.v2=sP; dlf.v3=dP; - c = extractFeat(f, c ,featsP, featsSbl); + short[] featsP = feats[d]; + short[] featsSbl = x != -1 ? feats[x] : null; + dlf.v1 = label; + dlf.v0 = n++; + dlf.v2 = sP; + dlf.v3 = dP; + c = extractFeat(f, c, featsP, featsSbl); - featsP =feats[p]; - + featsP = feats[p]; - dlf.v0= n++; dlf.v1=label; dlf.v2=sP; dlf.v3=pP; - c = extractFeat(f, c ,featsP, featsSbl); - + dlf.v0 = n++; + dlf.v1 = label; + dlf.v2 = sP; + dlf.v3 = pP; + c = extractFeat(f, c, featsP, featsSbl); return c; } - /** * Separated this method to speed up parsing + * * @param d * @param p * @param x @@ -471,146 +983,369 @@ final public class Extractor { * @param f * @return */ - int addClusterFeatures(Instances is, int i, int d, int p, int x, Cluster cluster, int c, long[] f, int label) { - - // int n= N.get(); - - short[] pos= is.pposs[i]; - int[] forms=is.forms[i]; - - int n=190; + int addClusterFeatures(Instances is, int i, int d, int p, int x, Cluster cluster, int c, long[] f, int label) { + + // int n= N.get(); + + short[] pos = is.pposs[i]; + int[] forms = is.forms[i]; + + int n = 190; int pP = pos[p], dP = pos[d]; - int sP = x!=-1 ? pos[x] : s_str; - - - int pLSp1 = p != pos.length - 1 ? forms[p + 1]==-1?-1:cluster.getLP(forms[p + 1]): _cend; - int cLSp1 = d != pos.length - 1 ? forms[d + 1] ==-1?-1:cluster.getLP(forms[d + 1]):_cend; - int sLSp1 = x < pos.length -1 ? forms[x + 1] ==-1?-1:cluster.getLP(forms[x + 1]) : _cend; - - int pLSm1 = p!=0 ? forms[p - 1]==-1?-1:cluster.getLP(forms[p - 1]): _cstr; - int cLSm1 = d-1>=0 ? forms[d - 1] ==-1?-1:cluster.getLP(forms[d - 1]):_cstr; - int sLSm1 = x>0 ? forms[x - 1] ==-1?-1:cluster.getLP(forms[x - 1]):_cstr; - - //int c=61; - int pF = forms[p],dF = forms[d], sF = x!=-1 ? forms[x] : s_stwrd; - int prntLS = pF==-1?-1:cluster.getLP(pF), chldLS = dF==-1?-1:cluster.getLP(dF); - - int sblLS = (x != -1)&&(sF!=-1) ? cluster.getLP(sF) : s_stwrd; - - - d2lp.v1=label; - d2lp.v0= n++; d2lp.v2=prntLS; d2lp.v3=sblLS; d2lp.cz4(); f[c++]=d2lp.getVal(); - d2lp.v0= n++; d2lp.v2=chldLS; d2lp.v3=sblLS; d2lp.cz4(); f[c++]=d2lp.getVal();// f[c++]=d2lp.csa(s_dist,dist); - - d3lp.v1= label; - d3lp.v0= n++; d3lp.v2=prntLS; d3lp.v3=chldLS; d3lp.v4=sblLS;d3lp.cz5(); f[c++]=d3lp.getVal(); - - d2lp.v0= n++; d2lp.v2=prntLS; d2lp.v3=chldLS; d2lp.v4=sF; d2lp.cz5(); f[c++]=d2lp.getVal(); //f[c++]=d2lp.csa(s_dist,dist); - d2lp.v0= n++; d2lp.v2=prntLS; d2lp.v3=sblLS; d2lp.v4=dF; d2lp.cz5(); f[c++]=d2lp.getVal(); //f[c++]=d2lp.csa(s_dist,dist); - d2lp.v0= n++; d2lp.v2=chldLS; d2lp.v3=sblLS; d2lp.v4=pF; d2lp.cz5(); f[c++]=d2lp.getVal(); //f[c++]=d2lp.csa(s_dist,dist); - - d2pp.v1=label; - d2pp.v0= n++; d2pp.v2=prntLS; d2pp.v3=chldLS; d2pp.v4=sP; d2pp.cz5(); f[c++]=d2pp.getVal(); //f[c++]=d2pp.csa(s_dist,dist); - d2pp.v0= n++; d2pp.v2=prntLS; d2pp.v3=sblLS; d2pp.v4=dP; d2pp.cz5(); f[c++]=d2pp.getVal(); //f[c++]=d2pp.csa(s_dist,dist); - d2pp.v0= n++; d2pp.v2=chldLS; d2pp.v3=sblLS; d2pp.v4=pP; d2pp.cz5(); f[c++]=d2pp.getVal(); //f[c++]=d2pp.csa(s_dist,dist); - - - if (x+1==x|| x+1==p || x+1==d) sLSp1=-1; - if (p+1==x|| p+1==p || p+1==d) pLSp1=-1; - if (d+1==x|| d+1==p || d+1==d) cLSp1=-1; - - if (x-1==x|| x-1==p || x-1==d) sLSm1=-1; - if (d-1==x|| d-1==p || d-1==d) cLSm1=-1; - if (p-1==x|| p-1==p || p-1==d) pLSm1=-1; - - dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.getVal(); - dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSm1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.getVal(); - dl1.v0=n++; dl1.v2=sP; dl1.v3=pP;dl1.v4=pLSp1;dl1.cz5(); f[c++]=dl1.getVal(); - dl1.v0=n++; dl1.v2=sP; dl1.v3=pP;dl1.v4=pLSm1; dl1.cz5(); f[c++]=dl1.getVal(); - dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=pLSm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.getVal(); - dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP;dl1.v4=pLSm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.getVal(); - dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=pP;dl1.v5=pLSp1; dl1.cz6(); f[c++]=dl1.getVal(); - dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP; dl1.v4=pP;dl1.v5=pLSp1; dl1.cz6(); f[c++]=dl1.getVal(); - dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=dP; dl1.cz5(); f[c++]=dl1.getVal(); - dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSm1;dl1.v4=dP; dl1.cz5(); f[c++]=dl1.getVal(); - dl1.v0=n++; dl1.v2=sP; dl1.v3=dP;dl1.v4=cLSp1; dl1.cz5(); f[c++]=dl1.getVal(); - dl1.v0=n++;dl1.v2=sP; dl1.v3=dP;dl1.v4=cLSm1; dl1.cz5(); f[c++]=dl1.getVal(); - dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSm1;dl1.v4=cLSm1;dl1.v5=dP; dl1.cz6(); f[c++]=dl1.getVal(); - dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP;dl1.v4=cLSm1;dl1.v5=dP;dl1.cz6(); f[c++]=dl1.getVal(); - dl1.v0=n++;dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=dP;dl1.v5=cLSp1;dl1.cz6();f[c++]=dl1.getVal(); - dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP;dl1.v4=dP;dl1.v5=cLSp1; dl1.cz6(); f[c++]=dl1.getVal(); - - - - dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.getVal(); - dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSm1;dl1.v4=pP; dl1.cz5(); f[c++]=dl1.getVal(); - dl1.v0=n++; dl1.v2=sP; dl1.v3=pP;dl1.v4=pLSp1;dl1.cz5(); f[c++]=dl1.getVal(); - dl1.v0=n++; dl1.v2=sP; dl1.v3=pP;dl1.v4=pLSm1; dl1.cz5(); f[c++]=dl1.getVal(); - dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=pLSm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.getVal(); - dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP;dl1.v4=pLSm1;dl1.v5=pP; dl1.cz6(); f[c++]=dl1.getVal(); - dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=pP;dl1.v5=pLSp1; dl1.cz6(); f[c++]=dl1.getVal(); - dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP; dl1.v4=pP;dl1.v5=pLSp1; dl1.cz6(); f[c++]=dl1.getVal(); - dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=dP; dl1.cz5(); f[c++]=dl1.getVal(); - dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSm1;dl1.v4=dP; dl1.cz5(); f[c++]=dl1.getVal(); - dl1.v0=n++; dl1.v2=sP; dl1.v3=dP;dl1.v4=cLSp1; dl1.cz5(); f[c++]=dl1.getVal(); - dl1.v0=n++;dl1.v2=sP; dl1.v3=dP;dl1.v4=cLSm1; dl1.cz5(); f[c++]=dl1.getVal(); - dl1.v0=n++; dl1.v2=sP; dl1.v3=sLSm1;dl1.v4=cLSm1;dl1.v5=dP; dl1.cz6(); f[c++]=dl1.getVal(); - dl1.v0=n++; dl1.v2=sLSm1; dl1.v3=sP;dl1.v4=cLSm1;dl1.v5=dP;dl1.cz6(); f[c++]=dl1.getVal(); - dl1.v0= n++;dl1.v2=sP; dl1.v3=sLSp1;dl1.v4=dP;dl1.v5=cLSp1;dl1.cz6();f[c++]=dl1.getVal(); - dl1.v0= n++; dl1.v2=sLSm1; dl1.v3=sP;dl1.v4=dP;dl1.v5=cLSp1; dl1.cz6(); f[c++]=dl1.getVal(); - - + int sP = x != -1 ? pos[x] : s_str; + + int pLSp1 = p != pos.length - 1 ? forms[p + 1] == -1 ? -1 : cluster.getLP(forms[p + 1]) : _cend; + int cLSp1 = d != pos.length - 1 ? forms[d + 1] == -1 ? -1 : cluster.getLP(forms[d + 1]) : _cend; + int sLSp1 = x < pos.length - 1 ? forms[x + 1] == -1 ? -1 : cluster.getLP(forms[x + 1]) : _cend; + + int pLSm1 = p != 0 ? forms[p - 1] == -1 ? -1 : cluster.getLP(forms[p - 1]) : _cstr; + int cLSm1 = d - 1 >= 0 ? forms[d - 1] == -1 ? -1 : cluster.getLP(forms[d - 1]) : _cstr; + int sLSm1 = x > 0 ? forms[x - 1] == -1 ? -1 : cluster.getLP(forms[x - 1]) : _cstr; + + // int c=61; + int pF = forms[p], dF = forms[d], sF = x != -1 ? forms[x] : s_stwrd; + int prntLS = pF == -1 ? -1 : cluster.getLP(pF), chldLS = dF == -1 ? -1 : cluster.getLP(dF); + + int sblLS = (x != -1) && (sF != -1) ? cluster.getLP(sF) : s_stwrd; + + d2lp.v1 = label; + d2lp.v0 = n++; + d2lp.v2 = prntLS; + d2lp.v3 = sblLS; + d2lp.cz4(); + f[c++] = d2lp.getVal(); + d2lp.v0 = n++; + d2lp.v2 = chldLS; + d2lp.v3 = sblLS; + d2lp.cz4(); + f[c++] = d2lp.getVal();// f[c++]=d2lp.csa(s_dist,dist); + + d3lp.v1 = label; + d3lp.v0 = n++; + d3lp.v2 = prntLS; + d3lp.v3 = chldLS; + d3lp.v4 = sblLS; + d3lp.cz5(); + f[c++] = d3lp.getVal(); + + d2lp.v0 = n++; + d2lp.v2 = prntLS; + d2lp.v3 = chldLS; + d2lp.v4 = sF; + d2lp.cz5(); + f[c++] = d2lp.getVal(); // f[c++]=d2lp.csa(s_dist,dist); + d2lp.v0 = n++; + d2lp.v2 = prntLS; + d2lp.v3 = sblLS; + d2lp.v4 = dF; + d2lp.cz5(); + f[c++] = d2lp.getVal(); // f[c++]=d2lp.csa(s_dist,dist); + d2lp.v0 = n++; + d2lp.v2 = chldLS; + d2lp.v3 = sblLS; + d2lp.v4 = pF; + d2lp.cz5(); + f[c++] = d2lp.getVal(); // f[c++]=d2lp.csa(s_dist,dist); + + d2pp.v1 = label; + d2pp.v0 = n++; + d2pp.v2 = prntLS; + d2pp.v3 = chldLS; + d2pp.v4 = sP; + d2pp.cz5(); + f[c++] = d2pp.getVal(); // f[c++]=d2pp.csa(s_dist,dist); + d2pp.v0 = n++; + d2pp.v2 = prntLS; + d2pp.v3 = sblLS; + d2pp.v4 = dP; + d2pp.cz5(); + f[c++] = d2pp.getVal(); // f[c++]=d2pp.csa(s_dist,dist); + d2pp.v0 = n++; + d2pp.v2 = chldLS; + d2pp.v3 = sblLS; + d2pp.v4 = pP; + d2pp.cz5(); + f[c++] = d2pp.getVal(); // f[c++]=d2pp.csa(s_dist,dist); + + if (x + 1 == x || x + 1 == p || x + 1 == d) + sLSp1 = -1; + if (p + 1 == x || p + 1 == p || p + 1 == d) + pLSp1 = -1; + if (d + 1 == x || d + 1 == p || d + 1 == d) + cLSp1 = -1; + + if (x - 1 == x || x - 1 == p || x - 1 == d) + sLSm1 = -1; + if (d - 1 == x || d - 1 == p || d - 1 == d) + cLSm1 = -1; + if (p - 1 == x || p - 1 == p || p - 1 == d) + pLSm1 = -1; + + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = sLSp1; + dl1.v4 = pP; + dl1.cz5(); + f[c++] = dl1.getVal(); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = sLSm1; + dl1.v4 = pP; + dl1.cz5(); + f[c++] = dl1.getVal(); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = pP; + dl1.v4 = pLSp1; + dl1.cz5(); + f[c++] = dl1.getVal(); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = pP; + dl1.v4 = pLSm1; + dl1.cz5(); + f[c++] = dl1.getVal(); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = sLSp1; + dl1.v4 = pLSm1; + dl1.v5 = pP; + dl1.cz6(); + f[c++] = dl1.getVal(); + dl1.v0 = n++; + dl1.v2 = sLSm1; + dl1.v3 = sP; + dl1.v4 = pLSm1; + dl1.v5 = pP; + dl1.cz6(); + f[c++] = dl1.getVal(); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = sLSp1; + dl1.v4 = pP; + dl1.v5 = pLSp1; + dl1.cz6(); + f[c++] = dl1.getVal(); + dl1.v0 = n++; + dl1.v2 = sLSm1; + dl1.v3 = sP; + dl1.v4 = pP; + dl1.v5 = pLSp1; + dl1.cz6(); + f[c++] = dl1.getVal(); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = sLSp1; + dl1.v4 = dP; + dl1.cz5(); + f[c++] = dl1.getVal(); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = sLSm1; + dl1.v4 = dP; + dl1.cz5(); + f[c++] = dl1.getVal(); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = dP; + dl1.v4 = cLSp1; + dl1.cz5(); + f[c++] = dl1.getVal(); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = dP; + dl1.v4 = cLSm1; + dl1.cz5(); + f[c++] = dl1.getVal(); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = sLSm1; + dl1.v4 = cLSm1; + dl1.v5 = dP; + dl1.cz6(); + f[c++] = dl1.getVal(); + dl1.v0 = n++; + dl1.v2 = sLSm1; + dl1.v3 = sP; + dl1.v4 = cLSm1; + dl1.v5 = dP; + dl1.cz6(); + f[c++] = dl1.getVal(); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = sLSp1; + dl1.v4 = dP; + dl1.v5 = cLSp1; + dl1.cz6(); + f[c++] = dl1.getVal(); + dl1.v0 = n++; + dl1.v2 = sLSm1; + dl1.v3 = sP; + dl1.v4 = dP; + dl1.v5 = cLSp1; + dl1.cz6(); + f[c++] = dl1.getVal(); + + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = sLSp1; + dl1.v4 = pP; + dl1.cz5(); + f[c++] = dl1.getVal(); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = sLSm1; + dl1.v4 = pP; + dl1.cz5(); + f[c++] = dl1.getVal(); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = pP; + dl1.v4 = pLSp1; + dl1.cz5(); + f[c++] = dl1.getVal(); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = pP; + dl1.v4 = pLSm1; + dl1.cz5(); + f[c++] = dl1.getVal(); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = sLSp1; + dl1.v4 = pLSm1; + dl1.v5 = pP; + dl1.cz6(); + f[c++] = dl1.getVal(); + dl1.v0 = n++; + dl1.v2 = sLSm1; + dl1.v3 = sP; + dl1.v4 = pLSm1; + dl1.v5 = pP; + dl1.cz6(); + f[c++] = dl1.getVal(); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = sLSp1; + dl1.v4 = pP; + dl1.v5 = pLSp1; + dl1.cz6(); + f[c++] = dl1.getVal(); + dl1.v0 = n++; + dl1.v2 = sLSm1; + dl1.v3 = sP; + dl1.v4 = pP; + dl1.v5 = pLSp1; + dl1.cz6(); + f[c++] = dl1.getVal(); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = sLSp1; + dl1.v4 = dP; + dl1.cz5(); + f[c++] = dl1.getVal(); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = sLSm1; + dl1.v4 = dP; + dl1.cz5(); + f[c++] = dl1.getVal(); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = dP; + dl1.v4 = cLSp1; + dl1.cz5(); + f[c++] = dl1.getVal(); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = dP; + dl1.v4 = cLSm1; + dl1.cz5(); + f[c++] = dl1.getVal(); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = sLSm1; + dl1.v4 = cLSm1; + dl1.v5 = dP; + dl1.cz6(); + f[c++] = dl1.getVal(); + dl1.v0 = n++; + dl1.v2 = sLSm1; + dl1.v3 = sP; + dl1.v4 = cLSm1; + dl1.v5 = dP; + dl1.cz6(); + f[c++] = dl1.getVal(); + dl1.v0 = n++; + dl1.v2 = sP; + dl1.v3 = sLSp1; + dl1.v4 = dP; + dl1.v5 = cLSp1; + dl1.cz6(); + f[c++] = dl1.getVal(); + dl1.v0 = n++; + dl1.v2 = sLSm1; + dl1.v3 = sP; + dl1.v4 = dP; + dl1.v5 = cLSp1; + dl1.cz6(); + f[c++] = dl1.getVal(); + return c; } private int extractFeat(long[] f, int cnt, short[] featsP, short[] featsD) { - if (featsP!=null && featsD!=null) { - for(short i1=0;i1<featsP.length;i1++) { - for(short i2=0;i2<featsD.length;i2++) { - dlf.v4=featsP[i1]; dlf.v5=featsD[i2]; - dlf.cz6(); f[cnt++]=dlf.getVal(); + if (featsP != null && featsD != null) { + for (short i1 = 0; i1 < featsP.length; i1++) { + for (short i2 = 0; i2 < featsD.length; i2++) { + dlf.v4 = featsP[i1]; + dlf.v5 = featsD[i2]; + dlf.cz6(); + f[cnt++] = dlf.getVal(); } - } - } else if (featsP==null && featsD!=null) { + } + } else if (featsP == null && featsD != null) { - for(short i2=0;i2<featsD.length;i2++) { - dlf.v4=nofeat; dlf.v5=featsD[i2]; - dlf.cz6(); f[cnt++]=dlf.getVal(); + for (short i2 = 0; i2 < featsD.length; i2++) { + dlf.v4 = nofeat; + dlf.v5 = featsD[i2]; + dlf.cz6(); + f[cnt++] = dlf.getVal(); - } - } else if (featsP!=null && featsD==null) { + } + } else if (featsP != null && featsD == null) { - for(short i1=0;i1<featsP.length;i1++) { - dlf.v4=featsP[i1]; dlf.v5=nofeat; - dlf.cz6(); f[cnt++]=dlf.getVal(); + for (short i1 = 0; i1 < featsP.length; i1++) { + dlf.v4 = featsP[i1]; + dlf.v5 = nofeat; + dlf.cz6(); + f[cnt++] = dlf.getVal(); - } + } } return cnt; } + public FV encodeCat(Instances is, int ic, short pposs[], int forms[], int[] lemmas, short[] heads, short[] types, + short feats[][], Cluster cluster, FV f) { - - public FV encodeCat(Instances is, int ic, short pposs[], int forms[], int[] lemmas, short[] heads, short[] types, short feats[][], Cluster cluster, FV f) { - - - long[] svs = new long[250]; + long[] svs = new long[250]; for (int i = 1; i < heads.length; i++) { + basic(pposs, heads[i], i, f); - basic(pposs, heads[i], i, f); - - int w1 = heads[i]<i?heads[i]:i; - int w2 = heads[i]<i?i:heads[i]; - - int dir =heads[i]<i?0:s_rel1; + int w1 = heads[i] < i ? heads[i] : i; + int w2 = heads[i] < i ? i : heads[i]; + + int dir = heads[i] < i ? 0 : s_rel1; int label = types[i] + dir; - - int c = firstm(is, ic, w1, w2, label, cluster,svs); - for(int k=0;k<c;k++) dl1.map(f,svs[k]); - int ch,cmi,cmo; + int c = firstm(is, ic, w1, w2, label, cluster, svs); + for (int k = 0; k < c; k++) + dl1.map(f, svs[k]); + + int ch, cmi, cmo; if (heads[i] < i) { ch = rightmostRight(heads, heads[i], i); cmi = leftmostLeft(heads, i, heads[i]); @@ -622,67 +1357,73 @@ final public class Extractor { cmo = leftmostLeft(heads, i, 0); } - int lx =types[i] + s_rel1*((heads[i]<i?0:1) +8); - c =second(is,ic,w1, w2,ch, lx, cluster, svs); - for(int k=0;k<c;k++) dl1.map(f,svs[k]); - c = addClusterFeatures(is,ic, w1, w2, ch, cluster, c, svs,lx); - for(int k=0;k<c;k++) dl1.map(f,svs[k]); - - lx =types[i]+s_rel1*((heads[i]<i?0:1) + ((cmi < i)?0:2) ); - c =second(is, ic,w1,w2,cmi, lx, cluster, svs); - for(int k=0;k<c;k++) dl1.map(f,svs[k]); - - c = addClusterFeatures(is,ic, w1, w2, cmi, cluster, c, svs,lx); - for(int k=0;k<c;k++) dl1.map(f,svs[k]); - - lx =types[i]+s_rel1*((heads[i]<i?0:1) + ((cmo < i)?0:2) ); - c =second(is, ic, w1,w2,cmo, lx, cluster, svs); - for(int k=0;k<c;k++) dl1.map(f,svs[k]); - - c = addClusterFeatures(is,ic, w1, w2, cmo, cluster, c, svs,lx); - for(int k=0;k<c;k++) dl1.map(f,svs[k]); + int lx = types[i] + s_rel1 * ((heads[i] < i ? 0 : 1) + 8); + c = second(is, ic, w1, w2, ch, lx, cluster, svs); + for (int k = 0; k < c; k++) + dl1.map(f, svs[k]); + c = addClusterFeatures(is, ic, w1, w2, ch, cluster, c, svs, lx); + for (int k = 0; k < c; k++) + dl1.map(f, svs[k]); + + lx = types[i] + s_rel1 * ((heads[i] < i ? 0 : 1) + ((cmi < i) ? 0 : 2)); + c = second(is, ic, w1, w2, cmi, lx, cluster, svs); + for (int k = 0; k < c; k++) + dl1.map(f, svs[k]); + + c = addClusterFeatures(is, ic, w1, w2, cmi, cluster, c, svs, lx); + for (int k = 0; k < c; k++) + dl1.map(f, svs[k]); + + lx = types[i] + s_rel1 * ((heads[i] < i ? 0 : 1) + ((cmo < i) ? 0 : 2)); + c = second(is, ic, w1, w2, cmo, lx, cluster, svs); + for (int k = 0; k < c; k++) + dl1.map(f, svs[k]); + + c = addClusterFeatures(is, ic, w1, w2, cmo, cluster, c, svs, lx); + for (int k = 0; k < c; k++) + dl1.map(f, svs[k]); } return f; } - public void compare(Instances is, int ic, short pos[], short[] heads, short[] types, Cluster cluster, F2SF f, DataFES x) { + public void compare(Instances is, int ic, short pos[], short[] heads, short[] types, Cluster cluster, F2SF f, + DataFES x) { + long[] svs = new long[250]; - long[] svs = new long[250]; - - float fx =0.0F; - - for (int i = 1; i < heads.length; i++) { f.clear(); - basic(pos, heads[i], i, f); - + basic(pos, heads[i], i, f); + if (x.pl[heads[i]][i] != f.getScore()) { - DB.println("basic diff "+x.pl[heads[i]][i] +" fg "+f.getScore()); + DB.println("basic diff " + x.pl[heads[i]][i] + " fg " + f.getScore()); } - - int w1 = heads[i]<i?heads[i]:i; - int w2 = heads[i]<i?i:heads[i]; - - int dir =heads[i]<i?0:s_rel1; + + int w1 = heads[i] < i ? heads[i] : i; + int w2 = heads[i] < i ? i : heads[i]; + + int dir = heads[i] < i ? 0 : s_rel1; int label = types[i] + dir; f.clear(); - int c = firstm(is, ic, w1, w2, label, cluster,svs); - for(int k=0;k<c;k++) dl1.map(f,svs[k]); - + int c = firstm(is, ic, w1, w2, label, cluster, svs); + for (int k = 0; k < c; k++) + dl1.map(f, svs[k]); + if (x.lab[heads[i]][i][types[i]] != f.getScore()) { - DB.println("first diff "+x.lab[heads[i]][i][types[i]] +" fg "+f.getScore()); + DB.println("first diff " + x.lab[heads[i]][i][types[i]] + " fg " + f.getScore()); } - + short[] labels = Edges.get(pos[heads[i]], pos[i]); - int lid=-1; - for(int k=0;k<labels.length;k++) if (types[i]== labels[k]) {lid= k;break;} - - + int lid = -1; + for (int k = 0; k < labels.length; k++) + if (types[i] == labels[k]) { + lid = k; + break; + } - int ch,cmi,cmo; + int ch, cmi, cmo; if (heads[i] < i) { ch = rightmostRight(heads, heads[i], i); cmi = leftmostLeft(heads, i, heads[i]); @@ -695,67 +1436,69 @@ final public class Extractor { } f.clear(); - - - int lx =types[i] + s_rel1*((heads[i]<i?0:1) +8); - c =second(is,ic,w1, w2,ch, lx, cluster, svs); - for(int k=0;k<c;k++) dl1.map(f,svs[k]); - - if (x.sib[heads[i]][i][ch==-1?heads[i]:ch][lid] != f.getScore()) { - DB.println("sib diff "+x.sib[heads[i]][i][ch==-1?i:ch][lid] +" fg "+f.getScore()); + + int lx = types[i] + s_rel1 * ((heads[i] < i ? 0 : 1) + 8); + c = second(is, ic, w1, w2, ch, lx, cluster, svs); + for (int k = 0; k < c; k++) + dl1.map(f, svs[k]); + + if (x.sib[heads[i]][i][ch == -1 ? heads[i] : ch][lid] != f.getScore()) { + DB.println("sib diff " + x.sib[heads[i]][i][ch == -1 ? i : ch][lid] + " fg " + f.getScore()); } - + f.clear(); - - - lx =types[i]+s_rel1*((heads[i]<i?0:1) + ((cmi < i)?0:2) ); - c =second(is, ic,w1,w2,cmi, lx, cluster, svs); - for(int k=0;k<c;k++) dl1.map(f,svs[k]); - - if (x.gra[heads[i]][i][cmi==-1?i:cmi][lid] != f.getScore() ) { - DB.println("gcm diff "+x.gra[heads[i]][i][cmi==-1?i:cmi][lid] +" fg "+f.getScore()+" cmi "+cmi+" i "+i+ - " head "+heads[i]+" w1 "+w1+" w2 "+w2+" label "+lx+" "+((heads[i]<i?0:1) + ((cmi < i)?0:2) ) ); - - System.out.println("w1 "+w1+" w2 "+w2+" cmi "+cmi+" label "+label+" "); - - for (long k : svs) System.out.print(k+" "); + + lx = types[i] + s_rel1 * ((heads[i] < i ? 0 : 1) + ((cmi < i) ? 0 : 2)); + c = second(is, ic, w1, w2, cmi, lx, cluster, svs); + for (int k = 0; k < c; k++) + dl1.map(f, svs[k]); + + if (x.gra[heads[i]][i][cmi == -1 ? i : cmi][lid] != f.getScore()) { + DB.println("gcm diff " + x.gra[heads[i]][i][cmi == -1 ? i : cmi][lid] + " fg " + f.getScore() + " cmi " + + cmi + " i " + i + " head " + heads[i] + " w1 " + w1 + " w2 " + w2 + " label " + lx + " " + + ((heads[i] < i ? 0 : 1) + ((cmi < i) ? 0 : 2))); + + System.out.println("w1 " + w1 + " w2 " + w2 + " cmi " + cmi + " label " + label + " "); + + for (long k : svs) + System.out.print(k + " "); System.out.println(); - + } f.clear(); - lx =types[i]+s_rel1*((heads[i]<i?0:1) + ((cmo < i)?0:2) ); - c =second(is, ic, w1,w2,cmo, lx, cluster, svs); - for(int k=0;k<c;k++) dl1.map(f,svs[k]); - - if (x.gra[heads[i]][i][cmo==-1?i:cmo][lid] != f.getScore() ) { - DB.println("gcm diff "+x.gra[heads[i]][i][cmo==-1?i:cmo][lid] +" fg "+f.getScore()+" cmo "+cmo+" i "+i+ - " head "+heads[i]+" w1 "+w1+" w2 "+w2+" label "+lx+" "+((heads[i]<i?0:1) + ((cmi < i)?0:2) ) ); + lx = types[i] + s_rel1 * ((heads[i] < i ? 0 : 1) + ((cmo < i) ? 0 : 2)); + c = second(is, ic, w1, w2, cmo, lx, cluster, svs); + for (int k = 0; k < c; k++) + dl1.map(f, svs[k]); + + if (x.gra[heads[i]][i][cmo == -1 ? i : cmo][lid] != f.getScore()) { + DB.println("gcm diff " + x.gra[heads[i]][i][cmo == -1 ? i : cmo][lid] + " fg " + f.getScore() + " cmo " + + cmo + " i " + i + " head " + heads[i] + " w1 " + w1 + " w2 " + w2 + " label " + lx + " " + + ((heads[i] < i ? 0 : 1) + ((cmi < i) ? 0 : 2))); - System.out.println("w1 "+w1+" w2 "+w2+" cmi "+cmi+" label "+label+" "); + System.out.println("w1 " + w1 + " w2 " + w2 + " cmi " + cmi + " label " + label + " "); - for (long k : svs) System.out.print(k+" "); + for (long k : svs) + System.out.print(k + " "); System.out.println(); - + } } - } - - - public short[] searchLabel(Instances is, int ic, short pposs[], int forms[], int[] lemmas, short[] heads, short[] types, short feats[][], Cluster cluster, IFV f) { + public short[] searchLabel(Instances is, int ic, short pposs[], int forms[], int[] lemmas, short[] heads, + short[] types, short feats[][], Cluster cluster, IFV f) { - long[] svs = new long[250]; + long[] svs = new long[250]; short[] newLabels = new short[types.length]; for (int i = 1; i < heads.length; i++) { + // int n =basic(pposs, forms, heads[i], i, cluster, f); - // int n =basic(pposs, forms, heads[i], i, cluster, f); - - int ch,cmi,cmo; + int ch, cmi, cmo; if (heads[i] < i) { ch = rightmostRight(heads, heads[i], i); cmi = leftmostLeft(heads, i, heads[i]); @@ -767,75 +1510,85 @@ final public class Extractor { cmo = leftmostLeft(heads, i, 0); } - - short labels[] = Edges.get(pposs[is.heads[ic][i]],pposs[i]); + short labels[] = Edges.get(pposs[is.heads[ic][i]], pposs[i]); float best = -1000; short bestL = 0; - for(int j=0; j< labels.length;j++) { + for (short label : labels) { f.clear(); - firstm(is, ic, heads[i], i, labels[j], cluster,svs); - for(int k=0;k<svs.length;k++) dl1.map(f,svs[k]); + firstm(is, ic, heads[i], i, label, cluster, svs); + for (long sv : svs) + dl1.map(f, sv); - second(is,ic,heads[i], i,ch, labels[j], cluster, svs); - for(int k=0;k<svs.length;k++) dl1.map(f,svs[k]); + second(is, ic, heads[i], i, ch, label, cluster, svs); + for (long sv : svs) + dl1.map(f, sv); - second(is, ic,heads[i],i,cmi, labels[j], cluster, svs); - for(int k=0;k<svs.length;k++) dl1.map(f,svs[k]); + second(is, ic, heads[i], i, cmi, label, cluster, svs); + for (long sv : svs) + dl1.map(f, sv); - second(is, ic, heads[i],i,cmo, labels[j], cluster, svs); - for(int k=0;k<svs.length;k++) dl1.map(f,svs[k]); + second(is, ic, heads[i], i, cmo, label, cluster, svs); + for (long sv : svs) + dl1.map(f, sv); if (best < f.getScore()) { - best= (float)f.getScore(); - bestL= labels[j]; - newLabels[i]=bestL; + best = (float) f.getScore(); + bestL = label; + newLabels[i] = bestL; } - } } return newLabels; - //return f; + // return f; } - - public static float encode3(short[] pos, short heads[] , short[] types, DataFES d2) { - + public static float encode3(short[] pos, short heads[], short[] types, DataFES d2) { float v = 0F; for (int i = 1; i < heads.length; i++) { - // int dir= (heads[i] < i)? 0:1; + // int dir= (heads[i] < i)? 0:1; v += d2.pl[heads[i]][i]; v += d2.lab[heads[i]][i][types[i]]; - // boolean left = i<heads[i]; + // boolean left = i<heads[i]; short[] labels = Edges.get(pos[heads[i]], pos[i]); - int lid=-1; - for(int k=0;k<labels.length;k++) if (types[i]== labels[k]) {lid= k;break;} + int lid = -1; + for (int k = 0; k < labels.length; k++) + if (types[i] == labels[k]) { + lid = k; + break; + } - int ch,cmi,cmo; + int ch, cmi, cmo; if (heads[i] < i) { ch = rightmostRight(heads, heads[i], i); cmi = leftmostLeft(heads, i, heads[i]); cmo = rightmostRight(heads, i, heads.length); - if (ch==-1) ch=heads[i]; - if (cmi==-1) cmi=heads[i]; - if (cmo==-1) cmo=heads[i]; + if (ch == -1) + ch = heads[i]; + if (cmi == -1) + cmi = heads[i]; + if (cmo == -1) + cmo = heads[i]; } else { ch = leftmostLeft(heads, heads[i], i); cmi = rightmostRight(heads, i, heads[i]); cmo = leftmostLeft(heads, i, 0); - if (ch==-1) ch=i; - if (cmi==-1) cmi=i; - if (cmo==-1) cmo=i; + if (ch == -1) + ch = i; + if (cmi == -1) + cmi = i; + if (cmo == -1) + cmo = i; } v += d2.sib[heads[i]][i][ch][lid]; v += d2.gra[heads[i]][i][cmi][lid]; @@ -843,38 +1596,47 @@ final public class Extractor { } return v; } - - public static float encode3(short[] pos, short heads[] , short[] types, DataFES d2, float[] scores) { + + public static float encode3(short[] pos, short heads[], short[] types, DataFES d2, float[] scores) { float v = 0F; for (int i = 1; i < heads.length; i++) { - - scores[i]= d2.pl[heads[i]][i]; + scores[i] = d2.pl[heads[i]][i]; scores[i] += d2.lab[heads[i]][i][types[i]]; short[] labels = Edges.get(pos[heads[i]], pos[i]); - int lid=-1; - for(int k=0;k<labels.length;k++) if (types[i]== labels[k]) {lid= k;break;} + int lid = -1; + for (int k = 0; k < labels.length; k++) + if (types[i] == labels[k]) { + lid = k; + break; + } - int ch,cmi,cmo; + int ch, cmi, cmo; if (heads[i] < i) { ch = rightmostRight(heads, heads[i], i); cmi = leftmostLeft(heads, i, heads[i]); cmo = rightmostRight(heads, i, heads.length); - if (ch==-1) ch=heads[i]; - if (cmi==-1) cmi=heads[i]; - if (cmo==-1) cmo=heads[i]; + if (ch == -1) + ch = heads[i]; + if (cmi == -1) + cmi = heads[i]; + if (cmo == -1) + cmo = heads[i]; } else { ch = leftmostLeft(heads, heads[i], i); cmi = rightmostRight(heads, i, heads[i]); cmo = leftmostLeft(heads, i, 0); - if (ch==-1) ch=i; - if (cmi==-1) cmi=i; - if (cmo==-1) cmo=i; + if (ch == -1) + ch = i; + if (cmi == -1) + cmi = i; + if (cmo == -1) + cmo = i; } scores[i] += d2.sib[heads[i]][i][ch][lid]; scores[i] += d2.gra[heads[i]][i][cmi][lid]; @@ -883,52 +1645,48 @@ final public class Extractor { return v; } - public static int rightmostRight(short[] heads, int head, int max) { int rightmost = -1; - for (int i = head + 1; i < max; i++) if (heads[i] == head) rightmost = i; + for (int i = head + 1; i < max; i++) + if (heads[i] == head) + rightmost = i; return rightmost; } public static int leftmostLeft(short[] heads, int head, int min) { int leftmost = -1; - for (int i = head - 1; i > min; i--) if (heads[i] == head) leftmost = i; + for (int i = head - 1; i > min; i--) + if (heads[i] == head) + leftmost = i; return leftmost; } - public static final String REL = "REL",END = "END",STR = "STR",LA = "LA",RA = "RA"; + public static final String REL = "REL", END = "END", STR = "STR", LA = "LA", RA = "RA"; - private static int ra,la; + private static int ra, la; private static int s_str; - private static int s_end, _cend,_cstr, s_stwrd,s_relend; + private static int s_end, _cend, _cstr, s_stwrd, s_relend; - protected static final String TYPE = "TYPE",DIR = "D"; + protected static final String TYPE = "TYPE", DIR = "D"; public static final String POS = "POS"; - protected static final String DIST = "DIST",MID = "MID"; - - private static final String _0 = "0",_4 = "4",_3 = "3", _2 = "2",_1 = "1",_5 = "5",_10 = "10"; - - private static int di0, d4,d3,d2,d1,d5,d10; - - - private static final String WORD = "WORD",STWRD = "STWRD", STPOS = "STPOS"; + protected static final String DIST = "DIST", MID = "MID"; + private static final String _0 = "0", _4 = "4", _3 = "3", _2 = "2", _1 = "1", _5 = "5", _10 = "10"; + private static final String WORD = "WORD", STWRD = "STWRD", STPOS = "STPOS"; private static int nofeat; - public static int maxForm; - /** * Initialize the features. + * * @param maxFeatures */ static public void initFeatures() { - MFO mf = new MFO(); mf.register(POS, MID); s_str = mf.register(POS, STR); @@ -936,38 +1694,33 @@ final public class Extractor { s_relend = mf.register(REL, END); - _cstr= mf.register(Cluster.SPATH,STR); - _cend=mf.register(Cluster.SPATH,END); - + _cstr = mf.register(Cluster.SPATH, STR); + _cend = mf.register(Cluster.SPATH, END); mf.register(TYPE, POS); - s_stwrd=mf.register(WORD,STWRD); - mf.register(POS,STPOS); + s_stwrd = mf.register(WORD, STWRD); + mf.register(POS, STPOS); la = mf.register(DIR, LA); ra = mf.register(DIR, RA); - // mf.register(TYPE, CHAR); - - mf.register(TYPE, Pipe.FEAT); - nofeat=mf.register(Pipe.FEAT, "NOFEAT"); + // mf.register(TYPE, CHAR); - for(int k=0;k<150;k++) mf.register(TYPE, "F"+k); + mf.register(TYPE, PipeGen.FEAT); + nofeat = mf.register(PipeGen.FEAT, "NOFEAT"); + for (int k = 0; k < 150; k++) + mf.register(TYPE, "F" + k); - di0=mf.register(DIST, _0); - d1=mf.register(DIST, _1); - d2=mf.register(DIST, _2); - d3=mf.register(DIST, _3); - d4=mf.register(DIST, _4); - d5=mf.register(DIST, _5); - // d5l=mf.register(DIST, _5l); - d10=mf.register(DIST, _10); - + mf.register(DIST, _0); + mf.register(DIST, _1); + mf.register(DIST, _2); + mf.register(DIST, _3); + mf.register(DIST, _4); + mf.register(DIST, _5); + mf.register(DIST, _10); } - - } diff --git a/dependencyParser/mate-tools/src/is2/parser/MFO.java b/dependencyParser/mate-tools/src/is2/parser/MFO.java index 519ea06..3157ef9 100755 --- a/dependencyParser/mate-tools/src/is2/parser/MFO.java +++ b/dependencyParser/mate-tools/src/is2/parser/MFO.java @@ -1,257 +1,246 @@ package is2.parser; - -import is2.data.IEncoder; -import is2.data.IEncoderPlus; -import is2.data.IFV; -import is2.data.Long2IntInterface; -import is2.util.DB; - import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.IOException; import java.util.HashMap; import java.util.Map.Entry; +import is2.data.IEncoderPlus; +import is2.util.DB; + /** * Map Features, do not map long to integer - * + * * @author Bernd Bohnet, 20.09.2009 */ -final public class MFO implements IEncoderPlus { - +final public class MFO implements IEncoderPlus { + /** The features and its values */ - static private final HashMap<String,HashMap<String,Integer>> m_featureSets = new HashMap<String,HashMap<String,Integer>>(); + static private final HashMap<String, HashMap<String, Integer>> m_featureSets = new HashMap<String, HashMap<String, Integer>>(); /** The feature class and the number of values */ - static private final HashMap<String,Integer> m_featureCounters = new HashMap<String,Integer>(); + static private final HashMap<String, Integer> m_featureCounters = new HashMap<String, Integer>(); /** The number of bits needed to encode a feature */ - static final HashMap<String,Integer> m_featureBits = new HashMap<String,Integer>(); - + static final HashMap<String, Integer> m_featureBits = new HashMap<String, Integer>(); + /** Integer counter for long2int */ - static private int count=0; - + static private int count = 0; + /** Stop growing */ - public boolean stop=false; - - final public static String NONE="<None>"; - - - - - - - - public MFO () {} - - - public int size() {return count;} - - - + public boolean stop = false; + + final public static String NONE = "<None>"; + + public MFO() { + } + + public int size() { + return count; + } + /** * Register an attribute class, if it not exists and add a possible value + * * @param type * @param type2 */ - final public int register(String a, String v) { + @Override + final public int register(String a, String v) { - HashMap<String,Integer> fs = getFeatureSet().get(a); - if (fs==null) { - fs = new HashMap<String,Integer>(); + HashMap<String, Integer> fs = getFeatureSet().get(a); + if (fs == null) { + fs = new HashMap<String, Integer>(); getFeatureSet().put(a, fs); fs.put(NONE, 0); getFeatureCounter().put(a, 1); } Integer c = getFeatureCounter().get(a); - + Integer i = fs.get(v); - if (i==null) { + if (i == null) { fs.put(v, c); c++; - getFeatureCounter().put(a,c); - return c-1; - } else return i; + getFeatureCounter().put(a, c); + return c - 1; + } else + return i; } - + /** * Calculates the number of bits needed to encode a feature */ - public void calculateBits() { - - int total=0; - for(Entry<String,Integer> e : getFeatureCounter().entrySet() ){ - int bits =(int)Math.ceil((Math.log(e.getValue()+1)/Math.log(2))); + public void calculateBits() { + + for (Entry<String, Integer> e : getFeatureCounter().entrySet()) { + int bits = (int) Math.ceil((Math.log(e.getValue() + 1) / Math.log(2))); m_featureBits.put(e.getKey(), bits); - total+=bits; - // System.out.println(" "+e.getKey()+" bits "+bits+" number "+(e.getValue()+1)); } - -// System.out.println("total number of needed bits "+total); + + // System.out.println("total number of needed bits "+total); } - - - - public String toString() { - + + @Override + public String toString() { + StringBuffer content = new StringBuffer(); - for(Entry<String,Integer> e : getFeatureCounter().entrySet() ){ - content.append(e.getKey()+" "+e.getValue()); - content.append(':'); - // HashMap<String,Integer> vs = getFeatureSet().get(e.getKey()); - content.append(getFeatureBits(e.getKey())); - - /*if (vs.size()<120) - for(Entry<String,Integer> e2 : vs.entrySet()) { - content.append(e2.getKey()+" ("+e2.getValue()+") "); - }*/ - content.append('\n'); - + for (Entry<String, Integer> e : getFeatureCounter().entrySet()) { + content.append(e.getKey() + " " + e.getValue()); + content.append(':'); + // HashMap<String,Integer> vs = getFeatureSet().get(e.getKey()); + content.append(getFeatureBits(e.getKey())); + + /* + * if (vs.size()<120) for(Entry<String,Integer> e2 : vs.entrySet()) + * { content.append(e2.getKey()+" ("+e2.getValue()+") "); } + */ + content.append('\n'); + } return content.toString(); } - - - + static final public short getFeatureBits(String a) { - if(m_featureBits.get(a)==null) return 0; - return (short)m_featureBits.get(a).intValue(); + if (m_featureBits.get(a) == null) + return 0; + return (short) m_featureBits.get(a).intValue(); } - - /** * Get the integer place holder of the string value v of the type a - * - * @param t the type - * @param v the value + * + * @param t + * the type + * @param v + * the value * @return the integer place holder of v */ - final public int getValue(String t, String v) { - - if (m_featureSets.get(t)==null) return -1; + @Override + final public int getValue(String t, String v) { + + if (m_featureSets.get(t) == null) + return -1; Integer vi = m_featureSets.get(t).get(v); - if (vi==null) return -1; //stop && + if (vi == null) + return -1; // stop && return vi.intValue(); } - /** - * Static version of getValue - * @see getValue - */ + /** + * Static version of getValue + * + * @see getValue + */ static final public int getValueS(String a, String v) { - - if (m_featureSets.get(a)==null) return -1; - Integer vi = m_featureSets.get(a).get(v); - if (vi==null) return -1; //stop && - return vi.intValue(); - } - - public int hasValue(String a, String v) { - + + if (m_featureSets.get(a) == null) + return -1; Integer vi = m_featureSets.get(a).get(v); - if (vi==null) return -1; + if (vi == null) + return -1; // stop && return vi.intValue(); } - - + + public int hasValue(String a, String v) { + + Integer vi = m_featureSets.get(a).get(v); + if (vi == null) + return -1; + return vi.intValue(); + } + public static String printBits(int k) { StringBuffer s = new StringBuffer(); - for(int i =0;i<31;i++) { - s.append((k&0x00000001)==1?'1':'0'); - k=k>>1; - + for (int i = 0; i < 31; i++) { + s.append((k & 0x00000001) == 1 ? '1' : '0'); + k = k >> 1; + } s.reverse(); return s.toString(); } - - - - - - - - /** - * Maps a long to a integer value. This is very useful to save memory for sparse data long values + + /** + * Maps a long to a integer value. This is very useful to save memory for + * sparse data long values + * * @param l * @return the integer */ - static public int misses = 0; - static public int good = 0; - + static public int misses = 0; + static public int good = 0; - - /** * Write the data + * * @param dos * @throws IOException */ - static public void writeData(DataOutputStream dos) throws IOException { - dos.writeInt(getFeatureSet().size()); - // DB.println("write"+getFeatureSet().size()); - for(Entry<String, HashMap<String,Integer>> e : getFeatureSet().entrySet()) { - dos.writeUTF(e.getKey()); - dos.writeInt(e.getValue().size()); - - for(Entry<String,Integer> e2 : e.getValue().entrySet()) { - - if(e2.getKey()==null) DB.println("key "+e2.getKey()+" value "+e2.getValue()+" e -key "+e.getKey()); - dos.writeUTF(e2.getKey()); - dos.writeInt(e2.getValue()); - - } - - } - } - public void read(DataInputStream din) throws IOException { - + static public void writeData(DataOutputStream dos) throws IOException { + dos.writeInt(getFeatureSet().size()); + // DB.println("write"+getFeatureSet().size()); + for (Entry<String, HashMap<String, Integer>> e : getFeatureSet().entrySet()) { + dos.writeUTF(e.getKey()); + dos.writeInt(e.getValue().size()); + + for (Entry<String, Integer> e2 : e.getValue().entrySet()) { + + if (e2.getKey() == null) + DB.println("key " + e2.getKey() + " value " + e2.getValue() + " e -key " + e.getKey()); + dos.writeUTF(e2.getKey()); + dos.writeInt(e2.getValue()); + + } + + } + } + + public void read(DataInputStream din) throws IOException { + int size = din.readInt(); - for(int i=0; i<size;i++) { + for (int i = 0; i < size; i++) { String k = din.readUTF(); int size2 = din.readInt(); - - HashMap<String,Integer> h = new HashMap<String,Integer>(); - getFeatureSet().put(k,h); - for(int j = 0;j<size2;j++) { + + HashMap<String, Integer> h = new HashMap<String, Integer>(); + getFeatureSet().put(k, h); + for (int j = 0; j < size2; j++) { h.put(din.readUTF(), din.readInt()); } getFeatureCounter().put(k, size2); } - count =size; - // stop(); + count = size; + // stop(); calculateBits(); } - - /** + /** * Clear the data */ - static public void clearData() { - getFeatureSet().clear(); - m_featureBits.clear(); - getFeatureSet().clear(); - } + static public void clearData() { + getFeatureSet().clear(); + m_featureBits.clear(); + getFeatureSet().clear(); + } - public HashMap<String,Integer> getFeatureCounter() { + @Override + public HashMap<String, Integer> getFeatureCounter() { return m_featureCounters; } - static public HashMap<String,HashMap<String,Integer>> getFeatureSet() { + static public HashMap<String, HashMap<String, Integer>> getFeatureSet() { return m_featureSets; } - - static public String[] reverse(HashMap<String,Integer> v){ + + static public String[] reverse(HashMap<String, Integer> v) { String[] set = new String[v.size()]; - for(Entry<String,Integer> e : v.entrySet()) { - set[e.getValue()]=e.getKey(); + for (Entry<String, Integer> e : v.entrySet()) { + set[e.getValue()] = e.getKey(); } return set; } - } diff --git a/dependencyParser/mate-tools/src/is2/parser/Open.java b/dependencyParser/mate-tools/src/is2/parser/Open.java index 35f14a7..2f68e07 100755 --- a/dependencyParser/mate-tools/src/is2/parser/Open.java +++ b/dependencyParser/mate-tools/src/is2/parser/Open.java @@ -2,8 +2,7 @@ package is2.parser; import is2.data.Parse; - -final public class Open { +final public class Open { public float p; short s, e, label; @@ -12,27 +11,30 @@ final public class Open { Closed left; Closed right; - public Open(short s, short t, short dir, short label,Closed left, Closed right, float p) { + public Open(short s, short t, short dir, short label, Closed left, Closed right, float p) { this.s = s; this.e = t; this.label = label; - this.dir = (byte)dir; - this.left =left; - this.right=right; - this.p=p; + this.dir = (byte) dir; + this.left = left; + this.right = right; + this.p = p; } - void create(Parse parse) { if (dir == 0) { parse.heads[s] = e; - if (label != -1) parse.labels[s] = label; + if (label != -1) + parse.labels[s] = label; } else { parse.heads[e] = s; - if (label != -1) parse.labels[e] = label; + if (label != -1) + parse.labels[e] = label; } - if (left != null) left.create(parse); - if (right != null) right.create(parse); + if (left != null) + left.create(parse); + if (right != null) + right.create(parse); } - + } diff --git a/dependencyParser/mate-tools/src/is2/parser/Options.java b/dependencyParser/mate-tools/src/is2/parser/Options.java index 3c8b551..bd550ec 100755 --- a/dependencyParser/mate-tools/src/is2/parser/Options.java +++ b/dependencyParser/mate-tools/src/is2/parser/Options.java @@ -2,42 +2,44 @@ package is2.parser; import is2.util.OptionsSuper; - public final class Options extends OptionsSuper { - - public Options (String[] args) { - + public Options(String[] args) { + + for (int i = 0; i < args.length; i++) { - - for(int i = 0; i < args.length; i++) { + if (args[i].equals("--help")) + explain(); - if (args[i].equals("--help")) explain(); - if (args[i].equals("-decode")) { - decodeProjective = args[i+1].equals("proj"); i++; - } else if (args[i].equals("-decodeTH")) { - decodeTH = Double.parseDouble(args[i+1]); i++; + decodeProjective = args[i + 1].equals("proj"); + i++; + } else if (args[i].equals("-decodeTH")) { + decodeTH = Double.parseDouble(args[i + 1]); + i++; } else if (args[i].equals("-nonormalize")) { - normalize=false; + normalize = false; } else if (args[i].equals("-features")) { - features= args[i+1]; i++; + features = args[i + 1]; + i++; } else if (args[i].equals("-hsize")) { - hsize= Integer.parseInt(args[i+1]); i++; + hsize = Integer.parseInt(args[i + 1]); + i++; } else if (args[i].equals("-len")) { - maxLen= Integer.parseInt(args[i+1]); i++; + maxLen = Integer.parseInt(args[i + 1]); + i++; } else if (args[i].equals("-cores")) { - cores= Integer.parseInt(args[i+1]); i++; + cores = Integer.parseInt(args[i + 1]); + i++; } else if (args[i].equals("-no2nd")) { - no2nd= true; + no2nd = true; } else if (args[i].equals("-few2nd")) { - few2nd= true; - } else super.addOption(args, i); - - } + few2nd = true; + } else + super.addOption(args, i); + } - } private void explain() { @@ -45,19 +47,24 @@ public final class Options extends OptionsSuper { System.out.println("java -class mate.jar is2.parser.Parser [Options]"); System.out.println(); System.out.println("Example: "); - System.out.println(" java -class mate.jar is2.parser.Parser -model eps3.model -train corpora/conll08st/train/train.closed -test corpora/conll08st/devel/devel.closed -out b3.test -eval corpora/conll08st/devel/devel.closed -count 2000 -i 6"); + System.out.println( + " java -class mate.jar is2.parser.Parser -model eps3.model -train corpora/conll08st/train/train.closed -test corpora/conll08st/devel/devel.closed -out b3.test -eval corpora/conll08st/devel/devel.closed -count 2000 -i 6"); System.out.println(""); System.out.println("Options:"); System.out.println(""); - System.out.println(" -train <file> the corpus a model is trained on; default "+this.trainfile); - System.out.println(" -test <file> the input corpus for testing; default "+this.testfile); - System.out.println(" -out <file> the output corpus (result) of a test run; default "+this.outfile); + System.out.println(" -train <file> the corpus a model is trained on; default " + this.trainfile); + System.out.println(" -test <file> the input corpus for testing; default " + this.testfile); + System.out.println(" -out <file> the output corpus (result) of a test run; default " + this.outfile); System.out.println(" -model <file> the parsing model for traing the model is stored in the files"); - System.out.println(" and for parsing the model is load from this file; default "+this.modelName); - System.out.println(" -i <number> the number of training iterations; good numbers are 10 for smaller corpora and 6 for bigger; default "+this.numIters); - System.out.println(" -count <number> the n first sentences of the corpus are take for the training default "+this.count); - System.out.println(" -format <number> conll format of the year 8 or 9; default "+this.formatTask); - + System.out.println( + " and for parsing the model is load from this file; default " + this.modelName); + System.out.println( + " -i <number> the number of training iterations; good numbers are 10 for smaller corpora and 6 for bigger; default " + + this.numIters); + System.out.println(" -count <number> the n first sentences of the corpus are take for the training default " + + this.count); + System.out.println(" -format <number> conll format of the year 8 or 9; default " + this.formatTask); + System.exit(0); } } diff --git a/dependencyParser/mate-tools/src/is2/parser/ParallelDecoder.java b/dependencyParser/mate-tools/src/is2/parser/ParallelDecoder.java index dd18f5f..ca508fd 100755 --- a/dependencyParser/mate-tools/src/is2/parser/ParallelDecoder.java +++ b/dependencyParser/mate-tools/src/is2/parser/ParallelDecoder.java @@ -1,151 +1,174 @@ package is2.parser; - -import is2.data.DataFES; - - import java.util.ArrayList; import java.util.concurrent.Callable; +import is2.data.DataFES; + /** * @author Bernd Bohnet, 30.08.2009 - * - * This class implements a parallel feature extractor. + * + * This class implements a parallel feature extractor. */ -final public class ParallelDecoder implements Callable<Object> -{ +final public class ParallelDecoder implements Callable<Object> { // some constants private static final float INIT_BEST = (-1.0F / 0.0F); - private static final boolean[] DIR ={false,true}; + private static final boolean[] DIR = { false, true }; - // the data space of the weights for a dependency tree + // the data space of the weights for a dependency tree final private DataFES x; private short[] pos; private Open O[][][][]; - private Closed C[][][][] ; + private Closed C[][][][]; private int length; - boolean done=false; - public boolean waiting =false; + boolean done = false; + public boolean waiting = false; /** * Initialize the parallel decoder. - * - * @param pos part-of-speech - * @param d data - * @param edges part-of-speech edge mapping - * @param o open spans - * @param c closed spans - * @param length number of words + * + * @param pos + * part-of-speech + * @param d + * data + * @param edges + * part-of-speech edge mapping + * @param o + * open spans + * @param c + * closed spans + * @param length + * number of words */ public ParallelDecoder(short[] pos, DataFES d, Open o[][][][], Closed c[][][][], int length) { - this.pos =pos; - this.x =d; + this.pos = pos; + this.x = d; - this.O=o; - this.C=c; - this.length=length; + this.O = o; + this.C = c; + this.length = length; } - - private static class DSet { short w1,w2;} + private static class DSet { + short w1, w2; + } @Override public Object call() { - try { - - while (true){ - - DSet set = get(); -// if (done && set==null) break; - - if (set ==null) return null; - - short s=set.w1, t=set.w2; - - for(short dir =0;dir<2;dir++) { - - short[] labs = (dir==1) ? Edges.get(pos[s],pos[t]):Edges.get(pos[t],pos[s]); - - O[s][t][dir] = new Open[labs.length]; - - for (int l = 0; l <labs.length; l++) { - - - double tRP = INIT_BEST; - - Closed tL = null, tR = null; - - for (int r = s; r < t; r++) { - - if (s == 0 && r != 0) continue; - - double tLPr = INIT_BEST,tRPr = INIT_BEST; - Closed tLCld = null, tRCld = null; - - if (r == s) tLPr = dir==1 ? x.sib[s][t][s][l] : - x.gra[t][s][s][l]; - else - for (int i = s + 1; i <= r; i++) - if (((dir==1 ? x.sib[s][t][i][l] : x.gra[t][s][i][l]) + C[s][r][1][i].p) > tLPr) { - tLPr = ((dir==1 ? x.sib[s][t][i][l] : x.gra[t][s][i][l]) + C[s][r][1][i].p);tLCld = C[s][r][1][i];} - - if (r == t-1) tRPr = dir==1 ? x.gra[s][t][s][l] : x.sib[t][s][s][l]; - else - for (int i = r + 1; i < t; i++) - if (((dir == 1 ? x.gra[s][t][i][l] : - x.sib[t][s][i][l]) + - C[r+1][t][0][i].p) > tRPr) { - tRPr = ((dir==1?x.gra[s][t][i][l]:x.sib[t][s][i][l]) + C[r+1][t][0][i].p); tRCld=C[r + 1][t][0][i]; - } - - if (tLPr + tRPr > tRP) {tRP = tLPr + tRPr; tL = tLCld;tR = tRCld;} + + while (true) { + + DSet set = get(); + // if (done && set==null) break; + + if (set == null) + return null; + + short s = set.w1, t = set.w2; + + for (short dir = 0; dir < 2; dir++) { + + short[] labs = (dir == 1) ? Edges.get(pos[s], pos[t]) : Edges.get(pos[t], pos[s]); + + O[s][t][dir] = new Open[labs.length]; + + for (int l = 0; l < labs.length; l++) { + + double tRP = INIT_BEST; + + Closed tL = null, tR = null; + + for (int r = s; r < t; r++) { + + if (s == 0 && r != 0) + continue; + + double tLPr = INIT_BEST, tRPr = INIT_BEST; + Closed tLCld = null, tRCld = null; + + if (r == s) + tLPr = dir == 1 ? x.sib[s][t][s][l] : x.gra[t][s][s][l]; + else + for (int i = s + 1; i <= r; i++) + if (((dir == 1 ? x.sib[s][t][i][l] : x.gra[t][s][i][l]) + C[s][r][1][i].p) > tLPr) { + tLPr = ((dir == 1 ? x.sib[s][t][i][l] : x.gra[t][s][i][l]) + C[s][r][1][i].p); + tLCld = C[s][r][1][i]; + } + + if (r == t - 1) + tRPr = dir == 1 ? x.gra[s][t][s][l] : x.sib[t][s][s][l]; + else + for (int i = r + 1; i < t; i++) + if (((dir == 1 ? x.gra[s][t][i][l] : x.sib[t][s][i][l]) + + C[r + 1][t][0][i].p) > tRPr) { + tRPr = ((dir == 1 ? x.gra[s][t][i][l] : x.sib[t][s][i][l]) + + C[r + 1][t][0][i].p); + tRCld = C[r + 1][t][0][i]; + } + + if (tLPr + tRPr > tRP) { + tRP = tLPr + tRPr; + tL = tLCld; + tR = tRCld; + } + } + O[s][t][dir][l] = new Open(s, t, dir, labs[l], tL, tR, + (float) (tRP + ((dir == 1) ? x.pl[s][t] : x.pl[t][s]) + + ((dir == 1) ? x.lab[s][t][labs[l]] : x.lab[t][s][labs[l]]))); } - O[s][t][dir][l] = new Open(s, t, dir, labs[l],tL, tR, - (float) ( tRP+((dir==1)?x.pl[s][t]: x.pl[t][s]) + ((dir==1)? x.lab[s][t][labs[l]]:x.lab[t][s][labs[l]]))); } - } - C[s][t][1] = new Closed[length]; C[s][t][0] = new Closed[length]; - - for (int m = s ; m <= t; m++) { - for(boolean d : DIR) { - if ((d && m!=s)||!d && (m!=t && s!=0)) { - - // create closed structure - - double top = INIT_BEST; - - Open tU = null; Closed tL = null; - int numLabels =O[(d ? s : m)][(d ? m : t)][d?1:0].length; - - //for (int l = numLabels-1; l >=0; l--) { - for (int l = 0; l < numLabels; l++) { - - Open hi = O[(d ? s : m)][(d ? m : t)][d?1:0][l]; - for (int amb = m + (d?1:-1); amb != (d?t:s) + (d?1:-1); amb += (d?1:-1)) { - - if ((hi.p + C[d?m:s][d?t:m][d?1:0][amb].p +x.gra[d?s:t][m][amb][l]) > top) { - top = (hi.p + C[d?m:s][d?t:m][d?1:0][amb].p +x.gra[d?s:t][m][amb][l]); tU = hi; tL=C[d?m:s][d?t:m][d?1:0][amb];} - + C[s][t][1] = new Closed[length]; + C[s][t][0] = new Closed[length]; + + for (int m = s; m <= t; m++) { + for (boolean d : DIR) { + if ((d && m != s) || !d && (m != t && s != 0)) { + + // create closed structure + + double top = INIT_BEST; + + Open tU = null; + Closed tL = null; + int numLabels = O[(d ? s : m)][(d ? m : t)][d ? 1 : 0].length; + + // for (int l = numLabels-1; l >=0; l--) { + for (int l = 0; l < numLabels; l++) { + + Open hi = O[(d ? s : m)][(d ? m : t)][d ? 1 : 0][l]; + for (int amb = m + (d ? 1 : -1); amb != (d ? t : s) + + (d ? 1 : -1); amb += (d ? 1 : -1)) { + + if ((hi.p + C[d ? m : s][d ? t : m][d ? 1 : 0][amb].p + + x.gra[d ? s : t][m][amb][l]) > top) { + top = (hi.p + C[d ? m : s][d ? t : m][d ? 1 : 0][amb].p + + x.gra[d ? s : t][m][amb][l]); + tU = hi; + tL = C[d ? m : s][d ? t : m][d ? 1 : 0][amb]; + } + + } + + if ((m == (d ? t : s)) && (hi.p + x.gra[d ? s : t][d ? t : s][m][l]) > top) { + top = (hi.p + x.gra[d ? s : t][d ? t : s][m][l]); + tU = hi; + tL = null; + } } - - if ((m == (d ? t : s)) && (hi.p + x.gra[d?s:t][d?t:s][m][l]) > top) { - top = (hi.p + x.gra[d ? s : t][d?t:s][m][l]); tU = hi; tL = null;} + C[s][t][d ? 1 : 0][m] = new Closed(s, t, m, d ? 1 : 0, tU, tL, (float) top); + } - C[s][t][d?1:0][m] = new Closed(s, t, m, d?1:0,tU,tL,(float) top); - - } - } + } } - } - } catch (Exception e ) { + } catch (Exception e) { e.printStackTrace(); System.exit(0); } @@ -156,15 +179,16 @@ final public class ParallelDecoder implements Callable<Object> static synchronized private DSet get() { synchronized (sets) { - if (sets.size()==0) return null; - return sets.remove(sets.size()-1); + if (sets.size() == 0) + return null; + return sets.remove(sets.size() - 1); } } - public static void add(short w1, short w2){ - DSet ds =new DSet(); - ds.w1=w1; - ds.w2=w2; + public static void add(short w1, short w2) { + DSet ds = new DSet(); + ds.w1 = w1; + ds.w2 = w2; sets.add(ds); } } diff --git a/dependencyParser/mate-tools/src/is2/parser/ParallelExtract.java b/dependencyParser/mate-tools/src/is2/parser/ParallelExtract.java index 4313bfd..ca85711 100755 --- a/dependencyParser/mate-tools/src/is2/parser/ParallelExtract.java +++ b/dependencyParser/mate-tools/src/is2/parser/ParallelExtract.java @@ -1,30 +1,21 @@ package is2.parser; +import java.util.ArrayList; +import java.util.concurrent.Callable; + import is2.data.Cluster; import is2.data.DataFES; import is2.data.F2SF; -import is2.data.FV; import is2.data.Instances; import is2.data.Long2IntInterface; -import is2.util.DB; - -import java.util.ArrayList; -import java.util.HashMap; -import java.util.HashSet; -import java.util.concurrent.Callable; - - - - /** * @author Bernd Bohnet, 30.08.2009 - * - * This class implements a parallel feature extractor. + * + * This class implements a parallel feature extractor. */ -final public class ParallelExtract implements Callable<Object> -{ - // the data space of the weights for a dependency tree +final public class ParallelExtract implements Callable<Object> { + // the data space of the weights for a dependency tree final DataFES d; // the data extractor does the actual work @@ -37,53 +28,50 @@ final public class ParallelExtract implements Callable<Object> private Cluster cluster; + public ParallelExtract(Extractor e, Instances is, int i, DataFES d, F2SF para, Cluster cluster) { - public ParallelExtract(Extractor e, Instances is, int i, DataFES d, F2SF para,Cluster cluster) { - - this.is =is; - extractor=e; - this.d =d; - this.i=i; - this.para=para; + this.is = is; + extractor = e; + this.d = d; + this.i = i; + this.para = para; this.cluster = cluster; } - public static class DSet { - int w1,w2; + int w1, w2; } + @Override public Object call() { try { - F2SF f= para; - + F2SF f = para; - short[] pos=is.pposs[i]; + short[] pos = is.pposs[i]; int length = pos.length; - long[] gvs = new long[50]; - long[] svs = new long[220]; + long[] gvs = new long[50]; + long[] svs = new long[220]; while (true) { DSet set = get(); - if (set ==null) break; - - int w1=set.w1; - int w2=set.w2; + if (set == null) + break; + int w1 = set.w1; + int w2 = set.w2; f.clear(); extractor.basic(pos, w1, w2, f); - d.pl[w1][w2]=f.getScoreF(); - - + d.pl[w1][w2] = f.getScoreF(); + f.clear(); extractor.basic(pos, w2, w1, f); - d.pl[w2][w1]=f.getScoreF(); + d.pl[w2][w1] = f.getScoreF(); short[] labels = Edges.get(pos[w1], pos[w2]); float[] lab = d.lab[w1][w2]; @@ -92,155 +80,169 @@ final public class ParallelExtract implements Callable<Object> int c = extractor.firstm(is, i, w1, w2, 0, cluster, svs); - for (int l = 0; l <lab.length ; l++) lab[l]=-100 ; + for (int l = 0; l < lab.length; l++) + lab[l] = -100; - for (int l = 0; l <labels.length ; l++) { - short label = labels[l]; + for (short label2 : labels) { + short label = label2; f.clear(); - int lv = extractor.d0.computeLabeValue(label,Extractor.s_type); - for(int k=0;k<c;k++)if (svs[k]>0) f.add(li.l2i(svs[k]+lv)); - + int lv = extractor.d0.computeLabeValue(label, Extractor.s_type); + for (int k = 0; k < c; k++) + if (svs[k] > 0) + f.add(li.l2i(svs[k] + lv)); - lab[label]=f.getScoreF(); + lab[label] = f.getScoreF(); } labels = Edges.get(pos[w2], pos[w1]); lab = d.lab[w2][w1]; - for (int l = 0; l <lab.length ; l++) lab[l]=-100 ; + for (int l = 0; l < lab.length; l++) + lab[l] = -100; - - for (int l = 0; l <labels.length ; l++) { - int label = labels[l]; + for (short label2 : labels) { + int label = label2; f.clear(); - int lv = extractor.d0.computeLabeValue(label + Extractor.s_rel1 ,Extractor.s_type); - for(int k=0;k<c;k++)if (svs[k]>0) f.add(li.l2i(svs[k]+lv)); + int lv = extractor.d0.computeLabeValue(label + Extractor.s_rel1, Extractor.s_type); + for (int k = 0; k < c; k++) + if (svs[k] > 0) + f.add(li.l2i(svs[k] + lv)); - lab[label]=f.getScoreF(); + lab[label] = f.getScoreF(); } - int s = w1<w2 ? w1 : w2; - int e = w1<w2 ? w2 : w1; + int s = w1 < w2 ? w1 : w2; + int e = w1 < w2 ? w2 : w1; + for (int m = 0; m < length; m++) { - for(int m=0;m<length;m++) { + int g = (m == s || e == m) ? -1 : m; - int g = (m==s||e==m) ? -1 : m; - - int cn =extractor.second(is, i, w1,w2,g, 0, cluster, svs); - int cc = extractor.addClusterFeatures(is,i, w1, w2, g, cluster, 0, gvs,0); - //for(int k=0;k<c;k++) dl1.map(f,svs[k]); - + int cn = extractor.second(is, i, w1, w2, g, 0, cluster, svs); + int cc = extractor.addClusterFeatures(is, i, w1, w2, g, cluster, 0, gvs, 0); + // for(int k=0;k<c;k++) dl1.map(f,svs[k]); - if(m>=w1) { + if (m >= w1) { labels = Edges.get(pos[w1], pos[w2]); float[] lab2 = new float[labels.length]; - for (int l = 0; l <labels.length ; l++) { + for (int l = 0; l < labels.length; l++) { short label = labels[l]; - int lx =label+Extractor.s_rel1*( g < w2?0:2 ); + int lx = label + Extractor.s_rel1 * (g < w2 ? 0 : 2); f.clear(); - int lv = extractor.d0.computeLabeValue(lx,Extractor.s_type); - for(int k=0;k<cn;k++)if (svs[k]>0) f.add(li.l2i(svs[k]+lv)); - for(int k=0;k<cc;k++)if (gvs[k]>0) f.add(li.l2i(gvs[k]+lv)); + int lv = extractor.d0.computeLabeValue(lx, Extractor.s_type); + for (int k = 0; k < cn; k++) + if (svs[k] > 0) + f.add(li.l2i(svs[k] + lv)); + for (int k = 0; k < cc; k++) + if (gvs[k] > 0) + f.add(li.l2i(gvs[k] + lv)); lab2[l] = f.getScoreF(); } - d.gra[w1][w2][m] =lab2; + d.gra[w1][w2][m] = lab2; } - - if (m<=w2) { + if (m <= w2) { labels = Edges.get(pos[w2], pos[w1]); float lab2[]; d.gra[w2][w1][m] = lab2 = new float[labels.length]; - for (int l = 0; l <labels.length ; l++) { + for (int l = 0; l < labels.length; l++) { - int label = labels[l] ; - int lx =label+Extractor.s_rel1*(1 + (g < w1?0:2) ); + int label = labels[l]; + int lx = label + Extractor.s_rel1 * (1 + (g < w1 ? 0 : 2)); f.clear(); - int lv = extractor.d0.computeLabeValue(lx,Extractor.s_type); - for(int k=0;k<cn;k++)if (svs[k]>0) f.add(li.l2i(svs[k]+lv)); - for(int k=0;k<cc;k++)if (gvs[k]>0) f.add(li.l2i(gvs[k]+lv)); - + int lv = extractor.d0.computeLabeValue(lx, Extractor.s_type); + for (int k = 0; k < cn; k++) + if (svs[k] > 0) + f.add(li.l2i(svs[k] + lv)); + for (int k = 0; k < cc; k++) + if (gvs[k] > 0) + f.add(li.l2i(gvs[k] + lv)); + lab2[l] = f.getScoreF(); - + } } + g = (m == s || e == m) ? -1 : m; - g = (m==s||e==m) ? -1 : m; - - // int cn = extractor.second(is,i,w1,w2,g,0, cluster, svs,Extractor._SIB); - if (m >=w1 && m<=w2) { + // int cn = extractor.second(is,i,w1,w2,g,0, cluster, + // svs,Extractor._SIB); + if (m >= w1 && m <= w2) { labels = Edges.get(pos[w1], pos[w2]); - float lab2[]= new float[labels.length]; + float lab2[] = new float[labels.length]; d.sib[w1][w2][m] = lab2; - for (int l = 0; l <labels.length ; l++) { + for (int l = 0; l < labels.length; l++) { short label = labels[l]; - int lx =label+Extractor.s_rel1*( 8); + int lx = label + Extractor.s_rel1 * (8); f.clear(); - int lv = extractor.d0.computeLabeValue(lx,Extractor.s_type); - for(int k=0;k<cn;k++) if (svs[k]>0) f.add(li.l2i(svs[k]+lv)); - for(int k=0;k<cc;k++) if (gvs[k]>0) f.add(li.l2i(gvs[k]+lv)); - - - lab2[l] = (float)f.score;//f.getScoreF(); + int lv = extractor.d0.computeLabeValue(lx, Extractor.s_type); + for (int k = 0; k < cn; k++) + if (svs[k] > 0) + f.add(li.l2i(svs[k] + lv)); + for (int k = 0; k < cc; k++) + if (gvs[k] > 0) + f.add(li.l2i(gvs[k] + lv)); + + lab2[l] = f.score;// f.getScoreF(); } } - if (m >=w1 && m <=w2) { + if (m >= w1 && m <= w2) { labels = Edges.get(pos[w2], pos[w1]); - float[] lab2 = new float[labels.length]; - d.sib[w2][w1][m]=lab2; - for (int l = 0; l <labels.length ; l++) { + float[] lab2 = new float[labels.length]; + d.sib[w2][w1][m] = lab2; + for (int l = 0; l < labels.length; l++) { - int label = labels[l] ; + int label = labels[l]; - int lx =label+Extractor.s_rel1*(9); + int lx = label + Extractor.s_rel1 * (9); f.clear(); - int lv = extractor.d0.computeLabeValue(lx,Extractor.s_type); - for(int k=0;k<cn;k++) if (svs[k]>0) f.add(li.l2i(svs[k]+lv)); - for(int k=0;k<cc;k++) if (gvs[k]>0) f.add(li.l2i(gvs[k]+lv)); - - lab2[l] = f.score;//f.getScoreF(); + int lv = extractor.d0.computeLabeValue(lx, Extractor.s_type); + for (int k = 0; k < cn; k++) + if (svs[k] > 0) + f.add(li.l2i(svs[k] + lv)); + for (int k = 0; k < cc; k++) + if (gvs[k] > 0) + f.add(li.l2i(gvs[k] + lv)); + + lab2[l] = f.score;// f.getScoreF(); } } } } - } catch(Exception e ) { + } catch (Exception e) { e.printStackTrace(); } return null; } - static ArrayList<DSet> sets = new ArrayList<DSet>(); - private DSet get() { + private DSet get() { synchronized (sets) { - if (sets.size()==0) return null; - return sets.remove(sets.size()-1); + if (sets.size() == 0) + return null; + return sets.remove(sets.size() - 1); } } - static public void add(int w1, int w2){ - DSet ds =new DSet(); - ds.w1=w1; - ds.w2=w2; + + static public void add(int w1, int w2) { + DSet ds = new DSet(); + ds.w1 = w1; + ds.w2 = w2; sets.add(ds); } - - - } diff --git a/dependencyParser/mate-tools/src/is2/parser/ParallelRearrange.java b/dependencyParser/mate-tools/src/is2/parser/ParallelRearrange.java index dfd995a..83dcdaa 100755 --- a/dependencyParser/mate-tools/src/is2/parser/ParallelRearrange.java +++ b/dependencyParser/mate-tools/src/is2/parser/ParallelRearrange.java @@ -1,15 +1,16 @@ package is2.parser; -import is2.data.DataFES; - import java.util.ArrayList; import java.util.concurrent.Callable; +import is2.data.DataFES; + /** * @author Dr. Bernd Bohnet, 30.08.2009 - * - * This class implements a parallel edge rearrangement for non-projective parsing; - * The linear method was first suggest by Rayn McDonald et. al. 2005. + * + * This class implements a parallel edge rearrangement for + * non-projective parsing; The linear method was first suggest by Rayn + * McDonald et. al. 2005. */ final public class ParallelRearrange implements Callable<Object> { @@ -21,7 +22,12 @@ final public class ParallelRearrange implements Callable<Object> { public short wh; public short nPar; public short nType; - public PA(float p2, short ch2, short pa2) { p=p2; ch=ch2;pa=pa2;} + + public PA(float p2, short ch2, short pa2) { + p = p2; + ch = ch2; + pa = pa2; + } } // list of parent child combinations @@ -31,97 +37,110 @@ final public class ParallelRearrange implements Callable<Object> { public float max; // some data from the dependency tree - //private EdgesC edges; + // private EdgesC edges; private short[] pos; private DataFES x; - private boolean[][] isChild ; - public short[] heads,types; - + private boolean[][] isChild; + public short[] heads, types; + // child, new parent, new label - public short wh,nPar,nType; - + public short wh, nPar, nType; + /** * Initialize the parallel rearrange thread - * - * @param isChild2 is a child - * @param edgesC the part-of-speech edge mapping - * @param pos the part-of-speech - * @param x the data - * @param s the heads - * @param ts the types + * + * @param isChild2 + * is a child + * @param edgesC + * the part-of-speech edge mapping + * @param pos + * the part-of-speech + * @param x + * the data + * @param s + * the heads + * @param ts + * the types */ - public ParallelRearrange(boolean[][] isChild2,short[] pos, DataFES x, short[] s, short[] ts) { - - heads =new short[s.length]; - System.arraycopy(s, 0, heads, 0, s.length); - - types =new short[ts.length]; - System.arraycopy(ts, 0, types, 0, ts.length); - - isChild=isChild2; - //edges = edgesC; - this.pos =pos; - this.x=x; - } + public ParallelRearrange(boolean[][] isChild2, short[] pos, DataFES x, short[] s, short[] ts) { + heads = new short[s.length]; + System.arraycopy(s, 0, heads, 0, s.length); + + types = new short[ts.length]; + System.arraycopy(ts, 0, types, 0, ts.length); + + isChild = isChild2; + // edges = edgesC; + this.pos = pos; + this.x = x; + } @Override public Object call() { - - // check the list of new possible parents and children for a better combination - while(true) { + + // check the list of new possible parents and children for a better + // combination + while (true) { PA px = getPA(); - if (px==null) break; + if (px == null) + break; + + float max = 0; + short pa = px.pa, ch = px.ch; - float max=0; - short pa =px.pa, ch =px.ch; - - if(ch == pa || pa == heads[ch] || isChild[ch][pa]) continue; + if (ch == pa || pa == heads[ch] || isChild[ch][pa]) + continue; - short oldP = heads[ch], oldT = types[ch]; + short oldP = heads[ch], oldT = types[ch]; - heads[ch]=pa; + heads[ch] = pa; short[] labels = Edges.get(pos[pa], pos[ch]); - for(int l=0;l<labels.length;l++) { + for (short label : labels) { - types[ch]=labels[l]; + types[ch] = label; float p_new = Extractor.encode3(pos, heads, types, x); - if(max < p_new-px.p ) { - max = p_new-px.p; wh = ch; nPar = pa; nType = labels[l] ; - px.max=max; - px.wh=ch; + if (max < p_new - px.p) { + max = p_new - px.p; + wh = ch; + nPar = pa; + nType = label; + px.max = max; + px.wh = ch; px.nPar = pa; - px.nType =labels[l]; + px.nType = label; } } - heads[ch]= oldP; types[ch]=oldT; + heads[ch] = oldP; + types[ch] = oldT; } return null; } /** - * Add a child-parent combination which are latter explored for rearrangement - * + * Add a child-parent combination which are latter explored for + * rearrangement + * * @param p2 * @param ch2 * @param pa */ static public void add(float p2, short ch2, short pa) { - PA px = new PA(p2,ch2,pa); + PA px = new PA(p2, ch2, pa); parents.add(px); order.add(px); } static private PA getPA() { synchronized (parents) { - if (parents.size()==0) return null; - return parents.remove(parents.size()-1); + if (parents.size() == 0) + return null; + return parents.remove(parents.size() - 1); } } - } diff --git a/dependencyParser/mate-tools/src/is2/parser/Parameters.java b/dependencyParser/mate-tools/src/is2/parser/Parameters.java index cb13a69..baba7b6 100755 --- a/dependencyParser/mate-tools/src/is2/parser/Parameters.java +++ b/dependencyParser/mate-tools/src/is2/parser/Parameters.java @@ -1,5 +1,5 @@ /** - * + * */ package is2.parser; @@ -14,25 +14,24 @@ import is2.data.Parse; /** * @author Bernd Bohnet, 31.08.2009 - * - * + * + * */ public abstract class Parameters { - public abstract void average(double avVal); - + public abstract void update(FV act, FV pred, Instances isd, int instc, Parse d, double upd, double e); - + public abstract void write(DataOutputStream dos) throws IOException; - - public abstract void read(DataInputStream dis ) throws IOException; - + + public abstract void read(DataInputStream dis) throws IOException; + public abstract int size(); /** * @return */ - public abstract IFV getFV() ; - + public abstract IFV getFV(); + } diff --git a/dependencyParser/mate-tools/src/is2/parser/ParametersFloat.java b/dependencyParser/mate-tools/src/is2/parser/ParametersFloat.java index c2cbe93..faf795d 100755 --- a/dependencyParser/mate-tools/src/is2/parser/ParametersFloat.java +++ b/dependencyParser/mate-tools/src/is2/parser/ParametersFloat.java @@ -1,26 +1,24 @@ package is2.parser; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; + import is2.data.F2SF; import is2.data.FV; import is2.data.Instances; import is2.data.Parse; import is2.util.DB; -import java.io.DataInputStream; -import java.io.DataOutputStream; -import java.io.IOException; - - - -final public class ParametersFloat extends Parameters { +final public class ParametersFloat extends Parameters { public float[] parameters; public float[] total; public ParametersFloat(int size) { - parameters = new float[size]; + parameters = new float[size]; total = new float[size]; - for(int i = 0; i < parameters.length; i++) { + for (int i = 0; i < parameters.length; i++) { parameters[i] = 0F; total[i] = 0F; } @@ -30,92 +28,96 @@ final public class ParametersFloat extends Parameters { * @param parameters2 */ public ParametersFloat(float[] p) { - parameters =p; + parameters = p; } - @Override public void average(double avVal) { - for(int j = 0; j < total.length; j++) { - parameters[j] = total[j]/((float)avVal); + for (int j = 0; j < total.length; j++) { + parameters[j] = total[j] / ((float) avVal); } - total =null; + total = null; } public ParametersFloat average2(double avVal) { float[] px = new float[this.parameters.length]; - for(int j = 0; j < total.length; j++) { - px[j] = total[j]/((float)avVal); + for (int j = 0; j < total.length; j++) { + px[j] = total[j] / ((float) avVal); } ParametersFloat pf = new ParametersFloat(px); return pf; } - + @Override public void update(FV act, FV pred, Instances isd, int instc, Parse d, double upd, double e) { e++; - + float lam_dist = getScore(act) - getScore(pred); - - float b = (float)e-lam_dist; - + + float b = (float) e - lam_dist; + FV dist = act.getDistVector(pred); - - dist.update(parameters, total, hildreth(dist,b), upd,false); + + dist.update(parameters, total, hildreth(dist, b), upd, false); } protected double hildreth(FV a, double b) { double A = a.dotProduct(a); - if (A<=0.0000000000000000001) return 0.0; - return b/A; + if (A <= 0.0000000000000000001) + return 0.0; + return b / A; } - public float getScore(FV fv) { - if (fv ==null) return 0.0F; - return fv.getScore(parameters,false); + if (fv == null) + return 0.0F; + return fv.getScore(parameters, false); } @Override - final public void write(DataOutputStream dos) throws IOException{ + final public void write(DataOutputStream dos) throws IOException { dos.writeInt(parameters.length); - for(float d : parameters) dos.writeFloat(d); + for (float d : parameters) + dos.writeFloat(d); } @Override - public void read(DataInputStream dis ) throws IOException{ + public void read(DataInputStream dis) throws IOException { parameters = new float[dis.readInt()]; - int notZero=0; - for(int i=0;i<parameters.length;i++) { - parameters[i]=dis.readFloat(); - if (parameters[i]!=0.0F) notZero++; + int notZero = 0; + for (int i = 0; i < parameters.length; i++) { + parameters[i] = dis.readFloat(); + if (parameters[i] != 0.0F) + notZero++; } - - - DB.println("read parameters "+parameters.length+" not zero "+notZero); + + DB.println("read parameters " + parameters.length + " not zero " + notZero); } - + public int countNZ() { - int notZero=0; - for(int i=0;i<parameters.length;i++) { - if (parameters[i]!=0.0F) notZero++; + int notZero = 0; + for (float parameter : parameters) { + if (parameter != 0.0F) + notZero++; } return notZero; - - // DB.println("read parameters "+parameters.length+" not zero "+notZero); - } + // DB.println("read parameters "+parameters.length+" not zero + // "+notZero); + } - /* (non-Javadoc) + /* + * (non-Javadoc) + * * @see is2.sp09k99995.Parameters#getFV() */ @Override @@ -123,8 +125,9 @@ final public class ParametersFloat extends Parameters { return new F2SF(parameters); } - - /* (non-Javadoc) + /* + * (non-Javadoc) + * * @see is2.sp09k99999.Parameters#size() */ @Override @@ -132,6 +135,4 @@ final public class ParametersFloat extends Parameters { return parameters.length; } - - } diff --git a/dependencyParser/mate-tools/src/is2/parser/Parser.java b/dependencyParser/mate-tools/src/is2/parser/Parser.java index d6ba6d6..7e6ac5f 100755 --- a/dependencyParser/mate-tools/src/is2/parser/Parser.java +++ b/dependencyParser/mate-tools/src/is2/parser/Parser.java @@ -1,8 +1,21 @@ package is2.parser; +import java.io.BufferedInputStream; +import java.io.BufferedOutputStream; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.IOException; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.Map.Entry; +import java.util.zip.ZipEntry; +import java.util.zip.ZipInputStream; +import java.util.zip.ZipOutputStream; import is2.data.Cluster; -import is2.data.DataF; import is2.data.DataFES; import is2.data.F2SF; import is2.data.FV; @@ -20,51 +33,34 @@ import is2.util.DB; import is2.util.OptionsSuper; import is2.util.ParserEvaluator; -import java.io.BufferedInputStream; -import java.io.BufferedOutputStream; -import java.io.DataInputStream; -import java.io.DataOutputStream; -import java.io.FileInputStream; -import java.io.FileNotFoundException; -import java.io.FileOutputStream; -import java.io.IOException; -import java.text.SimpleDateFormat; -import java.util.Date; -import java.util.Map.Entry; -import java.util.zip.ZipEntry; -import java.util.zip.ZipInputStream; -import java.util.zip.ZipOutputStream; - - - public class Parser implements Tool, Retrainable { - // output evaluation info + // output evaluation info private static final boolean MAX_INFO = true; - public static int THREADS =4; + public static int THREADS = 4; public Long2IntInterface l2i; public ParametersFloat params; public Pipe pipe; public OptionsSuper options; - // keep some of the parsing information for later evaluation public Instances is; DataFES d2; - public Parse d= null; + public Parse d = null; /** * Initialize the parser + * * @param options */ - public Parser (OptionsSuper options) { + public Parser(OptionsSuper options) { - this.options=options; + this.options = options; pipe = new Pipe(options); - params = new ParametersFloat(0); + params = new ParametersFloat(0); // load the model try { @@ -75,89 +71,79 @@ public class Parser implements Tool, Retrainable { } - /** - * @param modelFileName The file name of the parsing model + * @param modelFileName + * The file name of the parsing model */ public Parser(String modelFileName) { - this(new Options(new String[]{"-model",modelFileName})); + this(new Options(new String[] { "-model", modelFileName })); } - /** - * + * */ public Parser() { // TODO Auto-generated constructor stub } - - public static void main (String[] args) throws Exception - { - - - - + public static void main(String[] args) throws Exception { long start = System.currentTimeMillis(); OptionsSuper options = new Options(args); - Runtime runtime = Runtime.getRuntime(); THREADS = runtime.availableProcessors(); - if (options.cores<THREADS&&options.cores>0) THREADS =options.cores; - DB.println("Found " + runtime.availableProcessors()+" cores use "+THREADS); - - + if (options.cores < THREADS && options.cores > 0) + THREADS = options.cores; + DB.println("Found " + runtime.availableProcessors() + " cores use " + THREADS); if (options.train) { - Parser p =new Parser(); - p.options=options; + Parser p = new Parser(); + p.options = options; p.l2i = new Long2Int(options.hsize); - p.pipe = new Pipe (options); + p.pipe = new Pipe(options); Instances is = new Instances(); Extractor.initFeatures(); p.pipe.extractor = new Extractor[THREADS]; - DB.println("hsize "+options.hsize); + DB.println("hsize " + options.hsize); - DB.println("Use "+(options.featureCreation==OptionsSuper.MULTIPLICATIVE?"multiplication":"shift")+"-based feature creation function"); - for (int t=0;t<THREADS;t++) p.pipe.extractor[t]=new Extractor(p.l2i, options.stack, options.featureCreation); + DB.println("Use " + (options.featureCreation == OptionsSuper.MULTIPLICATIVE ? "multiplication" : "shift") + + "-based feature creation function"); + for (int t = 0; t < THREADS; t++) + p.pipe.extractor[t] = new Extractor(p.l2i, options.stack, options.featureCreation); - DB.println("Stacking "+options.stack); + DB.println("Stacking " + options.stack); - p.pipe.createInstances(options.trainfile,is); + p.pipe.createInstances(options.trainfile, is); p.params = new ParametersFloat(p.l2i.size()); - p.train(options, p.pipe,p.params,is,p.pipe.cl); + p.train(options, p.pipe, p.params, is, p.pipe.cl); - p.writeModell(options, p.params, null,p.pipe.cl); + p.writeModell(options, p.params, null, p.pipe.cl); } if (options.test) { - - // Parser p = new Parser(); + // Parser p = new Parser(); Parser p = new Parser(options); - // p. pipe = new Pipe(options); - // p. params = new ParametersFloat(0); // total should be zero and the parameters are later read + // p. pipe = new Pipe(options); + // p. params = new ParametersFloat(0); // total should be zero and + // the parameters are later read // load the model - // p.readModel(options, p.pipe, p.params); + // p.readModel(options, p.pipe, p.params); - DB.println("label only? "+options.label); + DB.println("label only? " + options.label); - p.out(options, p.pipe, p.params, !MAX_INFO, options.label); + p.outAll(options, p.pipe, p.params, !MAX_INFO, options.label); } - - - System.out.println(); if (options.eval) { @@ -166,24 +152,22 @@ public class Parser implements Tool, Retrainable { } long end = System.currentTimeMillis(); - System.out.println("used time "+((float)((end-start)/100)/10)); + System.out.println("used time " + ((float) ((end - start) / 100) / 10)); - Decoder.executerService.shutdown(); + Decoder.executerService.shutdown(); Pipe.executerService.shutdown(); System.out.println("end."); - - } /** * Read the models and mapping + * * @param options * @param pipe * @param params * @throws IOException */ - public void readModel(OptionsSuper options, Pipe pipe, Parameters params) throws IOException { - + public void readModel(OptionsSuper options, Pipe pipe, Parameters params) throws IOException { DB.println("Reading data started"); @@ -198,23 +182,23 @@ public class Parser implements Tool, Retrainable { params.read(dis); this.l2i = new Long2Int(params.size()); - DB.println("parsing -- li size "+l2i.size()); - + DB.println("parsing -- li size " + l2i.size()); pipe.extractor = new Extractor[THREADS]; boolean stack = dis.readBoolean(); - options.featureCreation=dis.readInt(); + options.featureCreation = dis.readInt(); - for (int t=0;t<THREADS;t++) pipe.extractor[t]=new Extractor(l2i, stack,options.featureCreation); - DB.println("Stacking "+stack); + for (int t = 0; t < THREADS; t++) + pipe.extractor[t] = new Extractor(l2i, stack, options.featureCreation); + DB.println("Stacking " + stack); Extractor.initFeatures(); Extractor.initStat(options.featureCreation); - - for (int t=0;t<THREADS;t++) pipe.extractor[t].init(); + for (int t = 0; t < THREADS; t++) + pipe.extractor[t].init(); Edges.read(dis); @@ -222,162 +206,167 @@ public class Parser implements Tool, Retrainable { Extractor.maxForm = dis.readInt(); - boolean foundInfo =false; + boolean foundInfo = false; try { - String info =null; + String info = null; int icnt = dis.readInt(); - for(int i=0;i<icnt;i++) { + for (int i = 0; i < icnt; i++) { info = dis.readUTF(); System.out.println(info); } } catch (Exception e) { - if (!foundInfo) System.out.println("no info about training"); + if (!foundInfo) + System.out.println("no info about training"); } - dis.close(); DB.println("Reading data finnished"); - Decoder.NON_PROJECTIVITY_THRESHOLD =(float)options.decodeTH; + Decoder.NON_PROJECTIVITY_THRESHOLD = (float) options.decodeTH; Extractor.initStat(options.featureCreation); } - - /** * Do the training + * * @param instanceLengths * @param options * @param pipe * @param params - * @param is - * @param cluster + * @param is + * @param cluster * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ - public void train(OptionsSuper options, Pipe pipe, ParametersFloat params, Instances is, Cluster cluster) + public void train(OptionsSuper options, Pipe pipe, ParametersFloat params, Instances is, Cluster cluster) throws IOException, InterruptedException, ClassNotFoundException { - DB.println("\nTraining Information "); DB.println("-------------------- "); + Decoder.NON_PROJECTIVITY_THRESHOLD = (float) options.decodeTH; - Decoder.NON_PROJECTIVITY_THRESHOLD =(float)options.decodeTH; - - if (options.decodeProjective) System.out.println("Decoding: "+(options.decodeProjective?"projective":"non-projective")); - else System.out.println(""+Decoder.getInfo()); + if (options.decodeProjective) + System.out.println("Decoding: " + (options.decodeProjective ? "projective" : "non-projective")); + else + System.out.println("" + Decoder.getInfo()); int numInstances = is.size(); - int maxLenInstances =0; - for(int i=0;i<numInstances;i++) if (maxLenInstances<is.length(i)) maxLenInstances=is.length(i); + int maxLenInstances = 0; + for (int i = 0; i < numInstances; i++) + if (maxLenInstances < is.length(i)) + maxLenInstances = is.length(i); DataFES data = new DataFES(maxLenInstances, pipe.mf.getFeatureCounter().get(PipeGen.REL).shortValue()); int iter = 0; - int del=0; - float error =0; - float f1=0; + int del = 0; + float error = 0; + float f1 = 0; FV pred = new FV(); FV act = new FV(); - double upd = (double)(numInstances*options.numIters)+1; + double upd = (double) (numInstances * options.numIters) + 1; - for(; iter < options.numIters; iter++) { + for (; iter < options.numIters; iter++) { - System.out.print("Iteration "+iter+": "); + System.out.print("Iteration " + iter + ": "); long start = System.currentTimeMillis(); - long last= System.currentTimeMillis(); - error=0; - f1=0; - for(int n = 0; n < numInstances; n++) { + long last = System.currentTimeMillis(); + error = 0; + f1 = 0; + for (int n = 0; n < numInstances; n++) { upd--; - if (is.labels[n].length>options.maxLen) continue; + if (is.labels[n].length > options.maxLen) + continue; - String info = " td "+((Decoder.timeDecotder)/1000000F)+" tr "+((Decoder.timeRearrange)/1000000F) - +" te "+((Pipe.timeExtract)/1000000F); + String info = " td " + ((Decoder.timeDecotder) / 1000000F) + " tr " + + ((Decoder.timeRearrange) / 1000000F) + " te " + ((Pipe.timeExtract) / 1000000F); - if((n+1) %500 == 0) del= PipeGen.outValueErr(n+1, error,f1/n,del, last, upd,info); + if ((n + 1) % 500 == 0) + del = PipeGen.outValueErr(n + 1, error, f1 / n, del, last, upd, info); short pos[] = is.pposs[n]; - data = pipe.fillVector((F2SF)params.getFV(), is, n, data, cluster); + data = pipe.fillVector(params.getFV(), is, n, data, cluster); - Parse d = Decoder.decode(pos, data, options.decodeProjective, Decoder.TRAINING); + Parse d = Decoder.decode(pos, data, options.decodeProjective, Decoder.TRAINING); - double e= pipe.errors(is, n ,d); + double e = pipe.errors(is, n, d); - if (d.f1>0)f1+=d.f1; + if (d.f1 > 0) + f1 += d.f1; - if (e<=0) continue; + if (e <= 0) + continue; pred.clear(); - pipe.extractor[0].encodeCat(is,n,pos,is.forms[n],is.plemmas[n],d.heads, d.labels, is.feats[n],pipe.cl, pred); + pipe.extractor[0].encodeCat(is, n, pos, is.forms[n], is.plemmas[n], d.heads, d.labels, is.feats[n], + pipe.cl, pred); error += e; params.getFV(); - act.clear(); - pipe.extractor[0].encodeCat(is,n,pos,is.forms[n],is.plemmas[n],is.heads[n], is.labels[n], is.feats[n],pipe.cl, act); + pipe.extractor[0].encodeCat(is, n, pos, is.forms[n], is.plemmas[n], is.heads[n], is.labels[n], + is.feats[n], pipe.cl, act); - params.update(act, pred, is, n, d, upd,e); + params.update(act, pred, is, n, d, upd, e); } - String info = " td "+((Decoder.timeDecotder)/1000000F)+" tr "+((Decoder.timeRearrange)/1000000F) - +" te "+((Pipe.timeExtract)/1000000F)+" nz "+params.countNZ(); - PipeGen.outValueErr(numInstances, error,f1/numInstances,del,last, upd,info); - del=0; + String info = " td " + ((Decoder.timeDecotder) / 1000000F) + " tr " + ((Decoder.timeRearrange) / 1000000F) + + " te " + ((Pipe.timeExtract) / 1000000F) + " nz " + params.countNZ(); + PipeGen.outValueErr(numInstances, error, f1 / numInstances, del, last, upd, info); + del = 0; long end = System.currentTimeMillis(); - System.out.println(" time:"+(end-start)); - + System.out.println(" time:" + (end - start)); - ParametersFloat pf = params.average2((iter+1)*is.size()); + ParametersFloat pf = params.average2((iter + 1) * is.size()); try { - if (options.testfile!=null && options.goldfile!=null) { - out (options, pipe, pf, ! MAX_INFO,false); + if (options.testfile != null && options.goldfile != null) { + out(options, pipe, pf, !MAX_INFO, false); ParserEvaluator.evaluate(options.goldfile, options.outfile); - // writeModell(options, pf, ""+(iter+1),pipe.cl); + // writeModell(options, pf, ""+(iter+1),pipe.cl); } - } catch (Exception e) { e.printStackTrace(); - } + } - if (error==0) { + if (error == 0) { DB.println("stopped because learned all lessons"); break; } - Decoder.timeDecotder=0;Decoder.timeRearrange=0; Pipe.timeExtract=0; - + Decoder.timeDecotder = 0; + Decoder.timeRearrange = 0; + Pipe.timeExtract = 0; } - if (options.average)params.average(iter*is.size()); - } - + if (options.average) + params.average(iter * is.size()); + } /** * Do the parsing job - * + * * @param options * @param pipe * @param params * @throws IOException */ - private void out (OptionsSuper options, Pipe pipe, ParametersFloat params, boolean maxInfo, boolean labelOnly) + private void out(OptionsSuper options, Pipe pipe, ParametersFloat params, boolean maxInfo, boolean labelOnly) throws Exception { long start = System.currentTimeMillis(); @@ -386,107 +375,247 @@ public class Parser implements Tool, Retrainable { CONLLWriter09 depWriter = new CONLLWriter09(options.outfile, options.formatTask); int cnt = 0; - int del=0; + int del = 0; long last = System.currentTimeMillis(); - if (maxInfo) System.out.println("\nParsing Information "); - if (maxInfo) System.out.println("------------------- "); + if (maxInfo) + System.out.println("\nParsing Information "); + if (maxInfo) + System.out.println("------------------- "); - if (maxInfo && !options.decodeProjective) System.out.println(""+Decoder.getInfo()); + if (maxInfo && !options.decodeProjective) + System.out.println("" + Decoder.getInfo()); System.out.print("Processing Sentence: "); - while(true) { + while (true) { - // Instances is = new Instances(); - // is.init(1, new MFO(),options.formatTask); + // Instances is = new Instances(); + // is.init(1, new MFO(),options.formatTask); - // SentenceData09 instance = pipe.nextInstance(is, depReader); + // SentenceData09 instance = pipe.nextInstance(is, depReader); SentenceData09 instance = depReader.getNext(); - if (instance==null) break; + if (instance == null) + break; cnt++; - SentenceData09 i09 = this.parse(instance,params, labelOnly,options); + SentenceData09 i09 = this.parse(instance, params, labelOnly, options); depWriter.write(i09); - del=PipeGen.outValue(cnt, del,last); + del = PipeGen.outValue(cnt, del, last); + + } + // pipe.close(); + depWriter.finishWriting(); + long end = System.currentTimeMillis(); + // DB.println("errors "+error); + if (maxInfo) + System.out.println("Used time " + (end - start)); + if (maxInfo) + System.out.println("forms count " + Instances.m_count + " unkown " + Instances.m_unkown); + + } + + private void outAll(OptionsSuper options, Pipe pipe, ParametersFloat params, boolean maxInfo, boolean labelOnly) + throws Exception { + + long start = System.currentTimeMillis(); + + CONLLReader09 depReader = new CONLLReader09(options.testfile, options.formatTask); + CONLLWriter09 depWriter = new CONLLWriter09(options.outfile, options.formatTask); + + int cnt = 0; + int del = 0; + long last = System.currentTimeMillis(); + + if (maxInfo) + System.out.println("\nParsing Information "); + if (maxInfo) + System.out.println("------------------- "); + + if (maxInfo && !options.decodeProjective) + System.out.println("" + Decoder.getInfo()); + + System.out.print("Processing Sentence: "); + + while (true) { + + // Instances is = new Instances(); + // is.init(1, new MFO(),options.formatTask); + + // SentenceData09 instance = pipe.nextInstance(is, depReader); + + SentenceData09 instance = depReader.getNext(); + if (instance == null) + break; + cnt++; + + SentenceData09[] i09 = this.parseAll(instance, params, labelOnly, options); + + for (SentenceData09 sd : i09) + depWriter.write(sd); + del = PipeGen.outValue(cnt, del, last); } - //pipe.close(); + // pipe.close(); depWriter.finishWriting(); long end = System.currentTimeMillis(); - // DB.println("errors "+error); - if (maxInfo) System.out.println("Used time " + (end-start)); - if (maxInfo) System.out.println("forms count "+Instances.m_count+" unkown "+Instances.m_unkown); + // DB.println("errors "+error); + if (maxInfo) + System.out.println("Used time " + (end - start)); + if (maxInfo) + System.out.println("forms count " + Instances.m_count + " unkown " + Instances.m_unkown); } /** * Parse a single sentence - * + * * @param instance * @param params * @param labelOnly * @param options * @return */ - public SentenceData09 parse (SentenceData09 instance, ParametersFloat params, boolean labelOnly, OptionsSuper options) { + public SentenceData09 parse(SentenceData09 instance, ParametersFloat params, boolean labelOnly, + OptionsSuper options) { String[] types = new String[pipe.mf.getFeatureCounter().get(PipeGen.REL)]; - for (Entry<String, Integer> e : MFO.getFeatureSet().get(PipeGen.REL).entrySet()) types[e.getValue()] = e.getKey(); + for (Entry<String, Integer> e : MFO.getFeatureSet().get(PipeGen.REL).entrySet()) + types[e.getValue()] = e.getKey(); + + is = new Instances(); + is.init(1, new MFO(), options.formatTask); + new CONLLReader09().insert(is, instance); - is = new Instances(); - is.init(1, new MFO(),options.formatTask); - new CONLLReader09().insert(is, instance); + // use for the training ppos - // use for the training ppos + SentenceData09 i09 = new SentenceData09(instance); + i09.createSemantic(instance); - SentenceData09 i09 = new SentenceData09(instance); - i09.createSemantic(instance); + if (labelOnly) { + F2SF f2s = params.getFV(); - if (labelOnly) { - F2SF f2s =params.getFV(); + // repair pheads - // repair pheads + is.pheads[0] = is.heads[0]; - is.pheads[0]= is.heads[0]; + for (int l = 0; l < is.pheads[0].length; l++) { + if (is.pheads[0][l] < 0) + is.pheads[0][l] = 0; + } - for(int l=0;l<is.pheads[0].length;l++) { - if (is.pheads[0][l]<0)is.pheads[0][l]=0; - } + short[] labels = pipe.extractor[0].searchLabel(is, 0, is.pposs[0], is.forms[0], is.plemmas[0], is.pheads[0], + is.plabels[0], is.feats[0], pipe.cl, f2s); - short[] labels = pipe.extractor[0].searchLabel(is, 0, is.pposs[0], is.forms[0], is.plemmas[0], is.pheads[0], is.plabels[0], is.feats[0], pipe.cl, f2s); + for (int j = 0; j < instance.forms.length - 1; j++) { + i09.plabels[j] = types[labels[j + 1]]; + i09.pheads[j] = is.pheads[0][j + 1]; + } + return i09; + } - for(int j = 0; j < instance.forms.length-1; j++) { - i09.plabels[j] = types[labels[j+1]]; - i09.pheads[j] = is.pheads[0][j+1]; - } - return i09; - } + if (options.maxLength > instance.length() && options.minLength <= instance.length()) { + try { + // System.out.println("prs "+instance.forms[0]); + // System.out.println("prs "+instance.toString()); + d2 = pipe.fillVector(params.getFV(), is, 0, null, pipe.cl);// cnt-1 + d = Decoder.decode(is.pposs[0], d2, options.decodeProjective, !Decoder.TRAINING); // cnt-1 + + } catch (Exception e) { + e.printStackTrace(); + } + + for (int j = 0; j < instance.forms.length - 1; j++) { + i09.plabels[j] = types[d.labels[j + 1]]; + i09.pheads[j] = d.heads[j + 1]; + } + } + return i09; + + } + + public SentenceData09[] parseAll(SentenceData09 instance, ParametersFloat params, boolean labelOnly, + OptionsSuper options) { + + String[] types = new String[pipe.mf.getFeatureCounter().get(PipeGen.REL)]; + for (Entry<String, Integer> e : MFO.getFeatureSet().get(PipeGen.REL).entrySet()) + types[e.getValue()] = e.getKey(); + + is = new Instances(); + is.init(1, new MFO(), options.formatTask); + new CONLLReader09().insert(is, instance); + + // use for the training ppos + + SentenceData09[] i09;// = new SentenceData09(instance); + // i09.createSemantic(instance); - if (options.maxLength > instance.length() && options.minLength <= instance.length()) { - try { - // System.out.println("prs "+instance.forms[0]); - // System.out.println("prs "+instance.toString()); - d2 = pipe.fillVector(params.getFV(), is,0,null,pipe.cl);//cnt-1 - d =Decoder.decode(is.pposs[0],d2,options.decodeProjective, !Decoder.TRAINING); //cnt-1 - - }catch (Exception e) { - e.printStackTrace(); - } - - for(int j = 0; j < instance.forms.length-1; j++) { - i09.plabels[j] = types[d.labels[j+1]]; - i09.pheads[j] = d.heads[j+1]; - } + if (labelOnly) { + F2SF f2s = params.getFV(); + + // repair pheads + + is.pheads[0] = is.heads[0]; + + for (int l = 0; l < is.pheads[0].length; l++) { + if (is.pheads[0][l] < 0) + is.pheads[0][l] = 0; + } + + short[] labels = pipe.extractor[0].searchLabel(is, 0, is.pposs[0], is.forms[0], is.plemmas[0], is.pheads[0], + is.plabels[0], is.feats[0], pipe.cl, f2s); + + i09 = new SentenceData09[1]; + i09[0] = new SentenceData09(instance); + i09[0].createSemantic(instance); + + for (int j = 0; j < instance.forms.length - 1; j++) { + i09[0].plabels[j] = types[labels[j + 1]]; + i09[0].pheads[j] = is.pheads[0][j + 1]; + } + return i09; + } + + if (options.maxLength > instance.length() && options.minLength <= instance.length()) { + Parse[] ds = null; + + try { + // System.out.println("prs "+instance.forms[0]); + // System.out.println("prs "+instance.toString()); + d2 = pipe.fillVector(params.getFV(), is, 0, null, pipe.cl);// cnt-1 + ds = Decoder.decodeAll(is.pposs[0], d2, options.decodeProjective, !Decoder.TRAINING); // cnt-1 + + } catch (Exception e) { + e.printStackTrace(); + } + + i09 = new SentenceData09[ds.length]; + for (int i = 0; i < ds.length; ++i) { + i09[i] = new SentenceData09(instance); + i09[i].createSemantic(instance); + for (int j = 0; j < instance.forms.length - 1; j++) { + i09[i].plabels[j] = types[ds[i].labels[j + 1]]; + i09[i].pheads[j] = ds[i].heads[j + 1]; } - return i09; + } + + return i09; + } else { + i09 = new SentenceData09[1]; + i09[0] = new SentenceData09(instance); + i09[0].createSemantic(instance); + return i09; + } } is2.io.CONLLReader09 reader = new is2.io.CONLLReader09(true); - /* (non-Javadoc) + + /* + * (non-Javadoc) + * * @see is2.tools.Tool#apply(is2.data.SentenceData09) */ @Override @@ -495,23 +624,21 @@ public class Parser implements Tool, Retrainable { SentenceData09 it = new SentenceData09(); it.createWithRoot(snt09); - SentenceData09 out=null; + SentenceData09 out = null; try { + // for(int k=0;k<it.length();k++) { + // it.forms[k] = reader.normalize(it.forms[k]); + // it.plemmas[k] = reader.normalize(it.plemmas[k]); + // } - // for(int k=0;k<it.length();k++) { - // it.forms[k] = reader.normalize(it.forms[k]); - // it.plemmas[k] = reader.normalize(it.plemmas[k]); - // } - - out = parse(it,this.params,false,options); + out = parse(it, this.params, false, options); - - } catch(Exception e) { + } catch (Exception e) { e.printStackTrace(); } - Decoder.executerService.shutdown(); + Decoder.executerService.shutdown(); Pipe.executerService.shutdown(); return out; @@ -519,34 +646,34 @@ public class Parser implements Tool, Retrainable { /** * Get the edge scores of the last parse. + * * @return the scores */ public float[] getInfo() { - float[] scores = new float[is.length(0)]; - Extractor.encode3(is.pposs[0], d.heads, d.labels, d2,scores); + Extractor.encode3(is.pposs[0], d.heads, d.labels, d2, scores); return scores; } - /** * Write the parsing model - * + * * @param options * @param params * @param extension * @throws FileNotFoundException * @throws IOException */ - private void writeModell(OptionsSuper options, ParametersFloat params, String extension, Cluster cs) throws FileNotFoundException, IOException { + private void writeModell(OptionsSuper options, ParametersFloat params, String extension, Cluster cs) + throws FileNotFoundException, IOException { - String name = extension==null?options.modelName:options.modelName+extension; - // System.out.println("Writting model: "+name); + String name = extension == null ? options.modelName : options.modelName + extension; + // System.out.println("Writting model: "+name); ZipOutputStream zos = new ZipOutputStream(new BufferedOutputStream(new FileOutputStream(name))); - zos.putNextEntry(new ZipEntry("data")); - DataOutputStream dos = new DataOutputStream(new BufferedOutputStream(zos)); + zos.putNextEntry(new ZipEntry("data")); + DataOutputStream dos = new DataOutputStream(new BufferedOutputStream(zos)); MFO.writeData(dos); cs.write(dos); @@ -556,109 +683,106 @@ public class Parser implements Tool, Retrainable { dos.writeBoolean(options.stack); dos.writeInt(options.featureCreation); - Edges.write(dos); dos.writeBoolean(options.decodeProjective); dos.writeInt(Extractor.maxForm); - dos.writeInt(5); // Info count - dos.writeUTF("Used parser "+Parser.class.toString()); - dos.writeUTF("Creation date "+(new SimpleDateFormat("yyyy.MM.dd HH:mm:ss")).format(new Date())); - dos.writeUTF("Training data "+options.trainfile); - dos.writeUTF("Iterations "+options.numIters+" Used sentences "+options.count); - dos.writeUTF("Cluster "+options.clusterFile); + dos.writeInt(5); // Info count + dos.writeUTF("Used parser " + Parser.class.toString()); + dos.writeUTF("Creation date " + (new SimpleDateFormat("yyyy.MM.dd HH:mm:ss")).format(new Date())); + dos.writeUTF("Training data " + options.trainfile); + dos.writeUTF("Iterations " + options.numIters + " Used sentences " + options.count); + dos.writeUTF("Cluster " + options.clusterFile); dos.flush(); dos.close(); } - @Override public boolean retrain(SentenceData09 sentence, float upd, int iterations) { params.total = params.parameters; - boolean done=false; + boolean done = false; - for(int k=0;k<iterations;k++) { + for (int k = 0; k < iterations; k++) { try { // create the data structure - DataFES data = new DataFES(sentence.length(), pipe.mf.getFeatureCounter().get(PipeGen.REL).shortValue()); - + DataFES data = new DataFES(sentence.length(), + pipe.mf.getFeatureCounter().get(PipeGen.REL).shortValue()); Instances is = new Instances(); - is.m_encoder =pipe.mf; - + is.m_encoder = pipe.mf; + is.init(1, pipe.mf, options.formatTask); + new CONLLReader09().insert(is, sentence); - is.init(1, pipe.mf,options.formatTask); - new CONLLReader09().insert(is, sentence); + // String list[] = + // ((MFO)is.m_encoder).reverse(((MFO)is.m_encoder).getFeatureSet().get(Pipe.POS)); + // for(String s :list) { + // System.out.println(s+" "); + // } - // String list[] = ((MFO)is.m_encoder).reverse(((MFO)is.m_encoder).getFeatureSet().get(Pipe.POS)); - // for(String s :list) { - // System.out.println(s+" "); - // } + // for(int i=0;i<is.length(0);i++) { - // for(int i=0;i<is.length(0);i++) { + // System.out.printf("%d\t %d\t %d + // \n",i,is.forms[0][i],is.pposs[0][i] ); + // System.out.printf("%s\t form:%s + // pos:%s\n",i,sentence.forms[i],sentence.ppos[i]); - // System.out.printf("%d\t %d\t %d \n",i,is.forms[0][i],is.pposs[0][i] ); - // System.out.printf("%s\t form:%s pos:%s\n",i,sentence.forms[i],sentence.ppos[i]); - - // } + // } SentenceData09 i09 = new SentenceData09(sentence); i09.createSemantic(sentence); - - - // create the weights - data = pipe.fillVector((F2SF)params.getFV(), is, 0, data, pipe.cl); + // create the weights + data = pipe.fillVector(params.getFV(), is, 0, data, pipe.cl); short[] pos = is.pposs[0]; // parse the sentence - Parse d = Decoder.decode(pos, data, options.decodeProjective, Decoder.TRAINING); + Parse d = Decoder.decode(pos, data, options.decodeProjective, Decoder.TRAINING); // training successful? - double e= pipe.errors(is, 0 ,d); - // System.out.println("errors "+e); - if (e==0) { - + double e = pipe.errors(is, 0, d); + // System.out.println("errors "+e); + if (e == 0) { - done= true; + done = true; break; } // update the weight vector FV pred = new FV(); - pipe.extractor[0].encodeCat(is,0,pos,is.forms[0],is.plemmas[0],d.heads, d.labels, is.feats[0],pipe.cl, pred); + pipe.extractor[0].encodeCat(is, 0, pos, is.forms[0], is.plemmas[0], d.heads, d.labels, is.feats[0], + pipe.cl, pred); params.getFV(); FV act = new FV(); - pipe.extractor[0].encodeCat(is,0,pos,is.forms[0],is.plemmas[0],is.heads[0], is.labels[0], is.feats[0],pipe.cl, act); + pipe.extractor[0].encodeCat(is, 0, pos, is.forms[0], is.plemmas[0], is.heads[0], is.labels[0], + is.feats[0], pipe.cl, act); - params.update(act, pred, is, 0, d, upd,e); + params.update(act, pred, is, 0, d, upd, e); - if (upd >0)upd--; + if (upd > 0) + upd--; - } catch(Exception e) { + } catch (Exception e) { e.printStackTrace(); } } - Decoder.executerService.shutdown(); + Decoder.executerService.shutdown(); Pipe.executerService.shutdown(); - return done; } - @Override public boolean retrain(SentenceData09 sentence, float upd, int iterations, boolean print) { // TODO Auto-generated method stub - return retrain( sentence, upd, iterations); + return retrain(sentence, upd, iterations); } } \ No newline at end of file diff --git a/dependencyParser/mate-tools/src/is2/parser/Pipe.java b/dependencyParser/mate-tools/src/is2/parser/Pipe.java index 13e9389..f7f3782 100755 --- a/dependencyParser/mate-tools/src/is2/parser/Pipe.java +++ b/dependencyParser/mate-tools/src/is2/parser/Pipe.java @@ -1,30 +1,26 @@ package is2.parser; +import java.io.IOException; +import java.util.ArrayList; +import java.util.concurrent.ExecutorService; + import is2.data.Cluster; -import is2.data.DataF; import is2.data.DataFES; import is2.data.F2SF; import is2.data.Instances; - import is2.data.Parse; import is2.data.PipeGen; import is2.data.SentenceData09; import is2.io.CONLLReader09; - import is2.util.OptionsSuper; -import java.io.IOException; -import java.util.ArrayList; -import java.util.concurrent.ExecutorService; - final public class Pipe extends PipeGen { public Extractor[] extractor; final public MFO mf = new MFO(); public Cluster cl; - - + private OptionsSuper options; public static long timeExtract; @@ -32,58 +28,62 @@ final public class Pipe extends PipeGen { options = o; } - public void createInstances(String file, Instances is) - throws Exception { + public void createInstances(String file, Instances is) throws Exception { CONLLReader09 depReader = new CONLLReader09(file); - mf.register(REL,"<root-type>"); + mf.register(REL, "<root-type>"); - // register at least one predicate since the parsing data might not contain predicates as in + // register at least one predicate since the parsing data might not + // contain predicates as in // the Japaness corpus but the development sets contains some - long sl=0; - System.out.print("Registering feature parts of sentence: "); int ic = 0; int del = 0; while (true) { SentenceData09 instance = depReader.getNext(); - if (instance == null) break; + if (instance == null) + break; ic++; - sl+=instance.labels.length; - if (ic % 1000 == 0) { del = outValue(ic, del); } String[] labs1 = instance.labels; - for (int i1 = 0; i1 < labs1.length; i1++) mf.register(REL, labs1[i1]); + for (String element : labs1) + mf.register(REL, element); String[] w = instance.forms; - for (int i1 = 0; i1 < w.length; i1++) mf.register(WORD, depReader.normalize(w[i1])); + for (String element : w) + mf.register(WORD, depReader.normalize(element)); w = instance.plemmas; - for (int i1 = 0; i1 < w.length; i1++) mf.register(WORD, depReader.normalize(w[i1])); - + for (String element : w) + mf.register(WORD, depReader.normalize(element)); w = instance.ppos; - for (int i1 = 0; i1 < w.length; i1++) mf.register(POS, w[i1]); + for (String element : w) + mf.register(POS, element); w = instance.gpos; - for (int i1 = 0; i1 < w.length; i1++) mf.register(POS, w[i1]); + for (String element : w) + mf.register(POS, element); - if (instance.feats !=null) { + if (instance.feats != null) { String fs[][] = instance.feats; - for (int i1 = 0; i1 < fs.length; i1++){ - w =fs[i1]; - if (w==null) continue; - for (int i2 = 0; i2 < w.length; i2++) mf.register(FEAT, w[i2]); + for (String[] element : fs) { + w = element; + if (w == null) + continue; + for (String element2 : w) + mf.register(FEAT, element2); } } - if ((ic-1)>options.count) break; + if ((ic - 1) > options.count) + break; } del = outValue(ic, del); @@ -91,48 +91,52 @@ final public class Pipe extends PipeGen { Extractor.initFeatures(); Extractor.maxForm = mf.getFeatureCounter().get(WORD); - - if (options.clusterFile==null)cl = new Cluster(); - else cl= new Cluster(options.clusterFile, mf,6); - - + + if (options.clusterFile == null) + cl = new Cluster(); + else + cl = new Cluster(options.clusterFile, mf, 6); mf.calculateBits(); Extractor.initStat(options.featureCreation); - - System.out.println(""+mf.toString()); - - for(Extractor e : extractor) e.init(); + + System.out.println("" + mf.toString()); + + for (Extractor e : extractor) + e.init(); depReader.startReading(file); int num1 = 0; - + is.init(ic, new MFO()); Edges.init(mf.getFeatureCounter().get(POS)); - - + System.out.print("Creating edge filters and read corpus: "); del = 0; while (true) { - if (num1 % 100 == 0) del = outValue(num1, del); + if (num1 % 100 == 0) + del = outValue(num1, del); SentenceData09 instance1 = depReader.getNext(is); - if (instance1 == null) break; + if (instance1 == null) + break; int last = is.size() - 1; - short[] pos =is.pposs[last]; + short[] pos = is.pposs[last]; for (int k = 0; k < is.length(last); k++) { - if (is.heads[last][k] < 0) continue; - Edges.put(pos[is.heads[last][k]],pos[k], is.labels[last][k]); -// Edges.put(pos[k],pos[is.heads[last][k]], is.labels[last][k]); + if (is.heads[last][k] < 0) + continue; + Edges.put(pos[is.heads[last][k]], pos[k], is.labels[last][k]); + // Edges.put(pos[k],pos[is.heads[last][k]], is.labels[last][k]); } - if (!options.allFeatures && num1 > options.count) break; + if (!options.allFeatures && num1 > options.count) + break; num1++; @@ -142,10 +146,9 @@ final public class Pipe extends PipeGen { Edges.findDefault(); } - /** * Creates an instance for outputParses - * + * * @param is * @return * @throws IOException @@ -153,68 +156,68 @@ final public class Pipe extends PipeGen { protected final SentenceData09 nextInstance(Instances is, CONLLReader09 depReader) throws Exception { SentenceData09 instance = depReader.getNext(is); - if (instance == null || instance.forms == null) return null; + if (instance == null || instance.forms == null) + return null; return instance; } - public static ExecutorService executerService =java.util.concurrent.Executors.newFixedThreadPool(Parser.THREADS); - + public static ExecutorService executerService = java.util.concurrent.Executors.newFixedThreadPool(Parser.THREADS); - public DataFES fillVector(F2SF params, Instances is,int inst, DataFES d, Cluster cluster) throws InterruptedException { + public DataFES fillVector(F2SF params, Instances is, int inst, DataFES d, Cluster cluster) + throws InterruptedException { long ts = System.nanoTime(); - if (executerService.isShutdown()) executerService =java.util.concurrent.Executors.newCachedThreadPool(); - - + if (executerService.isShutdown()) + executerService = java.util.concurrent.Executors.newCachedThreadPool(); + final int length = is.length(inst); - if (d ==null || d.len<length)d = new DataFES(length,mf.getFeatureCounter().get(PipeGen.REL).shortValue()); + if (d == null || d.len < length) + d = new DataFES(length, mf.getFeatureCounter().get(PipeGen.REL).shortValue()); ArrayList<ParallelExtract> pe = new ArrayList<ParallelExtract>(); - for(int i=0;i<Parser.THREADS;i++) pe.add(new ParallelExtract(extractor[i],is, inst, d, (F2SF)params.clone(), cluster)); + for (int i = 0; i < Parser.THREADS; i++) + pe.add(new ParallelExtract(extractor[i], is, inst, d, (F2SF) params.clone(), cluster)); for (int w1 = 0; w1 < length; w1++) { - for (int w2 =w1+1; w2 < length; w2++) { + for (int w2 = w1 + 1; w2 < length; w2++) { - if (w1==w2) continue; + if (w1 == w2) + continue; ParallelExtract.add(w1, w2); - } } -// for(int i=0;i<efp.length;i++) efp[i].start(); -// for(int i=0;i<efp.length;i++) efp[i].join(); - executerService.invokeAll( pe); - - timeExtract += (System.nanoTime()-ts); + // for(int i=0;i<efp.length;i++) efp[i].start(); + // for(int i=0;i<efp.length;i++) efp[i].join(); + executerService.invokeAll(pe); - - + timeExtract += (System.nanoTime() - ts); - - return d; } - public double errors( Instances is, int ic, Parse p) { + public double errors(Instances is, int ic, Parse p) { short[] act = is.heads[ic]; double correct = 0; // do not count root - for(int i = 1; i < act.length; i++) { - - // if (is.ppos[ic] ==null ) System.out.println("mf null"+is.ppos[ic][i]); - if (p.heads[i]==act[i] ){ - correct+=0.5; - if (p.labels[i]==is.labels[ic][i] ) correct+=0.5; + for (int i = 1; i < act.length; i++) { + + // if (is.ppos[ic] ==null ) System.out.println("mf + // null"+is.ppos[ic][i]); + if (p.heads[i] == act[i]) { + correct += 0.5; + if (p.labels[i] == is.labels[ic][i]) + correct += 0.5; } } - double x = ((double)act.length- 1 - correct ); + double x = ((double) act.length - 1 - correct); - p.f1 = (double)correct / (double)(act.length-1); + p.f1 = correct / (act.length - 1); return x; } diff --git a/dependencyParser/mate-tools/src/is2/parserR2/Decoder.java b/dependencyParser/mate-tools/src/is2/parserR2/Decoder.java index 1f0424e..2ba175f 100755 --- a/dependencyParser/mate-tools/src/is2/parserR2/Decoder.java +++ b/dependencyParser/mate-tools/src/is2/parserR2/Decoder.java @@ -10,28 +10,24 @@ import decoder.ParallelDecoder; import decoder.ParallelRearrangeNBest; import decoder.ParallelRearrangeNBest2; import extractors.Extractor; - - import is2.data.Closed; import is2.data.DataF; import is2.data.Instances; import is2.data.Open; import is2.data.Parse; import is2.data.ParseNBest; -import is2.util.DB; - /** * @author Bernd Bohnet, 01.09.2009 - * - * This methods do the actual work and they build the dependency trees. + * + * This methods do the actual work and they build the dependency trees. */ -final public class Decoder { +final public class Decoder { public static final boolean TRAINING = true; public static long timeDecotder; public static long timeRearrange; - + public static final boolean LAS = true; /** @@ -39,49 +35,57 @@ final public class Decoder { */ public static float NON_PROJECTIVITY_THRESHOLD = 0.3F; - public static ExecutorService executerService =java.util.concurrent.Executors.newFixedThreadPool(Parser.THREADS); + public static ExecutorService executerService = java.util.concurrent.Executors.newFixedThreadPool(Parser.THREADS); - // do not initialize - private Decoder() {}; - - + private Decoder() { + }; + /** * Build a dependency tree based on the data - * @param pos part-of-speech tags - * @param x the data - * @param projective projective or non-projective - * @param edges the edges + * + * @param pos + * part-of-speech tags + * @param x + * the data + * @param projective + * projective or non-projective + * @param edges + * the edges * @return a parse tree * @throws InterruptedException */ - public static List<ParseNBest> decode(short[] pos, DataF x, boolean projective, Extractor extractor) throws InterruptedException { + public static List<ParseNBest> decode(short[] pos, DataF x, boolean projective, Extractor extractor) + throws InterruptedException { long ts = System.nanoTime(); - - if (executerService.isShutdown()) executerService = java.util.concurrent.Executors.newCachedThreadPool(); + + if (executerService.isShutdown()) + executerService = java.util.concurrent.Executors.newCachedThreadPool(); final int n = pos.length; final Open O[][][][] = new Open[n][n][2][]; final Closed C[][][][] = new Closed[n][n][2][]; - ArrayList<ParallelDecoder> pe = new ArrayList<ParallelDecoder>(); + ArrayList<ParallelDecoder> pe = new ArrayList<ParallelDecoder>(); + + for (int i = 0; i < Parser.THREADS; i++) + pe.add(new ParallelDecoder(pos, x, O, C, n)); - for(int i=0;i<Parser.THREADS ;i++) pe.add(new ParallelDecoder(pos, x, O, C, n)); - for (short k = 1; k < n; k++) { // provide the threads the data for (short s = 0; s < n; s++) { short t = (short) (s + k); - if (t >= n) break; - - ParallelDecoder.add(s,t); + if (t >= n) + break; + + ParallelDecoder.add(s, t); } - + executerService.invokeAll(pe); } - + double bestSpanScore = (-1.0F / 0.0F); Closed bestSpan = null; for (int m = 1; m < n; m++) @@ -90,128 +94,138 @@ final public class Decoder { bestSpan = C[0][n - 1][1][m]; } - // build the dependency tree from the chart - ParseNBest out= new ParseNBest(pos.length); + // build the dependency tree from the chart + ParseNBest out = new ParseNBest(pos.length); bestSpan.create(out); - out.heads[0]=-1; - out.labels[0]=0; - bestProj=out; + out.heads[0] = -1; + out.labels[0] = 0; + bestProj = out; + + timeDecotder += (System.nanoTime() - ts); + // DB.println(""+out); - timeDecotder += (System.nanoTime()-ts); - // DB.println(""+out); - ts = System.nanoTime(); List<ParseNBest> parses; - + if (!projective) { - - // if (training) - // rearrange(pos, out.heads, out.types,x,training); - //else { - // DB.println("bestSpan score "+(float)bestSpan.p+" comp score "+Extractor.encode3(pos, out.heads, out.types, x)); - // System.out.println(); - // Parse best = new Parse(out.heads,out.types,Extractor.encode3(pos, out.heads, out.types, x)); - parses = rearrangeNBest(pos, out.heads, out.labels,x,extractor); - // DB.println("1best "+parses.get(0).f1); - // DB.println(""+parses.get(0).toString()); - - - // for(ParseNBest p :parses) if (p.heads==null) p.signature2parse(p.signature()); - - /// if (parses.get(0).f1>(best.f1+NON_PROJECTIVITY_THRESHOLD)) out = parses.get(0); - // else out =best; - - // } + + // if (training) + // rearrange(pos, out.heads, out.types,x,training); + // else { + // DB.println("bestSpan score "+(float)bestSpan.p+" comp score + // "+Extractor.encode3(pos, out.heads, out.types, x)); + // System.out.println(); + // Parse best = new Parse(out.heads,out.types,Extractor.encode3(pos, + // out.heads, out.types, x)); + parses = rearrangeNBest(pos, out.heads, out.labels, x, extractor); + // DB.println("1best "+parses.get(0).f1); + // DB.println(""+parses.get(0).toString()); + + // for(ParseNBest p :parses) if (p.heads==null) + // p.signature2parse(p.signature()); + + /// if (parses.get(0).f1>(best.f1+NON_PROJECTIVITY_THRESHOLD)) out = + /// parses.get(0); + // else out =best; + + // } } else { parses = new ArrayList<ParseNBest>(); parses.add(out); } - timeRearrange += (System.nanoTime()-ts); + timeRearrange += (System.nanoTime() - ts); return parses; } static Parse bestProj = null; - - - /** * This is the parallel non-projective edge re-arranger - * - * @param pos part-of-speech tags - * @param heads parent child relation - * @param labs edge labels - * @param x the data - * @param edges the existing edges defined by part-of-speech tags + * + * @param pos + * part-of-speech tags + * @param heads + * parent child relation + * @param labs + * edge labels + * @param x + * the data + * @param edges + * the existing edges defined by part-of-speech tags * @throws InterruptedException */ - public static List<ParseNBest> rearrangeNBestP(short[] pos, short[] heads, short[] labs, DataF x, Extractor extractor) throws InterruptedException { - - ArrayList<ParallelRearrangeNBest2> pe = new ArrayList<ParallelRearrangeNBest2>(); - - int round =0; + public static List<ParseNBest> rearrangeNBestP(short[] pos, short[] heads, short[] labs, DataF x, + Extractor extractor) throws InterruptedException { + + ArrayList<ParallelRearrangeNBest2> pe = new ArrayList<ParallelRearrangeNBest2>(); + + int round = 0; ArrayList<ParseNBest> parses = new ArrayList<ParseNBest>(); - ParseNBest px =new ParseNBest(); - px.signature(heads,labs); - //Object extractor; - px.f1=extractor.encode3(pos, heads, labs, x); + ParseNBest px = new ParseNBest(); + px.signature(heads, labs); + // Object extractor; + px.f1 = extractor.encode3(pos, heads, labs, x); parses.add(px); - + float lastNBest = Float.NEGATIVE_INFINITY; - + HashSet<Parse> done = new HashSet<Parse>(); gnu.trove.THashSet<CharSequence> contained = new gnu.trove.THashSet<CharSequence>(); - while(true) { - + while (true) { + pe.clear(); // used the first three parses - int ic=0, considered=0; - while(true) { - - if (parses.size()<=ic || considered>11) break; - + int ic = 0, considered = 0; + while (true) { + + if (parses.size() <= ic || considered > 11) + break; + ParseNBest parse = parses.get(ic); - + ic++; // parse already extended - if (done.contains(parse)) continue; + if (done.contains(parse)) + continue; considered++; parse.signature2parse(parse.signature()); - + done.add(parse); - - - boolean[][] isChild = new boolean[heads.length][heads.length]; - - for(int i = 1, l1=1; i < heads.length; i++,l1=i) - while((l1= heads[l1]) != -1) isChild[l1][i] = true; - - - // check the list of new possible parents and children for a better combination - for(short ch = 1; ch < heads.length; ch++) { - for(short pa = 0; pa < heads.length; pa++) { - if(ch == pa || pa == heads[ch] || isChild[ch][pa]) continue; + + boolean[][] isChild = new boolean[heads.length][heads.length]; + + for (int i = 1, l1 = 1; i < heads.length; i++, l1 = i) + while ((l1 = heads[l1]) != -1) + isChild[l1][i] = true; + + // check the list of new possible parents and children for a + // better combination + for (short ch = 1; ch < heads.length; ch++) { + for (short pa = 0; pa < heads.length; pa++) { + if (ch == pa || pa == heads[ch] || isChild[ch][pa]) + continue; ParallelRearrangeNBest2.add(parse.clone(), ch, pa); } } - - } - for(int t =0;t<Parser.THREADS;t++) pe.add(new ParallelRearrangeNBest2( pos,x,lastNBest,extractor, NON_PROJECTIVITY_THRESHOLD) ); + } + + for (int t = 0; t < Parser.THREADS; t++) + pe.add(new ParallelRearrangeNBest2(pos, x, lastNBest, extractor, NON_PROJECTIVITY_THRESHOLD)); - executerService.invokeAll(pe); - + // avoid to add parses several times - for(ParallelRearrangeNBest2 rp : pe) { - for(int k=rp.parses.size()-1;k>=0;k--) { - if (lastNBest>rp.parses.get(k).f1) continue; + for (ParallelRearrangeNBest2 rp : pe) { + for (int k = rp.parses.size() - 1; k >= 0; k--) { + if (lastNBest > rp.parses.get(k).f1) + continue; CharSequence sig = rp.parses.get(k).signature(); if (!contained.contains(sig)) { parses.add(rp.parses.get(k)); @@ -221,77 +235,88 @@ final public class Decoder { } Collections.sort(parses); - - if (round >=2) break; - round ++; - + + if (round >= 2) + break; + round++; + // do not use to much memory - if (parses.size()>Parser.NBest) { - // if (parses.get(Parser.NBest).f1>lastNBest) lastNBest = (float)parses.get(Parser.NBest).f1; - parses.subList(Parser.NBest, parses.size()-1).clear(); + if (parses.size() > Parser.NBest) { + // if (parses.get(Parser.NBest).f1>lastNBest) lastNBest = + // (float)parses.get(Parser.NBest).f1; + parses.subList(Parser.NBest, parses.size() - 1).clear(); } } return parses; } - /** * This is the parallel non-projective edge re-arranger - * - * @param pos part-of-speech tags - * @param heads parent child relation - * @param labs edge labels - * @param x the data - * @param edges the existing edges defined by part-of-speech tags + * + * @param pos + * part-of-speech tags + * @param heads + * parent child relation + * @param labs + * edge labels + * @param x + * the data + * @param edges + * the existing edges defined by part-of-speech tags * @throws InterruptedException */ - public static List<ParseNBest> rearrangeNBest(short[] pos, short[] heads, short[] labs, DataF x, Extractor extractor) throws InterruptedException { - - ArrayList<ParallelRearrangeNBest> pe = new ArrayList<ParallelRearrangeNBest>(); - - int round =0; + public static List<ParseNBest> rearrangeNBest(short[] pos, short[] heads, short[] labs, DataF x, + Extractor extractor) throws InterruptedException { + + ArrayList<ParallelRearrangeNBest> pe = new ArrayList<ParallelRearrangeNBest>(); + + int round = 0; ArrayList<ParseNBest> parses = new ArrayList<ParseNBest>(); - ParseNBest px =new ParseNBest(); - px.signature(heads,labs); - //Object extractor; - px.f1=extractor.encode3(pos, heads, labs, x); + ParseNBest px = new ParseNBest(); + px.signature(heads, labs); + // Object extractor; + px.f1 = extractor.encode3(pos, heads, labs, x); parses.add(px); - + float lastNBest = Float.NEGATIVE_INFINITY; - + HashSet<Parse> done = new HashSet<Parse>(); gnu.trove.THashSet<CharSequence> contained = new gnu.trove.THashSet<CharSequence>(); - while(true) { - + while (true) { + pe.clear(); // used the first three parses - int i=0; - while(true) { - - if (parses.size()<=i||pe.size()>12) break; - + int i = 0; + while (true) { + + if (parses.size() <= i || pe.size() > 12) + break; + ParseNBest parse = parses.get(i); - + i++; - + // parse already extended - if (done.contains(parse)) continue; + if (done.contains(parse)) + continue; -// DB.println("err "+parse.heads); + // DB.println("err "+parse.heads); parse.signature2parse(parse.signature()); - + done.add(parse); - pe.add(new ParallelRearrangeNBest( pos,x,parse,lastNBest,extractor, (float)parse.f1,NON_PROJECTIVITY_THRESHOLD) ); - } - + pe.add(new ParallelRearrangeNBest(pos, x, parse, lastNBest, extractor, (float) parse.f1, + NON_PROJECTIVITY_THRESHOLD)); + } + executerService.invokeAll(pe); - + // avoid to add parses several times - for(ParallelRearrangeNBest rp : pe) { - for(int k=rp.parses.size()-1;k>=0;k--) { - if (lastNBest>rp.parses.get(k).f1) continue; + for (ParallelRearrangeNBest rp : pe) { + for (int k = rp.parses.size() - 1; k >= 0; k--) { + if (lastNBest > rp.parses.get(k).f1) + continue; CharSequence sig = rp.parses.get(k).signature(); if (!contained.contains(sig)) { parses.add(rp.parses.get(k)); @@ -301,25 +326,26 @@ final public class Decoder { } Collections.sort(parses); - - if (round >=2) break; - round ++; - + + if (round >= 2) + break; + round++; + // do not use to much memory - if (parses.size()>Parser.NBest) { - if (parses.get(Parser.NBest).f1>lastNBest) lastNBest = (float)parses.get(Parser.NBest).f1; - parses.subList(Parser.NBest, parses.size()-1).clear(); + if (parses.size() > Parser.NBest) { + if (parses.get(Parser.NBest).f1 > lastNBest) + lastNBest = (float) parses.get(Parser.NBest).f1; + parses.subList(Parser.NBest, parses.size() - 1).clear(); } } return parses; } - + public static String getInfo() { - return "Decoder non-projectivity threshold: "+NON_PROJECTIVITY_THRESHOLD; + return "Decoder non-projectivity threshold: " + NON_PROJECTIVITY_THRESHOLD; } - /** * @param parses * @param is @@ -327,51 +353,53 @@ final public class Decoder { * @return */ public static int getGoldRank(List<ParseNBest> parses, Instances is, int i, boolean las) { - - for(int p=0;p<parses.size();p++) { - - if (parses.get(p).heads==null)parses.get(p).signature2parse(parses.get(p).signature()); - - boolean eq =true; - for(int w =1;w<is.length(0);w++) { - if (is.heads[i][w]!=parses.get(p).heads[w] || (is.labels[i][w]!=parses.get(p).labels[w]&& las )) { - eq=false; + + for (int p = 0; p < parses.size(); p++) { + + if (parses.get(p).heads == null) + parses.get(p).signature2parse(parses.get(p).signature()); + + boolean eq = true; + for (int w = 1; w < is.length(0); w++) { + if (is.heads[i][w] != parses.get(p).heads[w] || (is.labels[i][w] != parses.get(p).labels[w] && las)) { + eq = false; break; } } - if (eq) return p; + if (eq) + return p; } return -1; } public static int getSmallestError(List<ParseNBest> parses, Instances is, int i, boolean las) { - - int smallest=-1; - for(int p=0;p<parses.size();p++) { - - int err=0; - for(int w =1;w<is.length(0);w++) { - if (is.heads[i][w]!=parses.get(p).heads[w] || (is.labels[i][w]!=parses.get(p).labels[w] && las )) { + + int smallest = -1; + for (int p = 0; p < parses.size(); p++) { + + int err = 0; + for (int w = 1; w < is.length(0); w++) { + if (is.heads[i][w] != parses.get(p).heads[w] || (is.labels[i][w] != parses.get(p).labels[w] && las)) { err++; } } - if (smallest==-1||smallest>err) smallest=err; - if (smallest==0) return 0; + if (smallest == -1 || smallest > err) + smallest = err; + if (smallest == 0) + return 0; } return smallest; } public static int getError(ParseNBest parse, Instances is, int i, boolean las) { - - - int err=0; - for(int w =1;w<is.length(i);w++) { - if (is.heads[i][w]!=parse.heads[w] || (is.labels[i][w]!=parse.labels[w] && las )) { - err++; - } + + int err = 0; + for (int w = 1; w < is.length(i); w++) { + if (is.heads[i][w] != parse.heads[w] || (is.labels[i][w] != parse.labels[w] && las)) { + err++; } + } return err; } - } diff --git a/dependencyParser/mate-tools/src/is2/parserR2/Options.java b/dependencyParser/mate-tools/src/is2/parserR2/Options.java index b5ec0f9..eb396b4 100755 --- a/dependencyParser/mate-tools/src/is2/parserR2/Options.java +++ b/dependencyParser/mate-tools/src/is2/parserR2/Options.java @@ -2,56 +2,66 @@ package is2.parserR2; import is2.util.OptionsSuper; - public final class Options extends OptionsSuper { - - int start=0, end=0; - String prefix_model ="m"; - String prefix_test ="t"; - - public Options (String[] args) { - - for(int i = 0; i < args.length; i++) { + int start = 0, end = 0; + String prefix_model = "m"; + String prefix_test = "t"; + + public Options(String[] args) { + + for (int i = 0; i < args.length; i++) { + + if (args[i].equals("--help")) + explain(); - if (args[i].equals("--help")) explain(); - if (args[i].equals("-decode")) { - decodeProjective = args[i+1].equals("proj"); i++; - } else if (args[i].equals("-decodeTH")) { - decodeTH = Double.parseDouble(args[i+1]); i++; + decodeProjective = args[i + 1].equals("proj"); + i++; + } else if (args[i].equals("-decodeTH")) { + decodeTH = Double.parseDouble(args[i + 1]); + i++; } else if (args[i].equals("-nonormalize")) { - normalize=false; + normalize = false; } else if (args[i].equals("-features")) { - features= args[i+1]; i++; + features = args[i + 1]; + i++; } else if (args[i].equals("-hsize")) { - hsize= Integer.parseInt(args[i+1]); i++; + hsize = Integer.parseInt(args[i + 1]); + i++; } else if (args[i].equals("-len")) { - maxLen= Integer.parseInt(args[i+1]); i++; + maxLen = Integer.parseInt(args[i + 1]); + i++; } else if (args[i].equals("-cores")) { - cores= Integer.parseInt(args[i+1]); i++; + cores = Integer.parseInt(args[i + 1]); + i++; } else if (args[i].equals("-best")) { - best= Integer.parseInt(args[i+1]); i++; + best = Integer.parseInt(args[i + 1]); + i++; } else if (args[i].equals("-start")) { - start= Integer.parseInt(args[i+1]); i++; + start = Integer.parseInt(args[i + 1]); + i++; } else if (args[i].equals("-end")) { - end= Integer.parseInt(args[i+1]); i++; + end = Integer.parseInt(args[i + 1]); + i++; } else if (args[i].equals("-prefix-model")) { - prefix_model= args[i+1]; i++; + prefix_model = args[i + 1]; + i++; } else if (args[i].equals("-prefix-test")) { - prefix_test= args[i+1]; i++; + prefix_test = args[i + 1]; + i++; } else if (args[i].equals("-mapping")) { - this.useMapping= args[i+1]; i++; + this.useMapping = args[i + 1]; + i++; } else if (args[i].equals("-no2nd")) { - no2nd= true; + no2nd = true; } else if (args[i].equals("-few2nd")) { - few2nd= true; - } else super.addOption(args, i); - - } + few2nd = true; + } else + super.addOption(args, i); + } - } private void explain() { @@ -59,19 +69,24 @@ public final class Options extends OptionsSuper { System.out.println("java -class mate.jar is2.parser.Parser [Options]"); System.out.println(); System.out.println("Example: "); - System.out.println(" java -class mate.jar is2.parser.Parser -model eps3.model -train corpora/conll08st/train/train.closed -test corpora/conll08st/devel/devel.closed -out b3.test -eval corpora/conll08st/devel/devel.closed -count 2000 -i 6"); + System.out.println( + " java -class mate.jar is2.parser.Parser -model eps3.model -train corpora/conll08st/train/train.closed -test corpora/conll08st/devel/devel.closed -out b3.test -eval corpora/conll08st/devel/devel.closed -count 2000 -i 6"); System.out.println(""); System.out.println("Options:"); System.out.println(""); - System.out.println(" -train <file> the corpus a model is trained on; default "+this.trainfile); - System.out.println(" -test <file> the input corpus for testing; default "+this.testfile); - System.out.println(" -out <file> the output corpus (result) of a test run; default "+this.outfile); + System.out.println(" -train <file> the corpus a model is trained on; default " + this.trainfile); + System.out.println(" -test <file> the input corpus for testing; default " + this.testfile); + System.out.println(" -out <file> the output corpus (result) of a test run; default " + this.outfile); System.out.println(" -model <file> the parsing model for traing the model is stored in the files"); - System.out.println(" and for parsing the model is load from this file; default "+this.modelName); - System.out.println(" -i <number> the number of training iterations; good numbers are 10 for smaller corpora and 6 for bigger; default "+this.numIters); - System.out.println(" -count <number> the n first sentences of the corpus are take for the training default "+this.count); - System.out.println(" -format <number> conll format of the year 8 or 9; default "+this.formatTask); - + System.out.println( + " and for parsing the model is load from this file; default " + this.modelName); + System.out.println( + " -i <number> the number of training iterations; good numbers are 10 for smaller corpora and 6 for bigger; default " + + this.numIters); + System.out.println(" -count <number> the n first sentences of the corpus are take for the training default " + + this.count); + System.out.println(" -format <number> conll format of the year 8 or 9; default " + this.formatTask); + System.exit(0); } } diff --git a/dependencyParser/mate-tools/src/is2/parserR2/Parameters.java b/dependencyParser/mate-tools/src/is2/parserR2/Parameters.java index 0917ea8..de7b591 100755 --- a/dependencyParser/mate-tools/src/is2/parserR2/Parameters.java +++ b/dependencyParser/mate-tools/src/is2/parserR2/Parameters.java @@ -1,5 +1,5 @@ /** - * + * */ package is2.parserR2; @@ -14,25 +14,24 @@ import is2.data.Parse; /** * @author Bernd Bohnet, 31.08.2009 - * - * + * + * */ public abstract class Parameters { - public abstract void average(double avVal); - + public abstract void update(FV act, FV pred, Instances isd, int instc, Parse d, double upd, double e); - + public abstract void write(DataOutputStream dos) throws IOException; - - public abstract void read(DataInputStream dis ) throws IOException; - + + public abstract void read(DataInputStream dis) throws IOException; + public abstract int size(); /** * @return */ - public abstract IFV getFV() ; - + public abstract IFV getFV(); + } diff --git a/dependencyParser/mate-tools/src/is2/parserR2/ParametersFloat.java b/dependencyParser/mate-tools/src/is2/parserR2/ParametersFloat.java index 44e6d76..2ba0aaa 100755 --- a/dependencyParser/mate-tools/src/is2/parserR2/ParametersFloat.java +++ b/dependencyParser/mate-tools/src/is2/parserR2/ParametersFloat.java @@ -1,5 +1,9 @@ package is2.parserR2; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; + import is2.data.F2SF; import is2.data.FV; import is2.data.FVR; @@ -7,159 +11,153 @@ import is2.data.Instances; import is2.data.Parse; import is2.util.DB; -import java.io.DataInputStream; -import java.io.DataOutputStream; -import java.io.IOException; - - - -final public class ParametersFloat extends Parameters { +final public class ParametersFloat extends Parameters { public float[] parameters; private float[] total; public ParametersFloat(int size) { - parameters = new float[size]; + parameters = new float[size]; total = new float[size]; - for(int i = 0; i < parameters.length; i++) { + for (int i = 0; i < parameters.length; i++) { parameters[i] = 0F; total[i] = 0F; } } - /** * @param parameters2 */ public ParametersFloat(float[] p) { - parameters =p; + parameters = p; } - @Override public void average(double avVal) { - for(int j = 0; j < total.length; j++) { - parameters[j] = total[j]/((float)avVal); + for (int j = 0; j < total.length; j++) { + parameters[j] = total[j] / ((float) avVal); } - total =null; + total = null; } public ParametersFloat average2(double avVal) { float[] px = new float[this.parameters.length]; - for(int j = 0; j < total.length; j++) { - px[j] = total[j]/((float)avVal); + for (int j = 0; j < total.length; j++) { + px[j] = total[j] / ((float) avVal); } ParametersFloat pf = new ParametersFloat(px); return pf; } - - - public void update(FV act, FV pred, Instances isd, int instc, Parse dx, double upd, double e, - float d, float f) { + + public void update(FV act, FV pred, Instances isd, int instc, Parse dx, double upd, double e, float d, float f) { e++; - + float lam_dist = d - f; - - float b = (float)e-lam_dist; - + + float b = (float) e - lam_dist; + FV dist = act.getDistVector(pred); - - dist.update(parameters, total, hildreth(dist,b), upd,false); + + dist.update(parameters, total, hildreth(dist, b), upd, false); } + @Override public void update(FV act, FV pred, Instances isd, int instc, Parse dx, double upd, double e) { e++; - + float lam_dist = getScore(act) - getScore(pred); - - float b = (float)e-lam_dist; - + + float b = (float) e - lam_dist; + FV dist = act.getDistVector(pred); - - dist.update(parameters, total, hildreth(dist,b), upd,false); + + dist.update(parameters, total, hildreth(dist, b), upd, false); } public void update(FVR act, FVR pred, Instances isd, int instc, Parse dx, double upd, double e, float lam_dist) { e++; - - - float b = (float)e-lam_dist; - + + float b = (float) e - lam_dist; + FVR dist = act.getDistVector(pred); - - dist.update(parameters, total, hildreth(dist,b), upd,false); + + dist.update(parameters, total, hildreth(dist, b), upd, false); } - protected double hildreth(FV a, double b) { double A = a.dotProduct(a); - if (A<=0.0000000000000000001) return 0.0; - return b/A; + if (A <= 0.0000000000000000001) + return 0.0; + return b / A; } - protected double hildreth(FVR a, double b) { double A = a.dotProduct(a); - if (A<=0.0000000000000000001) return 0.0; - return b/A; + if (A <= 0.0000000000000000001) + return 0.0; + return b / A; } - public float getScore(FV fv) { - if (fv ==null) return 0.0F; - return fv.getScore(parameters,false); + if (fv == null) + return 0.0F; + return fv.getScore(parameters, false); } - public float getScore(FVR fv) { //xx - if (fv ==null) return 0.0F; - return fv.getScore(parameters,false); + public float getScore(FVR fv) { // xx + if (fv == null) + return 0.0F; + return fv.getScore(parameters, false); } - @Override - final public void write(DataOutputStream dos) throws IOException{ + final public void write(DataOutputStream dos) throws IOException { dos.writeInt(parameters.length); - for(float d : parameters) dos.writeFloat(d); + for (float d : parameters) + dos.writeFloat(d); } @Override - public void read(DataInputStream dis ) throws IOException{ + public void read(DataInputStream dis) throws IOException { parameters = new float[dis.readInt()]; - int notZero=0; - for(int i=0;i<parameters.length;i++) { - parameters[i]=dis.readFloat(); - if (parameters[i]!=0.0F) notZero++; + int notZero = 0; + for (int i = 0; i < parameters.length; i++) { + parameters[i] = dis.readFloat(); + if (parameters[i] != 0.0F) + notZero++; } - - - DB.println("read parameters "+parameters.length+" not zero "+notZero); + + DB.println("read parameters " + parameters.length + " not zero " + notZero); } - + public int countNZ() { - int notZero=0; - for(int i=0;i<parameters.length;i++) { - if (parameters[i]!=0.0F) notZero++; + int notZero = 0; + for (float parameter : parameters) { + if (parameter != 0.0F) + notZero++; } return notZero; - - // DB.println("read parameters "+parameters.length+" not zero "+notZero); - } + // DB.println("read parameters "+parameters.length+" not zero + // "+notZero); + } - /* (non-Javadoc) + /* + * (non-Javadoc) + * * @see is2.sp09k99995.Parameters#getFV() */ @Override @@ -167,8 +165,9 @@ final public class ParametersFloat extends Parameters { return new F2SF(parameters); } - - /* (non-Javadoc) + /* + * (non-Javadoc) + * * @see is2.sp09k99999.Parameters#size() */ @Override @@ -176,6 +175,4 @@ final public class ParametersFloat extends Parameters { return parameters.length; } - - } diff --git a/dependencyParser/mate-tools/src/is2/parserR2/Parser.java b/dependencyParser/mate-tools/src/is2/parserR2/Parser.java index b0cfe9e..54d6f2e 100755 --- a/dependencyParser/mate-tools/src/is2/parserR2/Parser.java +++ b/dependencyParser/mate-tools/src/is2/parserR2/Parser.java @@ -1,27 +1,5 @@ package is2.parserR2; - -import is2.data.Cluster; -import is2.data.DataF; -import is2.data.Edges; -import is2.data.F2SF; -import is2.data.FV; -import is2.data.Instances; -import is2.data.Long2Int; -import is2.data.Long2IntInterface; -import is2.data.MFB; -import is2.data.Parse; -import is2.data.ParseNBest; -import is2.data.PipeGen; -import is2.data.SentenceData09; -import is2.io.CONLLReader09; -import is2.io.CONLLWriter09; - -import is2.tools.Tool; -import is2.util.DB; -import is2.util.OptionsSuper; -import is2.util.ParserEvaluator; - import java.io.BufferedInputStream; import java.io.BufferedOutputStream; import java.io.DataInputStream; @@ -31,7 +9,6 @@ import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.text.SimpleDateFormat; -import java.util.ArrayList; import java.util.Date; import java.util.HashMap; import java.util.List; @@ -43,146 +20,160 @@ import java.util.zip.ZipOutputStream; //import extractors.ExtractorClusterStackedR2; import extractors.Extractor; import extractors.ExtractorFactory; - - +import is2.data.Cluster; +import is2.data.DataF; +import is2.data.Edges; +import is2.data.FV; +import is2.data.Instances; +import is2.data.Long2Int; +import is2.data.Long2IntInterface; +import is2.data.MFB; +import is2.data.Parse; +import is2.data.ParseNBest; +import is2.data.PipeGen; +import is2.data.SentenceData09; +import is2.io.CONLLReader09; +import is2.io.CONLLWriter09; +import is2.tools.Tool; +import is2.util.DB; +import is2.util.OptionsSuper; +import is2.util.ParserEvaluator; public class Parser implements Tool { - // output evaluation info + // output evaluation info private static final boolean MAX_INFO = true; - public static int THREADS =4; - + public static int THREADS = 4; + Long2IntInterface l2i; ParametersFloat params; Pipe pipe; OptionsSuper options; - - HashMap<Integer,Integer> rank = new HashMap<Integer,Integer>(); - int amongxbest=0, amongxbest_ula=0, nbest=0,bestProj=0, smallestErrorSum=0, countAllNodes=0; - static int NBest =1000; - - ExtractorFactory extractorFactory = new ExtractorFactory(ExtractorFactory.StackedClusteredR2); - - + + HashMap<Integer, Integer> rank = new HashMap<Integer, Integer>(); + int amongxbest = 0, amongxbest_ula = 0, nbest = 0, bestProj = 0, smallestErrorSum = 0, countAllNodes = 0; + static int NBest = 1000; + + ExtractorFactory extractorFactory = new ExtractorFactory(ExtractorFactory.StackedClusteredR2); + /** * Initialize the parser + * * @param options */ - public Parser (OptionsSuper options) { - - this.options=options; + public Parser(OptionsSuper options) { + + this.options = options; pipe = new Pipe(options); - params = new ParametersFloat(0); - + params = new ParametersFloat(0); + // load the model try { readModel(options, pipe, params); } catch (Exception e) { e.printStackTrace(); } - - } + } /** - * @param modelFileName The file name of the parsing model + * @param modelFileName + * The file name of the parsing model */ public Parser(String modelFileName) { - this(new Options(new String[]{"-model",modelFileName})); + this(new Options(new String[] { "-model", modelFileName })); } - /** - * + * */ public Parser() { // TODO Auto-generated constructor stub } + public static void main(String[] args) throws Exception { - public static void main (String[] args) throws Exception - { - long start = System.currentTimeMillis(); OptionsSuper options = new Options(args); NBest = options.best; - DB.println("n-best"+NBest); - + DB.println("n-best" + NBest); + Runtime runtime = Runtime.getRuntime(); THREADS = runtime.availableProcessors(); - if (options.cores<THREADS&&options.cores>0) THREADS =options.cores; + if (options.cores < THREADS && options.cores > 0) + THREADS = options.cores; - DB.println("Found " + runtime.availableProcessors()+" cores use "+THREADS); + DB.println("Found " + runtime.availableProcessors() + " cores use " + THREADS); if (options.train) { - - Parser p =new Parser(); - p.options=options; - + + Parser p = new Parser(); + p.options = options; + p.l2i = new Long2Int(options.hsize); - p.pipe = new Pipe (options); + p.pipe = new Pipe(options); Instances is = new Instances(); p.pipe.extractor = new Extractor[THREADS]; - - for (int t=0;t<THREADS;t++) p.pipe.extractor[t]=p.extractorFactory.getExtractor( p.l2i); + + for (int t = 0; t < THREADS; t++) + p.pipe.extractor[t] = p.extractorFactory.getExtractor(p.l2i); p.params = new ParametersFloat(p.l2i.size()); - - if (options.useMapping!=null) { + + if (options.useMapping != null) { String model = options.modelName; - + options.modelName = options.useMapping; - DB.println("Using mapping of model "+options.modelName); - ZipInputStream zis = new ZipInputStream(new BufferedInputStream(new FileInputStream(options.modelName))); + DB.println("Using mapping of model " + options.modelName); + ZipInputStream zis = new ZipInputStream( + new BufferedInputStream(new FileInputStream(options.modelName))); zis.getNextEntry(); DataInputStream dis = new DataInputStream(new BufferedInputStream(zis)); p.pipe.mf.read(dis); - - DB.println("read\n"+p.pipe.mf.toString()); - + + DB.println("read\n" + p.pipe.mf.toString()); + ParametersFloat params = new ParametersFloat(0); params.read(dis); Edges.read(dis); - + dis.close(); DB.println("end read model"); options.modelName = model; } - - - p.pipe.createInstances(options.trainfile,is); + p.pipe.createInstances(options.trainfile, is); - p.train(options, p.pipe,p.params,is,p.pipe.cl); + p.train(options, p.pipe, p.params, is, p.pipe.cl); - p.writeModell(options, p.params, null,p.pipe.cl); + p.writeModell(options, p.params, null, p.pipe.cl); } if (options.test) { Parser p = new Parser(); - p.options=options; + p.options = options; - p. pipe = new Pipe(options); - p. params = new ParametersFloat(0); // total should be zero and the parameters are later read + p.pipe = new Pipe(options); + p.params = new ParametersFloat(0); // total should be zero and the + // parameters are later read // load the model p.readModel(options, p.pipe, p.params); - DB.println("test on "+options.testfile); - - System.out.println(""+p.pipe.mf.toString()); - - + DB.println("test on " + options.testfile); + + System.out.println("" + p.pipe.mf.toString()); + p.outputParses(options, p.pipe, p.params, !MAX_INFO); } @@ -195,24 +186,23 @@ public class Parser implements Tool { } long end = System.currentTimeMillis(); - System.out.println("used time "+((float)((end-start)/100)/10)); + System.out.println("used time " + ((float) ((end - start) / 100) / 10)); - Decoder.executerService.shutdown(); + Decoder.executerService.shutdown(); Pipe.executerService.shutdown(); System.out.println("end."); - } /** * Read the models and mapping + * * @param options * @param pipe * @param params * @throws IOException */ - public void readModel(OptionsSuper options, Pipe pipe, Parameters params) throws IOException { - + public void readModel(OptionsSuper options, Pipe pipe, Parameters params) throws IOException { DB.println("Reading data started"); @@ -224,14 +214,15 @@ public class Parser implements Tool { pipe.mf.read(dis); pipe.cl = new Cluster(dis); - + params.read(dis); this.l2i = new Long2Int(params.size()); - DB.println("parsing -- li size "+l2i.size()); + DB.println("parsing -- li size " + l2i.size()); pipe.extractor = new Extractor[THREADS]; - for (int t=0;t<THREADS;t++) pipe.extractor[t]=this.extractorFactory.getExtractor(l2i); + for (int t = 0; t < THREADS; t++) + pipe.extractor[t] = this.extractorFactory.getExtractor(l2i); Edges.read(dis); @@ -239,452 +230,418 @@ public class Parser implements Tool { int maxForm = dis.readInt(); - for (int t=0;t<THREADS;t++) { + for (int t = 0; t < THREADS; t++) { pipe.extractor[t].setMaxForm(maxForm); pipe.extractor[t].initStat(); pipe.extractor[t].init(); } - - boolean foundInfo =false; + + boolean foundInfo = false; try { - String info =null; + String info = null; int icnt = dis.readInt(); - for(int i=0;i<icnt;i++) { + for (int i = 0; i < icnt; i++) { info = dis.readUTF(); System.out.println(info); } } catch (Exception e) { - if (!foundInfo) System.out.println("no info about training"); + if (!foundInfo) + System.out.println("no info about training"); } - - + dis.close(); DB.println("Reading data finnished"); - Decoder.NON_PROJECTIVITY_THRESHOLD =(float)options.decodeTH; - for (int t=0;t<THREADS;t++) { + Decoder.NON_PROJECTIVITY_THRESHOLD = (float) options.decodeTH; + for (int t = 0; t < THREADS; t++) { pipe.extractor[t].initStat(); pipe.extractor[t].init(); } } - - /** * Do the training + * * @param instanceLengths * @param options * @param pipe * @param params - * @param is - * @param cluster + * @param is + * @param cluster * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ - public void train(OptionsSuper options, Pipe pipe, ParametersFloat params, Instances is, Cluster cluster) - throws IOException, InterruptedException, ClassNotFoundException { + public void train(OptionsSuper options, Pipe pipe, ParametersFloat params, Instances is, Cluster cluster) + throws IOException, InterruptedException, ClassNotFoundException { - DB.println("\nTraining Information "); DB.println("-------------------- "); + Decoder.NON_PROJECTIVITY_THRESHOLD = (float) options.decodeTH; - Decoder.NON_PROJECTIVITY_THRESHOLD =(float)options.decodeTH; - - if (options.decodeProjective) System.out.println("Decoding: "+(options.decodeProjective?"projective":"non-projective")); - else System.out.println(""+Decoder.getInfo()); + if (options.decodeProjective) + System.out.println("Decoding: " + (options.decodeProjective ? "projective" : "non-projective")); + else + System.out.println("" + Decoder.getInfo()); int numInstances = is.size(); - int maxLenInstances =0; - for(int i=0;i<numInstances;i++) if (maxLenInstances<is.length(i)) maxLenInstances=is.length(i); + int maxLenInstances = 0; + for (int i = 0; i < numInstances; i++) + if (maxLenInstances < is.length(i)) + maxLenInstances = is.length(i); DataF data = new DataF(maxLenInstances, pipe.mf.getFeatureCounter().get(PipeGen.REL).shortValue()); int iter = 0; - int del=0; - float error =0; - float f1=0; + int del = 0; + float error = 0; + float f1 = 0; FV pred = new FV(); FV act = new FV(); - double upd = (double)(numInstances*options.numIters)+1; - - for(; iter < options.numIters; iter++) { + double upd = (double) (numInstances * options.numIters) + 1; + + for (; iter < options.numIters; iter++) { - System.out.print("Iteration "+iter+": "); + System.out.print("Iteration " + iter + ": "); long start = System.currentTimeMillis(); - long last= System.currentTimeMillis(); - error=0; - f1=0; - for(int n = 0; n < numInstances; n++) { + long last = System.currentTimeMillis(); + error = 0; + f1 = 0; + for (int n = 0; n < numInstances; n++) { upd--; - - if (is.labels[n].length>options.maxLen) continue; - - String info = " td "+((Decoder.timeDecotder)/1000000F)+" tr "+((Decoder.timeRearrange)/1000000F) - +" te "+((Pipe.timeExtract)/1000000F); - if((n+1) %500 == 0) del= PipeGen.outValueErr(n+1,Math.round(error*1000)/1000,f1/n,del, last, upd,info); + if (is.labels[n].length > options.maxLen) + continue; + + String info = " td " + ((Decoder.timeDecotder) / 1000000F) + " tr " + + ((Decoder.timeRearrange) / 1000000F) + " te " + ((Pipe.timeExtract) / 1000000F); + + if ((n + 1) % 500 == 0) + del = PipeGen.outValueErr(n + 1, Math.round(error * 1000) / 1000, f1 / n, del, last, upd, info); short pos[] = is.pposs[n]; - data = pipe.fillVector((F2SF)params.getFV(), is, n, data, cluster, THREADS, l2i); + data = pipe.fillVector(params.getFV(), is, n, data, cluster, THREADS, l2i); - List<ParseNBest> parses = Decoder.decode(pos, data, options.decodeProjective,pipe.extractor[0]); + List<ParseNBest> parses = Decoder.decode(pos, data, options.decodeProjective, pipe.extractor[0]); Parse d = parses.get(0); - double e= pipe.errors(is, n ,d); + double e = pipe.errors(is, n, d); - if (d.f1>0)f1+=(d.labels.length-1 -e) /(d.labels.length-1); + if (d.f1 > 0) + f1 += (d.labels.length - 1 - e) / (d.labels.length - 1); - if (e<=0) continue; + if (e <= 0) + continue; // get predicted feature vector pred.clear(); - pipe.extractor[0].encodeCat(is,n,pos,is.forms[n],is.plemmas[n],d.heads, d.labels, is.feats[n],pipe.cl, pred); + pipe.extractor[0].encodeCat(is, n, pos, is.forms[n], is.plemmas[n], d.heads, d.labels, is.feats[n], + pipe.cl, pred); error += e; act.clear(); - pipe.extractor[0].encodeCat(is,n,pos,is.forms[n],is.plemmas[n],is.heads[n], is.labels[n], is.feats[n],pipe.cl, act); + pipe.extractor[0].encodeCat(is, n, pos, is.forms[n], is.plemmas[n], is.heads[n], is.labels[n], + is.feats[n], pipe.cl, act); - params.update(act, pred, is, n, d, upd,e); + params.update(act, pred, is, n, d, upd, e); } - String info = " td "+((Decoder.timeDecotder)/1000000F)+" tr "+((Decoder.timeRearrange)/1000000F) - +" te "+((Pipe.timeExtract)/1000000F)+" nz "+params.countNZ(); - PipeGen.outValueErr(numInstances,Math.round(error*1000)/1000,f1/numInstances,del,last, upd,info); - del=0; + String info = " td " + ((Decoder.timeDecotder) / 1000000F) + " tr " + ((Decoder.timeRearrange) / 1000000F) + + " te " + ((Pipe.timeExtract) / 1000000F) + " nz " + params.countNZ(); + PipeGen.outValueErr(numInstances, Math.round(error * 1000) / 1000, f1 / numInstances, del, last, upd, info); + del = 0; long end = System.currentTimeMillis(); - System.out.println(" time:"+(end-start)); + System.out.println(" time:" + (end - start)); - - ParametersFloat pf = params.average2((iter+1)*is.size()); + ParametersFloat pf = params.average2((iter + 1) * is.size()); try { - if (options.testfile!=null) { - outputParses (options, pipe, pf, ! MAX_INFO); + if (options.testfile != null) { + outputParses(options, pipe, pf, !MAX_INFO); ParserEvaluator.evaluate(options.goldfile, options.outfile); - // writeModell(options, pf, ""+(iter+1),pipe.cl); + // writeModell(options, pf, ""+(iter+1),pipe.cl); } - - + } catch (Exception e) { e.printStackTrace(); - } - - - Decoder.timeDecotder=0;Decoder.timeRearrange=0; Pipe.timeExtract=0; + } + Decoder.timeDecotder = 0; + Decoder.timeRearrange = 0; + Pipe.timeExtract = 0; } - params.average(iter*is.size()); - } - + params.average(iter * is.size()); + } /** * Do the parsing + * * @param options * @param pipe * @param params * @throws IOException */ - private void outputParses (OptionsSuper options, Pipe pipe, ParametersFloat params, boolean maxInfo) throws Exception { + private void outputParses(OptionsSuper options, Pipe pipe, ParametersFloat params, boolean maxInfo) + throws Exception { long start = System.currentTimeMillis(); CONLLReader09 depReader = new CONLLReader09(options.testfile, options.formatTask); CONLLWriter09 depWriter = new CONLLWriter09(options.outfile, options.formatTask); -// ExtractorClusterStacked.initFeatures(); + // ExtractorClusterStacked.initFeatures(); int cnt = 0; - int del=0; + int del = 0; long last = System.currentTimeMillis(); - if (maxInfo) System.out.println("\nParsing Information "); - if (maxInfo) System.out.println("------------------- "); + if (maxInfo) + System.out.println("\nParsing Information "); + if (maxInfo) + System.out.println("------------------- "); + + if (maxInfo && !options.decodeProjective) + System.out.println("" + Decoder.getInfo()); - if (maxInfo && !options.decodeProjective) System.out.println(""+Decoder.getInfo()); - - // if (!maxInfo) System.out.println(); + // if (!maxInfo) System.out.println(); String[] types = new String[pipe.mf.getFeatureCounter().get(PipeGen.REL)]; - for (Entry<String, Integer> e : pipe.mf.getFeatureSet().get(PipeGen.REL).entrySet()) types[e.getValue()] = e.getKey(); + for (Entry<String, Integer> e : MFB.getFeatureSet().get(PipeGen.REL).entrySet()) + types[e.getValue()] = e.getKey(); - System.out.print("Processing Sentence: "); - while(true) { + while (true) { Instances is = new Instances(); - is.init(1, new MFB(),options.formatTask); + is.init(1, new MFB(), options.formatTask); SentenceData09 instance = pipe.nextInstance(is, depReader); - if (instance==null) break; + if (instance == null) + break; cnt++; - SentenceData09 i09 = this.parse(instance,params); - - // } + SentenceData09 i09 = this.parse(instance, params); + + // } depWriter.write(i09); - del=PipeGen.outValue(cnt, del,last); - // DB.println("xbest "+amongxbest+" cnt "+cnt+" "+((float)((float)amongxbest/cnt))+" nbest "+((float)nbest/cnt)+ - // " 1best "+((float)(rank.get(0)==null?0:rank.get(0))/cnt)+" best-proj "+((float)bestProj/cnt)); + del = PipeGen.outValue(cnt, del, last); + // DB.println("xbest "+amongxbest+" cnt "+cnt+" + // "+((float)((float)amongxbest/cnt))+" nbest "+((float)nbest/cnt)+ + // " 1best "+((float)(rank.get(0)==null?0:rank.get(0))/cnt)+" + // best-proj "+((float)bestProj/cnt)); } - //pipe.close(); + // pipe.close(); depWriter.finishWriting(); long end = System.currentTimeMillis(); - DB.println("rank\n"+rank+"\n"); - DB.println("x-best-las "+amongxbest+" x-best-ula "+amongxbest_ula+" cnt "+cnt+" x-best-las " - +((float)((float)amongxbest/cnt))+ - " x-best-ula "+((float)((float)amongxbest_ula/cnt))+ - " nbest "+((float)nbest/cnt)+ - " 1best "+((float)(rank.get(0)==null?0:rank.get(0))/cnt)+ - " best-proj "+((float)bestProj/cnt)+ - " Sum LAS "+((float)this.smallestErrorSum/countAllNodes)); - - // DB.println("errors "+error); - - rank.clear(); - amongxbest=0;amongxbest_ula=0; - cnt=0; - nbest=0; - bestProj=0; - if (maxInfo) System.out.println("Used time " + (end-start)); - if (maxInfo) System.out.println("forms count "+Instances.m_count+" unkown "+Instances.m_unkown); - - } - - - /** - * Do the parsing - * @param options - * @param pipe - * @param params - * @throws IOException - */ - private void getNBest(OptionsSuper options, Pipe pipe, ParametersFloat params, boolean maxInfo) throws Exception { - - - CONLLReader09 depReader = new CONLLReader09(options.testfile, options.formatTask); - - // ExtractorClusterStacked.initFeatures(); + DB.println("rank\n" + rank + "\n"); + DB.println("x-best-las " + amongxbest + " x-best-ula " + amongxbest_ula + " cnt " + cnt + " x-best-las " + + ((float) amongxbest / cnt) + " x-best-ula " + ((float) amongxbest_ula / cnt) + " nbest " + + ((float) nbest / cnt) + " 1best " + ((float) (rank.get(0) == null ? 0 : rank.get(0)) / cnt) + + " best-proj " + ((float) bestProj / cnt) + " Sum LAS " + + ((float) this.smallestErrorSum / countAllNodes)); - int cnt = 0; + // DB.println("errors "+error); - String[] types = new String[pipe.mf.getFeatureCounter().get(PipeGen.REL)]; - for (Entry<String, Integer> e : pipe.mf.getFeatureSet().get(PipeGen.REL).entrySet()) types[e.getValue()] = e.getKey(); - -// System.out.print("Processing Sentence: "); - - while(true) { - - Instances is = new Instances(); - is.init(1, new MFB(),options.formatTask); - - SentenceData09 instance = pipe.nextInstance(is, depReader); - if (instance==null) break; - cnt++; - - this.parseNBest(instance); - } - - //pipe.close(); -// depWriter.finishWriting(); -// long end = System.currentTimeMillis(); -// DB.println("rank\n"+rank+"\n"); -// DB.println("x-best-las "+amongxbest+" x-best-ula "+amongxbest_ula+" cnt "+cnt+" x-best-las " -// +((float)((float)amongxbest/cnt))+ -// " x-best-ula "+((float)((float)amongxbest_ula/cnt))+ -// " nbest "+((float)nbest/cnt)+ -// " 1best "+((float)(rank.get(0)==null?0:rank.get(0))/cnt)+ -// " best-proj "+((float)bestProj/cnt)); - // DB.println("errors "+error); - - - } + rank.clear(); + amongxbest = 0; + amongxbest_ula = 0; + cnt = 0; + nbest = 0; + bestProj = 0; + if (maxInfo) + System.out.println("Used time " + (end - start)); + if (maxInfo) + System.out.println("forms count " + Instances.m_count + " unkown " + Instances.m_unkown); + } - public SentenceData09 parse (SentenceData09 instance, ParametersFloat params) { + public SentenceData09 parse(SentenceData09 instance, ParametersFloat params) { String[] types = new String[pipe.mf.getFeatureCounter().get(PipeGen.REL)]; - for (Entry<String, Integer> e : MFB.getFeatureSet().get(PipeGen.REL).entrySet()) types[e.getValue()] = e.getKey(); + for (Entry<String, Integer> e : MFB.getFeatureSet().get(PipeGen.REL).entrySet()) + types[e.getValue()] = e.getKey(); Instances is = new Instances(); - is.init(1, new MFB(),options.formatTask); - new CONLLReader09().insert(is, instance); - + is.init(1, new MFB(), options.formatTask); + new CONLLReader09().insert(is, instance); + String[] forms = instance.forms; // use for the training ppos DataF d2; try { - d2 = pipe.fillVector(params.getFV(), is,0,null,pipe.cl,THREADS,l2i);//cnt-1 - } catch (Exception e ) { + d2 = pipe.fillVector(params.getFV(), is, 0, null, pipe.cl, THREADS, l2i);// cnt-1 + } catch (Exception e) { e.printStackTrace(); return null; } short[] pos = is.pposs[0]; - List<ParseNBest> parses=null; - Parse d= null; + List<ParseNBest> parses = null; + Parse d = null; try { - parses =Decoder.decode(pos,d2,options.decodeProjective,pipe.extractor[0]); //cnt-1 + parses = Decoder.decode(pos, d2, options.decodeProjective, pipe.extractor[0]); // cnt-1 d = parses.get(0); - }catch (Exception e) { + } catch (Exception e) { e.printStackTrace(); } - if (parses.size()>NBest) parses = parses.subList(0,NBest); - - int g_las = Decoder.getGoldRank(parses, is,0,Decoder.LAS); - int g_ula = Decoder.getGoldRank(parses, is,0,!Decoder.LAS); - - int smallest = Decoder.getSmallestError(parses, is,0,!Decoder.LAS); - smallestErrorSum+=is.length(0)-smallest; - countAllNodes+=is.length(0); - - if (g_las>=0) amongxbest++; - if (g_ula>=0) amongxbest_ula++; - - nbest+=parses.size(); - + if (parses.size() > NBest) + parses = parses.subList(0, NBest); + + int g_las = Decoder.getGoldRank(parses, is, 0, Decoder.LAS); + int g_ula = Decoder.getGoldRank(parses, is, 0, !Decoder.LAS); + + int smallest = Decoder.getSmallestError(parses, is, 0, !Decoder.LAS); + smallestErrorSum += is.length(0) - smallest; + countAllNodes += is.length(0); + + if (g_las >= 0) + amongxbest++; + if (g_ula >= 0) + amongxbest_ula++; + + nbest += parses.size(); + Integer r = rank.get(g_las); - if (r==null) rank.put(g_las, 1); - else rank.put(g_las, r+1); - - float err = (float)this.pipe.errors(is,0, d); - - float errBestProj = (float)this.pipe.errors(is,0, Decoder.bestProj); - - if (errBestProj==0) bestProj++; + if (r == null) + rank.put(g_las, 1); + else + rank.put(g_las, r + 1); + + this.pipe.errors(is, 0, d); + + float errBestProj = (float) this.pipe.errors(is, 0, Decoder.bestProj); + + if (errBestProj == 0) + bestProj++; SentenceData09 i09 = new SentenceData09(instance); - + i09.createSemantic(instance); - - for(int j = 0; j < forms.length-1; j++) { - i09.plabels[j] = types[d.labels[j+1]]; - i09.pheads[j] = d.heads[j+1]; + + for (int j = 0; j < forms.length - 1; j++) { + i09.plabels[j] = types[d.labels[j + 1]]; + i09.pheads[j] = d.heads[j + 1]; } return i09; } - public List<ParseNBest> parseNBest (SentenceData09 instance) { + public List<ParseNBest> parseNBest(SentenceData09 instance) { Instances is = new Instances(); - is.init(1, new MFB(),options.formatTask); - new CONLLReader09().insert(is, instance); - + is.init(1, new MFB(), options.formatTask); + new CONLLReader09().insert(is, instance); + // use for the training ppos DataF d2; try { - d2 = pipe.fillVector(params.getFV(), is,0,null,pipe.cl,THREADS, l2i);//cnt-1 - } catch (Exception e ) { + d2 = pipe.fillVector(params.getFV(), is, 0, null, pipe.cl, THREADS, l2i);// cnt-1 + } catch (Exception e) { e.printStackTrace(); return null; } short[] pos = is.pposs[0]; - List<ParseNBest> parses=null; + List<ParseNBest> parses = null; try { - parses =Decoder.decode(pos,d2,options.decodeProjective,pipe.extractor[0]); //cnt-1 - }catch (Exception e) { + parses = Decoder.decode(pos, d2, options.decodeProjective, pipe.extractor[0]); // cnt-1 + } catch (Exception e) { e.printStackTrace(); } - - if (parses.size()>NBest) parses = parses.subList(0,NBest); + if (parses.size() > NBest) + parses = parses.subList(0, NBest); return parses; } - - - /* (non-Javadoc) + /* + * (non-Javadoc) + * * @see is2.tools.Tool#apply(is2.data.SentenceData09) */ - + @Override public SentenceData09 apply(SentenceData09 snt09) { SentenceData09 it = new SentenceData09(); it.createWithRoot(snt09); - SentenceData09 out=null; + SentenceData09 out = null; try { + // for(int k=0;k<it.length();k++) { + // it.forms[k] = reader.normalize(it.forms[k]); + // it.plemmas[k] = reader.normalize(it.plemmas[k]); + // } - // for(int k=0;k<it.length();k++) { - // it.forms[k] = reader.normalize(it.forms[k]); - // it.plemmas[k] = reader.normalize(it.plemmas[k]); - // } - - out = parse(it,this.params); - + out = parse(it, this.params); - } catch(Exception e) { + } catch (Exception e) { e.printStackTrace(); } - Decoder.executerService.shutdown(); + Decoder.executerService.shutdown(); Pipe.executerService.shutdown(); return out; } - + /** * Write the parsing model - * + * * @param options * @param params * @param extension * @throws FileNotFoundException * @throws IOException */ - private void writeModell(OptionsSuper options, ParametersFloat params, String extension, Cluster cs) throws FileNotFoundException, IOException { - - String name = extension==null?options.modelName:options.modelName+extension; -// System.out.println("Writting model: "+name); + private void writeModell(OptionsSuper options, ParametersFloat params, String extension, Cluster cs) + throws FileNotFoundException, IOException { + + String name = extension == null ? options.modelName : options.modelName + extension; + // System.out.println("Writting model: "+name); ZipOutputStream zos = new ZipOutputStream(new BufferedOutputStream(new FileOutputStream(name))); - zos.putNextEntry(new ZipEntry("data")); - DataOutputStream dos = new DataOutputStream(new BufferedOutputStream(zos)); - + zos.putNextEntry(new ZipEntry("data")); + DataOutputStream dos = new DataOutputStream(new BufferedOutputStream(zos)); + MFB.writeData(dos); cs.write(dos); - + params.write(dos); Edges.write(dos); - + dos.writeBoolean(options.decodeProjective); dos.writeInt(pipe.extractor[0].getMaxForm()); - - dos.writeInt(5); // Info count - dos.writeUTF("Used parser "+Parser.class.toString()); - dos.writeUTF("Creation date "+(new SimpleDateFormat("yyyy.MM.dd HH:mm:ss")).format(new Date())); - dos.writeUTF("Training data "+options.trainfile); - dos.writeUTF("Iterations "+options.numIters+" Used sentences "+options.count); - dos.writeUTF("Cluster "+options.clusterFile); - + + dos.writeInt(5); // Info count + dos.writeUTF("Used parser " + Parser.class.toString()); + dos.writeUTF("Creation date " + (new SimpleDateFormat("yyyy.MM.dd HH:mm:ss")).format(new Date())); + dos.writeUTF("Training data " + options.trainfile); + dos.writeUTF("Iterations " + options.numIters + " Used sentences " + options.count); + dos.writeUTF("Cluster " + options.clusterFile); + dos.flush(); dos.close(); } - - - } diff --git a/dependencyParser/mate-tools/src/is2/parserR2/Pipe.java b/dependencyParser/mate-tools/src/is2/parserR2/Pipe.java index ad545ec..81ce59a 100755 --- a/dependencyParser/mate-tools/src/is2/parserR2/Pipe.java +++ b/dependencyParser/mate-tools/src/is2/parserR2/Pipe.java @@ -1,5 +1,11 @@ package is2.parserR2; +import java.io.IOException; +import java.util.ArrayList; +import java.util.concurrent.ExecutorService; + +import extractors.Extractor; +import extractors.ParallelExtract; import is2.data.Cluster; import is2.data.DataF; import is2.data.Edges; @@ -7,32 +13,19 @@ import is2.data.F2SF; import is2.data.Instances; import is2.data.Long2IntInterface; import is2.data.MFB; - import is2.data.Parse; import is2.data.PipeGen; import is2.data.SentenceData09; import is2.io.CONLLReader09; - -import is2.util.DB; import is2.util.OptionsSuper; -import java.io.IOException; -import java.util.ArrayList; -import java.util.concurrent.ExecutorService; - -import extractors.Extractor; -import extractors.ExtractorClusterStacked; -import extractors.ExtractorClusterStackedR2; -import extractors.ParallelExtract; - final public class Pipe extends PipeGen { public Extractor[] extractor; final public MFB mf = new MFB(); Cluster cl; - - + private OptionsSuper options; public static long timeExtract; @@ -41,76 +34,81 @@ final public class Pipe extends PipeGen { } public void createInstances(String file, Instances is) - // throws Exception + // throws Exception { - CONLLReader09 depReader = new CONLLReader09(file); - mf.register(REL,"<root-type>"); + mf.register(REL, "<root-type>"); - // register at least one predicate since the parsing data might not contain predicates as in + // register at least one predicate since the parsing data might not + // contain predicates as in // the Japaness corpus but the development sets contains some - long sl=0; - System.out.print("Registering feature parts of sentence: "); int ic = 0; int del = 0; while (true) { SentenceData09 instance = depReader.getNext(); - if (instance == null) break; + if (instance == null) + break; ic++; - sl+=instance.labels.length; - if (ic % 1000 == 0) { del = outValue(ic, del); } String[] labs1 = instance.labels; - for (int i1 = 0; i1 < labs1.length; i1++) mf.register(REL, labs1[i1]); + for (String element : labs1) + mf.register(REL, element); String[] w = instance.forms; - for (int i1 = 0; i1 < w.length; i1++) mf.register(WORD, depReader.normalize(w[i1])); + for (String element : w) + mf.register(WORD, depReader.normalize(element)); w = instance.plemmas; - for (int i1 = 0; i1 < w.length; i1++) mf.register(WORD, depReader.normalize(w[i1])); - + for (String element : w) + mf.register(WORD, depReader.normalize(element)); w = instance.ppos; - for (int i1 = 0; i1 < w.length; i1++) mf.register(POS, w[i1]); + for (String element : w) + mf.register(POS, element); w = instance.gpos; - for (int i1 = 0; i1 < w.length; i1++) mf.register(POS, w[i1]); + for (String element : w) + mf.register(POS, element); - if (instance.feats !=null) { + if (instance.feats != null) { String fs[][] = instance.feats; - for (int i1 = 0; i1 < fs.length; i1++){ - w =fs[i1]; - if (w==null) continue; - for (int i2 = 0; i2 < w.length; i2++) mf.register(FEAT, w[i2]); + for (String[] element : fs) { + w = element; + if (w == null) + continue; + for (String element2 : w) + mf.register(FEAT, element2); } } - if ((ic-1)>options.count) break; + if ((ic - 1) > options.count) + break; } del = outValue(ic, del); - - for(Extractor e : extractor) { + for (Extractor e : extractor) { e.setMaxForm(mf.getFeatureCounter().get(WORD)); - } - - if (options.clusterFile==null)cl = new Cluster(); - else cl= new Cluster(options.clusterFile, mf,6); - + } + + if (options.clusterFile == null) + cl = new Cluster(); + else + cl = new Cluster(options.clusterFile, mf, 6); + mf.calculateBits(); - - System.out.println(""+mf.toString()); - - for(Extractor e : extractor) { + + System.out.println("" + mf.toString()); + + for (Extractor e : extractor) { e.initStat(); e.init(); } @@ -118,32 +116,34 @@ final public class Pipe extends PipeGen { depReader.startReading(file); int num1 = 0; - Edges.init(mf.getFeatureCounter().get(POS)); - - + System.out.print("Creating edge filters and read corpus: "); del = 0; is.init(ic, new MFB()); while (true) { - if (num1 % 100 == 0) del = outValue(num1, del); + if (num1 % 100 == 0) + del = outValue(num1, del); SentenceData09 instance1 = depReader.getNext(is); - if (instance1 == null) break; + if (instance1 == null) + break; int last = is.size() - 1; - short[] pos =is.pposs[last]; + short[] pos = is.pposs[last]; for (int k = 0; k < is.length(last); k++) { - if (is.heads[last][k] < 0) continue; - Edges.put(pos[is.heads[last][k]],pos[k], k < is.heads[last][k],is.labels[last][k]); + if (is.heads[last][k] < 0) + continue; + Edges.put(pos[is.heads[last][k]], pos[k], k < is.heads[last][k], is.labels[last][k]); } - if (!options.allFeatures && num1 > options.count) break; + if (!options.allFeatures && num1 > options.count) + break; num1++; @@ -153,23 +153,25 @@ final public class Pipe extends PipeGen { Edges.findDefault(); } - - public void getInstances(String file, Instances is){ + public void getInstances(String file, Instances is) { CONLLReader09 depReader = new CONLLReader09(file); - - int ic =options.count+2; + + int ic = options.count + 2; is.init(ic, new MFB()); - int num1 =0,del=0; + int num1 = 0, del = 0; while (true) { - if (num1 % 100 == 0) del = outValue(num1, del); + if (num1 % 100 == 0) + del = outValue(num1, del); SentenceData09 instance1 = depReader.getNext(is); - if (instance1 == null) break; - - if (!options.allFeatures && num1 > options.count) break; + if (instance1 == null) + break; + + if (!options.allFeatures && num1 > options.count) + break; num1++; @@ -179,10 +181,9 @@ final public class Pipe extends PipeGen { } - /** * Creates an instance for outputParses - * + * * @param is * @return * @throws IOException @@ -190,67 +191,70 @@ final public class Pipe extends PipeGen { protected final SentenceData09 nextInstance(Instances is, CONLLReader09 depReader) throws Exception { SentenceData09 instance = depReader.getNext(is); - if (instance == null || instance.forms == null) return null; + if (instance == null || instance.forms == null) + return null; return instance; } - public static ExecutorService executerService =java.util.concurrent.Executors.newFixedThreadPool(Parser.THREADS); - + public static ExecutorService executerService = java.util.concurrent.Executors.newFixedThreadPool(Parser.THREADS); - public DataF fillVector(F2SF params, Instances is,int inst, DataF d, Cluster cluster, int threads, Long2IntInterface li) throws InterruptedException { + public DataF fillVector(F2SF params, Instances is, int inst, DataF d, Cluster cluster, int threads, + Long2IntInterface li) throws InterruptedException { long ts = System.nanoTime(); - if (executerService.isShutdown()) executerService =java.util.concurrent.Executors.newCachedThreadPool(); - - + if (executerService.isShutdown()) + executerService = java.util.concurrent.Executors.newCachedThreadPool(); + final int length = is.length(inst); - if (d ==null || d.len<length)d = new DataF(length,mf.getFeatureCounter().get(PipeGen.REL).shortValue()); + if (d == null || d.len < length) + d = new DataF(length, mf.getFeatureCounter().get(PipeGen.REL).shortValue()); ArrayList<ParallelExtract> pe = new ArrayList<ParallelExtract>(); - - - for(int i=0;i<threads;i++) { - -// DB.println(""+((ExtractorClusterStackedR2)extractor[i]).s_dist); - pe.add(new ParallelExtract( extractor[i],is, inst, d, (F2SF)params.clone(), cluster, li)); + + for (int i = 0; i < threads; i++) { + + // DB.println(""+((ExtractorClusterStackedR2)extractor[i]).s_dist); + pe.add(new ParallelExtract(extractor[i], is, inst, d, (F2SF) params.clone(), cluster, li)); } for (int w1 = 0; w1 < length; w1++) { for (int w2 = 0; w2 < length; w2++) { - if (w1==w2) continue; + if (w1 == w2) + continue; ParallelExtract.add(w1, w2); } } - executerService.invokeAll( pe); + executerService.invokeAll(pe); - timeExtract += (System.nanoTime()-ts); + timeExtract += (System.nanoTime() - ts); - return d; } /** * the loss function */ - public double errors( Instances is, int ic, Parse p) { - - if (p.heads==null) p.signature2parse(p.signature()); + public double errors(Instances is, int ic, Parse p) { + + if (p.heads == null) + p.signature2parse(p.signature()); short[] act = is.heads[ic]; double correct = 0; // do not count root - for(int i = 1; i < act.length; i++) { - if (p.heads[i]==act[i] ){ - correct+=0.5; - if (p.labels[i]==is.labels[ic][i] ) correct+=0.5; + for (int i = 1; i < act.length; i++) { + if (p.heads[i] == act[i]) { + correct += 0.5; + if (p.labels[i] == is.labels[ic][i]) + correct += 0.5; } } - double x = ((double)act.length- 1 - correct ); + double x = ((double) act.length - 1 - correct); - //p.f1 = (double)correct / (double)(act.length-1); + // p.f1 = (double)correct / (double)(act.length-1); return x; } diff --git a/dependencyParser/mate-tools/src/is2/parserR2/PipeReranker.java b/dependencyParser/mate-tools/src/is2/parserR2/PipeReranker.java index 87286ac..622fe1c 100644 --- a/dependencyParser/mate-tools/src/is2/parserR2/PipeReranker.java +++ b/dependencyParser/mate-tools/src/is2/parserR2/PipeReranker.java @@ -1,36 +1,24 @@ package is2.parserR2; +import java.util.concurrent.ExecutorService; + +import extractors.ExtractorReranker; import is2.data.Cluster; -import is2.data.DataF; import is2.data.Edges; -import is2.data.F2SF; import is2.data.Instances; import is2.data.MFB; -import is2.data.ParseNBest; - -import is2.data.Parse; import is2.data.PipeGen; import is2.data.SentenceData09; import is2.io.CONLLReader09; - import is2.util.OptionsSuper; -import java.io.IOException; -import java.util.ArrayList; -import java.util.concurrent.ExecutorService; - -import extractors.ExtractorClusterStacked; -import extractors.ExtractorReranker; -import extractors.ParallelExtract; - final public class PipeReranker extends PipeGen { public ExtractorReranker extractor; final public MFB mf = new MFB(); Cluster cl; - - + private OptionsSuper options; public static long timeExtract; @@ -39,60 +27,64 @@ final public class PipeReranker extends PipeGen { } public void createInstances(String file, Instances is) - // throws Exception + // throws Exception { - CONLLReader09 depReader = new CONLLReader09(file); - mf.register(REL,"<root-type>"); + mf.register(REL, "<root-type>"); - // register at least one predicate since the parsing data might not contain predicates as in + // register at least one predicate since the parsing data might not + // contain predicates as in // the Japaness corpus but the development sets contains some - long sl=0; - System.out.print("Registering feature parts of sentence: "); int ic = 0; int del = 0; while (true) { SentenceData09 instance = depReader.getNext(); - if (instance == null) break; + if (instance == null) + break; ic++; - sl+=instance.labels.length; - if (ic % 1000 == 0) { del = outValue(ic, del); } String[] labs1 = instance.labels; - for (int i1 = 0; i1 < labs1.length; i1++) mf.register(REL, labs1[i1]); + for (String element : labs1) + mf.register(REL, element); String[] w = instance.forms; - for (int i1 = 0; i1 < w.length; i1++) mf.register(WORD, depReader.normalize(w[i1])); + for (String element : w) + mf.register(WORD, depReader.normalize(element)); w = instance.plemmas; - for (int i1 = 0; i1 < w.length; i1++) mf.register(WORD, depReader.normalize(w[i1])); - + for (String element : w) + mf.register(WORD, depReader.normalize(element)); w = instance.ppos; - for (int i1 = 0; i1 < w.length; i1++) mf.register(POS, w[i1]); + for (String element : w) + mf.register(POS, element); w = instance.gpos; - for (int i1 = 0; i1 < w.length; i1++) mf.register(POS, w[i1]); + for (String element : w) + mf.register(POS, element); - if (instance.feats !=null) { + if (instance.feats != null) { String fs[][] = instance.feats; - for (int i1 = 0; i1 < fs.length; i1++){ - w =fs[i1]; - if (w==null) continue; - for (int i2 = 0; i2 < w.length; i2++) mf.register(FEAT, w[i2]); + for (String[] element : fs) { + w = element; + if (w == null) + continue; + for (String element2 : w) + mf.register(FEAT, element2); } } - if ((ic-1)>options.count) break; + if ((ic - 1) > options.count) + break; } del = outValue(ic, del); @@ -100,39 +92,32 @@ final public class PipeReranker extends PipeGen { ExtractorReranker.initFeatures(); ExtractorReranker.maxForm = mf.getFeatureCounter().get(WORD); - - if (options.clusterFile==null)cl = new Cluster(); - else cl= new Cluster(options.clusterFile, mf,6); - + + if (options.clusterFile == null) + cl = new Cluster(); + else + cl = new Cluster(options.clusterFile, mf, 6); + mf.calculateBits(); - extractor.initStat(); - - System.out.println(""+mf.toString()); - + ExtractorReranker.initStat(); + + System.out.println("" + mf.toString()); + extractor.init(); depReader.startReading(file); int num1 = 0; - + is.init(ic, new MFB()); Edges.init(mf.getFeatureCounter().get(POS)); - + del = 0; - del = outValue(num1, del); System.out.println(); } + public static ExecutorService executerService = java.util.concurrent.Executors.newFixedThreadPool(Parser.THREADS); - - public static ExecutorService executerService =java.util.concurrent.Executors.newFixedThreadPool(Parser.THREADS); - - - - - - - } diff --git a/dependencyParser/mate-tools/src/is2/parserR2/Reranker.java b/dependencyParser/mate-tools/src/is2/parserR2/Reranker.java index a531c3c..61650a0 100644 --- a/dependencyParser/mate-tools/src/is2/parserR2/Reranker.java +++ b/dependencyParser/mate-tools/src/is2/parserR2/Reranker.java @@ -1,26 +1,5 @@ package is2.parserR2; - -import is2.data.Cluster; -import is2.data.DataF; -import is2.data.Edges; -import is2.data.F2SF; -import is2.data.FVR; -import is2.data.Instances; -import is2.data.Long2Int; -import is2.data.Long2IntInterface; -import is2.data.MFB; -import is2.data.Parse; -import is2.data.ParseNBest; -import is2.data.PipeGen; -import is2.data.SentenceData09; -import is2.io.CONLLReader09; -import is2.io.CONLLWriter09; -import is2.tools.Tool; -import is2.util.DB; -import is2.util.OptionsSuper; -import is2.util.ParserEvaluator; - import java.io.BufferedInputStream; import java.io.BufferedOutputStream; import java.io.DataInputStream; @@ -43,13 +22,29 @@ import java.util.zip.ZipOutputStream; import extractors.Extractor; import extractors.ExtractorClusterStacked; import extractors.ExtractorReranker; - - +import is2.data.Cluster; +import is2.data.DataF; +import is2.data.Edges; +import is2.data.F2SF; +import is2.data.FVR; +import is2.data.Instances; +import is2.data.Long2Int; +import is2.data.Long2IntInterface; +import is2.data.MFB; +import is2.data.Parse; +import is2.data.ParseNBest; +import is2.data.PipeGen; +import is2.data.SentenceData09; +import is2.io.CONLLReader09; +import is2.io.CONLLWriter09; +import is2.tools.Tool; +import is2.util.DB; +import is2.util.OptionsSuper; +import is2.util.ParserEvaluator; public class Reranker implements Tool { - - public static int THREADS =4; + public static int THREADS = 4; Long2IntInterface l2i; @@ -62,135 +57,125 @@ public class Reranker implements Tool { Pipe pipe; Options options; - HashMap<Integer,Integer> rank = new HashMap<Integer,Integer>(); - int amongxbest=0, amongxbest_ula=0, nbest=0,bestProj=0, smallestErrorSum=0, countAllNodes=0; - static int NBest =1000; - - - + HashMap<Integer, Integer> rank = new HashMap<Integer, Integer>(); + int amongxbest = 0, amongxbest_ula = 0, nbest = 0, bestProj = 0, smallestErrorSum = 0, countAllNodes = 0; + static int NBest = 1000; /** * Initialize the parser + * * @param options */ - public Reranker (Options options) { + public Reranker(Options options) { - this.options=options; + this.options = options; } - /** - * @param modelFileName The file name of the parsing model + * @param modelFileName + * The file name of the parsing model */ public Reranker(String modelFileName) { - this(new Options(new String[]{"-model",modelFileName})); + this(new Options(new String[] { "-model", modelFileName })); } - - public Reranker() { // TODO Auto-generated constructor stub } - - public static void main (String[] args) throws Exception - { + public static void main(String[] args) throws Exception { long start = System.currentTimeMillis(); Options options = new Options(args); NBest = options.best; - DB.println("n-best "+NBest); + DB.println("n-best " + NBest); Runtime runtime = Runtime.getRuntime(); THREADS = runtime.availableProcessors(); - if (options.cores<THREADS&&options.cores>0) THREADS =options.cores; - - DB.println("Found " + runtime.availableProcessors()+" cores use "+THREADS); + if (options.cores < THREADS && options.cores > 0) + THREADS = options.cores; + DB.println("Found " + runtime.availableProcessors() + " cores use " + THREADS); if (options.train) { - Reranker p =new Reranker(); - p.options=options; - + Reranker p = new Reranker(); + p.options = options; p.l2i = new Long2Int(options.hsize); - p.pipeReranker = new PipeReranker(options); + p.pipeReranker = new PipeReranker(options); p.pipeReranker.extractor = new ExtractorReranker(p.l2i); - // initialize the parser p.pipe = new Pipe(options); // read parsing models - p.paramsParsers = new ParametersFloat[options.end+1]; - for(int m=0;m<=options.end;m++) { - String name = options.prefix_model+m; + p.paramsParsers = new ParametersFloat[options.end + 1]; + for (int m = 0; m <= options.end; m++) { + String name = options.prefix_model + m; p.paramsParsers[m] = new ParametersFloat(0); - p.readModel(name, p.pipe, p.paramsParsers[m]); + p.readModel(name, p.pipe, p.paramsParsers[m]); } // set up the reranker p.parametersReranker = new ParametersFloat(p.l2i.size()); - Instances[] iss = new Instances[options.end+1]; + Instances[] iss = new Instances[options.end + 1]; - for(int m=0;m<=options.end;m++) { - String name = options.prefix_test+m; + for (int m = 0; m <= options.end; m++) { + String name = options.prefix_test + m; iss[m] = new Instances(); - DB.println("create instances of part "+name); - p.pipe.getInstances(name, iss[m]); - } - + DB.println("create instances of part " + name); + p.pipe.getInstances(name, iss[m]); + } ExtractorReranker.initFeatures(); p.pipeReranker.extractor.init(); - p.pipeReranker.extractor.initStat(); + ExtractorReranker.initStat(); - p.train(options,iss); + p.train(options, iss); - p.writeModell(options, p.parametersReranker, null,p.pipe.cl); + p.writeModell(options, p.parametersReranker, null, p.pipe.cl); } if (options.test) { Reranker p = new Reranker(); - p.options=options; + p.options = options; // set up the reranker p.l2i = new Long2Int(options.hsize); - p.pipeReranker = new PipeReranker(options); + p.pipeReranker = new PipeReranker(options); p.pipeReranker.extractor = new ExtractorReranker(p.l2i); p.parametersReranker = new ParametersFloat(p.l2i.size()); - // initialize the parser p.pipe = new Pipe(options); // read parsing models - p.paramsParsers = new ParametersFloat[options.end+1]; - - String nbestName ="n-best+"+options.testfile.substring(options.testfile.length()-12,options.testfile.length()-1); + p.paramsParsers = new ParametersFloat[options.end + 1]; + + String nbestName = "n-best+" + + options.testfile.substring(options.testfile.length() - 12, options.testfile.length() - 1); File fnbest = new File(nbestName); - int read = fnbest.exists()?2:1; + int read = fnbest.exists() ? 2 : 1; if (read != 2) - for(int m=0;m<=options.end;m++) { - String name = options.prefix_model+m; - p.paramsParsers[m] = new ParametersFloat(0); - p.readModel(name, p.pipe, p.paramsParsers[m]); - } + for (int m = 0; m <= options.end; m++) { + String name = options.prefix_model + m; + p.paramsParsers[m] = new ParametersFloat(0); + p.readModel(name, p.pipe, p.paramsParsers[m]); + } p.readModel(options.modelName, p.pipeReranker, p.parametersReranker); - ExtractorReranker.initFeatures(); - p.pipeReranker.extractor.initStat(); + ExtractorReranker.initStat(); p.pipeReranker.extractor.init(); p.rerankedParses(options, p.pipe, p.parametersReranker, false, nbestName); @@ -205,17 +190,17 @@ public class Reranker implements Tool { } long end = System.currentTimeMillis(); - System.out.println("used time "+((float)((end-start)/100)/10)); + System.out.println("used time " + ((float) ((end - start) / 100) / 10)); - Decoder.executerService.shutdown(); + Decoder.executerService.shutdown(); Pipe.executerService.shutdown(); System.out.println("end."); - } /** * Read the models and mapping + * * @param options * @param pipe * @param prm @@ -223,8 +208,7 @@ public class Reranker implements Tool { */ public void readModel(String modelName, Pipe pipe, Parameters prm) throws IOException { - - DB.println("Reading data started: "+modelName); + DB.println("Reading data started: " + modelName); // prepare zipped reader ZipInputStream zis = new ZipInputStream(new BufferedInputStream(new FileInputStream(modelName))); @@ -238,16 +222,16 @@ public class Reranker implements Tool { prm.read(dis); Long2Int l2i = new Long2Int(prm.size()); - DB.println("li size "+l2i.size()); + DB.println("li size " + l2i.size()); pipe.extractor = new ExtractorClusterStacked[THREADS]; - for (int t=0;t<THREADS;t++) pipe.extractor[t]=new ExtractorClusterStacked(l2i); + for (int t = 0; t < THREADS; t++) + pipe.extractor[t] = new ExtractorClusterStacked(l2i); ExtractorClusterStacked.initFeatures(); - - for (int t=0;t<THREADS;t++) { + for (int t = 0; t < THREADS; t++) { pipe.extractor[t].initStat(); pipe.extractor[t].init(); } @@ -258,31 +242,32 @@ public class Reranker implements Tool { ExtractorClusterStacked.maxForm = dis.readInt(); - boolean foundInfo =false; + boolean foundInfo = false; try { - String info =null; + String info = null; int icnt = dis.readInt(); - for(int i=0;i<icnt;i++) { + for (int i = 0; i < icnt; i++) { info = dis.readUTF(); System.out.println(info); } } catch (Exception e) { - if (!foundInfo) System.out.println("no info about training"); + if (!foundInfo) + System.out.println("no info about training"); } - dis.close(); DB.println("Reading data finnished"); - Decoder.NON_PROJECTIVITY_THRESHOLD =(float)options.decodeTH; + Decoder.NON_PROJECTIVITY_THRESHOLD = (float) options.decodeTH; - // ExtractorClusterStacked.initStat(); + // ExtractorClusterStacked.initStat(); } /** * Read the models and mapping + * * @param options * @param pipe * @param params @@ -290,7 +275,7 @@ public class Reranker implements Tool { */ public void readModel(String modelName, PipeReranker pipe, Parameters params) throws IOException { - DB.println("Reading data started: "+modelName); + DB.println("Reading data started: " + modelName); // prepare zipped reader ZipInputStream zis = new ZipInputStream(new BufferedInputStream(new FileInputStream(modelName))); @@ -299,13 +284,13 @@ public class Reranker implements Tool { pipe.mf.read(dis); - // DB.println("reranker model "+pipe.mf.toString()); + // DB.println("reranker model "+pipe.mf.toString()); pipe.cl = new Cluster(dis); params.read(dis); this.l2i = new Long2Int(params.size()); - DB.println("li size "+l2i.size()); + DB.println("li size " + l2i.size()); pipe.extractor = new ExtractorReranker(l2i); @@ -320,226 +305,218 @@ public class Reranker implements Tool { ExtractorClusterStacked.maxForm = dis.readInt(); - boolean foundInfo =false; + boolean foundInfo = false; try { - String info =null; + String info = null; int icnt = dis.readInt(); - for(int i=0;i<icnt;i++) { + for (int i = 0; i < icnt; i++) { info = dis.readUTF(); System.out.println(info); } } catch (Exception e) { - if (!foundInfo) System.out.println("no info about training"); + if (!foundInfo) + System.out.println("no info about training"); } - dis.close(); DB.println("Reading data finnished"); - Decoder.NON_PROJECTIVITY_THRESHOLD =(float)options.decodeTH; + Decoder.NON_PROJECTIVITY_THRESHOLD = (float) options.decodeTH; - //ExtractorClusterStacked.initStat(); + // ExtractorClusterStacked.initStat(); } - /** * Do the training + * * @param instanceLengths * @param options * @param pipe * @param parametersReranker - * @param is - * @param cluster + * @param is + * @param cluster * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ - public void train(OptionsSuper options, Instances[] iss) - throws IOException, InterruptedException, ClassNotFoundException { - + public void train(OptionsSuper options, Instances[] iss) + throws IOException, InterruptedException, ClassNotFoundException { - int read = 0; // 0 compute -- 1 compute and write -- 2 read parses - DB.println("Training Information "); DB.println("-------------------- "); ExtractorReranker.initStat(); pipeReranker.extractor.init(); - for(Extractor e : this.pipe.extractor) { + for (Extractor e : this.pipe.extractor) { e.init(); } - int numInstances =0; - int maxLenInstances =0; - // int maxLenSentence=1; - for(Instances is : iss) { + int numInstances = 0; + int maxLenInstances = 0; + // int maxLenSentence=1; + for (Instances is : iss) { numInstances += is.size(); - for(int i=0;i<is.size();i++) if (maxLenInstances<is.length(i)) maxLenInstances=is.length(i); + for (int i = 0; i < is.size(); i++) + if (maxLenInstances < is.length(i)) + maxLenInstances = is.length(i); } - DataF data = new DataF(maxLenInstances, pipe.mf.getFeatureCounter().get(PipeGen.REL).shortValue()); int iter = 0; - int del=0; - float error =0; - float f1=0; + int del = 0; + float error = 0; + float f1 = 0; + double upd = (double) (options.count * options.numIters) + options.numIters * 10; - - - double upd = (double)(options.count*options.numIters)+options.numIters*10; - - //float[][] = new float[this.NBest][3]; + // float[][] = new float[this.NBest][3]; FVR act = new FVR(); FVR pred = new FVR(); FVR f = new FVR(); - long[] vs = new long[ExtractorReranker._FC*maxLenInstances]; - - - for(; iter < options.numIters; iter++) { + long[] vs = new long[ExtractorReranker._FC * maxLenInstances]; + for (; iter < options.numIters; iter++) { - - - System.out.print("Iteration "+iter+": "); - error=0; - f1=0; + System.out.print("Iteration " + iter + ": "); + error = 0; + f1 = 0; - float las =0, cnt=0,averageScore=0; + float las = 0, cnt = 0, averageScore = 0; - - float firstBestTotalError=0,totalError=0; + float firstBestTotalError = 0, totalError = 0; long start = System.currentTimeMillis(); - long last= System.currentTimeMillis(); + long last = System.currentTimeMillis(); long rerankTime = 0; - - String nbest ="n-best"; + String nbest = "n-best"; File fnbest = new File(nbest); - read = fnbest.exists()?2:1; + read = fnbest.exists() ? 2 : 1; - DataInputStream dis =null; + DataInputStream dis = null; DataOutputStream dos = null; - if (read==1) { + if (read == 1) { - DB.println("computing and writting nbest list to file: "+nbest); + DB.println("computing and writting nbest list to file: " + nbest); ZipOutputStream zos = new ZipOutputStream(new BufferedOutputStream(new FileOutputStream(nbest))); - zos.putNextEntry(new ZipEntry("data")); - dos = new DataOutputStream(new BufferedOutputStream(zos)); + zos.putNextEntry(new ZipEntry("data")); + dos = new DataOutputStream(new BufferedOutputStream(zos)); } - // start reading again - if (read ==2) { + if (read == 2) { - // DB.println("reading nbest list from file: "+nbest); + // DB.println("reading nbest list from file: "+nbest); - ZipInputStream zis = new ZipInputStream(new BufferedInputStream(new FileInputStream(nbest))); + ZipInputStream zis = new ZipInputStream(new BufferedInputStream(new FileInputStream(nbest))); zis.getNextEntry(); dis = new DataInputStream(new BufferedInputStream(zis)); } - HashMap<Integer,Integer> remapped = new HashMap<Integer,Integer>(); + HashMap<Integer, Integer> remapped = new HashMap<Integer, Integer>(); - int i=0,round=0,instance=0,length=0,count=0, changes=0; - for(Instances is : iss) { + int round = 0, instance = 0, length = 0, count = 0, changes = 0; + for (Instances is : iss) { - F2SF fparser =this.paramsParsers[instance].getFV(); - round++; + F2SF fparser = this.paramsParsers[instance].getFV(); + round++; - // go over the sentences in the instance - for(int n = 0; n < is.size(); n++) { - count+=1; - length +=is.length(n); + for (int n = 0; n < is.size(); n++) { + count += 1; + length += is.length(n); upd--; - if (is.labels[n].length>options.maxLen) continue; + if (is.labels[n].length > options.maxLen) + continue; - List<ParseNBest> parses=null; + List<ParseNBest> parses = null; short pos[] = is.pposs[n]; - - // read or write nbest list - if (read==0|| read==1 && iter==0) { - data = pipe.fillVector(fparser, is, n, data, pipe.cl,THREADS,l2i); - parses = Decoder.decode(pos, data, options.decodeProjective,pipe.extractor[0]); - if (parses.size()>NBest) parses = parses.subList(0, NBest); + // read or write nbest list + if (read == 0 || read == 1 && iter == 0) { + data = pipe.fillVector(fparser, is, n, data, pipe.cl, THREADS, l2i); + parses = Decoder.decode(pos, data, options.decodeProjective, pipe.extractor[0]); + if (parses.size() > NBest) + parses = parses.subList(0, NBest); - if (read==1) { + if (read == 1) { // write the forest dos.writeInt(parses.size()); - for(int k=0;k<parses.size();k++) { + for (int k = 0; k < parses.size(); k++) { dos.writeUTF(parses.get(k).signature()); - dos.writeFloat((float)parses.get(k).f1); + dos.writeFloat((float) parses.get(k).f1); } } - } else if (read==2) { + } else if (read == 2) { parses = new ArrayList<ParseNBest>(); int parseCount = dis.readInt(); - for(int k=0;k<parseCount;k++) { - ParseNBest p = new ParseNBest(dis.readUTF(),dis.readFloat()); - if (parses.size()<NBest) parses.add(p); + for (int k = 0; k < parseCount; k++) { + ParseNBest p = new ParseNBest(dis.readUTF(), dis.readFloat()); + if (parses.size() < NBest) + parses.add(p); } } - - int best =0; float bestScore=-100; - int goldBest =0; double goldError=Float.MAX_VALUE; + int best = 0; + float bestScore = -100; + int goldBest = 0; + double goldError = Float.MAX_VALUE; long startReranking = System.currentTimeMillis(); // score the n-best parses - for(int k=0;k<parses.size();k++) { + for (int k = 0; k < parses.size(); k++) { - ParseNBest p= parses.get(k); + ParseNBest p = parses.get(k); - pipeReranker.extractor.extractFeatures(is,n,p,parses.indexOf(p),vs,pipe.cl); + pipeReranker.extractor.extractFeatures(is, n, p, parses.indexOf(p), vs, pipe.cl); - int rank=1*ExtractorReranker.s_type; + int rank = 1 * ExtractorReranker.s_type; - f.clear(); + f.clear(); - for(int j=0;j<vs.length;j++) { - if (vs[j]==Integer.MIN_VALUE) break; - if (vs[j]>0) f.add(pipeReranker.extractor.li.l2i(vs[j]+rank)); + for (long element : vs) { + if (element == Integer.MIN_VALUE) + break; + if (element > 0) + f.add(pipeReranker.extractor.li.l2i(element + rank)); } - f.add(pipeReranker.extractor.li.l2i(1+rank),(float)p.f1); - float score = (float)(parametersReranker.getScore(f)); - if (score>bestScore) { //rankScore[k][2]> - bestScore =score; - best=k; + f.add(pipeReranker.extractor.li.l2i(1 + rank), (float) p.f1); + float score = (parametersReranker.getScore(f)); + if (score > bestScore) { // rankScore[k][2]> + bestScore = score; + best = k; } } // get the best parse in the n-best list - for(int k=0;k<parses.size();k++) { + for (int k = 0; k < parses.size(); k++) { - if (parses.get(k).heads.length!=is.length(n)) { - DB.println("error "+n+" "+parses.get(k).heads.length+" "+is.length(n)); + if (parses.get(k).heads.length != is.length(n)) { + DB.println("error " + n + " " + parses.get(k).heads.length + " " + is.length(n)); continue; } double errg = pipe.errors(is, n, parses.get(k)); if (goldError > errg) { goldError = errg; - goldBest=k; + goldBest = k; } } @@ -547,468 +524,425 @@ public class Reranker implements Tool { ParseNBest predParse = parses.get(best); ParseNBest goldBestParse = parses.get(goldBest); - double e= pipe.errors(is, n ,predParse); + double e = pipe.errors(is, n, predParse); Integer ctb = remapped.get(best); - if (ctb==null) remapped.put(best, 1); - else remapped.put(best, ctb+1); + if (ctb == null) + remapped.put(best, 1); + else + remapped.put(best, ctb + 1); - String info = " 1best-error "+((length-firstBestTotalError)/length)+ - " reranked "+((length-totalError)/length)+ - " chd "+changes+" "+" ps las "+(las/cnt)+" avs "+((float)averageScore/(float)count)+" "; - - + String info = " 1best-error " + ((length - firstBestTotalError) / length) + " reranked " + + ((length - totalError) / length) + " chd " + changes + " " + " ps las " + (las / cnt) + + " avs " + (averageScore / count) + " "; - if((n+1) %500 == 0) del= PipeGen.outValueErr(count,Math.round(error*1000)/1000,f1/count,del, last, upd,info); + if ((n + 1) % 500 == 0) + del = PipeGen.outValueErr(count, Math.round(error * 1000) / 1000, f1 / count, del, last, upd, + info); - firstBestTotalError+=Decoder.getError(firstBest, is, n, Decoder.LAS); + firstBestTotalError += Decoder.getError(firstBest, is, n, Decoder.LAS); - totalError+=Decoder.getError(predParse, is, n, Decoder.LAS); + totalError += Decoder.getError(predParse, is, n, Decoder.LAS); + rerankTime += System.currentTimeMillis() - startReranking; - rerankTime +=System.currentTimeMillis()-startReranking; - - if (best!=0){ + if (best != 0) { changes++; } - las +=is.length(n)-Decoder.getError(goldBestParse, is, n, Decoder.LAS); - cnt +=is.length(n); - - averageScore+=predParse.f1; + las += is.length(n) - Decoder.getError(goldBestParse, is, n, Decoder.LAS); + cnt += is.length(n); - - if (options.count<count) break; + averageScore += predParse.f1; - - if (Decoder.getError(goldBestParse, is, n, Decoder.LAS)>= - Decoder.getError(predParse, is, n, Decoder.LAS) ) continue; + if (options.count < count) + break; + if (Decoder.getError(goldBestParse, is, n, Decoder.LAS) >= Decoder.getError(predParse, is, n, + Decoder.LAS)) + continue; // get predicted feature vector - pipeReranker.extractor.extractFeatures(is,n,predParse,parses.indexOf(predParse),vs,pipe.cl); + pipeReranker.extractor.extractFeatures(is, n, predParse, parses.indexOf(predParse), vs, pipe.cl); pred.clear(); - int rank=1*ExtractorReranker.s_type; + int rank = 1 * ExtractorReranker.s_type; - for(int j=0;j<vs.length;j++) { - if (vs[j]==Integer.MIN_VALUE) break; - if (vs[j]>0) pred.add(pipeReranker.extractor.li.l2i(vs[j]+rank)); + for (long element : vs) { + if (element == Integer.MIN_VALUE) + break; + if (element > 0) + pred.add(pipeReranker.extractor.li.l2i(element + rank)); } - pred.add(pipeReranker.extractor.li.l2i(1+rank),(float)predParse.f1); + pred.add(pipeReranker.extractor.li.l2i(1 + rank), (float) predParse.f1); error += 1; - pipeReranker.extractor.extractFeatures(is,n,goldBestParse,parses.indexOf(goldBestParse),vs,pipe.cl); - + pipeReranker.extractor.extractFeatures(is, n, goldBestParse, parses.indexOf(goldBestParse), vs, + pipe.cl); act.clear(); - rank=1*ExtractorReranker.s_type; - for(int j=0;j<vs.length;j++) { - if (vs[j]==Integer.MIN_VALUE) break; - if (vs[j]>0) act.add(pipeReranker.extractor.li.l2i(vs[j]+rank)); + rank = 1 * ExtractorReranker.s_type; + for (long element : vs) { + if (element == Integer.MIN_VALUE) + break; + if (element > 0) + act.add(pipeReranker.extractor.li.l2i(element + rank)); } - act.add(pipeReranker.extractor.li.l2i(1+rank),(float)goldBestParse.f1); - float lam_dist =(float)( parametersReranker.getScore(act) - - (parametersReranker.getScore(pred))); - + act.add(pipeReranker.extractor.li.l2i(1 + rank), (float) goldBestParse.f1); + float lam_dist = parametersReranker.getScore(act) - (parametersReranker.getScore(pred)); - - parametersReranker.update(act, pred, is, n, null, upd, e,lam_dist); + parametersReranker.update(act, pred, is, n, null, upd, e, lam_dist); } instance++; } - String info = " td "+((Decoder.timeDecotder)/1000000F)+" tr "+((Decoder.timeRearrange)/1000000F) - +" te "+((Pipe.timeExtract)/1000000F)+" nz "+parametersReranker.countNZ()+ - " 1best-error "+((length-firstBestTotalError)/length)+ - " reranked-best "+((length-totalError)/length)+ - " rds "+round+" "+ - " rerank-t "+(rerankTime/count)+ - " chd "+changes+" "+" ps las "+(las/cnt)+" avs "+((float)averageScore/(float)count)+" "; - + String info = " td " + ((Decoder.timeDecotder) / 1000000F) + " tr " + ((Decoder.timeRearrange) / 1000000F) + + " te " + ((Pipe.timeExtract) / 1000000F) + " nz " + parametersReranker.countNZ() + " 1best-error " + + ((length - firstBestTotalError) / length) + " reranked-best " + ((length - totalError) / length) + + " rds " + round + " " + " rerank-t " + (rerankTime / count) + " chd " + changes + " " + + " ps las " + (las / cnt) + " avs " + (averageScore / count) + " "; - // DB.println("remapped "+remapped); + // DB.println("remapped "+remapped); - PipeGen.outValueErr(count,Math.round(error*1000)/1000,f1/count,del,last, upd,info); - del=0; + PipeGen.outValueErr(count, Math.round(error * 1000) / 1000, f1 / count, del, last, upd, info); + del = 0; long end = System.currentTimeMillis(); - System.out.println(" time:"+(end-start)); - i++; - // ParametersFloat pf = params.average2((iter+1)*is.size()); + System.out.println(" time:" + (end - start)); + Decoder.timeDecotder = 0; + Decoder.timeRearrange = 0; + Pipe.timeExtract = 0; - - - Decoder.timeDecotder=0;Decoder.timeRearrange=0; Pipe.timeExtract=0; - - if (dos!=null)dos.close(); - if (dis!=null)dis.close(); + if (dos != null) + dos.close(); + if (dis != null) + dis.close(); } - DB.println("sb "+parametersReranker.parameters[this.pipeReranker.extractor.li.l2i(4090378920L+1*ExtractorReranker.s_type)]);//4090378266 - parametersReranker.average(iter*numInstances); - - } + DB.println("sb " + parametersReranker.parameters[this.pipeReranker.extractor.li + .l2i(4090378920L + 1 * ExtractorReranker.s_type)]);// 4090378266 + parametersReranker.average(iter * numInstances); + } /** * Do the parsing + * * @param options * @param pipe * @param params * @throws IOException */ - private void rerankedParses (OptionsSuper options, Pipe pipe, ParametersFloat params, boolean maxInfo, String nbestName) throws Exception { + private void rerankedParses(OptionsSuper options, Pipe pipe, ParametersFloat params, boolean maxInfo, + String nbestName) throws Exception { long start = System.currentTimeMillis(); ExtractorClusterStacked.initFeatures(); - - DataInputStream dis =null; + + DataInputStream dis = null; DataOutputStream dos = null; - - float olas=0, olcnt =0; - + + float olas = 0, olcnt = 0; + File fnbest = new File(nbestName); - int read = fnbest.exists()?2:1; - if (read==1) { + int read = fnbest.exists() ? 2 : 1; + if (read == 1) { - DB.println("computing and writting nbest list to file: "+nbestName); + DB.println("computing and writting nbest list to file: " + nbestName); ZipOutputStream zos = new ZipOutputStream(new BufferedOutputStream(new FileOutputStream(nbestName))); - zos.putNextEntry(new ZipEntry("data")); - dos = new DataOutputStream(new BufferedOutputStream(zos)); + zos.putNextEntry(new ZipEntry("data")); + dos = new DataOutputStream(new BufferedOutputStream(zos)); } - - - if (read ==2) { + if (read == 2) { - // DB.println("reading nbest list from file: "+nbestName); + // DB.println("reading nbest list from file: "+nbestName); - ZipInputStream zis = new ZipInputStream(new BufferedInputStream(new FileInputStream(nbestName))); + ZipInputStream zis = new ZipInputStream(new BufferedInputStream(new FileInputStream(nbestName))); zis.getNextEntry(); dis = new DataInputStream(new BufferedInputStream(zis)); } - for(int m =0;m< this.paramsParsers.length;m++) { - + for (ParametersFloat paramsParser : this.paramsParsers) { CONLLReader09 depReader = new CONLLReader09(options.testfile, options.formatTask); CONLLWriter09 depWriter = new CONLLWriter09(options.outfile, options.formatTask); - - float las=0,lcnt =0, averageScore =0; + + float las = 0, lcnt = 0, averageScore = 0; int cnt = 0; - int del=0; - - - long last = System.currentTimeMillis(); + int del = 0; + long last = System.currentTimeMillis(); String[] types = new String[pipe.mf.getFeatureCounter().get(PipeGen.REL)]; - for (Entry<String, Integer> e : pipe.mf.getFeatureSet().get(PipeGen.REL).entrySet()) types[e.getValue()] = e.getKey(); + for (Entry<String, Integer> e : MFB.getFeatureSet().get(PipeGen.REL).entrySet()) + types[e.getValue()] = e.getKey(); System.out.print("Processing Sentence: "); - FVR f = new FVR(); - while(true) { + while (true) { Instances is = new Instances(); - is.init(1, new MFB(),options.formatTask); + is.init(1, new MFB(), options.formatTask); SentenceData09 instance = pipe.nextInstance(is, depReader); - if (instance==null) break; + if (instance == null) + break; cnt++; String[] forms = instance.forms; - List<ParseNBest> parses =null; + List<ParseNBest> parses = null; // read or write nbest list - if (read==0|| read==1) { - parses = this.parseNBest(instance, this.paramsParsers[m]); - // data = pipe.fillVector(fparser, is, n, data, pipe.cl,THREADS); - // parses = Decoder.decode(pos, data, options.decodeProjective); + if (read == 0 || read == 1) { + parses = this.parseNBest(instance, paramsParser); + // data = pipe.fillVector(fparser, is, n, data, + // pipe.cl,THREADS); + // parses = Decoder.decode(pos, data, + // options.decodeProjective); - if (parses.size()>NBest) parses = parses.subList(0, NBest); + if (parses.size() > NBest) + parses = parses.subList(0, NBest); - - if (read==1) { + if (read == 1) { // write the forest dos.writeInt(parses.size()); - for(int k=0;k<parses.size();k++) { + for (int k = 0; k < parses.size(); k++) { dos.writeUTF(parses.get(k).signature()); - dos.writeFloat((float)parses.get(k).f1); + dos.writeFloat((float) parses.get(k).f1); } } - } else if (read==2) { + } else if (read == 2) { parses = new ArrayList<ParseNBest>(); int parseCount = dis.readInt(); - for(int k=0;k<parseCount;k++) { - ParseNBest p = new ParseNBest(dis.readUTF(),dis.readFloat()); - if (parses.size()<NBest) parses.add(p); + for (int k = 0; k < parseCount; k++) { + ParseNBest p = new ParseNBest(dis.readUTF(), dis.readFloat()); + if (parses.size() < NBest) + parses.add(p); } } - nbest +=parses.size(); - - - //List<ParseNBest> parses = this.parseNBest(instance, this.paramsParsers[m]); - - long vs[] = new long[ExtractorReranker._FC*is.length(0)]; + nbest += parses.size(); + + // List<ParseNBest> parses = this.parseNBest(instance, + // this.paramsParsers[m]); + + long vs[] = new long[ExtractorReranker._FC * is.length(0)]; - float bestScore=0; - int best=0; - - - for(int k=0;k<parses.size();k++) { + float bestScore = 0; + int best = 0; - ParseNBest p= parses.get(k); + for (int k = 0; k < parses.size(); k++) { - pipeReranker.extractor.extractFeatures(is,0,p,k,vs,pipeReranker.cl); + ParseNBest p = parses.get(k); - int rank=1*ExtractorReranker.s_type; - f.clear(); - for(int j=0;j<vs.length;j++) { - if (vs[j]==Integer.MIN_VALUE) break; - if (vs[j]>0) f.add(pipeReranker.extractor.li.l2i(vs[j]+rank)); + pipeReranker.extractor.extractFeatures(is, 0, p, k, vs, pipeReranker.cl); + + int rank = 1 * ExtractorReranker.s_type; + f.clear(); + for (long element : vs) { + if (element == Integer.MIN_VALUE) + break; + if (element > 0) + f.add(pipeReranker.extractor.li.l2i(element + rank)); } - f.add(pipeReranker.extractor.li.l2i(1+rank),(float)p.f1); + f.add(pipeReranker.extractor.li.l2i(1 + rank), (float) p.f1); - float score = (float)(parametersReranker.getScore(f)); - if (score>bestScore) { //rankScore[k][2]> - bestScore =score; - best=k; + float score = (parametersReranker.getScore(f)); + if (score > bestScore) { // rankScore[k][2]> + bestScore = score; + best = k; } } // change to best ParseNBest d = parses.get(best); - las +=(is.length(0)-1)-Decoder.getError(d, is, 0, Decoder.LAS); - lcnt +=is.length(0)-1; + las += (is.length(0) - 1) - Decoder.getError(d, is, 0, Decoder.LAS); + lcnt += is.length(0) - 1; - averageScore+=d.f1; + averageScore += d.f1; SentenceData09 i09 = new SentenceData09(instance); i09.createSemantic(instance); - for(int j = 0; j < forms.length-1; j++) { - i09.plabels[j] = types[d.labels[j+1]]; - i09.pheads[j] = d.heads[j+1]; + for (int j = 0; j < forms.length - 1; j++) { + i09.plabels[j] = types[d.labels[j + 1]]; + i09.pheads[j] = d.heads[j + 1]; } - depWriter.write(i09); - String info =""+((float)(averageScore/(float)cnt))+" "; + String info = "" + (averageScore / cnt) + " "; - if (cnt%10 ==0) - del=PipeGen.outValueErr(cnt, lcnt-las, las/lcnt, del, last, 0, info);//outValue(cnt, del,last, info); + if (cnt % 10 == 0) + del = PipeGen.outValueErr(cnt, lcnt - las, las / lcnt, del, last, 0, info);// outValue(cnt, + // del,last, + // info); } - //pipe.close(); + // pipe.close(); depWriter.finishWriting(); long end = System.currentTimeMillis(); - DB.println("rank\n"+rank+"\n"); - DB.println("x-best-las "+amongxbest+" x-best-ula "+amongxbest_ula+" cnt "+cnt+" x-best-las " - +((float)((float)amongxbest/cnt))+ - " x-best-ula "+((float)((float)amongxbest_ula/cnt))+ - " nbest "+((float)nbest/cnt)+ - " 1best "+((float)(rank.get(0)==null?0:rank.get(0))/cnt)+ - " best-proj "+((float)bestProj/cnt)+ - " Sum LAS "+((float)this.smallestErrorSum/countAllNodes)+" "+ - ""+(las/lcnt)); - - // DB.println("errors "+error); - olas+=las; - olcnt+=lcnt; + DB.println("rank\n" + rank + "\n"); + DB.println("x-best-las " + amongxbest + " x-best-ula " + amongxbest_ula + " cnt " + cnt + " x-best-las " + + ((float) amongxbest / cnt) + " x-best-ula " + ((float) amongxbest_ula / cnt) + " nbest " + + ((float) nbest / cnt) + " 1best " + ((float) (rank.get(0) == null ? 0 : rank.get(0)) / cnt) + + " best-proj " + ((float) bestProj / cnt) + " Sum LAS " + + ((float) this.smallestErrorSum / countAllNodes) + " " + "" + (las / lcnt)); + + // DB.println("errors "+error); + olas += las; + olcnt += lcnt; rank.clear(); - amongxbest=0;amongxbest_ula=0; - cnt=0; - nbest=0; - bestProj=0; - if (maxInfo) System.out.println("Used time " + (end-start)); - if (maxInfo) System.out.println("forms count "+Instances.m_count+" unkown "+Instances.m_unkown); + amongxbest = 0; + amongxbest_ula = 0; + cnt = 0; + nbest = 0; + bestProj = 0; + if (maxInfo) + System.out.println("Used time " + (end - start)); + if (maxInfo) + System.out.println("forms count " + Instances.m_count + " unkown " + Instances.m_unkown); } - - if (dos !=null) { + + if (dos != null) { dos.flush(); dos.close(); } - if (dis!=null)dis.close(); - - DB.println("\n overall las "+(olas/olcnt)); - } - - - /** - * Do the parsing - * @param options - * @param pipe - * @param params - * @throws IOException - */ - private void getNBest(OptionsSuper options, Pipe pipe, ParametersFloat params, boolean maxInfo) throws Exception { - - - CONLLReader09 depReader = new CONLLReader09(options.testfile, options.formatTask); - - ExtractorClusterStacked.initFeatures(); - - int cnt = 0; - - String[] types = new String[pipe.mf.getFeatureCounter().get(PipeGen.REL)]; - for (Entry<String, Integer> e : pipe.mf.getFeatureSet().get(PipeGen.REL).entrySet()) types[e.getValue()] = e.getKey(); - - // System.out.print("Processing Sentence: "); - - while(true) { - - Instances is = new Instances(); - is.init(1, new MFB(),options.formatTask); - - SentenceData09 instance = pipe.nextInstance(is, depReader); - if (instance==null) break; - cnt++; - - this.parseNBest(instance, this.paramsParsers[0]); - } - - //pipe.close(); - // depWriter.finishWriting(); - // long end = System.currentTimeMillis(); - // DB.println("rank\n"+rank+"\n"); - // DB.println("x-best-las "+amongxbest+" x-best-ula "+amongxbest_ula+" cnt "+cnt+" x-best-las " - // +((float)((float)amongxbest/cnt))+ - // " x-best-ula "+((float)((float)amongxbest_ula/cnt))+ - // " nbest "+((float)nbest/cnt)+ - // " 1best "+((float)(rank.get(0)==null?0:rank.get(0))/cnt)+ - // " best-proj "+((float)bestProj/cnt)); - // DB.println("errors "+error); - + if (dis != null) + dis.close(); + DB.println("\n overall las " + (olas / olcnt)); } - - public SentenceData09 parse (SentenceData09 instance, ParametersFloat params) { + public SentenceData09 parse(SentenceData09 instance, ParametersFloat params) { String[] types = new String[pipe.mf.getFeatureCounter().get(PipeGen.REL)]; - for (Entry<String, Integer> e : MFB.getFeatureSet().get(PipeGen.REL).entrySet()) types[e.getValue()] = e.getKey(); + for (Entry<String, Integer> e : MFB.getFeatureSet().get(PipeGen.REL).entrySet()) + types[e.getValue()] = e.getKey(); Instances is = new Instances(); - is.init(1, new MFB(),options.formatTask); - new CONLLReader09().insert(is, instance); + is.init(1, new MFB(), options.formatTask); + new CONLLReader09().insert(is, instance); String[] forms = instance.forms; // use for the training ppos DataF d2; try { - d2 = pipe.fillVector(params.getFV(), is,0,null,pipe.cl, THREADS,l2i);//cnt-1 - } catch (Exception e ) { + d2 = pipe.fillVector(params.getFV(), is, 0, null, pipe.cl, THREADS, l2i);// cnt-1 + } catch (Exception e) { e.printStackTrace(); return null; } short[] pos = is.pposs[0]; - List<ParseNBest> parses=null; - Parse d= null; + List<ParseNBest> parses = null; + Parse d = null; try { - parses =Decoder.decode(pos,d2,options.decodeProjective,pipe.extractor[0]); //cnt-1 + parses = Decoder.decode(pos, d2, options.decodeProjective, pipe.extractor[0]); // cnt-1 d = parses.get(0); - }catch (Exception e) { + } catch (Exception e) { e.printStackTrace(); } - if (parses.size()>NBest) parses = parses.subList(0,NBest); + if (parses.size() > NBest) + parses = parses.subList(0, NBest); - int g_las = Decoder.getGoldRank(parses, is,0,Decoder.LAS); - int g_ula = Decoder.getGoldRank(parses, is,0,!Decoder.LAS); + int g_las = Decoder.getGoldRank(parses, is, 0, Decoder.LAS); + int g_ula = Decoder.getGoldRank(parses, is, 0, !Decoder.LAS); - int smallest = Decoder.getSmallestError(parses, is,0,!Decoder.LAS); - smallestErrorSum+=is.length(0)-smallest; - countAllNodes+=is.length(0); + int smallest = Decoder.getSmallestError(parses, is, 0, !Decoder.LAS); + smallestErrorSum += is.length(0) - smallest; + countAllNodes += is.length(0); - if (g_las>=0) amongxbest++; - if (g_ula>=0) amongxbest_ula++; + if (g_las >= 0) + amongxbest++; + if (g_ula >= 0) + amongxbest_ula++; - nbest+=parses.size(); + nbest += parses.size(); Integer r = rank.get(g_las); - if (r==null) rank.put(g_las, 1); - else rank.put(g_las, r+1); + if (r == null) + rank.put(g_las, 1); + else + rank.put(g_las, r + 1); - float err = (float)this.pipe.errors(is,0, d); + this.pipe.errors(is, 0, d); - float errBestProj = (float)this.pipe.errors(is,0, Decoder.bestProj); + float errBestProj = (float) this.pipe.errors(is, 0, Decoder.bestProj); - if (errBestProj==0) bestProj++; + if (errBestProj == 0) + bestProj++; SentenceData09 i09 = new SentenceData09(instance); i09.createSemantic(instance); - for(int j = 0; j < forms.length-1; j++) { - i09.plabels[j] = types[d.labels[j+1]]; - i09.pheads[j] = d.heads[j+1]; + for (int j = 0; j < forms.length - 1; j++) { + i09.plabels[j] = types[d.labels[j + 1]]; + i09.pheads[j] = d.heads[j + 1]; } return i09; } - public List<ParseNBest> parseNBest (SentenceData09 instance, ParametersFloat params) { + public List<ParseNBest> parseNBest(SentenceData09 instance, ParametersFloat params) { Instances is = new Instances(); - is.init(1, new MFB(),options.formatTask); - new CONLLReader09().insert(is, instance); - - + is.init(1, new MFB(), options.formatTask); + new CONLLReader09().insert(is, instance); // use for the training ppos DataF d2; try { - d2 = pipe.fillVector(params.getFV(), is,0,null,pipe.cl,THREADS,l2i);//cnt-1 - } catch (Exception e ) { + d2 = pipe.fillVector(params.getFV(), is, 0, null, pipe.cl, THREADS, l2i);// cnt-1 + } catch (Exception e) { e.printStackTrace(); return null; } short[] pos = is.pposs[0]; - List<ParseNBest> parses=null; + List<ParseNBest> parses = null; try { - parses =Decoder.decode(pos,d2,options.decodeProjective,pipe.extractor[0]); //cnt-1 - }catch (Exception e) { + parses = Decoder.decode(pos, d2, options.decodeProjective, pipe.extractor[0]); // cnt-1 + } catch (Exception e) { e.printStackTrace(); } - if (parses.size()>NBest) parses = parses.subList(0,NBest); + if (parses.size() > NBest) + parses = parses.subList(0, NBest); return parses; } - - - /* (non-Javadoc) + /* + * (non-Javadoc) + * * @see is2.tools.Tool#apply(is2.data.SentenceData09) */ @Override public SentenceData09 apply(SentenceData09 snt09) { try { - parse(snt09,this.parametersReranker); - } catch(Exception e) { + parse(snt09, this.parametersReranker); + } catch (Exception e) { e.printStackTrace(); } - Decoder.executerService.shutdown(); + Decoder.executerService.shutdown(); Pipe.executerService.shutdown(); return snt09; @@ -1016,20 +950,21 @@ public class Reranker implements Tool { /** * Write the parsing model - * + * * @param options * @param params * @param extension * @throws FileNotFoundException * @throws IOException */ - private void writeModell(OptionsSuper options, ParametersFloat params, String extension, Cluster cs) throws FileNotFoundException, IOException { + private void writeModell(OptionsSuper options, ParametersFloat params, String extension, Cluster cs) + throws FileNotFoundException, IOException { - String name = extension==null?options.modelName:options.modelName+extension; - DB.println("Writting model: "+name); + String name = extension == null ? options.modelName : options.modelName + extension; + DB.println("Writting model: " + name); ZipOutputStream zos = new ZipOutputStream(new BufferedOutputStream(new FileOutputStream(name))); - zos.putNextEntry(new ZipEntry("data")); - DataOutputStream dos = new DataOutputStream(new BufferedOutputStream(zos)); + zos.putNextEntry(new ZipEntry("data")); + DataOutputStream dos = new DataOutputStream(new BufferedOutputStream(zos)); MFB.writeData(dos); cs.write(dos); @@ -1042,18 +977,15 @@ public class Reranker implements Tool { dos.writeInt(ExtractorClusterStacked.maxForm); - dos.writeInt(5); // Info count - dos.writeUTF("Used parser "+Reranker.class.toString()); - dos.writeUTF("Creation date "+(new SimpleDateFormat("yyyy.MM.dd HH:mm:ss")).format(new Date())); - dos.writeUTF("Training data "+options.trainfile); - dos.writeUTF("Iterations "+options.numIters+" Used sentences "+options.count); - dos.writeUTF("Cluster "+options.clusterFile); + dos.writeInt(5); // Info count + dos.writeUTF("Used parser " + Reranker.class.toString()); + dos.writeUTF("Creation date " + (new SimpleDateFormat("yyyy.MM.dd HH:mm:ss")).format(new Date())); + dos.writeUTF("Training data " + options.trainfile); + dos.writeUTF("Iterations " + options.numIters + " Used sentences " + options.count); + dos.writeUTF("Cluster " + options.clusterFile); dos.flush(); dos.close(); } - - - } diff --git a/dependencyParser/mate-tools/src/is2/tag/ExtractorT2.java b/dependencyParser/mate-tools/src/is2/tag/ExtractorT2.java index a37dbbe..688dc21 100644 --- a/dependencyParser/mate-tools/src/is2/tag/ExtractorT2.java +++ b/dependencyParser/mate-tools/src/is2/tag/ExtractorT2.java @@ -1,5 +1,13 @@ package is2.tag; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map.Entry; import is2.data.Cluster; import is2.data.F2SF; @@ -13,165 +21,168 @@ import is2.io.CONLLReader09; import is2.tools.IPipe; import is2.util.OptionsSuper; -import java.io.DataInputStream; -import java.io.DataOutputStream; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Map.Entry; +final public class ExtractorT2 extends PipeGen implements IPipe { + final static int _MAX = 71; + private static final String STWRD = "STWRD", STPOS = "STPOS"; -final public class ExtractorT2 extends PipeGen implements IPipe { - - final static int _MAX=71; - - private static final String STWRD = "STWRD", STPOS = "STPOS"; - - private static short s_pos,s_word,s_char; + private static short s_pos, s_word, s_char; protected static short s_type; - private static int _strp,_ewrd; - static int _CEND; + private static int _strp; + static int _CEND; public String[] types; final public MFO mf; - final MFO.Data4 d1 = new MFO.Data4(),d2 = new MFO.Data4(),d3 = new MFO.Data4(), - dw = new MFO.Data4(), dwp = new MFO.Data4(); + final MFO.Data4 d1 = new MFO.Data4(), d2 = new MFO.Data4(), d3 = new MFO.Data4(), dw = new MFO.Data4(), + dwp = new MFO.Data4(); Cluster cl; private OptionsSuper options; - public ExtractorT2 (OptionsSuper options, MFO mf) throws IOException { - this.mf =mf; + public ExtractorT2(OptionsSuper options, MFO mf) throws IOException { + this.mf = mf; this.options = options; - } - public HashMap<Integer, int[]> _pps = new HashMap<Integer, int[]>(); + } + + public HashMap<Integer, int[]> _pps = new HashMap<Integer, int[]>(); private Lexicon lx; - - public int corpusWrds = 0; - - - - - /* (non-Javadoc) - * @see is2.tag5.IPipe#createInstances(java.lang.String, java.io.File, is2.data.InstancesTagger) + + public int corpusWrds = 0; + + /* + * (non-Javadoc) + * + * @see is2.tag5.IPipe#createInstances(java.lang.String, java.io.File, + * is2.data.InstancesTagger) */ + @Override public Instances createInstances(String file) { return createInstances(file, -1, -1); - } - - + } + public Instances createInstances(String file, int skipStart, int skipEnd) { - + InstancesTagger is = new InstancesTagger(); CONLLReader09 depReader = new CONLLReader09(CONLLReader09.NO_NORMALIZE); depReader.startReading(file); - mf.register(POS,"<root-POS>"); - mf.register(WORD,"<root>"); + mf.register(POS, "<root-POS>"); + mf.register(WORD, "<root>"); System.out.println("Registering feature parts "); - HashMap<Integer, HashSet<Integer>> pps = new HashMap<Integer, HashSet<Integer>>(); - - int ic=0; - while(true) { + HashMap<Integer, HashSet<Integer>> pps = new HashMap<Integer, HashSet<Integer>>(); + + int ic = 0; + while (true) { SentenceData09 instance1 = depReader.getNext(); - - if (instance1== null) break; + + if (instance1 == null) + break; ic++; String[] w = instance1.forms; - for(int i1 = 0; i1 < w.length; i1++) mf.register(WORD, w[i1]); - for(int i1 = 0; i1 < w.length; i1++) registerChars(CHAR, w[i1]); - for(int i1 = 0; i1 < w.length; i1++) registerChars(CHAR, w[i1].toLowerCase()); - + for (String element : w) + mf.register(WORD, element); + for (String element : w) + registerChars(CHAR, element); + for (String element : w) + registerChars(CHAR, element.toLowerCase()); w = instance1.plemmas; - for(int i1 = 0; i1 < w.length; i1++) mf.register(WORD, w[i1]); - for(int i1 = 0; i1 < w.length; i1++) registerChars(CHAR, w[i1]); + for (String element : w) + mf.register(WORD, element); + for (String element : w) + registerChars(CHAR, element); w = instance1.gpos; - for(int i1 = 0; i1 < w.length; i1++) { - mf.register(POS, w[i1]); + for (String element : w) { + mf.register(POS, element); } - for(int i1 = 0; i1 < w.length; i1++) { - HashSet<Integer> ps = pps.get(mf.getValue(POS,w[i1])); - if (ps==null) { - ps= new HashSet<Integer>(); - pps.put(mf.getValue(POS,w[i1]), ps); + for (int i1 = 0; i1 < w.length; i1++) { + HashSet<Integer> ps = pps.get(mf.getValue(POS, w[i1])); + if (ps == null) { + ps = new HashSet<Integer>(); + pps.put(mf.getValue(POS, w[i1]), ps); } - if (i1+1<w.length) ps.add(mf.getValue(POS,w[i1+1])); + if (i1 + 1 < w.length) + ps.add(mf.getValue(POS, w[i1 + 1])); } - + } - for(Entry<Integer,HashSet<Integer>> e : pps.entrySet()) { + for (Entry<Integer, HashSet<Integer>> e : pps.entrySet()) { int[] ps = new int[e.getValue().size()]; - int j=0; - for(int k : e.getValue().toArray(new Integer[0])) { - ps[j++] =k; + int j = 0; + for (int k : e.getValue().toArray(new Integer[0])) { + ps[j++] = k; } _pps.put(e.getKey(), ps); - // System.out.println("put "+e.getKey()+" "+ps.length+" pps size "+_pps.size()); + // System.out.println("put "+e.getKey()+" "+ps.length+" pps size + // "+_pps.size()); } - - System.out.println("words in corpus "+(corpusWrds=mf.getFeatureCounter().get(ExtractorT2.WORD))); - if (options.clusterFile==null)cl = new Cluster(); - else cl= new Cluster(options.clusterFile, mf,6); - if (options.lexicon==null)lx = new Lexicon(new byte[0][0]); - else lx= new Lexicon(options.lexicon,mf); + System.out.println("words in corpus " + (corpusWrds = mf.getFeatureCounter().get(PipeGen.WORD))); + if (options.clusterFile == null) + cl = new Cluster(); + else + cl = new Cluster(options.clusterFile, mf, 6); + + if (options.lexicon == null) + lx = new Lexicon(new byte[0][0]); + else + lx = new Lexicon(options.lexicon, mf); initFeatures(); mf.calculateBits(); initValues(); - System.out.println(""+mf.toString()); + System.out.println("" + mf.toString()); depReader.startReading(file); int num1 = 0; - int instanceCount=0; - + int instanceCount = 0; + System.out.print("Creating Instances: "); - is.init(ic, mf) ; - int del=0; + is.init(ic, mf); + int del = 0; + + while (true) { + if (num1 % 100 == 0) + del = outValue(num1, del); - while(true) { - if (num1 % 100 ==0) del = outValue(num1, del); - - if (num1>=skipStart && num1<skipEnd && skipStart>=0) { + if (num1 >= skipStart && num1 < skipEnd && skipStart >= 0) { SentenceData09 instance1 = depReader.getNext(); - if (instance1== null) break; + if (instance1 == null) + break; num1++; continue; } - SentenceData09 instance1 = depReader.getNext(is); - if (instance1== null) break; - - is.fillChars(instance1, instanceCount,_CEND); - for(int k=0;k<instance1.length();k++) { - if (instance1.ppos[k].contains("\\|")) + if (instance1 == null) + break; - is.pposs[num1][k] = (short)mf.getValue(FM, instance1.ppos[k].split("\\|")[1]); - } + is.fillChars(instance1, instanceCount, _CEND); + for (int k = 0; k < instance1.length(); k++) { + if (instance1.ppos[k].contains("\\|")) + is.pposs[num1][k] = (short) mf.getValue(FM, instance1.ppos[k].split("\\|")[1]); + } - if (num1>options.count) break; + if (num1 > options.count) + break; num1++; instanceCount++; @@ -179,241 +190,464 @@ final public class ExtractorT2 extends PipeGen implements IPipe { outValue(num1, del); System.out.println(); - types= mf.reverse(mf.getFeatureSet().get(POS)); + types = MFO.reverse(mf.getFeatureSet().get(POS)); return is; } private void registerChars(String type, String word) { - for(int i=0;i<word.length();i++) mf.register(type, Character.toString(word.charAt(i))); + for (int i = 0; i < word.length(); i++) + mf.register(type, Character.toString(word.charAt(i))); } - - /* (non-Javadoc) + /* + * (non-Javadoc) + * * @see is2.tag5.IPipe#initValues() */ + @Override public void initValues() { s_pos = mf.getFeatureBits(POS); - s_word = mf.getFeatureBits(WORD); - s_type = mf.getFeatureBits(TYPE); - s_char = mf.getFeatureBits(CHAR); - - d1.a0 = s_type; d1.a1 = s_pos; d1.a2= s_word;d1.a3= s_word; - d2.a0 = s_type; d2.a1 = s_pos; d2.a2= s_pos; d2.a3= s_pos; d2.a4= s_pos; d2.a5= s_pos; d2.a6= s_pos; - d3.a0 = s_type; d3.a1 = s_pos; d3.a2= s_char; d3.a3= s_char; d3.a4= s_char; d3.a5= s_char; d3.a6= s_char; d3.a7= s_char; - dw.a0 = s_type; dw.a1 = s_pos;dw.a2= s_word; dw.a3= s_word; dw.a4= s_word; dw.a5= s_word; dw.a6= s_word; dw.a7= s_word; - dwp.a0 = s_type; dwp.a1 = s_pos;dwp.a2= s_word ; dwp.a3= s_pos; dwp.a4= s_word; + s_word = mf.getFeatureBits(WORD); + s_type = mf.getFeatureBits(TYPE); + s_char = mf.getFeatureBits(CHAR); + + d1.a0 = s_type; + d1.a1 = s_pos; + d1.a2 = s_word; + d1.a3 = s_word; + d2.a0 = s_type; + d2.a1 = s_pos; + d2.a2 = s_pos; + d2.a3 = s_pos; + d2.a4 = s_pos; + d2.a5 = s_pos; + d2.a6 = s_pos; + d3.a0 = s_type; + d3.a1 = s_pos; + d3.a2 = s_char; + d3.a3 = s_char; + d3.a4 = s_char; + d3.a5 = s_char; + d3.a6 = s_char; + d3.a7 = s_char; + dw.a0 = s_type; + dw.a1 = s_pos; + dw.a2 = s_word; + dw.a3 = s_word; + dw.a4 = s_word; + dw.a5 = s_word; + dw.a6 = s_word; + dw.a7 = s_word; + dwp.a0 = s_type; + dwp.a1 = s_pos; + dwp.a2 = s_word; + dwp.a3 = s_pos; + dwp.a4 = s_word; } - /* (non-Javadoc) + /* + * (non-Javadoc) + * * @see is2.tag5.IPipe#initFeatures() */ + @Override public void initFeatures() { // 62 - for(int t=0;t<67;t++) mf.register(TYPE, "F"+t); + for (int t = 0; t < 67; t++) + mf.register(TYPE, "F" + t); mf.register(POS, MID); _strp = mf.register(POS, STR); mf.register(POS, END); mf.register(WORD, STR); - _ewrd =mf.register(WORD, END); + mf.register(WORD, END); _CEND = mf.register(CHAR, END); - mf.register(WORD,STWRD); - mf.register(POS,STPOS); - + mf.register(WORD, STWRD); + mf.register(POS, STPOS); } - final public void addFeatures(InstancesTagger is, int ic, String fs,int i, short pposs[], int[] forms, int[] lemmas, long[] vs) { + final public void addFeatures(InstancesTagger is, int ic, String fs, int i, short pposs[], int[] forms, + int[] lemmas, long[] vs) { - int c0= is.chars[ic][i][0], c1=is.chars[ic][i][1], c2=is.chars[ic][i][2], c3=is.chars[ic][i][3], c4=is.chars[ic][i][4],c5=is.chars[ic][i][5]; - int e0 =is.chars[ic][i][6], e1 =is.chars[ic][i][7],e2 =is.chars[ic][i][8],e3 =is.chars[ic][i][9],e4 =is.chars[ic][i][10]; + int c0 = is.chars[ic][i][0], c1 = is.chars[ic][i][1], c2 = is.chars[ic][i][2], c3 = is.chars[ic][i][3], + c4 = is.chars[ic][i][4], c5 = is.chars[ic][i][5]; + int e0 = is.chars[ic][i][6], e1 = is.chars[ic][i][7], e2 = is.chars[ic][i][8], e3 = is.chars[ic][i][9], + e4 = is.chars[ic][i][10]; - int f=1,n=0; - short upper =0, number = 1; - for(int k1=0;k1<fs.length();k1++){ + int f = 1, n = 0; + short upper = 0, number = 1; + for (int k1 = 0; k1 < fs.length(); k1++) { char c = fs.charAt(k1); if (Character.isUpperCase(c)) { - if (k1==0) upper=1; + if (k1 == 0) + upper = 1; else { // first char + another - if (upper==1) upper=3; + if (upper == 1) + upper = 3; // another uppercase in the word - else if (upper==0) upper=2; + else if (upper == 0) + upper = 2; } } - // first - if (Character.isDigit(c) && k1==0) number =2 ; - else if (Character.isDigit(c) && number==1) number = 3; - // if(number==2 &&Character.isDigit(c)) number=4; - // if(number==4 && !Character.isDigit(c)) number=5; + // first + if (Character.isDigit(c) && k1 == 0) + number = 2; + else if (Character.isDigit(c) && number == 1) + number = 3; + // if(number==2 &&Character.isDigit(c)) number=4; + // if(number==4 && !Character.isDigit(c)) number=5; } - // if (i==0 && upper>0) upper+=4; - int form = forms[i], form2 = forms[i]<corpusWrds?forms[i]:-1; + // if (i==0 && upper>0) upper+=4; + int form = forms[i], form2 = forms[i] < corpusWrds ? forms[i] : -1; - int len = forms.length; + int len = forms.length; long l; - d1.v0 = f++; d1.v2=form2; l=mf.calc3(d1); vs[n++]=mf.calc3(d1); - - d1.v0 = f++; d1.v2=is.formlc[ic][i]; vs[n++]=mf.calc3(d1); - - - d3.v2=c0; d3.v3=c1; d3.v4=c2; d3.v5=c3; d3.v6=c4; - d3.v0=f++; vs[n++]=mf.calc3(d3); - d3.v0=f++; vs[n++]=mf.calc4(d3); - d3.v0=f++; vs[n++]=mf.calc5(d3); - d3.v0=f++; vs[n++]=mf.calc6(d3); - d3.v0=f++; vs[n++]=mf.calc7(d3); - - if (form!=-1) { - d3.v2=c2; d3.v3=c3; d3.v4=c4; d3.v5=c5; d3.v6=cl.getLP(form); - d3.v0=f; vs[n++]=mf.calc6(d3); d3.v0=f+1; vs[n++]=mf.calc7(d3); - } - f+=2; - - if (form>0) { - d3.v0=f; d3.v5=cl.getLP(form); vs[n++]=mf.calc6(d3); - d3.v0=f+1; d3.v4=cl.getLP(form); vs[n++]=mf.calc5(d3); - d3.v0=f+2; d3.v3=cl.getLP(form); vs[n++]=mf.calc4(d3); + d1.v0 = f++; + d1.v2 = form2; + l = mf.calc3(d1); + vs[n++] = mf.calc3(d1); + + d1.v0 = f++; + d1.v2 = is.formlc[ic][i]; + vs[n++] = mf.calc3(d1); + + d3.v2 = c0; + d3.v3 = c1; + d3.v4 = c2; + d3.v5 = c3; + d3.v6 = c4; + d3.v0 = f++; + vs[n++] = mf.calc3(d3); + d3.v0 = f++; + vs[n++] = mf.calc4(d3); + d3.v0 = f++; + vs[n++] = mf.calc5(d3); + d3.v0 = f++; + vs[n++] = mf.calc6(d3); + d3.v0 = f++; + vs[n++] = mf.calc7(d3); + + if (form != -1) { + d3.v2 = c2; + d3.v3 = c3; + d3.v4 = c4; + d3.v5 = c5; + d3.v6 = cl.getLP(form); + d3.v0 = f; + vs[n++] = mf.calc6(d3); + d3.v0 = f + 1; + vs[n++] = mf.calc7(d3); + } + f += 2; + + if (form > 0) { + d3.v0 = f; + d3.v5 = cl.getLP(form); + vs[n++] = mf.calc6(d3); + d3.v0 = f + 1; + d3.v4 = cl.getLP(form); + vs[n++] = mf.calc5(d3); + d3.v0 = f + 2; + d3.v3 = cl.getLP(form); + vs[n++] = mf.calc4(d3); } - f+=5; - - d3.v2=e0; d3.v3=e1; d3.v4=e2; d3.v5=e3; d3.v6=e4; - d3.v0 =f++; vs[n++]=mf.calc3(d3); - d3.v0 =f++; vs[n++]=l=mf.calc4(d3); vs[n++]=d3.calcs(3, upper, l); - d3.v0 =f++; vs[n++]=l=mf.calc5(d3); vs[n++]=d3.calcs(3, upper, l); - d3.v0 =f++; vs[n++]=l=mf.calc6(d3); vs[n++]=d3.calcs(3, upper, l); - d3.v0 =f++; vs[n++]=l=mf.calc7(d3); vs[n++]=d3.calcs(3, upper, l); - - if (form>0) { - d3.v0=f; d3.v5=cl.getLP(form); vs[n++]=mf.calc6(d3); - d3.v0=f+1; d3.v4=cl.getLP(form); vs[n++]=mf.calc5(d3); - d3.v0=f+2; d3.v3=cl.getLP(form); vs[n++]=mf.calc4(d3); - - d3.v2=e0; d3.v3=e1; d3.v4=e2; - - d3.v0=f+3; d3.v2=lx.getTag(form); vs[n++]=mf.calc3(d3); - d3.v0=f+4; d3.v4=cl.getLP(form); vs[n++]=mf.calc5(d3); - d3.v0=f+5; d3.v3=cl.getLP(form); vs[n++]=mf.calc4(d3); + f += 5; + + d3.v2 = e0; + d3.v3 = e1; + d3.v4 = e2; + d3.v5 = e3; + d3.v6 = e4; + d3.v0 = f++; + vs[n++] = mf.calc3(d3); + d3.v0 = f++; + vs[n++] = l = mf.calc4(d3); + vs[n++] = d3.calcs(3, upper, l); + d3.v0 = f++; + vs[n++] = l = mf.calc5(d3); + vs[n++] = d3.calcs(3, upper, l); + d3.v0 = f++; + vs[n++] = l = mf.calc6(d3); + vs[n++] = d3.calcs(3, upper, l); + d3.v0 = f++; + vs[n++] = l = mf.calc7(d3); + vs[n++] = d3.calcs(3, upper, l); + + if (form > 0) { + d3.v0 = f; + d3.v5 = cl.getLP(form); + vs[n++] = mf.calc6(d3); + d3.v0 = f + 1; + d3.v4 = cl.getLP(form); + vs[n++] = mf.calc5(d3); + d3.v0 = f + 2; + d3.v3 = cl.getLP(form); + vs[n++] = mf.calc4(d3); + + d3.v2 = e0; + d3.v3 = e1; + d3.v4 = e2; + + d3.v0 = f + 3; + d3.v2 = lx.getTag(form); + vs[n++] = mf.calc3(d3); + d3.v0 = f + 4; + d3.v4 = cl.getLP(form); + vs[n++] = mf.calc5(d3); + d3.v0 = f + 5; + d3.v3 = cl.getLP(form); + vs[n++] = mf.calc4(d3); } - f+=6; + f += 6; // sign three-grams - d3.v0=f++;d3.v2=c1; d3.v3=c2; d3.v4=c3; vs[n++]=mf.calc5(d3); - d3.v0=f++;d3.v2=c2; d3.v3=c3; d3.v4=c4; vs[n++]=mf.calc5(d3); - d3.v0=f++;d3.v2=c3; d3.v3=c4; d3.v4=c5; vs[n++]=mf.calc5(d3); + d3.v0 = f++; + d3.v2 = c1; + d3.v3 = c2; + d3.v4 = c3; + vs[n++] = mf.calc5(d3); + d3.v0 = f++; + d3.v2 = c2; + d3.v3 = c3; + d3.v4 = c4; + vs[n++] = mf.calc5(d3); + d3.v0 = f++; + d3.v2 = c3; + d3.v3 = c4; + d3.v4 = c5; + vs[n++] = mf.calc5(d3); // sign quad-grams - d3.v0=f++;d3.v2=c1; d3.v3=c2; d3.v4=c3; d3.v5=c4; vs[n++]=mf.calc6(d3); - d3.v0=f++;d3.v2=c2; d3.v3=c3; d3.v4=c4; d3.v5=c5; vs[n++]=mf.calc6(d3); // changed to 6 - - if (i+1<len && forms[i+1]<this.corpusWrds) {dw.v0=f; dw.v2=forms[i+1];dw.v3= form2;vs[n++]=mf.calc4(dw);} + d3.v0 = f++; + d3.v2 = c1; + d3.v3 = c2; + d3.v4 = c3; + d3.v5 = c4; + vs[n++] = mf.calc6(d3); + d3.v0 = f++; + d3.v2 = c2; + d3.v3 = c3; + d3.v4 = c4; + d3.v5 = c5; + vs[n++] = mf.calc6(d3); // changed to 6 + + if (i + 1 < len && forms[i + 1] < this.corpusWrds) { + dw.v0 = f; + dw.v2 = forms[i + 1]; + dw.v3 = form2; + vs[n++] = mf.calc4(dw); + } f++; - - if (len>i+1) { - - if (forms[i+1]<corpusWrds){dw.v0=f; dw.v2= forms[i+1]; vs[n++]=mf.calc3(dw);} - - d3.v0=f+1; d3.v2 =is.chars[ic][i+1][0];vs[n++]=mf.calc3(d3); - d3.v0=f+2; d3.v2 =is.chars[ic][i+1][6];vs[n++]=mf.calc3(d3); - - d3.v2=e0; d3.v3=e1; - - d3.v0 =f+3; d3.v4 =is.chars[ic][i+1][0];vs[n++]=mf.calc5(d3); - d3.v0 =f+4; d3.v4 =is.chars[ic][i+1][6];vs[n++]=mf.calc5(d3); - - if (is.chars[ic][i+1][11]>1 ) { // instance.forms[i+1].length() - - d3.v0=f+5; d3.v2=is.chars[ic][i+1][0]; d3.v3=is.chars[ic][i+1][1]; vs[n++]=mf.calc4(d3); - d3.v0=f+6; d3.v2=is.chars[ic][i+1][6]; d3.v3=is.chars[ic][i+1][7]; vs[n++]=mf.calc4(d3); - - d3.v2=e0; d3.v3=e1; - - d3.v0=f+7; d3.v4 = is.chars[ic][i+1][0]; d3.v5 =is.chars[ic][i+1][1]; vs[n++]=mf.calc6(d3); - d3.v0=f+8; d3.v4 = is.chars[ic][i+1][6]; d3.v5=is.chars[ic][i+1][7]; vs[n++]=mf.calc6(d3); - - if (forms[i+1]>0) { - d3.v0=f+9; d3.v2=is.chars[ic][i+1][0]; d3.v3=is.chars[ic][i+1][1]; d3.v4 =cl.getLP(forms[i+1]); vs[n++]=mf.calc5(d3); - d3.v0=f+10; d3.v2=is.chars[ic][i+1][6]; d3.v3=is.chars[ic][i+1][7]; d3.v4 =cl.getLP(forms[i+1]); vs[n++]=mf.calc5(d3); - } - } - - if (forms[i+1]>0) { - - - dw.v0=f+11; dw.v2= cl.getLP(forms[i+1]); dw.v3= form2;vs[n++]=mf.calc4(dw); - - // if (forms[i]>0){ - // dw.v0=f+12; dw.v2= cl.getLP(forms[i+1]); dw.v3=lx.getTag(form);vs[n++]=mf.calc4(dw); - // dw.v0=f+13; dw.v2= cl.getLP(forms[i]); dw.v3=lx.getTag(forms[i+1]);vs[n++]=mf.calc4(dw); - // } - } - - - if (len>i+2) { - if (forms[i+2]<corpusWrds && forms[i+1]<corpusWrds) { - dw.v0=f+12; dw.v2= forms[i+2]; dw.v3 = forms[i+1];vs[n++]=mf.calc4(dw);vs[n++]=mf.calc3(dw); - } - d2.v0=f+13; d2.v2=pposs[i+1]; d2.v3= pposs[i+2]; vs[n++]=mf.calc4(d2); - } - - if (len>i+3) { - if (forms[i+3]<this.corpusWrds && forms[i+2]<this.corpusWrds) { - dw.v0=f+14; dw.v2= forms[i+3]; dw.v3 = forms[i+2]; vs[n++]=mf.calc4(dw); vs[n++]=mf.calc3(dw); + + if (len > i + 1) { + + if (forms[i + 1] < corpusWrds) { + dw.v0 = f; + dw.v2 = forms[i + 1]; + vs[n++] = mf.calc3(dw); + } + + d3.v0 = f + 1; + d3.v2 = is.chars[ic][i + 1][0]; + vs[n++] = mf.calc3(d3); + d3.v0 = f + 2; + d3.v2 = is.chars[ic][i + 1][6]; + vs[n++] = mf.calc3(d3); + + d3.v2 = e0; + d3.v3 = e1; + + d3.v0 = f + 3; + d3.v4 = is.chars[ic][i + 1][0]; + vs[n++] = mf.calc5(d3); + d3.v0 = f + 4; + d3.v4 = is.chars[ic][i + 1][6]; + vs[n++] = mf.calc5(d3); + + if (is.chars[ic][i + 1][11] > 1) { // instance.forms[i+1].length() + + d3.v0 = f + 5; + d3.v2 = is.chars[ic][i + 1][0]; + d3.v3 = is.chars[ic][i + 1][1]; + vs[n++] = mf.calc4(d3); + d3.v0 = f + 6; + d3.v2 = is.chars[ic][i + 1][6]; + d3.v3 = is.chars[ic][i + 1][7]; + vs[n++] = mf.calc4(d3); + + d3.v2 = e0; + d3.v3 = e1; + + d3.v0 = f + 7; + d3.v4 = is.chars[ic][i + 1][0]; + d3.v5 = is.chars[ic][i + 1][1]; + vs[n++] = mf.calc6(d3); + d3.v0 = f + 8; + d3.v4 = is.chars[ic][i + 1][6]; + d3.v5 = is.chars[ic][i + 1][7]; + vs[n++] = mf.calc6(d3); + + if (forms[i + 1] > 0) { + d3.v0 = f + 9; + d3.v2 = is.chars[ic][i + 1][0]; + d3.v3 = is.chars[ic][i + 1][1]; + d3.v4 = cl.getLP(forms[i + 1]); + vs[n++] = mf.calc5(d3); + d3.v0 = f + 10; + d3.v2 = is.chars[ic][i + 1][6]; + d3.v3 = is.chars[ic][i + 1][7]; + d3.v4 = cl.getLP(forms[i + 1]); + vs[n++] = mf.calc5(d3); } - } - } - f+=15; + } - // length - d2.v0=f++; d2.v2=is.chars[ic][i][11];vs[n++]=mf.calc3(d2); + if (forms[i + 1] > 0) { + dw.v0 = f + 11; + dw.v2 = cl.getLP(forms[i + 1]); + dw.v3 = form2; + vs[n++] = mf.calc4(dw); + + // if (forms[i]>0){ + // dw.v0=f+12; dw.v2= cl.getLP(forms[i+1]); + // dw.v3=lx.getTag(form);vs[n++]=mf.calc4(dw); + // dw.v0=f+13; dw.v2= cl.getLP(forms[i]); + // dw.v3=lx.getTag(forms[i+1]);vs[n++]=mf.calc4(dw); + // } + } + + if (len > i + 2) { + if (forms[i + 2] < corpusWrds && forms[i + 1] < corpusWrds) { + dw.v0 = f + 12; + dw.v2 = forms[i + 2]; + dw.v3 = forms[i + 1]; + vs[n++] = mf.calc4(dw); + vs[n++] = mf.calc3(dw); + } + d2.v0 = f + 13; + d2.v2 = pposs[i + 1]; + d2.v3 = pposs[i + 2]; + vs[n++] = mf.calc4(d2); + } + + if (len > i + 3) { + if (forms[i + 3] < this.corpusWrds && forms[i + 2] < this.corpusWrds) { + dw.v0 = f + 14; + dw.v2 = forms[i + 3]; + dw.v3 = forms[i + 2]; + vs[n++] = mf.calc4(dw); + vs[n++] = mf.calc3(dw); + } + } + } + f += 15; + + // length + d2.v0 = f++; + d2.v2 = is.chars[ic][i][11]; + vs[n++] = mf.calc3(d2); // contains a number - d2.v0=f++; d2.v2=number; vs[n++]=mf.calc3(d2); - if (lemmas[i]< corpusWrds) {d1.v0=f; d1.v2=lemmas[i]; vs[n++]=mf.calc3(d1); } + d2.v0 = f++; + d2.v2 = number; + vs[n++] = mf.calc3(d2); + if (lemmas[i] < corpusWrds) { + d1.v0 = f; + d1.v2 = lemmas[i]; + vs[n++] = mf.calc3(d1); + } f++; - if (i!=0 &&len>i+1) { - - if (lemmas[i-1]< corpusWrds&& lemmas[i+1]<corpusWrds) {dw.v0=f; dw.v2=lemmas[i-1];dw.v3=lemmas[i+1];vs[n++]=mf.calc4(dw);} - - d2.v0=f+1; d2.v2=pposs[i-1]; d2.v3=pposs[i+1];vs[n++]=mf.calc4(d2); - } - f+=2; - - d2.v0= f++; d2.v2=i>=1? pposs[i-1]:_strp; vs[n++]=mf.calc3(d2); - - if (i>0) { - - dw.v0 = f; dw.v2 =i>=1? forms[i-1]<corpusWrds?forms[i-1]:-1:_strp; vs[n++]=mf.calc3(dw); + if (i != 0 && len > i + 1) { + + if (lemmas[i - 1] < corpusWrds && lemmas[i + 1] < corpusWrds) { + dw.v0 = f; + dw.v2 = lemmas[i - 1]; + dw.v3 = lemmas[i + 1]; + vs[n++] = mf.calc4(dw); + } + + d2.v0 = f + 1; + d2.v2 = pposs[i - 1]; + d2.v3 = pposs[i + 1]; + vs[n++] = mf.calc4(d2); + } + f += 2; + + d2.v0 = f++; + d2.v2 = i >= 1 ? pposs[i - 1] : _strp; + vs[n++] = mf.calc3(d2); + + if (i > 0) { + + dw.v0 = f; + dw.v2 = i >= 1 ? forms[i - 1] < corpusWrds ? forms[i - 1] : -1 : _strp; + vs[n++] = mf.calc3(dw); f++; - - if (lemmas[i-1]<corpusWrds) {dw.v0 = f; dw.v2 = i>=1? lemmas[i-1]:_strp; vs[n++]=mf.calc3(dw);} + + if (lemmas[i - 1] < corpusWrds) { + dw.v0 = f; + dw.v2 = i >= 1 ? lemmas[i - 1] : _strp; + vs[n++] = mf.calc3(dw); + } f++; - - //if (len>i+1) {d2.v0=f; d2.v2= pposs[i-1];d2.v3= pposs[i+1]; vs[n++]=mf.calc4(d2);} - //f++; - - if (i>1) { - - d2.v0=f++; d2.v2=i<2?_strp: pposs[i-2]; vs[n++]=mf.calc3(d2); - d2.v0=f++; d2.v2= pposs[i-1]; d2.v3= pposs[i-2]; vs[n++]=mf.calc4(d2); - - if (forms[i-2]<corpusWrds) {dw.v0=f;dw.v2= forms[i-2]; vs[n++]=mf.calc3(dw);} f++; - if (forms[i-1]<corpusWrds) {dwp.v0=f;dwp.v2 = forms[i-1]; dwp.v3 = pposs[i-2];vs[n++]=mf.calc4(dwp); } f++; - if (forms[i-2]<corpusWrds) {dwp.v0=f;dwp.v2 = forms[i-2]; dwp.v3 = pposs[i-1];vs[n++]=mf.calc4(dwp);} f++; - - if (i>2) { - d2.v0=f++; d2.v2=pposs[i-3]; vs[n++]=mf.calc3(d2); - d2.v0=f++; d2.v2=pposs[i-2]; d2.v3= pposs[i-3]; vs[n++]=mf.calc4(d2); - if(forms[i-3]<this.corpusWrds && forms[i-2]<this.corpusWrds) { - dw.v0=f; dw.v2 = forms[i-3]; dw.v3 = forms[i-2]; vs[n++]=mf.calc4(dw); + + // if (len>i+1) {d2.v0=f; d2.v2= pposs[i-1];d2.v3= pposs[i+1]; + // vs[n++]=mf.calc4(d2);} + // f++; + + if (i > 1) { + + d2.v0 = f++; + d2.v2 = i < 2 ? _strp : pposs[i - 2]; + vs[n++] = mf.calc3(d2); + d2.v0 = f++; + d2.v2 = pposs[i - 1]; + d2.v3 = pposs[i - 2]; + vs[n++] = mf.calc4(d2); + + if (forms[i - 2] < corpusWrds) { + dw.v0 = f; + dw.v2 = forms[i - 2]; + vs[n++] = mf.calc3(dw); + } + f++; + if (forms[i - 1] < corpusWrds) { + dwp.v0 = f; + dwp.v2 = forms[i - 1]; + dwp.v3 = pposs[i - 2]; + vs[n++] = mf.calc4(dwp); + } + f++; + if (forms[i - 2] < corpusWrds) { + dwp.v0 = f; + dwp.v2 = forms[i - 2]; + dwp.v3 = pposs[i - 1]; + vs[n++] = mf.calc4(dwp); + } + f++; + + if (i > 2) { + d2.v0 = f++; + d2.v2 = pposs[i - 3]; + vs[n++] = mf.calc3(d2); + d2.v0 = f++; + d2.v2 = pposs[i - 2]; + d2.v3 = pposs[i - 3]; + vs[n++] = mf.calc4(d2); + if (forms[i - 3] < this.corpusWrds && forms[i - 2] < this.corpusWrds) { + dw.v0 = f; + dw.v2 = forms[i - 3]; + dw.v3 = forms[i - 2]; + vs[n++] = mf.calc4(dw); } f++; } @@ -422,100 +656,109 @@ final public class ExtractorT2 extends PipeGen implements IPipe { vs[n] = Integer.MIN_VALUE; } - - public int fillFeatureVectorsOne(String fs, ParametersFloat params, int w1, InstancesTagger is, int n, short[] pos,Long2IntInterface li, float[] score) { + public int fillFeatureVectorsOne(String fs, ParametersFloat params, int w1, InstancesTagger is, int n, short[] pos, + Long2IntInterface li, float[] score) { - float best = -1000; + float best = -1000; int bestType = -1; F2SF f = new F2SF(params.parameters); long vs[] = new long[_MAX]; int lemmas[]; - if (options.noLemmas) lemmas = new int[is.length(n)]; - else lemmas = is.plemmas[n]; - addFeatures(is,n,fs,w1,pos,is.forms[n], lemmas, vs); + if (options.noLemmas) + lemmas = new int[is.length(n)]; + else + lemmas = is.plemmas[n]; + addFeatures(is, n, fs, w1, pos, is.forms[n], lemmas, vs); - //for(int t = 0; t < types.length; t++) { - - for(int t=0;t<types.length;t++) { + // for(int t = 0; t < types.length; t++) { - int p = t<<s_type; + for (int t = 0; t < types.length; t++) { + + int p = t << s_type; f.clear(); - for(int k=0;vs[k]!=Integer.MIN_VALUE;k++) if(vs[k]>0) f.add(li.l2i(vs[k]+p)); + for (int k = 0; vs[k] != Integer.MIN_VALUE; k++) + if (vs[k] > 0) + f.add(li.l2i(vs[k] + p)); if (f.score > best) { - bestType=t; - score[w1]= best =f.score; + bestType = t; + score[w1] = best = f.score; } - } + } return bestType; } - - public ArrayList<POS> classify(String fs, ParametersFloat params, int w1, InstancesTagger is, int n, short[] pos, Long2IntInterface li) { + public ArrayList<POS> classify(String fs, ParametersFloat params, int w1, InstancesTagger is, int n, short[] pos, + Long2IntInterface li) { F2SF f = new F2SF(params.parameters); long vs[] = new long[_MAX]; int lemmas[]; - if (options.noLemmas) lemmas = new int[is.length(n)]; - else lemmas = is.plemmas[n]; - addFeatures(is,n,fs,w1,pos,is.forms[n], lemmas, vs); + if (options.noLemmas) + lemmas = new int[is.length(n)]; + else + lemmas = is.plemmas[n]; + addFeatures(is, n, fs, w1, pos, is.forms[n], lemmas, vs); ArrayList<POS> best = new ArrayList<POS>(types.length); - - for(int t=0;t<types.length;t++) { - int p = t<<s_type; + for (int t = 0; t < types.length; t++) { + + int p = t << s_type; f.clear(); - f.add(vs,li, p); + f.add(vs, li, p); POS px = new POS(t, f.score); best.add(px); - } + } Collections.sort(best); return best; } - /* (non-Javadoc) + /* + * (non-Javadoc) + * * @see is2.tag5.IPipe#write(java.io.DataOutputStream) */ @Override - public void write(DataOutputStream dos){ + public void write(DataOutputStream dos) { try { this.cl.write(dos); this.lx.write(dos); dos.writeInt(this.corpusWrds); dos.writeInt(_pps.size()); - - for(Entry<Integer,int[]> e : _pps.entrySet()) { + + for (Entry<Integer, int[]> e : _pps.entrySet()) { dos.writeInt(e.getValue().length); - for(int k : e.getValue()) dos.writeInt(k); + for (int k : e.getValue()) + dos.writeInt(k); dos.writeInt(e.getKey()); } } catch (IOException e) { e.printStackTrace(); } } - - - public void read(DataInputStream dis){ + + public void read(DataInputStream dis) { try { - this.cl =new Cluster(dis); - this.lx =new Lexicon(dis); + this.cl = new Cluster(dis); + this.lx = new Lexicon(dis); this.corpusWrds = dis.readInt(); int pc = dis.readInt(); - for(int j=0;j<pc;j++) { - int ps[] = new int [dis.readInt()]; - for(int k=0;k<ps.length;k++) ps[k]=dis.readInt(); + for (int j = 0; j < pc; j++) { + int ps[] = new int[dis.readInt()]; + for (int k = 0; k < ps.length; k++) + ps[k] = dis.readInt(); _pps.put(dis.readInt(), ps); } - // System.out.println("_pps "+ps.length); - + // System.out.println("_pps "+ps.length); + } catch (IOException e) { e.printStackTrace(); } diff --git a/dependencyParser/mate-tools/src/is2/tag/Lexicon.java b/dependencyParser/mate-tools/src/is2/tag/Lexicon.java index 8a85813..f719f26 100644 --- a/dependencyParser/mate-tools/src/is2/tag/Lexicon.java +++ b/dependencyParser/mate-tools/src/is2/tag/Lexicon.java @@ -1,12 +1,8 @@ /** - * + * */ package is2.tag; -import is2.data.IEncoderPlus; -import is2.data.PipeGen; -import is2.util.DB; - import java.io.BufferedReader; import java.io.DataInputStream; import java.io.DataOutputStream; @@ -14,75 +10,86 @@ import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; +import is2.data.IEncoderPlus; +import is2.data.PipeGen; +import is2.util.DB; + /** * @author Dr. Bernd Bohnet, 07.01.2011 - * - * + * + * */ public class Lexicon { - - public static final String FR = "FR",TAG = "TAG"; - + + public static final String FR = "FR", TAG = "TAG"; + final byte[][] word2tag; + public Lexicon(byte[][] w2t) { - + word2tag = w2t; } - + public Lexicon(String clusterFile, IEncoderPlus mf) { final String REGEX = "\t"; // register words try { - BufferedReader inputReader = new BufferedReader(new InputStreamReader(new FileInputStream(clusterFile),"UTF-8"),32768); + BufferedReader inputReader = new BufferedReader( + new InputStreamReader(new FileInputStream(clusterFile), "UTF-8"), 32768); - int cnt=0; + int cnt = 0; String line; - while ((line =inputReader.readLine())!=null) { + while ((line = inputReader.readLine()) != null) { try { - String[] split = line.split(REGEX); - // int f = Integer.parseInt(split[2]); -// if (f>2) { - cnt++; - mf.register(PipeGen.WORD, split[0]); - mf.register(TAG, split[1]); //tag - - if (split.length>1) mf.register(FR, split[1]); // frequency -// } - } catch(Exception e) { - System.out.println("Error in lexicon line "+cnt+" error: "+e.getMessage()); + String[] split = line.split(REGEX); + // int f = Integer.parseInt(split[2]); + // if (f>2) { + cnt++; + mf.register(PipeGen.WORD, split[0]); + mf.register(TAG, split[1]); // tag + + if (split.length > 1) + mf.register(FR, split[1]); // frequency + // } + } catch (Exception e) { + System.out.println("Error in lexicon line " + cnt + " error: " + e.getMessage()); } } - System.out.println("read number of words from lexicon "+cnt); + System.out.println("read number of words from lexicon " + cnt); inputReader.close(); - + } catch (Exception e) { e.printStackTrace(); } - + word2tag = new byte[mf.getFeatureCounter().get(PipeGen.WORD)][1]; // insert words try { String line; - BufferedReader inputReader = new BufferedReader(new InputStreamReader(new FileInputStream(clusterFile),"UTF-8"),32768); + BufferedReader inputReader = new BufferedReader( + new InputStreamReader(new FileInputStream(clusterFile), "UTF-8"), 32768); - while ((line =inputReader.readLine())!=null) { + while ((line = inputReader.readLine()) != null) { String[] split = line.split(REGEX); - int w =mf.getValue(PipeGen.WORD, split[0]); - if (w<0) continue; - word2tag[w][0] = (byte)mf.getValue(TAG, split[1]); - // if (split.length>1) word2tag[w][1]= (byte)mf.getValue(FR, split[2]); // frequency + int w = mf.getValue(PipeGen.WORD, split[0]); + if (w < 0) + continue; + word2tag[w][0] = (byte) mf.getValue(TAG, split[1]); + // if (split.length>1) word2tag[w][1]= (byte)mf.getValue(FR, + // split[2]); // frequency } inputReader.close(); - int fill=0; - for(int l = 0; l<word2tag.length; l++ ){ - if (word2tag[l][0]!=0) fill++; + int fill = 0; + for (byte[] element : word2tag) { + if (element[0] != 0) + fill++; } - System.out.println("filled "+fill+" of "+word2tag.length); - + System.out.println("filled " + fill + " of " + word2tag.length); + } catch (Exception e) { e.printStackTrace(); } @@ -90,32 +97,34 @@ public class Lexicon { /** * Read the cluster + * * @param dos - * @throws IOException + * @throws IOException */ public Lexicon(DataInputStream dis) throws IOException { word2tag = new byte[dis.readInt()][1]; - for(int i =0;i<word2tag.length;i++) { - word2tag[i][0]=dis.readByte(); -// word2tag[i][1]=dis.readByte(); + for (int i = 0; i < word2tag.length; i++) { + word2tag[i][0] = dis.readByte(); + // word2tag[i][1]=dis.readByte(); } - DB.println("Read lexicon with "+word2tag.length+" words "); + DB.println("Read lexicon with " + word2tag.length + " words "); } - + /** * Write the cluster + * * @param dos - * @throws IOException + * @throws IOException */ public void write(DataOutputStream dos) throws IOException { dos.writeInt(word2tag.length); - for(byte[] i : word2tag) { + for (byte[] i : word2tag) { dos.writeByte(i[0]); -// dos.writeByte(i[1]); + // dos.writeByte(i[1]); } - + } /** @@ -123,7 +132,8 @@ public class Lexicon { * @return */ public int getTag(int form) { - if (word2tag.length<form || form<0) return -1; + if (word2tag.length < form || form < 0) + return -1; return word2tag[form][0]; } @@ -132,9 +142,9 @@ public class Lexicon { * @return */ public int getConf(int form) { - if (word2tag.length<form || form<0) return -1; + if (word2tag.length < form || form < 0) + return -1; return word2tag[form][1]; } - - + } diff --git a/dependencyParser/mate-tools/src/is2/tag/MFO.java b/dependencyParser/mate-tools/src/is2/tag/MFO.java index df790e3..c391727 100644 --- a/dependencyParser/mate-tools/src/is2/tag/MFO.java +++ b/dependencyParser/mate-tools/src/is2/tag/MFO.java @@ -1,218 +1,213 @@ package is2.tag; - -import is2.data.IEncoderPlus; -import is2.util.DB; - import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.IOException; import java.util.HashMap; import java.util.Map.Entry; +import is2.data.IEncoderPlus; +import is2.util.DB; + /** * Map Features, do not map long to integer - * + * * @author Bernd Bohnet, 20.09.2009 */ -final public class MFO implements IEncoderPlus { +final public class MFO implements IEncoderPlus { /** The features and its values */ - private final HashMap<String,HashMap<String,Integer>> m_featureSets = new HashMap<String,HashMap<String,Integer>>(); + private final HashMap<String, HashMap<String, Integer>> m_featureSets = new HashMap<String, HashMap<String, Integer>>(); /** The feature class and the number of values */ - private final HashMap<String,Integer> m_featureCounters = new HashMap<String,Integer>(); + private final HashMap<String, Integer> m_featureCounters = new HashMap<String, Integer>(); /** The number of bits needed to encode a feature */ - final HashMap<String,Integer> m_featureBits = new HashMap<String,Integer>(); + final HashMap<String, Integer> m_featureBits = new HashMap<String, Integer>(); /** Integer counter for long2int */ - //private int count=0; + // private int count=0; /** Stop growing */ - public boolean stop=false; - - final public static String NONE="<None>"; - - - + public boolean stop = false; + final public static String NONE = "<None>"; final public static class Data4 { public int shift; - public short a0,a1,a2,a3,a4,a5,a6,a7,a8,a9; - public int v0,v1,v2,v3,v4,v5,v6,v7,v8,v9; + public short a0, a1, a2, a3, a4, a5, a6, a7, a8, a9; + public int v0, v1, v2, v3, v4, v5, v6, v7, v8, v9; final public long calcs(int b, long v, long l) { - if (l<0) return l; - l |= v<<shift; - shift +=b; + if (l < 0) + return l; + l |= v << shift; + shift += b; return l; } - final public long calc2() { - if (v0<0||v1<0) return -1; + if (v0 < 0 || v1 < 0) + return -1; long l = v0; - shift =a0; - l |= (long)v1<<shift; - shift +=a1; + shift = a0; + l |= (long) v1 << shift; + shift += a1; return l; } - - final public long calc3() { - if (v0<0||v1<0||v2<0) return -1; - // if (v1<0||v2<0) return -1; + if (v0 < 0 || v1 < 0 || v2 < 0) + return -1; + // if (v1<0||v2<0) return -1; long l = v0; - shift =a0; - l |= (long)v1<<shift; - shift +=a1; - l |= (long)v2<<shift; - shift=(short) (shift + a2); + shift = a0; + l |= (long) v1 << shift; + shift += a1; + l |= (long) v2 << shift; + shift = (short) (shift + a2); - //shift=; + // shift=; return l; } - final public long calc4() { - if (v0<0||v1<0||v2<0||v3<0) return -1; + if (v0 < 0 || v1 < 0 || v2 < 0 || v3 < 0) + return -1; long l = v0; - shift =a0; - l |= (long)v1<<shift; - shift +=a1; - l |= (long)v2<<shift; - shift +=a2; - l |= (long)v3<<shift; - shift= shift +a3; + shift = a0; + l |= (long) v1 << shift; + shift += a1; + l |= (long) v2 << shift; + shift += a2; + l |= (long) v3 << shift; + shift = shift + a3; return l; } - - final public long calc5() { - if (v0<0||v1<0||v2<0||v3<0||v4<0) return -1; + if (v0 < 0 || v1 < 0 || v2 < 0 || v3 < 0 || v4 < 0) + return -1; long l = v0; - shift =a0; - l |= (long)v1<<shift; - shift +=a1; - l |= (long)v2<<shift; - shift +=a2; - l |= (long)v3<<shift; - shift +=a3; - l |= (long)v4<<shift; - shift =shift+a4; + shift = a0; + l |= (long) v1 << shift; + shift += a1; + l |= (long) v2 << shift; + shift += a2; + l |= (long) v3 << shift; + shift += a3; + l |= (long) v4 << shift; + shift = shift + a4; return l; } - final public long calc6() { - if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0) return -1; + if (v0 < 0 || v1 < 0 || v2 < 0 || v3 < 0 || v4 < 0 || v5 < 0) + return -1; long l = v0; - shift =a0; - l |= (long)v1<<shift; - shift +=a1; - l |= (long)v2<<shift; - shift +=a2; - l |= (long)v3<<shift; - shift +=a3; - l |= (long)v4<<shift; - shift +=a4; - l |= (long)v5<<shift; - shift =shift+a5; + shift = a0; + l |= (long) v1 << shift; + shift += a1; + l |= (long) v2 << shift; + shift += a2; + l |= (long) v3 << shift; + shift += a3; + l |= (long) v4 << shift; + shift += a4; + l |= (long) v5 << shift; + shift = shift + a5; return l; } final public long calc7() { - if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0||v6<0) return -1; + if (v0 < 0 || v1 < 0 || v2 < 0 || v3 < 0 || v4 < 0 || v5 < 0 || v6 < 0) + return -1; long l = v0; - shift =a0; - l |= (long)v1<<shift; - shift +=a1; - l |= (long)v2<<shift; - shift +=a2; - l |= (long)v3<<shift; - shift +=a3; - l |= (long)v4<<shift; - shift +=a4; - l |= (long)v5<<shift; - shift +=a5; - l |= (long)v6<<shift; - shift =shift+a6; + shift = a0; + l |= (long) v1 << shift; + shift += a1; + l |= (long) v2 << shift; + shift += a2; + l |= (long) v3 << shift; + shift += a3; + l |= (long) v4 << shift; + shift += a4; + l |= (long) v5 << shift; + shift += a5; + l |= (long) v6 << shift; + shift = shift + a6; return l; } - final public long calc8() { - if (v0<0||v1<0||v2<0||v3<0||v4<0||v5<0||v6<0||v7<0) return -1; + if (v0 < 0 || v1 < 0 || v2 < 0 || v3 < 0 || v4 < 0 || v5 < 0 || v6 < 0 || v7 < 0) + return -1; long l = v0; - shift =a0; - l |= (long)v1<<shift; - shift +=a1; - l |= (long)v2<<shift; - shift +=a2; - l |= (long)v3<<shift; - shift +=a3; - l |= (long)v4<<shift; - shift +=a4; - l |= (long)v5<<shift; - shift +=a5; - l |= (long)v6<<shift; - shift +=a6; - l |= (long)v7<<shift; - shift =shift+a7; + shift = a0; + l |= (long) v1 << shift; + shift += a1; + l |= (long) v2 << shift; + shift += a2; + l |= (long) v3 << shift; + shift += a3; + l |= (long) v4 << shift; + shift += a4; + l |= (long) v5 << shift; + shift += a5; + l |= (long) v6 << shift; + shift += a6; + l |= (long) v7 << shift; + shift = shift + a7; return l; } } - public MFO () {} - - - // public int size() {return count;} + public MFO() { + } + // public int size() {return count;} final public void stop() { - stop=true; + stop = true; } final public void start() { - stop=false; + stop = false; } - /** * Register an attribute class, if it not exists and add a possible value + * * @param type * @param type2 */ - final public int register(String a, String v) { + @Override + final public int register(String a, String v) { - HashMap<String,Integer> fs = getFeatureSet().get(a); - if (fs==null) { - fs = new HashMap<String,Integer>(); + HashMap<String, Integer> fs = getFeatureSet().get(a); + if (fs == null) { + fs = new HashMap<String, Integer>(); getFeatureSet().put(a, fs); fs.put(NONE, 0); getFeatureCounter().put(a, 1); @@ -220,286 +215,282 @@ final public class MFO implements IEncoderPlus { Integer c = getFeatureCounter().get(a); Integer i = fs.get(v); - if (i==null) { + if (i == null) { fs.put(v, c); c++; - getFeatureCounter().put(a,c); - return c-1; - } else return i; + getFeatureCounter().put(a, c); + return c - 1; + } else + return i; } /** * Calculates the number of bits needed to encode a feature */ - public void calculateBits() { + public void calculateBits() { - int total=0; - for(Entry<String,Integer> e : getFeatureCounter().entrySet() ){ - int bits =(int)Math.ceil((Math.log(e.getValue()+1)/Math.log(2))); + for (Entry<String, Integer> e : getFeatureCounter().entrySet()) { + int bits = (int) Math.ceil((Math.log(e.getValue() + 1) / Math.log(2))); m_featureBits.put(e.getKey(), bits); - total+=bits; - // System.out.println(" "+e.getKey()+" bits "+bits+" number "+(e.getValue()+1)); } - // System.out.println("total number of needed bits "+total); + // System.out.println("total number of needed bits "+total); } - - @Override - public String toString() { + public String toString() { StringBuffer content = new StringBuffer(); - for(Entry<String,Integer> e : getFeatureCounter().entrySet() ){ - content.append(e.getKey()+" "+e.getValue()); + for (Entry<String, Integer> e : getFeatureCounter().entrySet()) { + content.append(e.getKey() + " " + e.getValue()); content.append(':'); - // HashMap<String,Integer> vs = getFeatureSet().get(e.getKey()); + // HashMap<String,Integer> vs = getFeatureSet().get(e.getKey()); content.append(getFeatureBits(e.getKey())); - /*if (vs.size()<120) - for(Entry<String,Integer> e2 : vs.entrySet()) { - content.append(e2.getKey()+" ("+e2.getValue()+") "); - }*/ + /* + * if (vs.size()<120) for(Entry<String,Integer> e2 : vs.entrySet()) + * { content.append(e2.getKey()+" ("+e2.getValue()+") "); } + */ content.append('\n'); } return content.toString(); } - - static final public long calcs(Data4 d,int b, long v, long l) { - if (l<0) return l; - l |= v<<d.shift; - d.shift +=b; + static final public long calcs(Data4 d, int b, long v, long l) { + if (l < 0) + return l; + l |= v << d.shift; + d.shift += b; return l; } - final public short getFeatureBits(String a) { - return (short)m_featureBits.get(a).intValue(); + return (short) m_featureBits.get(a).intValue(); } - - /** * Get the integer place holder of the string value v of the type a - * - * @param t the type - * @param v the value + * + * @param t + * the type + * @param v + * the value * @return the integer place holder of v */ + @Override final public int getValue(String t, String v) { - if (m_featureSets.get(t)==null) return -1; + if (m_featureSets.get(t) == null) + return -1; Integer vi = m_featureSets.get(t).get(v); - if (vi==null) return -1; //stop && + if (vi == null) + return -1; // stop && return vi.intValue(); } /** * Static version of getValue + * * @see getValue */ final public int getValueS(String a, String v) { - if (m_featureSets.get(a)==null) return -1; + if (m_featureSets.get(a) == null) + return -1; Integer vi = m_featureSets.get(a).get(v); - if (vi==null) return -1; //stop && + if (vi == null) + return -1; // stop && return vi.intValue(); } public int hasValue(String a, String v) { Integer vi = m_featureSets.get(a).get(v); - if (vi==null) return -1; + if (vi == null) + return -1; return vi.intValue(); } - - - final public long calc2(Data4 d) { - if (d.v0<0||d.v1<0) return -1; - // if (d.v1<0||d.v2<0) return -1; + if (d.v0 < 0 || d.v1 < 0) + return -1; + // if (d.v1<0||d.v2<0) return -1; long l = d.v0; - short shift =d.a0; - l |= (long)d.v1<<shift; - shift +=d.a1; - // l |= (long)d.v2<<shift; - d.shift=shift; + short shift = d.a0; + l |= (long) d.v1 << shift; + shift += d.a1; + // l |= (long)d.v2<<shift; + d.shift = shift; - //d.shift=; + // d.shift=; return l; } - - final public long calc3(Data4 d) { - if (d.v0<0||d.v1<0||d.v2<0) return -1; + if (d.v0 < 0 || d.v1 < 0 || d.v2 < 0) + return -1; long l = d.v0; - short shift =d.a0; - l |= (long)d.v1<<shift; - shift +=d.a1; - l |= (long)d.v2<<shift; - d.shift=shift + d.a2; + short shift = d.a0; + l |= (long) d.v1 << shift; + shift += d.a1; + l |= (long) d.v2 << shift; + d.shift = shift + d.a2; return l; } - final public long calc4(Data4 d) { - if (d.v0<0||d.v1<0||d.v2<0||d.v3<0) return -1; + if (d.v0 < 0 || d.v1 < 0 || d.v2 < 0 || d.v3 < 0) + return -1; long l = d.v0; - int shift =d.a0; - l |= (long)d.v1<<shift; - shift +=d.a1; - l |= (long)d.v2<<shift; - shift +=d.a2; - l |= (long)d.v3<<shift; - d.shift= shift +d.a3; + int shift = d.a0; + l |= (long) d.v1 << shift; + shift += d.a1; + l |= (long) d.v2 << shift; + shift += d.a2; + l |= (long) d.v3 << shift; + d.shift = shift + d.a3; return l; } - final public long calc5(Data4 d) { - if (d.v0<0||d.v1<0||d.v2<0||d.v3<0||d.v4<0) return -1; + if (d.v0 < 0 || d.v1 < 0 || d.v2 < 0 || d.v3 < 0 || d.v4 < 0) + return -1; long l = d.v0; - int shift =d.a0; - l |= (long)d.v1<<shift; - shift +=d.a1; - l |= (long)d.v2<<shift; - shift +=d.a2; - l |= (long)d.v3<<shift; - shift +=d.a3; - l |= (long)d.v4<<shift; - d.shift =shift+d.a4; + int shift = d.a0; + l |= (long) d.v1 << shift; + shift += d.a1; + l |= (long) d.v2 << shift; + shift += d.a2; + l |= (long) d.v3 << shift; + shift += d.a3; + l |= (long) d.v4 << shift; + d.shift = shift + d.a4; return l; } - final public long calc6(Data4 d) { - if (d.v0<0||d.v1<0||d.v2<0||d.v3<0||d.v4<0||d.v5<0) return -1; + if (d.v0 < 0 || d.v1 < 0 || d.v2 < 0 || d.v3 < 0 || d.v4 < 0 || d.v5 < 0) + return -1; long l = d.v0; - int shift =d.a0; - l |= (long)d.v1<<shift; - shift +=d.a1; - l |= (long)d.v2<<shift; - shift +=d.a2; - l |= (long)d.v3<<shift; - shift +=d.a3; - l |= (long)d.v4<<shift; - shift +=d.a4; - l |= (long)d.v5<<shift; - d.shift =shift+d.a5; + int shift = d.a0; + l |= (long) d.v1 << shift; + shift += d.a1; + l |= (long) d.v2 << shift; + shift += d.a2; + l |= (long) d.v3 << shift; + shift += d.a3; + l |= (long) d.v4 << shift; + shift += d.a4; + l |= (long) d.v5 << shift; + d.shift = shift + d.a5; return l; } final public long calc7(Data4 d) { - if (d.v0<0||d.v1<0||d.v2<0||d.v3<0||d.v4<0||d.v5<0||d.v6<0) return -1; + if (d.v0 < 0 || d.v1 < 0 || d.v2 < 0 || d.v3 < 0 || d.v4 < 0 || d.v5 < 0 || d.v6 < 0) + return -1; long l = d.v0; - int shift =d.a0; - l |= (long)d.v1<<shift; - shift +=d.a1; - l |= (long)d.v2<<shift; - shift +=d.a2; - l |= (long)d.v3<<shift; - shift +=d.a3; - l |= (long)d.v4<<shift; - shift +=d.a4; - l |= (long)d.v5<<shift; - shift +=d.a5; - l |= (long)d.v6<<shift; - d.shift =shift+d.a6; + int shift = d.a0; + l |= (long) d.v1 << shift; + shift += d.a1; + l |= (long) d.v2 << shift; + shift += d.a2; + l |= (long) d.v3 << shift; + shift += d.a3; + l |= (long) d.v4 << shift; + shift += d.a4; + l |= (long) d.v5 << shift; + shift += d.a5; + l |= (long) d.v6 << shift; + d.shift = shift + d.a6; return l; } - final public long calc8(Data4 d) { - if (d.v0<0||d.v1<0||d.v2<0||d.v3<0||d.v4<0||d.v5<0||d.v6<0||d.v7<0) return -1; + if (d.v0 < 0 || d.v1 < 0 || d.v2 < 0 || d.v3 < 0 || d.v4 < 0 || d.v5 < 0 || d.v6 < 0 || d.v7 < 0) + return -1; long l = d.v0; - int shift =d.a0; - l |= (long)d.v1<<shift; - shift +=d.a1; - l |= (long)d.v2<<shift; - shift +=d.a2; - l |= (long)d.v3<<shift; - shift +=d.a3; - l |= (long)d.v4<<shift; - shift +=d.a4; - l |= (long)d.v5<<shift; - shift +=d.a5; - l |= (long)d.v6<<shift; - shift +=d.a6; - l |= (long)d.v7<<shift; - d.shift =shift+d.a7; + int shift = d.a0; + l |= (long) d.v1 << shift; + shift += d.a1; + l |= (long) d.v2 << shift; + shift += d.a2; + l |= (long) d.v3 << shift; + shift += d.a3; + l |= (long) d.v4 << shift; + shift += d.a4; + l |= (long) d.v5 << shift; + shift += d.a5; + l |= (long) d.v6 << shift; + shift += d.a6; + l |= (long) d.v7 << shift; + d.shift = shift + d.a7; return l; } - - - - - - - /** - * Maps a long to a integer value. This is very useful to save memory for sparse data long values + /** + * Maps a long to a integer value. This is very useful to save memory for + * sparse data long values + * * @param node * @return the integer */ - static public int misses = 0; - static public int good = 0; - - - + static public int misses = 0; + static public int good = 0; /** * Write the data + * * @param dos * @throws IOException */ public void writeData(DataOutputStream dos) throws IOException { dos.writeInt(getFeatureSet().size()); - for(Entry<String, HashMap<String,Integer>> e : getFeatureSet().entrySet()) { + for (Entry<String, HashMap<String, Integer>> e : getFeatureSet().entrySet()) { dos.writeUTF(e.getKey()); dos.writeInt(e.getValue().size()); - for(Entry<String,Integer> e2 : e.getValue().entrySet()) { + for (Entry<String, Integer> e2 : e.getValue().entrySet()) { - if(e2.getKey()==null) DB.println("key "+e2.getKey()+" value "+e2.getValue()+" e -key "+e.getKey()); - dos.writeUTF(e2.getKey()); + if (e2.getKey() == null) + DB.println("key " + e2.getKey() + " value " + e2.getValue() + " e -key " + e.getKey()); + dos.writeUTF(e2.getKey()); dos.writeInt(e2.getValue()); - } + } } } + public void read(DataInputStream din) throws IOException { int size = din.readInt(); - for(int i=0; i<size;i++) { + for (int i = 0; i < size; i++) { String k = din.readUTF(); int size2 = din.readInt(); - HashMap<String,Integer> h = new HashMap<String,Integer>(); - getFeatureSet().put(k,h); - for(int j = 0;j<size2;j++) { + HashMap<String, Integer> h = new HashMap<String, Integer>(); + getFeatureSet().put(k, h); + for (int j = 0; j < size2; j++) { h.put(din.readUTF(), din.readInt()); } getFeatureCounter().put(k, size2); @@ -508,8 +499,7 @@ final public class MFO implements IEncoderPlus { calculateBits(); } - - /** + /** * Clear the data */ public void clearData() { @@ -518,18 +508,19 @@ final public class MFO implements IEncoderPlus { getFeatureSet().clear(); } - public HashMap<String,Integer> getFeatureCounter() { + @Override + public HashMap<String, Integer> getFeatureCounter() { return m_featureCounters; } - public HashMap<String,HashMap<String,Integer>> getFeatureSet() { + public HashMap<String, HashMap<String, Integer>> getFeatureSet() { return m_featureSets; } - static public String[] reverse(HashMap<String,Integer> v){ + static public String[] reverse(HashMap<String, Integer> v) { String[] set = new String[v.size()]; - for(Entry<String,Integer> e : v.entrySet()) { - set[e.getValue()]=e.getKey(); + for (Entry<String, Integer> e : v.entrySet()) { + set[e.getValue()] = e.getKey(); } return set; } diff --git a/dependencyParser/mate-tools/src/is2/tag/Options.java b/dependencyParser/mate-tools/src/is2/tag/Options.java index 540f8ed..0998c70 100644 --- a/dependencyParser/mate-tools/src/is2/tag/Options.java +++ b/dependencyParser/mate-tools/src/is2/tag/Options.java @@ -1,100 +1,102 @@ package is2.tag; -import is2.util.OptionsSuper; - import java.io.File; +import is2.util.OptionsSuper; public final class Options extends OptionsSuper { - - public Options (String[] args) { - - for(int i = 0; i < args.length; i++) { + public Options(String[] args) { + + for (int i = 0; i < args.length; i++) { String[] pair = args[i].split(":"); - if (pair[0].equals("--help")) explain(); + if (pair[0].equals("--help")) + explain(); else if (pair[0].equals("-train")) { train = true; - trainfile = args[i+1]; + trainfile = args[i + 1]; } else if (pair[0].equals("-eval")) { eval = true; - goldfile =args[i+1]; i++; + goldfile = args[i + 1]; + i++; } else if (pair[0].equals("-test")) { test = true; - testfile = args[i+1]; i++; + testfile = args[i + 1]; + i++; } else if (pair[0].equals("-i")) { - numIters = Integer.parseInt(args[i+1]); i++; - } - else if (pair[0].equals("-out")) { - outfile = args[i+1]; i++; - } - else if (pair[0].equals("-decode")) { - decodeProjective = args[i+1].equals("proj"); i++; - } - else if (pair[0].equals("-confidence")) { - + numIters = Integer.parseInt(args[i + 1]); + i++; + } else if (pair[0].equals("-out")) { + outfile = args[i + 1]; + i++; + } else if (pair[0].equals("-decode")) { + decodeProjective = args[i + 1].equals("proj"); + i++; + } else if (pair[0].equals("-confidence")) { + conf = true; } else if (pair[0].equals("-count")) { - count = Integer.parseInt(args[i+1]); i++; + count = Integer.parseInt(args[i + 1]); + i++; } else if (pair[0].equals("-model")) { - modelName = args[i+1]; i++; - } else if (pair[0].equals("-tmp")) { - tmp = args[i+1]; i++; + modelName = args[i + 1]; + i++; + } else if (pair[0].equals("-tmp")) { + tmp = args[i + 1]; + i++; } else if (pair[0].equals("-format")) { - //format = args[i+1]; - formatTask = Integer.parseInt(args[i+1]); i++; + // format = args[i+1]; + formatTask = Integer.parseInt(args[i + 1]); + i++; } else if (pair[0].equals("-allfeatures")) { - allFeatures=true; + allFeatures = true; } else if (pair[0].equals("-nonormalize")) { - normalize=false; - }else if (pair[0].equals("-nframes")) { - //format = args[i+1]; - nbframes= args[i+1]; i++; - - + normalize = false; + } else if (pair[0].equals("-nframes")) { + // format = args[i+1]; + nbframes = args[i + 1]; + i++; + } else if (pair[0].equals("-pframes")) { - //format = args[i+1]; - pbframes= args[i+1]; i++; + // format = args[i+1]; + pbframes = args[i + 1]; + i++; } else if (pair[0].equals("-nopred")) { - nopred =true; + nopred = true; } else if (pair[0].equals("-divide")) { - keep =true; + keep = true; } else if (pair[0].equals("-lexicon")) { - lexicon= args[i+1]; i++; + lexicon = args[i + 1]; + i++; + + } else + super.addOption(args, i); - } else super.addOption(args, i); - } - - + try { + if (trainfile != null) { - try { - - if (trainfile!=null) { - - if (keep && tmp!=null) { + if (keep && tmp != null) { trainforest = new File(tmp); - if (!trainforest.exists()) keep=false; - - } else - if (tmp!=null) { + if (!trainforest.exists()) + keep = false; + + } else if (tmp != null) { trainforest = File.createTempFile("train", ".tmp", new File(tmp)); trainforest.deleteOnExit(); - } - else { - trainforest = File.createTempFile("train", ".tmp"); //,new File("F:\\") + } else { + trainforest = File.createTempFile("train", ".tmp"); // ,new + // File("F:\\") trainforest.deleteOnExit(); } - - + } - } catch (java.io.IOException e) { System.out.println("Unable to create tmp files for feature forests!"); System.out.println(e); @@ -107,19 +109,24 @@ public final class Options extends OptionsSuper { System.out.println("java -class mate.jar is2.parser.Parser [Options]"); System.out.println(); System.out.println("Example: "); - System.out.println(" java -class mate.jar is2.parser.Parser -model eps3.model -train corpora/conll08st/train/train.closed -test corpora/conll08st/devel/devel.closed -out b3.test -eval corpora/conll08st/devel/devel.closed -count 2000 -i 6"); + System.out.println( + " java -class mate.jar is2.parser.Parser -model eps3.model -train corpora/conll08st/train/train.closed -test corpora/conll08st/devel/devel.closed -out b3.test -eval corpora/conll08st/devel/devel.closed -count 2000 -i 6"); System.out.println(""); System.out.println("Options:"); System.out.println(""); - System.out.println(" -train <file> the corpus a model is trained on; default "+this.trainfile); - System.out.println(" -test <file> the input corpus for testing; default "+this.testfile); - System.out.println(" -out <file> the output corpus (result) of a test run; default "+this.outfile); + System.out.println(" -train <file> the corpus a model is trained on; default " + this.trainfile); + System.out.println(" -test <file> the input corpus for testing; default " + this.testfile); + System.out.println(" -out <file> the output corpus (result) of a test run; default " + this.outfile); System.out.println(" -model <file> the parsing model for traing the model is stored in the files"); - System.out.println(" and for parsing the model is load from this file; default "+this.modelName); - System.out.println(" -i <number> the number of training iterations; good numbers are 10 for smaller corpora and 6 for bigger; default "+this.numIters); - System.out.println(" -count <number> the n first sentences of the corpus are take for the training default "+this.count); - System.out.println(" -format <number> conll format of the year 8 or 9; default "+this.formatTask); - + System.out.println( + " and for parsing the model is load from this file; default " + this.modelName); + System.out.println( + " -i <number> the number of training iterations; good numbers are 10 for smaller corpora and 6 for bigger; default " + + this.numIters); + System.out.println(" -count <number> the n first sentences of the corpus are take for the training default " + + this.count); + System.out.println(" -format <number> conll format of the year 8 or 9; default " + this.formatTask); + System.exit(0); } } diff --git a/dependencyParser/mate-tools/src/is2/tag/POS.java b/dependencyParser/mate-tools/src/is2/tag/POS.java index c8e039f..30f1e41 100644 --- a/dependencyParser/mate-tools/src/is2/tag/POS.java +++ b/dependencyParser/mate-tools/src/is2/tag/POS.java @@ -2,28 +2,29 @@ package is2.tag; public class POS implements Comparable<POS> { - // pos tag + // pos tag public int p; - + // score of the tag public float s; // the position of the word in the sentence public int w; - + public POS(int p, float s) { - this.p=p; - this.s=s; + this.p = p; + this.s = s; } @Override public int compareTo(POS o) { - - return s>o.s?-1:s==o.s?0:1; + + return s > o.s ? -1 : s == o.s ? 0 : 1; } - + + @Override public String toString() { - return ""+p+":"+s; + return "" + p + ":" + s; } - + } diff --git a/dependencyParser/mate-tools/src/is2/tag/Tagger.java b/dependencyParser/mate-tools/src/is2/tag/Tagger.java index b0c2dec..2ea146e 100644 --- a/dependencyParser/mate-tools/src/is2/tag/Tagger.java +++ b/dependencyParser/mate-tools/src/is2/tag/Tagger.java @@ -1,6 +1,18 @@ package is2.tag; - +import java.io.BufferedInputStream; +import java.io.BufferedOutputStream; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Map.Entry; +import java.util.zip.ZipEntry; +import java.util.zip.ZipInputStream; +import java.util.zip.ZipOutputStream; import is2.data.F2SF; import is2.data.FV; @@ -13,6 +25,7 @@ import is2.data.PipeGen; import is2.data.SentenceData09; import is2.io.CONLLReader09; import is2.io.CONLLWriter09; +import is2.io.IOGenerals; import is2.tools.IPipe; import is2.tools.Tool; import is2.tools.Train; @@ -20,75 +33,60 @@ import is2.util.DB; import is2.util.Evaluator; import is2.util.OptionsSuper; -import java.io.BufferedInputStream; -import java.io.BufferedOutputStream; -import java.io.DataInputStream; -import java.io.DataOutputStream; -import java.io.FileInputStream; -import java.io.FileNotFoundException; -import java.io.FileOutputStream; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Map.Entry; -import java.util.zip.ZipEntry; -import java.util.zip.ZipInputStream; -import java.util.zip.ZipOutputStream; - - public class Tagger implements Tool, Train { public ExtractorT2 pipe; public ParametersFloat params; public Long2IntInterface li; public MFO mf; - private OptionsSuper _options; /** - * Initialize + * Initialize + * * @param options */ - public Tagger (Options options) { + public Tagger(Options options) { - // load the model try { readModel(options); } catch (Exception e) { e.printStackTrace(); - } + } + } + + public Tagger() { } - public Tagger() { } /** - * @param modelFileName the file name of the model + * @param modelFileName + * the file name of the model */ public Tagger(String modelFileName) { - this(new Options(new String[]{"-model",modelFileName})); + this(new Options(new String[] { "-model", modelFileName })); } - public static void main (String[] args) throws FileNotFoundException, Exception - { + public static void main(String[] args) throws FileNotFoundException, Exception { long start = System.currentTimeMillis(); Options options = new Options(args); - Tagger tagger = new Tagger(); if (options.train) { - // depReader.normalizeOn=false; + // depReader.normalizeOn=false; tagger.li = new Long2Int(options.hsize); - tagger.pipe = new ExtractorT2 (options, tagger.mf= new MFO()); - - //tagger.pipe.li =tagger.li; - - InstancesTagger is = (InstancesTagger)tagger.pipe.createInstances(options.trainfile); + tagger.pipe = new ExtractorT2(options, tagger.mf = new MFO()); + + // tagger.pipe.li =tagger.li; + + InstancesTagger is = (InstancesTagger) tagger.pipe.createInstances(options.trainfile); tagger.params = new ParametersFloat(tagger.li.size()); - tagger.train(options, tagger.pipe,tagger.params,is); + tagger.train(options, tagger.pipe, tagger.params, is); tagger.writeModel(options, tagger.pipe, tagger.params); } @@ -96,53 +94,54 @@ public class Tagger implements Tool, Train { if (options.test) { tagger.readModel(options); - - tagger.out(options,tagger.pipe, tagger.params); + + tagger.out(options, tagger.pipe, tagger.params); } System.out.println(); if (options.eval) { System.out.println("\nEVALUATION PERFORMANCE:"); - Evaluator.evaluateTagger(options.goldfile, options.outfile,options.format); + Evaluator.evaluateTagger(options.goldfile, options.outfile, options.format); } long end = System.currentTimeMillis(); - System.out.println("used time "+((float)((end-start)/100)/10)); + System.out.println("used time " + ((float) ((end - start) / 100) / 10)); } - public void readModel(OptionsSuper options) { + @Override + public void readModel(OptionsSuper options) { - try{ - pipe = new ExtractorT2(options, mf =new MFO()); - _options=options; - // load the model - ZipInputStream zis = new ZipInputStream(new BufferedInputStream(new FileInputStream(options.modelName))); - zis.getNextEntry(); - DataInputStream dis = new DataInputStream(new BufferedInputStream(zis)); + try { + pipe = new ExtractorT2(options, mf = new MFO()); + // load the model + ZipInputStream zis = new ZipInputStream(new BufferedInputStream(new FileInputStream(options.modelName))); + zis.getNextEntry(); + DataInputStream dis = new DataInputStream(new BufferedInputStream(zis)); - pipe.mf.read(dis); - pipe.initValues(); - pipe.initFeatures(); + pipe.mf.read(dis); + pipe.initValues(); + pipe.initFeatures(); - params = new ParametersFloat(0); - params.read(dis); - li = new Long2Int(params.parameters.length); - pipe.read(dis); + params = new ParametersFloat(0); + params.read(dis); + li = new Long2Int(params.parameters.length); + pipe.read(dis); - dis.close(); + dis.close(); - pipe.types = new String[pipe.mf.getFeatureCounter().get(ExtractorT2.POS)]; - for(Entry<String,Integer> e : pipe.mf.getFeatureSet().get(ExtractorT2.POS).entrySet()) - pipe.types[e.getValue()] = e.getKey(); + pipe.types = new String[pipe.mf.getFeatureCounter().get(PipeGen.POS)]; + for (Entry<String, Integer> e : pipe.mf.getFeatureSet().get(PipeGen.POS).entrySet()) + pipe.types[e.getValue()] = e.getKey(); - DB.println("Loading data finished. "); - } catch(Exception e) { + DB.println("Loading data finished. "); + } catch (Exception e) { e.printStackTrace(); } } - + /** * Do the training + * * @param instanceLengths * @param options * @param pipe @@ -151,184 +150,190 @@ public class Tagger implements Tool, Train { * @throws InterruptedException * @throws ClassNotFoundException */ + @Override public void train(OptionsSuper options, IPipe pipe, ParametersFloat params, Instances is2) { - InstancesTagger is = (InstancesTagger)is2; - String wds[] = mf.reverse(this.pipe.mf.getFeatureSet().get(ExtractorT2.WORD)); - + InstancesTagger is = (InstancesTagger) is2; + String wds[] = MFO.reverse(this.pipe.mf.getFeatureSet().get(PipeGen.WORD)); + int pd[] = new int[this.pipe.types.length]; - for(int k=0;k<pd.length;k++) pd[k]=k; - - int del=0; - F2SF f = new F2SF(params.parameters); + for (int k = 0; k < pd.length; k++) + pd[k] = k; + + int del = 0; + F2SF f = new F2SF(params.parameters); long vs[] = new long[ExtractorT2._MAX]; - int types =this.pipe.types.length; + int types = this.pipe.types.length; - double upd = options.numIters*is.size() +1; + double upd = options.numIters * is.size() + 1; - for(int i = 0; i <options.numIters ; i++) { + for (int i = 0; i < options.numIters; i++) { long start = System.currentTimeMillis(); int numInstances = is.size(); - long last= System.currentTimeMillis(); - FV pred = new FV(),gold = new FV(); + long last = System.currentTimeMillis(); + FV pred = new FV(), gold = new FV(); + + int correct = 0, count = 0; + System.out.print("Iteration " + i + ": "); - int correct =0,count=0; - System.out.print("Iteration "+i+": "); - - for(int n = 0; n < numInstances; n++) { + for (int n = 0; n < numInstances; n++) { - if((n+1) % 500 == 0) del= PipeGen.outValueErr(n+1, (count-correct),(float)correct/(float)count,del,last,upd); + if ((n + 1) % 500 == 0) + del = PipeGen.outValueErr(n + 1, (count - correct), (float) correct / (float) count, del, last, + upd); int length = is.length(n); - + upd--; - - for(int w = 1; w < length; w++) { - double best = -1000; + for (int w = 1; w < length; w++) { + + double best = -1000; short bestType = -1; - int[] lemmas; //= is.lemmas[n]; - if (options.noLemmas)lemmas = new int[is.length(n)]; - else lemmas = is.plemmas[n]; - - this.pipe.addFeatures(is,n,wds[is.forms[n][w]],w,is.gpos[n],is.forms[n], lemmas, vs); - - for(short t=0;t<types;t++) { + int[] lemmas; // = is.lemmas[n]; + if (options.noLemmas) + lemmas = new int[is.length(n)]; + else + lemmas = is.plemmas[n]; + + this.pipe.addFeatures(is, n, wds[is.forms[n][w]], w, is.gpos[n], is.forms[n], lemmas, vs); + + for (short t = 0; t < types; t++) { // the hypotheses of a part of speech tag - long p = t<<ExtractorT2.s_type; - f.clear(); - + long p = t << ExtractorT2.s_type; + f.clear(); + // add the features to the vector - for(int k1=0;vs[k1]!=Integer.MIN_VALUE;k1++) { - if (vs[k1]>0) f.add(this.li.l2i(vs[k1]|p)); + for (int k1 = 0; vs[k1] != Integer.MIN_VALUE; k1++) { + if (vs[k1] > 0) + f.add(this.li.l2i(vs[k1] | p)); } if (f.score > best) { - bestType=t; - best =f.score; + bestType = t; + best = f.score; } } - + count++; - if (bestType == is.gpos[n][w] ) { + if (bestType == is.gpos[n][w]) { correct++; - continue; + continue; } - + pred.clear(); - for (int k1=0;vs[k1]!=Integer.MIN_VALUE;k1++) if (vs[k1]>0) pred.add(this.li.l2i(vs[k1]| bestType<<ExtractorT2.s_type)); + for (int k1 = 0; vs[k1] != Integer.MIN_VALUE; k1++) + if (vs[k1] > 0) + pred.add(this.li.l2i(vs[k1] | bestType << ExtractorT2.s_type)); gold.clear(); - for (int k1=0;vs[k1]!=Integer.MIN_VALUE;k1++) if (vs[k1]>0) gold.add(this.li.l2i(vs[k1] | is.gpos[n][w]<<ExtractorT2.s_type)); + for (int k1 = 0; vs[k1] != Integer.MIN_VALUE; k1++) + if (vs[k1] > 0) + gold.add(this.li.l2i(vs[k1] | is.gpos[n][w] << ExtractorT2.s_type)); - params.update(pred,gold, (float)upd, 1.0F); + params.update(pred, gold, (float) upd, 1.0F); } - } + } long end = System.currentTimeMillis(); - String info = "time "+(end-start); - PipeGen.outValueErr(numInstances, (count-correct),(float)correct/(float)count,del,last,upd,info); + String info = "time " + (end - start); + PipeGen.outValueErr(numInstances, (count - correct), (float) correct / (float) count, del, last, upd, info); System.out.println(); - del=0; + del = 0; } - params.average(options.numIters*is.size()); + params.average(options.numIters * is.size()); } - /** * Tag a sentence + * * @param options * @param pipe * @param params * @throws IOException */ - public void out (OptionsSuper options, IPipe pipe, ParametersFloat params) { - + @Override + public void out(OptionsSuper options, IPipe pipe, ParametersFloat params) { + try { - - long start = System.currentTimeMillis(); -// change this backe!!! -// CONLLReader09 depReader = new CONLLReader09(options.testfile, CONLLReader09.NO_NORMALIZE); - CONLLReader09 depReader = new CONLLReader09(options.testfile); + long start = System.currentTimeMillis(); + // change this backe!!! + // CONLLReader09 depReader = new CONLLReader09(options.testfile, + // CONLLReader09.NO_NORMALIZE); + CONLLReader09 depReader = new CONLLReader09(options.testfile); - CONLLWriter09 depWriter = new CONLLWriter09(options.outfile); + CONLLWriter09 depWriter = new CONLLWriter09(options.outfile); - System.out.print("Processing Sentence: "); - pipe.initValues(); + System.out.print("Processing Sentence: "); + pipe.initValues(); - int cnt = 0; - int del=0; - while(true) { + int cnt = 0; + int del = 0; + while (true) { - InstancesTagger is = new InstancesTagger(); - is.init(1, mf); - SentenceData09 instance = depReader.getNext(is); - if (instance == null || instance.forms == null) break; - - - is.fillChars(instance, 0, ExtractorT2._CEND); + InstancesTagger is = new InstancesTagger(); + is.init(1, mf); + SentenceData09 instance = depReader.getNext(is); + if (instance == null || instance.forms == null) + break; - cnt++; + is.fillChars(instance, 0, ExtractorT2._CEND); + cnt++; - tag(is, instance); + tag(is, instance); - SentenceData09 i09 = new SentenceData09(instance); - i09.createSemantic(instance); - depWriter.write(i09); + SentenceData09 i09 = new SentenceData09(instance); + i09.createSemantic(instance); + depWriter.write(i09); - if(cnt % 100 == 0) del=PipeGen.outValue(cnt, del); + if (cnt % 100 == 0) + del = PipeGen.outValue(cnt, del); - } - del=PipeGen.outValue(cnt, del); - depWriter.finishWriting(); - - float min=1000, max=-1000; - - // int r[] = new int[14]; - /* - for(Entry<Float, Integer> e : map.entrySet()) { - if(e.getKey()<min)min=e.getKey(); - if(e.getKey()>max)max=e.getKey(); - - if(e.getKey()<0.2) r[0]++; - else if(e.getKey()<0.5) r[1]+=e.getValue(); - else if(e.getKey()<0.7) r[2]+=e.getValue(); - else if(e.getKey()<0.8) r[3]+=e.getValue(); - else if(e.getKey()<0.9) r[4]+=e.getValue(); - else if(e.getKey()<1.0) r[5]+=e.getValue(); - else if(e.getKey()<1.2) r[6]+=e.getValue(); - else if(e.getKey()<1.3) r[7]+=e.getValue(); - else if(e.getKey()<1.4) r[8]+=e.getValue(); - else if(e.getKey()<1.5) r[9]+=e.getValue(); - else if(e.getKey()<1.9) r[10]+=e.getValue(); - else if(e.getKey()<2.2) r[11]+=e.getValue(); - else if(e.getKey()<2.5) r[12]+=e.getValue(); - else if(e.getKey()>=2.5) r[13]+=e.getValue(); - } - */ - // for(int k=0;k<r.length;k++) System.out.println(k+" "+r[k][0]+" "+((float)r[k][1]/(float)r[k][0])+" good "+r[k][1]); - // System.out.println("min "+min+" "+max); - - long end = System.currentTimeMillis(); - System.out.println(PipeGen.getSecondsPerInstnace(cnt,(end-start))); - System.out.println(PipeGen.getUsedTime(end-start)); - } catch(Exception e) { + } + del = PipeGen.outValue(cnt, del); + depWriter.finishWriting(); + + // int r[] = new int[14]; + /* + * for(Entry<Float, Integer> e : map.entrySet()) { + * if(e.getKey()<min)min=e.getKey(); + * if(e.getKey()>max)max=e.getKey(); + * + * if(e.getKey()<0.2) r[0]++; else if(e.getKey()<0.5) + * r[1]+=e.getValue(); else if(e.getKey()<0.7) r[2]+=e.getValue(); + * else if(e.getKey()<0.8) r[3]+=e.getValue(); else + * if(e.getKey()<0.9) r[4]+=e.getValue(); else if(e.getKey()<1.0) + * r[5]+=e.getValue(); else if(e.getKey()<1.2) r[6]+=e.getValue(); + * else if(e.getKey()<1.3) r[7]+=e.getValue(); else + * if(e.getKey()<1.4) r[8]+=e.getValue(); else if(e.getKey()<1.5) + * r[9]+=e.getValue(); else if(e.getKey()<1.9) r[10]+=e.getValue(); + * else if(e.getKey()<2.2) r[11]+=e.getValue(); else + * if(e.getKey()<2.5) r[12]+=e.getValue(); else if(e.getKey()>=2.5) + * r[13]+=e.getValue(); } + */ + // for(int k=0;k<r.length;k++) System.out.println(k+" "+r[k][0]+" + // "+((float)r[k][1]/(float)r[k][0])+" good "+r[k][1]); + // System.out.println("min "+min+" "+max); + + long end = System.currentTimeMillis(); + System.out.println(PipeGen.getSecondsPerInstnace(cnt, (end - start))); + System.out.println(PipeGen.getUsedTime(end - start)); + } catch (Exception e) { e.printStackTrace(); } } - - public SentenceData09 tag(SentenceData09 instance){ + public SentenceData09 tag(SentenceData09 instance) { InstancesTagger is = new InstancesTagger(); is.init(1, pipe.mf); new CONLLReader09().insert(is, instance); @@ -338,83 +343,82 @@ public class Tagger implements Tool, Train { return instance; } - private void tag(InstancesTagger is, SentenceData09 instance) { int length = instance.ppos.length; short[] pos = new short[instance.gpos.length]; - float sc[] =new float[instance.ppos.length]; - - instance.ppos[0]= is2.io.CONLLReader09.ROOT_POS; - pos[0]=(short)pipe.mf.getValue(ExtractorT2.POS, is2.io.CONLLReader09.ROOT_POS); + float sc[] = new float[instance.ppos.length]; + + instance.ppos[0] = IOGenerals.ROOT_POS; + pos[0] = (short) pipe.mf.getValue(PipeGen.POS, IOGenerals.ROOT_POS); - for(int j = 1; j < length; j++) { + for (int j = 1; j < length; j++) { - short bestType = (short)pipe.fillFeatureVectorsOne( instance.forms[j],params, j, is,0,pos,this.li,sc); + short bestType = (short) pipe.fillFeatureVectorsOne(instance.forms[j], params, j, is, 0, pos, this.li, sc); pos[j] = bestType; - instance.ppos[j]= pipe.types[bestType]; + instance.ppos[j] = pipe.types[bestType]; } - for(int j = 1; j < length; j++) { + for (int j = 1; j < length; j++) { - short bestType = (short)pipe.fillFeatureVectorsOne(instance.forms[j],params, j, is,0,pos,this.li,sc); - instance.ppos[j]= pipe.types[bestType]; - pos[j]=bestType; + short bestType = (short) pipe.fillFeatureVectorsOne(instance.forms[j], params, j, is, 0, pos, this.li, sc); + instance.ppos[j] = pipe.types[bestType]; + pos[j] = bestType; } } /** * Tag a single word and return a n-best list of Part-of-Speech tags. - * - * @param is set of sentences - * @param instanceIndex index to the sentence in question - * @param word word to be tagged + * + * @param is + * set of sentences + * @param instanceIndex + * index to the sentence in question + * @param word + * word to be tagged * @return n-best list of Part-of-Speech tags */ - public ArrayList<POS> tag(InstancesTagger is,int instanceIndex, int word, String wordForm) { + public ArrayList<POS> tag(InstancesTagger is, int instanceIndex, int word, String wordForm) { + + return pipe.classify(wordForm, params, word, is, instanceIndex, is.pposs[instanceIndex], li); - return pipe.classify( wordForm , params, word, is, instanceIndex, is.pposs[instanceIndex], li); - } - - public ArrayList<String> tagStrings(InstancesTagger is,int instanceIndex, int word, String wordForm) { - - ArrayList<POS> plist = pipe.classify( wordForm , params, word, is, instanceIndex, is.pposs[instanceIndex], li); - String pos[] = mf.reverse(this.pipe.mf.getFeatureSet().get(ExtractorT2.POS)); - - ArrayList<String> postags =null; - for(POS p : plist) { + + public ArrayList<String> tagStrings(InstancesTagger is, int instanceIndex, int word, String wordForm) { + + ArrayList<POS> plist = pipe.classify(wordForm, params, word, is, instanceIndex, is.pposs[instanceIndex], li); + String pos[] = MFO.reverse(this.pipe.mf.getFeatureSet().get(PipeGen.POS)); + + ArrayList<String> postags = null; + for (POS p : plist) { try { - postags.add(pos[p.p]); - }catch(Exception e) { + postags.add(pos[p.p]); + } catch (Exception e) { e.printStackTrace(); } } return postags; - - + } - - /** * Tag a sentence + * * @param options * @param pipe * @param parametersReranker * @throws IOException */ - public String[] tag (String[] words, String[] lemmas) { + public String[] tag(String[] words, String[] lemmas) { String[] pposs = new String[words.length]; try { pipe.initValues(); - int length = words.length+1; - + int length = words.length + 1; InstancesTagger is = new InstancesTagger(); is.init(1, pipe.mf); @@ -422,17 +426,17 @@ public class Tagger implements Tool, Train { SentenceData09 instance = new SentenceData09(); instance.forms = new String[length]; - instance.forms[0]=is2.io.CONLLReader09.ROOT; + instance.forms[0] = IOGenerals.ROOT; instance.plemmas = new String[length]; - instance.plemmas[0]=is2.io.CONLLReader09.ROOT_LEMMA; + instance.plemmas[0] = IOGenerals.ROOT_LEMMA; - for(int j = 0; j < words.length; j++) { - instance.forms[j+1]=words[j]; - instance.plemmas[j+1]=lemmas[j]; + for (int j = 0; j < words.length; j++) { + instance.forms[j + 1] = words[j]; + instance.plemmas[j + 1] = lemmas[j]; } - for(int j = 0; j < length; j++) { + for (int j = 0; j < length; j++) { is.setForm(0, j, instance.forms[j]); is.setLemma(0, j, instance.plemmas[j]); } @@ -443,24 +447,24 @@ public class Tagger implements Tool, Train { this.tag(is, instance); - for(int j = 0; j < words.length; j++) { - pposs[j] = instance.ppos[j+1]; + for (int j = 0; j < words.length; j++) { + pposs[j] = instance.ppos[j + 1]; } - } catch(Exception e) { + } catch (Exception e) { e.printStackTrace(); } return pposs; - } - /* (non-Javadoc) + /* + * (non-Javadoc) + * * @see is2.tools.Tool#apply(is2.data.SentenceData09) */ @Override public SentenceData09 apply(SentenceData09 snt) { - SentenceData09 it = new SentenceData09(); it.createWithRoot(snt); @@ -469,22 +473,24 @@ public class Tagger implements Tool, Train { i09.createSemantic(it); return i09; } - - - - /* (non-Javadoc) - * @see is2.tools.Train#writeModel(is2.util.OptionsSuper, is2.mtag2.Pipe, is2.data.ParametersFloat) + + /* + * (non-Javadoc) + * + * @see is2.tools.Train#writeModel(is2.util.OptionsSuper, is2.mtag2.Pipe, + * is2.data.ParametersFloat) */ @Override public void writeModel(OptionsSuper options, IPipe pipe, is2.data.ParametersFloat params) { - try{ - ZipOutputStream zos = new ZipOutputStream(new BufferedOutputStream(new FileOutputStream(options.modelName))); - zos.putNextEntry(new ZipEntry("data")); + try { + ZipOutputStream zos = new ZipOutputStream( + new BufferedOutputStream(new FileOutputStream(options.modelName))); + zos.putNextEntry(new ZipEntry("data")); DataOutputStream dos = new DataOutputStream(new BufferedOutputStream(zos)); this.pipe.mf.writeData(dos); - DB.println("number of parameters "+params.parameters.length); + DB.println("number of parameters " + params.parameters.length); dos.flush(); params.write(dos); @@ -492,7 +498,7 @@ public class Tagger implements Tool, Train { dos.flush(); dos.close(); - } catch(Exception e) { + } catch (Exception e) { e.printStackTrace(); } } diff --git a/dependencyParser/mate-tools/src/is2/tools/IPipe.java b/dependencyParser/mate-tools/src/is2/tools/IPipe.java index d976074..b6e0e02 100644 --- a/dependencyParser/mate-tools/src/is2/tools/IPipe.java +++ b/dependencyParser/mate-tools/src/is2/tools/IPipe.java @@ -1,18 +1,16 @@ /** - * + * */ package is2.tools; -import is2.data.Instances; -import is2.data.InstancesTagger; - import java.io.DataOutputStream; -import java.io.File; + +import is2.data.Instances; /** * @author Dr. Bernd Bohnet, 25.12.2010 - * - * + * + * */ public interface IPipe { diff --git a/dependencyParser/mate-tools/src/is2/tools/Retrainable.java b/dependencyParser/mate-tools/src/is2/tools/Retrainable.java index 67a2e56..86fbfcc 100644 --- a/dependencyParser/mate-tools/src/is2/tools/Retrainable.java +++ b/dependencyParser/mate-tools/src/is2/tools/Retrainable.java @@ -2,23 +2,28 @@ package is2.tools; import is2.data.SentenceData09; -/** +/** * Provides Methods for the retraining + * * @author bohnetbd * */ public interface Retrainable { - + /** - * Retrains with a update factor (upd). - * The retraining stops when the model was successful adapted or it gave up after the maximal iterations. + * Retrains with a update factor (upd). The retraining stops when the model + * was successful adapted or it gave up after the maximal iterations. * - * @param sentence the data container of the new example. - * @param upd the update factor, e.g. 0.01 - * @param iterations maximal number of iterations that are tried to adapt the system. - * @return success = true -- else false + * @param sentence + * the data container of the new example. + * @param upd + * the update factor, e.g. 0.01 + * @param iterations + * maximal number of iterations that are tried to adapt the + * system. + * @return success = true -- else false */ - public boolean retrain(SentenceData09 sentence, float upd, int iterations) ; + public boolean retrain(SentenceData09 sentence, float upd, int iterations); boolean retrain(SentenceData09 sentence, float upd, int iterations, boolean print); diff --git a/dependencyParser/mate-tools/src/is2/tools/Tool.java b/dependencyParser/mate-tools/src/is2/tools/Tool.java index 06246a2..41ead53 100644 --- a/dependencyParser/mate-tools/src/is2/tools/Tool.java +++ b/dependencyParser/mate-tools/src/is2/tools/Tool.java @@ -1,5 +1,5 @@ /** - * + * */ package is2.tools; @@ -7,19 +7,19 @@ import is2.data.SentenceData09; /** * @author Bernd Bohnet, 27.10.2010 - * - * Interface to all tools + * + * Interface to all tools */ public interface Tool { - /** - * Uses the tool and applies it on the input sentence. - * The input is altered and has to include a root (token). - * - * @param i the input sentence - * @return The result of the performance without the root. + * Uses the tool and applies it on the input sentence. The input is altered + * and has to include a root (token). + * + * @param i + * the input sentence + * @return The result of the performance without the root. */ - SentenceData09 apply(SentenceData09 snt09); - + SentenceData09 apply(SentenceData09 snt09); + } diff --git a/dependencyParser/mate-tools/src/is2/tools/ToolIO.java b/dependencyParser/mate-tools/src/is2/tools/ToolIO.java index 279a4ff..d7b67fe 100644 --- a/dependencyParser/mate-tools/src/is2/tools/ToolIO.java +++ b/dependencyParser/mate-tools/src/is2/tools/ToolIO.java @@ -1,17 +1,15 @@ /** - * + * */ package is2.tools; -import is2.data.SentenceData09; - /** * @author Bernd Bohnet, 27.10.2010 - * - * Interface to all tools + * + * Interface to all tools */ public interface ToolIO { - void readModel(); - + void readModel(); + } diff --git a/dependencyParser/mate-tools/src/is2/tools/Train.java b/dependencyParser/mate-tools/src/is2/tools/Train.java index 234f937..31a7ad8 100644 --- a/dependencyParser/mate-tools/src/is2/tools/Train.java +++ b/dependencyParser/mate-tools/src/is2/tools/Train.java @@ -1,5 +1,5 @@ /** - * + * */ package is2.tools; @@ -9,8 +9,8 @@ import is2.util.OptionsSuper; /** * @author Dr. Bernd Bohnet, 24.12.2010 - * - * + * + * */ public interface Train { diff --git a/dependencyParser/mate-tools/src/is2/util/Convert.java b/dependencyParser/mate-tools/src/is2/util/Convert.java index 1ed2389..b9c820f 100644 --- a/dependencyParser/mate-tools/src/is2/util/Convert.java +++ b/dependencyParser/mate-tools/src/is2/util/Convert.java @@ -1,5 +1,5 @@ /** - * + * */ package is2.util; @@ -9,11 +9,9 @@ import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.InputStreamReader; import java.io.OutputStreamWriter; -import java.io.Reader; import java.nio.charset.Charset; import java.util.ArrayList; - import is2.data.SentenceData09; import is2.io.CONLLReader06; import is2.io.CONLLReader08; @@ -23,91 +21,94 @@ import is2.io.CONLLWriter09; /** * @author Dr. Bernd Bohnet, 01.03.2010 - * - * + * + * */ public class Convert { - - public static void main(String args[]) throws Exception { - - - if (args.length<2) { - + + if (args.length < 2) { + System.out.println("Usage"); System.out.println(" java is2.util.Convert <in> <out> [-w06|-w0809|-yue] [-wordsonly]"); - - + } - - int todo =9; - boolean wordsOnly=false; - for(String a : args) { - if (a!=null && a.equals("-w06")) todo=6; - else if (a!=null && a.equals("-w0809")) todo=89; - else if (a!=null && a.equals("-yue")) todo=99; - else if (a!=null && a.equals("-utf8")) todo=8; - - if (a!=null && a.equals("-wordsonly")) wordsOnly=true; - - + + int todo = 9; + boolean wordsOnly = false; + for (String a : args) { + if (a != null && a.equals("-w06")) + todo = 6; + else if (a != null && a.equals("-w0809")) + todo = 89; + else if (a != null && a.equals("-yue")) + todo = 99; + else if (a != null && a.equals("-utf8")) + todo = 8; + + if (a != null && a.equals("-wordsonly")) + wordsOnly = true; + } - - if (todo==9)convert(args[0],args[1]); - else if (todo==6) convert0906(args[0],args[1]); - else if (todo==8) convert8(args[0],args[1], args[2]); - else if (todo==89) convert0809(args[0],args[1]); - else if (todo==99) { - convertChnYue(args[0],args[1],wordsOnly); + + if (todo == 9) + convert(args[0], args[1]); + else if (todo == 6) + convert0906(args[0], args[1]); + else if (todo == 8) + convert8(args[0], args[1], args[2]); + else if (todo == 89) + convert0809(args[0], args[1]); + else if (todo == 99) { + convertChnYue(args[0], args[1], wordsOnly); } - - + } - + private static void convert8(String infile, String outfile, String format) { - try { - - System.out.println("availableCharsets: "+Charset.availableCharsets()); - - BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(infile), format)); - BufferedWriter out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outfile), "UTF8")); - ; - int ch; - - int count =0, wcount=0;; - while ((ch = in.read()) > -1) { - count++; - - if (Character.isDefined(ch)) { - - out.write(ch); - wcount++; - } - } - in.close(); - out.close(); - System.out.println("read "+count+" chars and wrote "+wcount+" utf8 chars"); - } - catch (Exception e) { - e.printStackTrace(); - } - + try { + + System.out.println("availableCharsets: " + Charset.availableCharsets()); + + BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(infile), format)); + BufferedWriter out = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outfile), "UTF8")); + ; + int ch; + + int count = 0, wcount = 0; + ; + while ((ch = in.read()) > -1) { + count++; + + if (Character.isDefined(ch)) { + + out.write(ch); + wcount++; + } + } + in.close(); + out.close(); + System.out.println("read " + count + " chars and wrote " + wcount + " utf8 chars"); + } catch (Exception e) { + e.printStackTrace(); + } + } public static void convert(String source, String target) throws Exception { - + CONLLReader06 reader = new CONLLReader06(source); CONLLWriter09 writer = new CONLLWriter09(target); - int str =0; + int str = 0; while (true) { SentenceData09 i = reader.getNext(); str++; - if (i == null) break; - - - String[] formsNoRoot = new String[i.length()-1]; + if (i == null) + break; + + String[] formsNoRoot = new String[i.length() - 1]; String[] posNoRoot = new String[formsNoRoot.length]; String[] lemmas = new String[formsNoRoot.length]; @@ -122,97 +123,95 @@ public class Convert { int[] heads = new int[formsNoRoot.length]; - - - for(int j = 0; j < formsNoRoot.length; j++) { - formsNoRoot[j] = i.forms[j+1]; - if (formsNoRoot[j].length()==0 ||formsNoRoot[j].equals("")) { - System.out.println("error forms "+str); - // System.exit(0); - formsNoRoot[j]=" "; + for (int j = 0; j < formsNoRoot.length; j++) { + formsNoRoot[j] = i.forms[j + 1]; + if (formsNoRoot[j].length() == 0 || formsNoRoot[j].equals("")) { + System.out.println("error forms " + str); + // System.exit(0); + formsNoRoot[j] = " "; } - posNoRoot[j] = i.gpos[j+1]; - if (posNoRoot[j].length()==0 ||posNoRoot[j].equals(" ")) { - System.out.println("error pos "+str); - // System.exit(0); + posNoRoot[j] = i.gpos[j + 1]; + if (posNoRoot[j].length() == 0 || posNoRoot[j].equals(" ")) { + System.out.println("error pos " + str); + // System.exit(0); } - pposs[j] = i.ppos[j+1]; - if (pposs[j].length()==0 ||pposs[j].equals(" ")) { - System.out.println("error pos "+str); - //System.exit(0); + pposs[j] = i.ppos[j + 1]; + if (pposs[j].length() == 0 || pposs[j].equals(" ")) { + System.out.println("error pos " + str); + // System.exit(0); } - labels[j] = i.labels[j+1]; - if (labels[j].length()==0 ||labels[j].equals(" ")) { - System.out.println("error lab "+str); - // System.exit(0); + labels[j] = i.labels[j + 1]; + if (labels[j].length() == 0 || labels[j].equals(" ")) { + System.out.println("error lab " + str); + // System.exit(0); } - heads[j] = i.heads[j+1]; - if(heads[j]> posNoRoot.length) { - System.out.println("head out of range "+heads[j]+" "+heads.length+" "+str); - heads[j]=posNoRoot.length; + heads[j] = i.heads[j + 1]; + if (heads[j] > posNoRoot.length) { + System.out.println("head out of range " + heads[j] + " " + heads.length + " " + str); + heads[j] = posNoRoot.length; } - - lemmas[j] = i.plemmas[j+1]; - if (lemmas[j].length()==0 ||lemmas[j].equals(" ")) { - System.out.println("error lab "+str); - // System.exit(0); + + lemmas[j] = i.plemmas[j + 1]; + if (lemmas[j].length() == 0 || lemmas[j].equals(" ")) { + System.out.println("error lab " + str); + // System.exit(0); } - org_lemmas[j] = i.lemmas[j+1]; - if (org_lemmas[j].length()==0 ||org_lemmas[j].equals(" ")) { - System.out.println("error lab "+str); - // System.exit(0); + org_lemmas[j] = i.lemmas[j + 1]; + if (org_lemmas[j].length() == 0 || org_lemmas[j].equals(" ")) { + System.out.println("error lab " + str); + // System.exit(0); } - of[j] = i.ofeats[j+1]; - pf[j] = i.pfeats[j+1]; - if (str==6099) { - // System.out.println(formsNoRoot[j]+"\t"+posNoRoot[j]+"\t"+pposs[j]+"\t"+labels[j]+"\t"+heads[j]); + of[j] = i.ofeats[j + 1]; + pf[j] = i.pfeats[j + 1]; + if (str == 6099) { + // System.out.println(formsNoRoot[j]+"\t"+posNoRoot[j]+"\t"+pposs[j]+"\t"+labels[j]+"\t"+heads[j]); } // (instance.fillp!=null) fillp[j] = instance.fillp[j+1]; } - SentenceData09 i09 = new SentenceData09(formsNoRoot, formsNoRoot, formsNoRoot,pposs, pposs, labels, heads,fillp,of, pf); + SentenceData09 i09 = new SentenceData09(formsNoRoot, formsNoRoot, formsNoRoot, pposs, pposs, labels, heads, + fillp, of, pf); + + // public SentenceData09(String[] forms, String[] lemmas, String[] + // olemmas,String[] gpos, String[] ppos, String[] labs, int[] heads, + // String[] fillpred) { + // SentenceData09 + // SentenceData09 i2 = new SentenceData09(i.forms, + // i.lemmas,i.org_lemmas,); - //public SentenceData09(String[] forms, String[] lemmas, String[] olemmas,String[] gpos, String[] ppos, String[] labs, int[] heads, String[] fillpred) { - //SentenceData09 - // SentenceData09 i2 = new SentenceData09(i.forms, i.lemmas,i.org_lemmas,); - writer.write(i09); - } writer.finishWriting(); - - + } - - - - public static void convertChnYue(String source, String target, boolean wordsOnly) throws Exception { - - - BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(source),"UTF-8"),32768); - + + public static void convertChnYue(String source, String target, boolean wordsOnly) throws Exception { + + BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(source), "UTF-8"), 32768); + CONLLWriter09 writer = new CONLLWriter09(target); - int str =0; + int str = 0; while (true) { - + ArrayList<String[]> lines = new ArrayList<String[]>(); - + String line; - while((line = reader.readLine())!=null) { - - if (line.length()<2) break; + while ((line = reader.readLine()) != null) { + + if (line.length() < 2) + break; String split[] = line.split("\t"); - lines.add(split); + lines.add(split); } - if (line ==null)break; - + if (line == null) + break; + str++; - - + String[] formsNoRoot = new String[lines.size()]; String[] posNoRoot = new String[formsNoRoot.length]; String[] lemmas = new String[formsNoRoot.length]; @@ -228,90 +227,91 @@ public class Convert { int[] heads = new int[formsNoRoot.length]; - - - for(int j = 0; j < formsNoRoot.length; j++) { + for (int j = 0; j < formsNoRoot.length; j++) { formsNoRoot[j] = lines.get(j)[0]; - if (formsNoRoot[j].length()==0 ||formsNoRoot[j].equals("")) { - System.out.println("error forms "+str); - // System.exit(0); - formsNoRoot[j]="_"; + if (formsNoRoot[j].length() == 0 || formsNoRoot[j].equals("")) { + System.out.println("error forms " + str); + // System.exit(0); + formsNoRoot[j] = "_"; } - + posNoRoot[j] = lines.get(j)[1]; - if (posNoRoot[j].length()==0 ||posNoRoot[j].equals(" ")) { - System.out.println("error pos "+str); - // System.exit(0); + if (posNoRoot[j].length() == 0 || posNoRoot[j].equals(" ")) { + System.out.println("error pos " + str); + // System.exit(0); } pposs[j] = "_"; labels[j] = lines.get(j)[3]; - if (labels[j].length()==0 ||labels[j].equals(" ")) { - System.out.println("error lab "+str); + if (labels[j].length() == 0 || labels[j].equals(" ")) { + System.out.println("error lab " + str); labels[j] = "_"; - // System.exit(0); + // System.exit(0); } - heads[j] = Integer.parseInt(lines.get(j)[2])+1; - if(heads[j]> posNoRoot.length) { - System.out.println("head out of range "+heads[j]+" "+heads.length+" "+str); - heads[j]=posNoRoot.length; + heads[j] = Integer.parseInt(lines.get(j)[2]) + 1; + if (heads[j] > posNoRoot.length) { + System.out.println("head out of range " + heads[j] + " " + heads.length + " " + str); + heads[j] = posNoRoot.length; } - - // 0 is root and not -1 - if (heads[j]==-1)heads[j]=0; - + + // 0 is root and not -1 + if (heads[j] == -1) + heads[j] = 0; + lemmas[j] = "_"; - + org_lemmas[j] = "_"; - + of[j] = "_"; pf[j] = "_"; if (wordsOnly) { - posNoRoot[j]="_"; - heads[j]=0; + posNoRoot[j] = "_"; + heads[j] = 0; labels[j] = "_"; } - + // (instance.fillp!=null) fillp[j] = instance.fillp[j+1]; } - SentenceData09 i09 = new SentenceData09(formsNoRoot, lemmas, org_lemmas,posNoRoot, posNoRoot, labels, heads,fillp,of, pf); + SentenceData09 i09 = new SentenceData09(formsNoRoot, lemmas, org_lemmas, posNoRoot, posNoRoot, labels, + heads, fillp, of, pf); + + // public SentenceData09(String[] forms, String[] lemmas, String[] + // olemmas,String[] gpos, String[] ppos, String[] labs, int[] heads, + // String[] fillpred) { + // SentenceData09 + // SentenceData09 i2 = new SentenceData09(i.forms, + // i.lemmas,i.org_lemmas,); - //public SentenceData09(String[] forms, String[] lemmas, String[] olemmas,String[] gpos, String[] ppos, String[] labs, int[] heads, String[] fillpred) { - //SentenceData09 - // SentenceData09 i2 = new SentenceData09(i.forms, i.lemmas,i.org_lemmas,); - writer.write(i09); - } + reader.close(); writer.finishWriting(); - - + } - - - + /** * Convert the 0 + * * @param source * @param target * @throws Exception */ - public static void convert0809(String source, String target) throws Exception { - + public static void convert0809(String source, String target) throws Exception { + CONLLReader08 reader = new CONLLReader08(source); CONLLWriter09 writer = new CONLLWriter09(target); - int str =0; + int str = 0; while (true) { SentenceData09 i = reader.getNext(); str++; - if (i == null) break; - - - String[] formsNoRoot = new String[i.length()-1]; + if (i == null) + break; + + String[] formsNoRoot = new String[i.length() - 1]; String[] posNoRoot = new String[formsNoRoot.length]; String[] lemmas = new String[formsNoRoot.length]; @@ -326,85 +326,83 @@ public class Convert { int[] heads = new int[formsNoRoot.length]; - - - for(int j = 0; j < formsNoRoot.length; j++) { - formsNoRoot[j] = i.forms[j+1]; - if (formsNoRoot[j].length()==0 ||formsNoRoot[j].equals("")) { - System.out.println("error forms "+str); - // System.exit(0); - formsNoRoot[j]=" "; + for (int j = 0; j < formsNoRoot.length; j++) { + formsNoRoot[j] = i.forms[j + 1]; + if (formsNoRoot[j].length() == 0 || formsNoRoot[j].equals("")) { + System.out.println("error forms " + str); + // System.exit(0); + formsNoRoot[j] = " "; } - posNoRoot[j] = i.gpos[j+1]; - if (posNoRoot[j].length()==0 ||posNoRoot[j].equals(" ")) { - System.out.println("error pos "+str); - // System.exit(0); + posNoRoot[j] = i.gpos[j + 1]; + if (posNoRoot[j].length() == 0 || posNoRoot[j].equals(" ")) { + System.out.println("error pos " + str); + // System.exit(0); } - pposs[j] = i.ppos[j+1]; - if (pposs[j].length()==0 ||pposs[j].equals(" ")) { - System.out.println("error pos "+str); - //System.exit(0); + pposs[j] = i.ppos[j + 1]; + if (pposs[j].length() == 0 || pposs[j].equals(" ")) { + System.out.println("error pos " + str); + // System.exit(0); } - labels[j] = i.labels[j+1]; - if (labels[j].length()==0 ||labels[j].equals(" ")) { - System.out.println("error lab "+str); - // System.exit(0); + labels[j] = i.labels[j + 1]; + if (labels[j].length() == 0 || labels[j].equals(" ")) { + System.out.println("error lab " + str); + // System.exit(0); } - heads[j] = i.heads[j+1]; - if(heads[j]> posNoRoot.length) { - System.out.println("head out of range "+heads[j]+" "+heads.length+" "+str); - heads[j]=posNoRoot.length; + heads[j] = i.heads[j + 1]; + if (heads[j] > posNoRoot.length) { + System.out.println("head out of range " + heads[j] + " " + heads.length + " " + str); + heads[j] = posNoRoot.length; } - - lemmas[j] = i.plemmas[j+1]; - if (lemmas[j].length()==0 ||lemmas[j].equals(" ")) { - System.out.println("error lab "+str); - // System.exit(0); + + lemmas[j] = i.plemmas[j + 1]; + if (lemmas[j].length() == 0 || lemmas[j].equals(" ")) { + System.out.println("error lab " + str); + // System.exit(0); } - org_lemmas[j] = i.lemmas[j+1]; - // if (org_lemmas[j].length()==0 ||org_lemmas[j].equals(" ")) { - // System.out.println("error lab "+str); - // // System.exit(0); - // } -// of[j] = i.ofeats[j+1]; -// pf[j] = i.pfeats[j+1]; - if (str==6099) { - // System.out.println(formsNoRoot[j]+"\t"+posNoRoot[j]+"\t"+pposs[j]+"\t"+labels[j]+"\t"+heads[j]); + org_lemmas[j] = i.lemmas[j + 1]; + // if (org_lemmas[j].length()==0 ||org_lemmas[j].equals(" ")) { + // System.out.println("error lab "+str); + // // System.exit(0); + // } + // of[j] = i.ofeats[j+1]; + // pf[j] = i.pfeats[j+1]; + if (str == 6099) { + // System.out.println(formsNoRoot[j]+"\t"+posNoRoot[j]+"\t"+pposs[j]+"\t"+labels[j]+"\t"+heads[j]); } // (instance.fillp!=null) fillp[j] = instance.fillp[j+1]; } - SentenceData09 i09 = new SentenceData09(formsNoRoot, org_lemmas, lemmas,pposs, pposs, labels, heads,fillp,of, pf); + SentenceData09 i09 = new SentenceData09(formsNoRoot, org_lemmas, lemmas, pposs, pposs, labels, heads, fillp, + of, pf); + + // public SentenceData09(String[] forms, String[] lemmas, String[] + // olemmas,String[] gpos, String[] ppos, String[] labs, int[] heads, + // String[] fillpred) { + // SentenceData09 + // SentenceData09 i2 = new SentenceData09(i.forms, + // i.lemmas,i.org_lemmas,); - //public SentenceData09(String[] forms, String[] lemmas, String[] olemmas,String[] gpos, String[] ppos, String[] labs, int[] heads, String[] fillpred) { - //SentenceData09 - // SentenceData09 i2 = new SentenceData09(i.forms, i.lemmas,i.org_lemmas,); - writer.write(i09); - } writer.finishWriting(); - - + } - public static void convert0906(String source, String target) throws Exception { - + CONLLReader09 reader = new CONLLReader09(source); CONLLWriter06 writer = new CONLLWriter06(target); - while (true) { SentenceData09 i = reader.getNext(); - - if (i == null) break; - - - String[] formsNoRoot = new String[i.length()-1]; + + if (i == null) + break; + + String[] formsNoRoot = new String[i.length() - 1]; String[] posNoRoot = new String[formsNoRoot.length]; String[] lemmas = new String[formsNoRoot.length]; @@ -419,37 +417,37 @@ public class Convert { int[] heads = new int[formsNoRoot.length]; - for(int j = 0; j < formsNoRoot.length; j++) { - formsNoRoot[j] = i.forms[j+1]; - posNoRoot[j] = i.gpos[j+1]; - pposs[j] = i.gpos[j+1]; + for (int j = 0; j < formsNoRoot.length; j++) { + formsNoRoot[j] = i.forms[j + 1]; + posNoRoot[j] = i.gpos[j + 1]; + pposs[j] = i.gpos[j + 1]; - labels[j] = i.labels[j+1]; - heads[j] = i.heads[j+1]; - lemmas[j] = i.plemmas[j+1]; + labels[j] = i.labels[j + 1]; + heads[j] = i.heads[j + 1]; + lemmas[j] = i.plemmas[j + 1]; - org_lemmas[j] = i.lemmas[j+1]; - of[j] = i.ofeats[j+1]; - pf[j] = i.pfeats[j+1]; + org_lemmas[j] = i.lemmas[j + 1]; + of[j] = i.ofeats[j + 1]; + pf[j] = i.pfeats[j + 1]; // (instance.fillp!=null) fillp[j] = instance.fillp[j+1]; } - SentenceData09 i09 = new SentenceData09(formsNoRoot, lemmas, org_lemmas,posNoRoot, pposs, labels, heads,fillp,of, pf); + SentenceData09 i09 = new SentenceData09(formsNoRoot, lemmas, org_lemmas, posNoRoot, pposs, labels, heads, + fillp, of, pf); + + // public SentenceData09(String[] forms, String[] lemmas, String[] + // olemmas,String[] gpos, String[] ppos, String[] labs, int[] heads, + // String[] fillpred) { + // SentenceData09 + // SentenceData09 i2 = new SentenceData09(i.forms, + // i.lemmas,i.org_lemmas,); - //public SentenceData09(String[] forms, String[] lemmas, String[] olemmas,String[] gpos, String[] ppos, String[] labs, int[] heads, String[] fillpred) { - //SentenceData09 - // SentenceData09 i2 = new SentenceData09(i.forms, i.lemmas,i.org_lemmas,); - writer.write(i09); - } writer.finishWriting(); - - + } - - } diff --git a/dependencyParser/mate-tools/src/is2/util/Convert0409.java b/dependencyParser/mate-tools/src/is2/util/Convert0409.java index 7fc1142..b735ad8 100644 --- a/dependencyParser/mate-tools/src/is2/util/Convert0409.java +++ b/dependencyParser/mate-tools/src/is2/util/Convert0409.java @@ -1,44 +1,40 @@ /** - * + * */ package is2.util; import is2.data.SentenceData09; import is2.io.CONLLReader04; -import is2.io.CONLLReader06; import is2.io.CONLLReader09; import is2.io.CONLLWriter06; import is2.io.CONLLWriter09; /** * @author Dr. Bernd Bohnet, 01.03.2010 - * - * + * + * */ public class Convert0409 { - - public static void main(String args[]) throws Exception { - - convert(args[0],args[1]); - - + + convert(args[0], args[1]); + } - + public static void convert(String source, String target) throws Exception { - + CONLLReader04 reader = new CONLLReader04(source); CONLLWriter09 writer = new CONLLWriter09(target); - int str =0; + int str = 0; while (true) { SentenceData09 i = reader.getNext(); str++; - if (i == null) break; - - - String[] formsNoRoot = new String[i.length()-1]; + if (i == null) + break; + + String[] formsNoRoot = new String[i.length() - 1]; String[] posNoRoot = new String[formsNoRoot.length]; String[] lemmas = new String[formsNoRoot.length]; @@ -53,85 +49,83 @@ public class Convert0409 { int[] heads = new int[formsNoRoot.length]; - - - for(int j = 0; j < formsNoRoot.length; j++) { - formsNoRoot[j] = i.forms[j+1]; - if (formsNoRoot[j].length()==0 ||formsNoRoot[j].equals("")) { - System.out.println("error forms "+str); - // System.exit(0); - formsNoRoot[j]=" "; + for (int j = 0; j < formsNoRoot.length; j++) { + formsNoRoot[j] = i.forms[j + 1]; + if (formsNoRoot[j].length() == 0 || formsNoRoot[j].equals("")) { + System.out.println("error forms " + str); + // System.exit(0); + formsNoRoot[j] = " "; } - posNoRoot[j] = i.gpos[j+1]; - if (posNoRoot[j].length()==0 ||posNoRoot[j].equals(" ")) { - System.out.println("error pos "+str); - // System.exit(0); + posNoRoot[j] = i.gpos[j + 1]; + if (posNoRoot[j].length() == 0 || posNoRoot[j].equals(" ")) { + System.out.println("error pos " + str); + // System.exit(0); } - pposs[j] = i.ppos[j+1]; - if (pposs[j].length()==0 ||pposs[j].equals(" ")) { - System.out.println("error pos "+str); - //System.exit(0); + pposs[j] = i.ppos[j + 1]; + if (pposs[j].length() == 0 || pposs[j].equals(" ")) { + System.out.println("error pos " + str); + // System.exit(0); } - labels[j] = i.labels[j+1]; - if (labels[j].length()==0 ||labels[j].equals(" ")) { - System.out.println("error lab "+str); - // System.exit(0); + labels[j] = i.labels[j + 1]; + if (labels[j].length() == 0 || labels[j].equals(" ")) { + System.out.println("error lab " + str); + // System.exit(0); } - heads[j] = i.heads[j+1]; - if(heads[j]> posNoRoot.length) { - System.out.println("head out of range "+heads[j]+" "+heads.length+" "+str); - heads[j]=posNoRoot.length; + heads[j] = i.heads[j + 1]; + if (heads[j] > posNoRoot.length) { + System.out.println("head out of range " + heads[j] + " " + heads.length + " " + str); + heads[j] = posNoRoot.length; } - - lemmas[j] = i.plemmas[j+1]; - if (lemmas[j].length()==0 ||lemmas[j].equals(" ")) { - System.out.println("error lab "+str); - // System.exit(0); + + lemmas[j] = i.plemmas[j + 1]; + if (lemmas[j].length() == 0 || lemmas[j].equals(" ")) { + System.out.println("error lab " + str); + // System.exit(0); } - org_lemmas[j] = i.lemmas[j+1]; - if (org_lemmas[j].length()==0 ||org_lemmas[j].equals(" ")) { - System.out.println("error lab "+str); - // System.exit(0); + org_lemmas[j] = i.lemmas[j + 1]; + if (org_lemmas[j].length() == 0 || org_lemmas[j].equals(" ")) { + System.out.println("error lab " + str); + // System.exit(0); } - of[j] = i.ofeats[j+1]; - pf[j] = i.pfeats[j+1]; - if (str==6099) { - // System.out.println(formsNoRoot[j]+"\t"+posNoRoot[j]+"\t"+pposs[j]+"\t"+labels[j]+"\t"+heads[j]); + of[j] = i.ofeats[j + 1]; + pf[j] = i.pfeats[j + 1]; + if (str == 6099) { + // System.out.println(formsNoRoot[j]+"\t"+posNoRoot[j]+"\t"+pposs[j]+"\t"+labels[j]+"\t"+heads[j]); } // (instance.fillp!=null) fillp[j] = instance.fillp[j+1]; } - SentenceData09 i09 = new SentenceData09(formsNoRoot, lemmas, org_lemmas,pposs, pposs, labels, heads,fillp,of, pf); + SentenceData09 i09 = new SentenceData09(formsNoRoot, lemmas, org_lemmas, pposs, pposs, labels, heads, fillp, + of, pf); + + // public SentenceData09(String[] forms, String[] lemmas, String[] + // olemmas,String[] gpos, String[] ppos, String[] labs, int[] heads, + // String[] fillpred) { + // SentenceData09 + // SentenceData09 i2 = new SentenceData09(i.forms, + // i.lemmas,i.org_lemmas,); - //public SentenceData09(String[] forms, String[] lemmas, String[] olemmas,String[] gpos, String[] ppos, String[] labs, int[] heads, String[] fillpred) { - //SentenceData09 - // SentenceData09 i2 = new SentenceData09(i.forms, i.lemmas,i.org_lemmas,); - writer.write(i09); - } writer.finishWriting(); - - + } - public static void convert0906(String source, String target) throws Exception { - + CONLLReader09 reader = new CONLLReader09(source); CONLLWriter06 writer = new CONLLWriter06(target); - while (true) { SentenceData09 i = reader.getNext(); - - if (i == null) break; - - - String[] formsNoRoot = new String[i.length()-1]; + + if (i == null) + break; + + String[] formsNoRoot = new String[i.length() - 1]; String[] posNoRoot = new String[formsNoRoot.length]; String[] lemmas = new String[formsNoRoot.length]; @@ -146,37 +140,37 @@ public class Convert0409 { int[] heads = new int[formsNoRoot.length]; - for(int j = 0; j < formsNoRoot.length; j++) { - formsNoRoot[j] = i.forms[j+1]; - posNoRoot[j] = i.gpos[j+1]; - pposs[j] = i.ppos[j+1]; + for (int j = 0; j < formsNoRoot.length; j++) { + formsNoRoot[j] = i.forms[j + 1]; + posNoRoot[j] = i.gpos[j + 1]; + pposs[j] = i.ppos[j + 1]; - labels[j] = i.labels[j+1]; - heads[j] = i.heads[j+1]; - lemmas[j] = i.plemmas[j+1]; + labels[j] = i.labels[j + 1]; + heads[j] = i.heads[j + 1]; + lemmas[j] = i.plemmas[j + 1]; - org_lemmas[j] = i.lemmas[j+1]; - of[j] = i.ofeats[j+1]; - pf[j] = i.pfeats[j+1]; + org_lemmas[j] = i.lemmas[j + 1]; + of[j] = i.ofeats[j + 1]; + pf[j] = i.pfeats[j + 1]; // (instance.fillp!=null) fillp[j] = instance.fillp[j+1]; } - SentenceData09 i09 = new SentenceData09(formsNoRoot, lemmas, org_lemmas,posNoRoot, pposs, labels, heads,fillp,of, pf); + SentenceData09 i09 = new SentenceData09(formsNoRoot, lemmas, org_lemmas, posNoRoot, pposs, labels, heads, + fillp, of, pf); + + // public SentenceData09(String[] forms, String[] lemmas, String[] + // olemmas,String[] gpos, String[] ppos, String[] labs, int[] heads, + // String[] fillpred) { + // SentenceData09 + // SentenceData09 i2 = new SentenceData09(i.forms, + // i.lemmas,i.org_lemmas,); - //public SentenceData09(String[] forms, String[] lemmas, String[] olemmas,String[] gpos, String[] ppos, String[] labs, int[] heads, String[] fillpred) { - //SentenceData09 - // SentenceData09 i2 = new SentenceData09(i.forms, i.lemmas,i.org_lemmas,); - writer.write(i09); - } writer.finishWriting(); - - + } - - } diff --git a/dependencyParser/mate-tools/src/is2/util/ConvertADJ.java b/dependencyParser/mate-tools/src/is2/util/ConvertADJ.java index b30aabb..e6ca6c1 100644 --- a/dependencyParser/mate-tools/src/is2/util/ConvertADJ.java +++ b/dependencyParser/mate-tools/src/is2/util/ConvertADJ.java @@ -1,84 +1,76 @@ /** - * + * */ package is2.util; import is2.data.SentenceData09; -import is2.io.CONLLReader04; -import is2.io.CONLLReader06; import is2.io.CONLLReader09; import is2.io.CONLLWriter06; -import is2.io.CONLLWriter09; /** * @author Dr. Bernd Bohnet, 01.03.2010 - * - * + * + * */ public class ConvertADJ { - - public static void main(String args[]) throws Exception { - - convert(args[0],args[1]); - - + + convert(args[0], args[1]); + } - + public static void convert(String source, String target) throws Exception { - + CONLLReader09 reader = new CONLLReader09(source); -// CONLLWriter09 writer = new CONLLWriter09(target); - int adj=0,argadj=0; - int rb=0,argrb=0; - int str =0; + // CONLLWriter09 writer = new CONLLWriter09(target); + int adj = 0, argadj = 0; + int rb = 0, argrb = 0; while (true) { SentenceData09 i = reader.getNext(); - str++; - if (i == null) break; - - - for (int k =0;k<i.length();k++) { - - if (i.gpos[k].startsWith("JJ")) adj++; - if (i.gpos[k].startsWith("RB")) rb++; - - if (i.argposition!=null) { - for(int p=0;p<i.argposition.length;p++) { - if(i.argposition[p]!=null) - for(int a=0;a<i.argposition[p].length;a++) { - if(i.argposition[p][a]==k && i.gpos[k].startsWith("JJ")) argadj ++; - if(i.argposition[p][a]==k && i.gpos[k].startsWith("RB")) argrb ++; - } - + if (i == null) + break; + + for (int k = 0; k < i.length(); k++) { + + if (i.gpos[k].startsWith("JJ")) + adj++; + if (i.gpos[k].startsWith("RB")) + rb++; + + if (i.argposition != null) { + for (int[] element : i.argposition) { + if (element != null) + for (int a = 0; a < element.length; a++) { + if (element[a] == k && i.gpos[k].startsWith("JJ")) + argadj++; + if (element[a] == k && i.gpos[k].startsWith("RB")) + argrb++; + } + } } // (instance.fillp!=null) fillp[j] = instance.fillp[j+1]; } - - } - System.out.println("adj "+adj+ " "+argadj); - System.out.println("rb "+rb+ " "+argrb); - + System.out.println("adj " + adj + " " + argadj); + System.out.println("rb " + rb + " " + argrb); + } - public static void convert0906(String source, String target) throws Exception { - + CONLLReader09 reader = new CONLLReader09(source); CONLLWriter06 writer = new CONLLWriter06(target); - while (true) { SentenceData09 i = reader.getNext(); - - if (i == null) break; - - - String[] formsNoRoot = new String[i.length()-1]; + + if (i == null) + break; + + String[] formsNoRoot = new String[i.length() - 1]; String[] posNoRoot = new String[formsNoRoot.length]; String[] lemmas = new String[formsNoRoot.length]; @@ -93,37 +85,37 @@ public class ConvertADJ { int[] heads = new int[formsNoRoot.length]; - for(int j = 0; j < formsNoRoot.length; j++) { - formsNoRoot[j] = i.forms[j+1]; - posNoRoot[j] = i.gpos[j+1]; - pposs[j] = i.ppos[j+1]; + for (int j = 0; j < formsNoRoot.length; j++) { + formsNoRoot[j] = i.forms[j + 1]; + posNoRoot[j] = i.gpos[j + 1]; + pposs[j] = i.ppos[j + 1]; - labels[j] = i.labels[j+1]; - heads[j] = i.heads[j+1]; - lemmas[j] = i.plemmas[j+1]; + labels[j] = i.labels[j + 1]; + heads[j] = i.heads[j + 1]; + lemmas[j] = i.plemmas[j + 1]; - org_lemmas[j] = i.lemmas[j+1]; - of[j] = i.ofeats[j+1]; - pf[j] = i.pfeats[j+1]; + org_lemmas[j] = i.lemmas[j + 1]; + of[j] = i.ofeats[j + 1]; + pf[j] = i.pfeats[j + 1]; // (instance.fillp!=null) fillp[j] = instance.fillp[j+1]; } - SentenceData09 i09 = new SentenceData09(formsNoRoot, lemmas, org_lemmas,posNoRoot, pposs, labels, heads,fillp,of, pf); + SentenceData09 i09 = new SentenceData09(formsNoRoot, lemmas, org_lemmas, posNoRoot, pposs, labels, heads, + fillp, of, pf); + + // public SentenceData09(String[] forms, String[] lemmas, String[] + // olemmas,String[] gpos, String[] ppos, String[] labs, int[] heads, + // String[] fillpred) { + // SentenceData09 + // SentenceData09 i2 = new SentenceData09(i.forms, + // i.lemmas,i.org_lemmas,); - //public SentenceData09(String[] forms, String[] lemmas, String[] olemmas,String[] gpos, String[] ppos, String[] labs, int[] heads, String[] fillpred) { - //SentenceData09 - // SentenceData09 i2 = new SentenceData09(i.forms, i.lemmas,i.org_lemmas,); - writer.write(i09); - } writer.finishWriting(); - - + } - - } diff --git a/dependencyParser/mate-tools/src/is2/util/ConvertLowerCase0909.java b/dependencyParser/mate-tools/src/is2/util/ConvertLowerCase0909.java index e8f19f3..e5842d6 100644 --- a/dependencyParser/mate-tools/src/is2/util/ConvertLowerCase0909.java +++ b/dependencyParser/mate-tools/src/is2/util/ConvertLowerCase0909.java @@ -1,89 +1,76 @@ /** - * + * */ package is2.util; import is2.data.SentenceData09; -import is2.io.CONLLReader06; import is2.io.CONLLReader09; -import is2.io.CONLLWriter06; import is2.io.CONLLWriter09; /** * @author Dr. Bernd Bohnet, 01.03.2010 - * - * + * + * */ public class ConvertLowerCase0909 { - - public static void main(String args[]) throws Exception { - - - + CONLLReader09 reader = new CONLLReader09(args[0]); CONLLWriter09 writer = new CONLLWriter09(args[1]); - - int str =0; + while (true) { SentenceData09 i = reader.getNext(); - str++; - if (i == null) break; - + if (i == null) + break; + SentenceData09 i09 = new SentenceData09(i); i09.createSemantic(i); - - for(int k=0;k<i09.length();k++) { - i09.lemmas[k]=i09.lemmas[k].toLowerCase(); - i09.plemmas[k]=i09.plemmas[k].toLowerCase(); - + + for (int k = 0; k < i09.length(); k++) { + i09.lemmas[k] = i09.lemmas[k].toLowerCase(); + i09.plemmas[k] = i09.plemmas[k].toLowerCase(); + } - + writer.write(i09); - - + } writer.finishWriting(); - - + } - + public static void convert(String source, String target) throws Exception { - + CONLLReader09 reader = new CONLLReader09(source); CONLLWriter09 writer = new CONLLWriter09(target); - int str =0; while (true) { SentenceData09 i = reader.getNext(); - str++; - if (i == null) break; - + if (i == null) + break; + SentenceData09 i09 = new SentenceData09(i); i09.createSemantic(i); - - for(int k=0;k<i09.length();k++) { - i09.lemmas[k]=i09.lemmas[k].toLowerCase(); - i09.plemmas[k]=i09.plemmas[k].toLowerCase(); - + + for (int k = 0; k < i09.length(); k++) { + i09.lemmas[k] = i09.lemmas[k].toLowerCase(); + i09.plemmas[k] = i09.plemmas[k].toLowerCase(); + } - - //public SentenceData09(String[] forms, String[] lemmas, String[] olemmas,String[] gpos, String[] ppos, String[] labs, int[] heads, String[] fillpred) { - //SentenceData09 - // SentenceData09 i2 = new SentenceData09(i.forms, i.lemmas,i.org_lemmas,); - + + // public SentenceData09(String[] forms, String[] lemmas, String[] + // olemmas,String[] gpos, String[] ppos, String[] labs, int[] heads, + // String[] fillpred) { + // SentenceData09 + // SentenceData09 i2 = new SentenceData09(i.forms, + // i.lemmas,i.org_lemmas,); + writer.write(i09); - } writer.finishWriting(); - - + } - - - - } diff --git a/dependencyParser/mate-tools/src/is2/util/ConvertTiger2CoNLL.java b/dependencyParser/mate-tools/src/is2/util/ConvertTiger2CoNLL.java index bb528f7..e650737 100644 --- a/dependencyParser/mate-tools/src/is2/util/ConvertTiger2CoNLL.java +++ b/dependencyParser/mate-tools/src/is2/util/ConvertTiger2CoNLL.java @@ -1,9 +1,8 @@ /** - * + * */ package is2.util; - import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.FileInputStream; @@ -13,88 +12,89 @@ import java.util.StringTokenizer; /** * @author Dr. Bernd Bohnet, 17.01.2010 - * - * This class removes all information from a conll 2009 file except of columns 1 and 2 - * that contain the word id and the word form. + * + * This class removes all information from a conll 2009 file except of + * columns 1 and 2 that contain the word id and the word form. */ public class ConvertTiger2CoNLL { - public static void main (String[] args) throws IOException { + public static void main(String[] args) throws IOException { - - OptionsSuper options = new OptionsSuper(args,null); + OptionsSuper options = new OptionsSuper(args, null); + + if (options.trainfile != null) { + System.err.println( + "included sentences " + clean(options.trainfile, options.outfile, options.start, options.count)); + } else + System.err.println("Please proivde the file name -train <file-name>"); - if (options.trainfile!= null){ - System.err.println("included sentences "+clean(options.trainfile, options.outfile, options.start, options.count)); - } - else System.err.println("Please proivde the file name -train <file-name>"); - } /** * @param trainfile - * @throws IOException + * @throws IOException */ private static int clean(String file, String outFile, int start, int numberOfSentences) throws IOException { - System.err.println("writting to "+outFile); - System.err.println("start "+start+" to "+(start+numberOfSentences)); - int state=0; + System.err.println("writting to " + outFile); + System.err.println("start " + start + " to " + (start + numberOfSentences)); + int state = 0; - BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(file),"UTF-8"),32768); - BufferedWriter writer = new BufferedWriter(new java.io.OutputStreamWriter (new java.io.FileOutputStream (outFile),"UTF-8"),32768); - String l =null; + BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF-8"), 32768); + BufferedWriter writer = new BufferedWriter( + new java.io.OutputStreamWriter(new java.io.FileOutputStream(outFile), "UTF-8"), 32768); + String l = null; try { - int id =1, snt=0,cnt=0; + int id = 1, snt = 0, cnt = 0; - while( (l = reader.readLine())!=null) { + while ((l = reader.readLine()) != null) { - if (l.startsWith("#BOS")) { - state=1; //BOS - id=1; + state = 1; // BOS + id = 1; snt++; continue; } - if (l.startsWith("#EOS") && state==1) { - state=2; //BOS + if (l.startsWith("#EOS") && state == 1) { + state = 2; // BOS cnt++; - + writer.newLine(); } - - if (start>snt || (start+numberOfSentences)<=snt) { - state=3; + + if (start > snt || (start + numberOfSentences) <= snt) { + state = 3; } - - if (l.startsWith("#5")||l.startsWith("#6")||l.startsWith("#7")) continue; - if ((start+numberOfSentences)<=snt) break; - - if (state==3) continue; - - - if (state==1) { + + if (l.startsWith("#5") || l.startsWith("#6") || l.startsWith("#7")) + continue; + if ((start + numberOfSentences) <= snt) + break; + + if (state == 3) + continue; + + if (state == 1) { l = l.replace("\t\t", "\t"); l = l.replace("\t\t", "\t"); - - StringTokenizer t = new StringTokenizer(l,"\t"); - int count=0; - - writer.write(""+id+"\t"); - + + StringTokenizer t = new StringTokenizer(l, "\t"); + int count = 0; + + writer.write("" + id + "\t"); + while (t.hasMoreTokens()) { - if (count==0) { - writer.write(t.nextToken()+"\t"); - } else if (count==1) { - writer.write(t.nextToken()+"\t_\t"); - } else if (count==2) { - writer.write(t.nextToken()+"\t_\t"); - } else if (count==3) { - writer.write(t.nextToken().replace(".", "|")+"\t_\t"); - } - else { + if (count == 0) { + writer.write(t.nextToken() + "\t"); + } else if (count == 1) { + writer.write(t.nextToken() + "\t_\t"); + } else if (count == 2) { + writer.write(t.nextToken() + "\t_\t"); + } else if (count == 3) { + writer.write(t.nextToken().replace(".", "|") + "\t_\t"); + } else { t.nextToken(); } count++; @@ -107,18 +107,14 @@ public class ConvertTiger2CoNLL { writer.flush(); writer.close(); reader.close(); - + return cnt; } catch (IOException e) { e.printStackTrace(); } - - + return -1; } - - - } diff --git a/dependencyParser/mate-tools/src/is2/util/DB.java b/dependencyParser/mate-tools/src/is2/util/DB.java index 8218ea5..30fd231 100755 --- a/dependencyParser/mate-tools/src/is2/util/DB.java +++ b/dependencyParser/mate-tools/src/is2/util/DB.java @@ -1,63 +1,61 @@ -package is2.util; +package is2.util; import java.util.Calendar; import java.util.GregorianCalendar; - public class DB { - - private static final String ARROW = " -> "; - private static final String LEER = " " ; - private static final String BIG = " " ; + private static final String ARROW = " -> "; + private static final String LEER = " "; + private static final String BIG = " "; private static boolean debug = true; - final static public void println (Object err) { + final static public void println(Object err) { - if (!debug) return; + if (!debug) + return; StackTraceElement[] ste = new Exception().getStackTrace(); StringBuffer msg = new StringBuffer(); - msg.append((getDate().append(LEER).substring(0,10))); + msg.append((getDate().append(LEER).substring(0, 10))); msg.append(' '); - msg.append(ste[1].getClassName()+" "+ste[1].getLineNumber()); + msg.append(ste[1].getClassName() + " " + ste[1].getLineNumber()); msg.append(':'); msg.append(ste[1].getMethodName()); msg.append(ARROW); - int l = 55-msg.length(); - if (l < 0) l =0; + int l = 55 - msg.length(); + if (l < 0) + l = 0; msg.append(BIG.substring(0, l)); - -// if ((m_depth >= 0) && (m_depth < (BIG.length()) )) { -// vDebugMessage.append(BIG.substring(0, m_depth*2)); -// } + // if ((m_depth >= 0) && (m_depth < (BIG.length()) )) { + // vDebugMessage.append(BIG.substring(0, m_depth*2)); + // } msg.append(err); System.err.println(msg); - } - - final static public void prints (Object err) { - if (!debug) return; + final static public void prints(Object err) { + + if (!debug) + return; System.err.println(err); } - final private static StringBuffer getDate() { -// if (Preferences.s_debug <= BDebug.FAIL) return s_sb; + // if (Preferences.s_debug <= BDebug.FAIL) return s_sb; - GregorianCalendar s_cal = new GregorianCalendar(); + GregorianCalendar s_cal = new GregorianCalendar(); StringBuffer sb = new StringBuffer(); -// sb.append(s_cal.get(Calendar.HOUR_OF_DAY)); -// sb.append('_'); + // sb.append(s_cal.get(Calendar.HOUR_OF_DAY)); + // sb.append('_'); sb.append(s_cal.get(Calendar.MINUTE)); sb.append('.'); sb.append(s_cal.get(Calendar.SECOND)); @@ -68,14 +66,13 @@ public class DB { } public static void setDebug(boolean b) { - debug=b; - + debug = b; + } public static boolean getDebug() { - + return debug; } - } diff --git a/dependencyParser/mate-tools/src/is2/util/Edges.java b/dependencyParser/mate-tools/src/is2/util/Edges.java index af1a658..2457cae 100644 --- a/dependencyParser/mate-tools/src/is2/util/Edges.java +++ b/dependencyParser/mate-tools/src/is2/util/Edges.java @@ -1,10 +1,8 @@ /** - * + * */ package is2.util; -import is2.data.PipeGen; - import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.IOException; @@ -14,103 +12,102 @@ import java.util.Map.Entry; /** * @author Dr. Bernd Bohnet, 13.05.2009; - * - * + * + * */ public final class Edges { - private static short[][][] edges; - private static HashMap<Short,Integer> labelCount = new HashMap<Short,Integer>(); + private static HashMap<Short, Integer> labelCount = new HashMap<Short, Integer>(); - private static HashMap<String,Integer> slabelCount = new HashMap<String,Integer>(); + private static HashMap<String, Integer> slabelCount = new HashMap<String, Integer>(); - static short[] def = new short[1]; - - private Edges () {} - + + private Edges() { + } + /** * @param length */ public static void init(int length) { - edges = new short[length][length][]; + edges = new short[length][length][]; } - - - public static void findDefault(){ - - int best =0; - - - - for(Entry<Short,Integer> e : labelCount.entrySet()) { - - - if (best<e.getValue()) { + + public static void findDefault() { + + int best = 0; + + for (Entry<Short, Integer> e : labelCount.entrySet()) { + + if (best < e.getValue()) { best = e.getValue(); - def[0]=e.getKey(); + def[0] = e.getKey(); } } - - // labelCount=null; - // String[] types = new String[mf.getFeatureCounter().get(PipeGen.REL)]; - // for (Entry<String, Integer> e : MFO.getFeatureSet().get(PipeGen.REL).entrySet()) types[e.getValue()] = e.getKey(); + // labelCount=null; + // String[] types = new String[mf.getFeatureCounter().get(PipeGen.REL)]; + // for (Entry<String, Integer> e : + // MFO.getFeatureSet().get(PipeGen.REL).entrySet()) types[e.getValue()] + // = e.getKey(); + + is2.util.DB.println("set default label to " + def[0] + " "); - is2.util.DB.println("set default label to "+def[0]+" " ); + // System.out.println("found default "+def[0]); - // System.out.println("found default "+def[0]); - } - final static public void put(int pos1, int pos2, short label) { - putD(pos1, pos2,label); - // putD(pos2, pos1,!dir, label); + putD(pos1, pos2, label); + // putD(pos2, pos1,!dir, label); } - - + final static public void putD(int pos1, int pos2, short label) { - + Integer lc = labelCount.get(label); - if (lc==null) labelCount.put(label, 1); - else labelCount.put(label, lc+1); + if (lc == null) + labelCount.put(label, 1); + else + labelCount.put(label, lc + 1); - String key = pos1+"-"+pos2+label; + String key = pos1 + "-" + pos2 + label; Integer lcs = slabelCount.get(key); - if (lcs==null) slabelCount.put(key, 1); - else slabelCount.put(key, lcs+1); - - if (edges[pos1][pos2]==null) { - edges[pos1][pos2]=new short[1]; - edges[pos1][pos2][0]=label; - -// edgesh[pos1][pos2][dir?0:1] = new TIntHashSet(2); -// edgesh[pos1][pos2][dir?0:1].add(label); + if (lcs == null) + slabelCount.put(key, 1); + else + slabelCount.put(key, lcs + 1); + + if (edges[pos1][pos2] == null) { + edges[pos1][pos2] = new short[1]; + edges[pos1][pos2][0] = label; + + // edgesh[pos1][pos2][dir?0:1] = new TIntHashSet(2); + // edgesh[pos1][pos2][dir?0:1].add(label); } else { short labels[] = edges[pos1][pos2]; - for(short l : labels) { - //contains label already? - if(l==label) return; + for (short l : labels) { + // contains label already? + if (l == label) + return; } - - short[] nlabels = new short[labels.length+1]; + + short[] nlabels = new short[labels.length + 1]; System.arraycopy(labels, 0, nlabels, 0, labels.length); - nlabels[labels.length]=label; - edges[pos1][pos2]=nlabels; - - // edgesh[pos1][pos2][dir?0:1].add(label); + nlabels[labels.length] = label; + edges[pos1][pos2] = nlabels; + + // edgesh[pos1][pos2][dir?0:1].add(label); } } - + final static public short[] get(int pos1, int pos2) { - - if (pos1<0 || pos2<0 || edges[pos1][pos2]==null) return def; + + if (pos1 < 0 || pos2 < 0 || edges[pos1][pos2] == null) + return def; return edges[pos1][pos2]; } - /** * @param dis */ @@ -119,24 +116,24 @@ public final class Edges { int len = edges.length; d.writeShort(len); - for(int p1 =0;p1<len;p1++) { - for(int p2 =0;p2<len;p2++) { - if (edges[p1][p2]==null) d.writeShort(0); + for (int p1 = 0; p1 < len; p1++) { + for (int p2 = 0; p2 < len; p2++) { + if (edges[p1][p2] == null) + d.writeShort(0); else { d.writeShort(edges[p1][p2].length); - for(int l =0;l<edges[p1][p2].length;l++) { + for (int l = 0; l < edges[p1][p2].length; l++) { d.writeShort(edges[p1][p2][l]); } - + } } } - + d.writeShort(def[0]); } - /** * @param dis */ @@ -144,21 +141,21 @@ public final class Edges { int len = d.readShort(); edges = new short[len][len][]; - for(int p1 =0;p1<len;p1++) { - for(int p2 =0;p2<len;p2++) { + for (int p1 = 0; p1 < len; p1++) { + for (int p2 = 0; p2 < len; p2++) { int ll = d.readShort(); - if (ll==0) { - edges[p1][p2]=null; + if (ll == 0) { + edges[p1][p2] = null; } else { edges[p1][p2] = new short[ll]; - for(int l =0;l<ll;l++) { - edges[p1][p2][l]=d.readShort(); - } + for (int l = 0; l < ll; l++) { + edges[p1][p2][l] = d.readShort(); + } } } } - - def[0]= d.readShort(); + + def[0] = d.readShort(); } @@ -169,38 +166,32 @@ public final class Edges { } String _key; - + public C(String key) { super(); - _key=key; + _key = key; } - /* (non-Javadoc) + /* + * (non-Javadoc) + * * @see java.util.Comparator#compare(java.lang.Object, java.lang.Object) */ @Override public int compare(Short l1, Short l2) { - - // int c1 = labelCount.get(l1); - // int c2 = labelCount.get(l2); - // if (true) return c1==c2?0:c1>c2?-1:1; - - int x1 = slabelCount.get(_key+l1.shortValue()); - int x2 = slabelCount.get(_key+l2.shortValue()); - // System.out.println(x1+" "+x2); - - - return x1==x2?0:x1>x2?-1:1; - - - - } - + // int c1 = labelCount.get(l1); + // int c2 = labelCount.get(l2); + // if (true) return c1==c2?0:c1>c2?-1:1; + + int x1 = slabelCount.get(_key + l1.shortValue()); + int x2 = slabelCount.get(_key + l2.shortValue()); + // System.out.println(x1+" "+x2); + + return x1 == x2 ? 0 : x1 > x2 ? -1 : 1; + + } - - } - - + } diff --git a/dependencyParser/mate-tools/src/is2/util/Evaluator.java b/dependencyParser/mate-tools/src/is2/util/Evaluator.java index c527303..f75fc54 100644 --- a/dependencyParser/mate-tools/src/is2/util/Evaluator.java +++ b/dependencyParser/mate-tools/src/is2/util/Evaluator.java @@ -4,107 +4,95 @@ import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; -import java.util.HashSet; import java.util.Hashtable; import java.util.Map.Entry; //import org.apache.commons.math.stat.inference.TestUtils; - import is2.data.Parse; import is2.data.SentenceData09; import is2.io.CONLLReader09; - public class Evaluator { - - public static void main(String[] args) { + public static void main(String[] args) { Options options = new Options(args); - if (options.eval && options.significant1==null ) { + if (options.eval && options.significant1 == null) { - Results r = evaluate(options.goldfile, options.outfile); + evaluate(options.goldfile, options.outfile); - } + } /* - else if (options.significant1!=null && options.significant2!=null ) { - - System.out.println("compare1 "+options.significant1); - System.out.println("compare2 "+options.significant2); - System.out.println("gold "+options.goldfile); - - Results r1 = evaluate(options.goldfile, options.significant1,false); - - System.out.println("file 1 done "); - - Results r2 = evaluate(options.goldfile, options.significant2,false); - - double[] s1 = new double[r1.correctHead.size()]; - double[] s2 = new double[r1.correctHead.size()]; - - for(int k=0;k<r1.correctHead.size();k++) { - s1[k] = r1.correctHead.get(k); - s2[k] = r2.correctHead.get(k); - } - - try { - double p = TestUtils.pairedTTest(s1, s2); - System.out.print("significant to "+p); - } catch (Exception e) { - e.printStackTrace(); - } - -// significant(options.significant1, options.significant2) ; - - - } - */ - else if (options.significant1!=null) { - Results r = evaluate(options.goldfile, options.outfile,true); -// significant(options.significant1, options.significant2) ; + * else if (options.significant1!=null && options.significant2!=null ) { + * + * System.out.println("compare1 "+options.significant1); + * System.out.println("compare2 "+options.significant2); + * System.out.println("gold "+options.goldfile); + * + * Results r1 = evaluate(options.goldfile, options.significant1,false); + * + * System.out.println("file 1 done "); + * + * Results r2 = evaluate(options.goldfile, options.significant2,false); + * + * double[] s1 = new double[r1.correctHead.size()]; double[] s2 = new + * double[r1.correctHead.size()]; + * + * for(int k=0;k<r1.correctHead.size();k++) { s1[k] = + * r1.correctHead.get(k); s2[k] = r2.correctHead.get(k); } + * + * try { double p = TestUtils.pairedTTest(s1, s2); + * System.out.print("significant to "+p); } catch (Exception e) { + * e.printStackTrace(); } + * + * // significant(options.significant1, options.significant2) ; + * + * + * } + */ + else if (options.significant1 != null) { + evaluate(options.goldfile, options.outfile, true); } - } - /** - * + * * @param act_file * @param pred_file - * @param what top, pos, length, mor + * @param what + * top, pos, length, mor */ - public static void evaluateTagger (String act_file, String pred_file, String what) { + public static void evaluateTagger(String act_file, String pred_file, String what) { - - CONLLReader09 goldReader = new CONLLReader09(act_file); + CONLLReader09 goldReader = new CONLLReader09(act_file); CONLLReader09 predictedReader = new CONLLReader09(); - predictedReader.startReading(pred_file); + predictedReader.startReading(pred_file); - Hashtable<String,Integer> errors = new Hashtable<String,Integer>(); - Hashtable<String,StringBuffer> words = new Hashtable<String,StringBuffer>(); + Hashtable<String, Integer> errors = new Hashtable<String, Integer>(); + Hashtable<String, StringBuffer> words = new Hashtable<String, StringBuffer>(); - int total = 0, numsent = 0, corrT=0; + int total = 0, numsent = 0, corrT = 0; SentenceData09 goldInstance = goldReader.getNext(); SentenceData09 predInstance = predictedReader.getNext(); + HashMap<Integer, int[]> correctL = new HashMap<Integer, int[]>(); + HashMap<String, int[]> pos = new HashMap<String, int[]>(); + HashMap<String, int[]> mor = new HashMap<String, int[]>(); - HashMap<Integer,int[]> correctL = new HashMap<Integer,int[]>(); - HashMap<String,int[]> pos = new HashMap<String,int[]>(); - HashMap<String,int[]> mor = new HashMap<String,int[]>(); + float correctM = 0; + ; - float correctM = 0, allM=0;; - - while(goldInstance != null) { + while (goldInstance != null) { int instanceLength = goldInstance.length(); if (instanceLength != predInstance.length()) - System.out.println("Lengths do not match on sentence "+numsent); + System.out.println("Lengths do not match on sentence " + numsent); String gold[] = goldInstance.gpos; String pred[] = predInstance.ppos; @@ -112,182 +100,171 @@ public class Evaluator { String goldM[] = goldInstance.ofeats; String predM[] = predInstance.pfeats; - - // NOTE: the first item is the root info added during nextInstance(), so we skip it. + // NOTE: the first item is the root info added during + // nextInstance(), so we skip it. for (int i = 1; i < instanceLength; i++) { - + int[] cwr = correctL.get(i); - if (cwr ==null) { + if (cwr == null) { cwr = new int[2]; correctL.put(i, cwr); } cwr[1]++; int[] correctPos = pos.get(gold[i]); - if (correctPos==null) { + if (correctPos == null) { correctPos = new int[2]; pos.put(gold[i], correctPos); } correctPos[1]++; int[] correctMor = mor.get(goldM[i]); - if (correctMor==null) { + if (correctMor == null) { correctMor = new int[2]; mor.put(goldM[i], correctMor); } - if ((goldM[i].equals("_")&&predM[i]==null) || goldM[i].equals(predM[i])) { + if ((goldM[i].equals("_") && predM[i] == null) || goldM[i].equals(predM[i])) { correctM++; correctMor[0]++; } - allM++; correctMor[1]++; - + if (gold[i].equals(pred[i])) { corrT++; cwr[0]++; correctPos[0]++; } else { - String key = "gold: '"+gold[i]+"' pred: '"+pred[i]+"'"; + String key = "gold: '" + gold[i] + "' pred: '" + pred[i] + "'"; Integer cnt = errors.get(key); StringBuffer errWrd = words.get(key); - if (cnt==null) { - errors.put(key,1); + if (cnt == null) { + errors.put(key, 1); words.put(key, new StringBuffer().append(goldInstance.forms[i])); - } - else { - errors.put(key,cnt+1); - errWrd.append(" "+goldInstance.forms[i]); + } else { + errors.put(key, cnt + 1); + errWrd.append(" " + goldInstance.forms[i]); } } - - + } - total += instanceLength - 1; // Subtract one to not score fake root token + total += instanceLength - 1; // Subtract one to not score fake root + // token - numsent++; goldInstance = goldReader.getNext(); predInstance = predictedReader.getNext(); } - - - - - // System.out.println("error gold:"+goldPos[i]+" pred:"+predPos[i]+" "+goldInstance.forms[i]+" snt "+numsent+" i:"+i); + // System.out.println("error gold:"+goldPos[i]+" pred:"+predPos[i]+" + // "+goldInstance.forms[i]+" snt "+numsent+" i:"+i); ArrayList<Entry<String, Integer>> opsl = new ArrayList<Entry<String, Integer>>(); - for(Entry<String, Integer> e : errors.entrySet()) { + for (Entry<String, Integer> e : errors.entrySet()) { opsl.add(e); } - - Collections.sort(opsl, new Comparator<Entry<String, Integer>>(){ + + Collections.sort(opsl, new Comparator<Entry<String, Integer>>() { @Override - public int compare(Entry<String, Integer> o1, - Entry<String, Integer> o2) { - - return o1.getValue()==o2.getValue()?0:o1.getValue()>o2.getValue()?-1:1; + public int compare(Entry<String, Integer> o1, Entry<String, Integer> o2) { + + return o1.getValue() == o2.getValue() ? 0 : o1.getValue() > o2.getValue() ? -1 : 1; } - - + }); - - - int cnt=0; - if (what.contains("top") ) { + + if (what.contains("top")) { System.out.println("top most errors:"); - for(Entry<String, Integer> e : opsl) { - cnt++; - if(e.getValue()>10) System.out.println(e.getKey()+" "+e.getValue()+" context: "+words.get(e.getKey())); + for (Entry<String, Integer> e : opsl) { + if (e.getValue() > 10) + System.out.println(e.getKey() + " " + e.getValue() + " context: " + words.get(e.getKey())); } - } - + } + if (what.contains("length")) { - for(int k=0;k<60;k++) { + for (int k = 0; k < 60; k++) { int[] cwr = correctL.get(k); - if (cwr == null) continue; - System.out.print(k+":"+cwr[0]+":"+cwr[1]+":"+(((float)Math.round(10000*(float)((float)cwr[0])/(float)cwr[1]))/100)+" "); + if (cwr == null) + continue; + System.out.print(k + ":" + cwr[0] + ":" + cwr[1] + ":" + + (((float) Math.round(10000 * (cwr[0]) / cwr[1])) / 100) + " "); } System.out.println(); } - + if (what.contains("pos")) { - for(Entry<String,int[]> e : pos.entrySet()) { - - System.out.print(e.getKey()+":"+e.getValue()[0]+":"+e.getValue()[1]+":"+ - (((float)Math.round(10000*((float)e.getValue()[0])/((float)e.getValue()[1])))/100)+" "); - + for (Entry<String, int[]> e : pos.entrySet()) { + + System.out.print(e.getKey() + ":" + e.getValue()[0] + ":" + e.getValue()[1] + ":" + + (((float) Math.round(10000 * ((float) e.getValue()[0]) / (e.getValue()[1]))) / 100) + " "); + } System.out.print(""); } System.out.println(); if (what.contains("mor")) { - for(Entry<String,int[]> e : mor.entrySet()) { - - System.out.print(e.getKey()+":"+e.getValue()[0]+":"+e.getValue()[1]+":"+ - (((float)Math.round(10000*((float)e.getValue()[0])/((float)e.getValue()[1])))/100)+" "); - + for (Entry<String, int[]> e : mor.entrySet()) { + + System.out.print(e.getKey() + ":" + e.getValue()[0] + ":" + e.getValue()[1] + ":" + + (((float) Math.round(10000 * ((float) e.getValue()[0]) / (e.getValue()[1]))) / 100) + " "); + } System.out.print(""); } - System.out.println("\nTokens: " + total+" Correct: " + corrT+" "+(float)corrT/total+" Correct M.:"+(int)correctM+ " morphology "+(correctM/total)); + System.out.println("\nTokens: " + total + " Correct: " + corrT + " " + (float) corrT / total + " Correct M.:" + + (int) correctM + " morphology " + (correctM / total)); } - - - public static int errors(SentenceData09 s, boolean uas) { - int errors =0; - for (int k =1;k<s.length();k++) { + int errors = 0; + for (int k = 1; k < s.length(); k++) { - if (s.heads[k] != s.pheads[k] && (uas || ! s.labels[k].equals(s.plabels[k]))) { + if (s.heads[k] != s.pheads[k] && (uas || !s.labels[k].equals(s.plabels[k]))) { errors++; } } return errors; } - public static int errors(SentenceData09 s1, SentenceData09 s2, HashMap<String,Integer> r1,HashMap<String,Integer> r2) { - + public static int errors(SentenceData09 s1, SentenceData09 s2, HashMap<String, Integer> r1, + HashMap<String, Integer> r2) { + int errors = 0; + for (int k = 1; k < s1.length(); k++) { - int errors =0; - for (int k =1;k<s1.length();k++) { + if (s1.heads[k] != s1.pheads[k] || (!s1.labels[k].equals(s1.plabels[k]))) { - if (s1.heads[k] != s1.pheads[k] || (! s1.labels[k].equals(s1.plabels[k]))) { - - if (s2.heads[k] != s2.pheads[k] || (! s2.labels[k].equals(s2.plabels[k]))) { + if (s2.heads[k] != s2.pheads[k] || (!s2.labels[k].equals(s2.plabels[k]))) { // equal do nothing } else { Integer cnt = r1.get(s1.labels[k]); - if (cnt==null) cnt=0; + if (cnt == null) + cnt = 0; cnt++; - r1.put(s1.labels[k],cnt); - + r1.put(s1.labels[k], cnt); } } - if (s2.heads[k] != s2.pheads[k] || (! s2.labels[k].equals(s2.plabels[k]))) { + if (s2.heads[k] != s2.pheads[k] || (!s2.labels[k].equals(s2.plabels[k]))) { - if (s1.heads[k] != s1.pheads[k] || (! s1.labels[k].equals(s1.plabels[k]))) { + if (s1.heads[k] != s1.pheads[k] || (!s1.labels[k].equals(s1.plabels[k]))) { // equal do nothing } else { Integer cnt = r2.get(s2.labels[k]); - if (cnt==null) cnt=0; + if (cnt == null) + cnt = 0; cnt++; - r2.put(s2.labels[k],cnt); - + r2.put(s2.labels[k], cnt); } @@ -296,8 +273,7 @@ public class Evaluator { return errors; } - - public static final String PUNCT ="!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"; + public static final String PUNCT = "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"; public static class Results { @@ -307,48 +283,48 @@ public class Evaluator { public float ula; public float lpas; public float upla; - + ArrayList<Double> correctHead; } - - public static Results evaluate (String act_file, String pred_file) { - return evaluate (act_file, pred_file,true); + + public static Results evaluate(String act_file, String pred_file) { + return evaluate(act_file, pred_file, true); } - public static Results evaluate (String act_file, String pred_file, boolean printEval) { - return evaluate ( act_file, pred_file, printEval, false); + + public static Results evaluate(String act_file, String pred_file, boolean printEval) { + return evaluate(act_file, pred_file, printEval, false); } - - - public static Results evaluate (String act_file, String pred_file, boolean printEval, boolean sig) { + + public static Results evaluate(String act_file, String pred_file, boolean printEval, boolean sig) { CONLLReader09 goldReader = new CONLLReader09(act_file, -1); CONLLReader09 predictedReader = new CONLLReader09(pred_file, -1); - int total = 0, corr = 0, corrL = 0, Ptotal=0, Pcorr = 0, PcorrL = 0, BPtotal=0, BPcorr = 0, BPcorrL = 0, corrLableAndPos=0, corrHeadAndPos=0; - int corrLableAndPosP=0, corrHeadAndPosP=0,corrLableAndPosC=0; - int numsent = 0, corrsent = 0, corrsentL = 0, Pcorrsent = 0, PcorrsentL = 0,sameProj=0;; - int proj=0, nonproj=0, pproj=0, pnonproj=0, nonProjOk=0, nonProjWrong=0; - - int corrOne = 0; - - int correctChnWoPunc =0, correctLChnWoPunc=0,CPtotal=0; + int total = 0, corr = 0, corrL = 0, Ptotal = 0, Pcorr = 0, PcorrL = 0, BPtotal = 0, BPcorr = 0, BPcorrL = 0, + corrLableAndPos = 0, corrHeadAndPos = 0; + int corrLableAndPosP = 0, corrLableAndPosC = 0; + int numsent = 0, corrsent = 0, corrsentL = 0, Pcorrsent = 0, PcorrsentL = 0; + ; + int proj = 0, nonproj = 0, pproj = 0, pnonproj = 0, nonProjOk = 0, nonProjWrong = 0; + + int correctChnWoPunc = 0, correctLChnWoPunc = 0, CPtotal = 0; SentenceData09 goldInstance = goldReader.getNext(); SentenceData09 predInstance = predictedReader.getNext(); - HashMap<String,Integer> label = new HashMap<String,Integer>(); - HashMap<String,Integer> labelCount = new HashMap<String,Integer>(); - HashMap<String,Integer> labelCorrect = new HashMap<String,Integer>(); - HashMap<String,Integer> falsePositive = new HashMap<String,Integer>(); + HashMap<String, Integer> label = new HashMap<String, Integer>(); + HashMap<String, Integer> labelCount = new HashMap<String, Integer>(); + HashMap<String, Integer> labelCorrect = new HashMap<String, Integer>(); + HashMap<String, Integer> falsePositive = new HashMap<String, Integer>(); // does the node have the correct head? ArrayList<Double> correctHead = new ArrayList<Double>(); - - while(goldInstance != null) { + + while (goldInstance != null) { int instanceLength = goldInstance.length(); if (instanceLength != predInstance.length()) - System.out.println("Lengths do not match on sentence "+numsent); + System.out.println("Lengths do not match on sentence " + numsent); int[] goldHeads = goldInstance.heads; String[] goldLabels = goldInstance.labels; @@ -361,256 +337,267 @@ public class Evaluator { boolean Pwhole = true; boolean PwholeL = true; + int corrLabels = 0; - int tlasS=0, totalS=0,corrLabels=0, XLabels=0; - - // NOTE: the first item is the root info added during nextInstance(), so we skip it. + // NOTE: the first item is the root info added during + // nextInstance(), so we skip it. - - - int punc=0, bpunc=0,totalChnWoPunc=0; + int punc = 0, bpunc = 0, totalChnWoPunc = 0; for (int i = 1; i < instanceLength; i++) { - - Parse p = new Parse(predHeads.length); - for (int k=0;k<p.heads.length;k++) p.heads[k]=(short) predHeads[k]; + for (int k = 0; k < p.heads.length; k++) + p.heads[k] = (short) predHeads[k]; Parse g = new Parse(predHeads.length); - for (int k=0;k<g.heads.length;k++) g.heads[k]=(short) goldHeads[k]; - - - - - + for (int k = 0; k < g.heads.length; k++) + g.heads[k] = (short) goldHeads[k]; + { Integer count = labelCount.get(goldLabels[i]); - if (count==null)count = 0; - + if (count == null) + count = 0; + count++; - + labelCount.put(goldLabels[i], count); - - if(goldLabels[i].equals(predLabels[i])) { + + if (goldLabels[i].equals(predLabels[i])) { Integer correct = labelCorrect.get(goldLabels[i]); - if (correct ==null) correct =0; - correct ++; + if (correct == null) + correct = 0; + correct++; labelCorrect.put(goldLabels[i], correct); - + } else { Integer fp = falsePositive.get(predLabels[i]); - if (fp ==null) fp =0; - fp ++; + if (fp == null) + fp = 0; + fp++; falsePositive.put(predLabels[i], fp); } - - + } { } - - - if (goldLabels[i].startsWith("PMOD")) XLabels++; - - boolean tlas =false; + + if (goldLabels[i].startsWith("PMOD")) { + } + + boolean tlas = false; if (predHeads[i] == goldHeads[i]) { corr++; - - if (goldInstance.gpos[i].equals(predInstance.ppos[i])) corrHeadAndPos ++; + + if (goldInstance.gpos[i].equals(predInstance.ppos[i])) + corrHeadAndPos++; if (goldLabels[i].equals(predLabels[i])) { corrL++; - // if (predLabels[i].startsWith("PMOD")) - corrLabels++; - // else correctHead.add(0); + // if (predLabels[i].startsWith("PMOD")) + corrLabels++; + // else correctHead.add(0); if (goldInstance.gpos[i].equals(predInstance.ppos[i])) { - tlasS++; - tlas=true; - corrLableAndPos ++; + tlas = true; + corrLableAndPos++; } - } - else { - // correctHead.add(0); - // System.out.println(numsent+" error gold "+goldLabels[i]+" "+predLabels[i]+" head "+goldHeads[i]+" child "+i); + } else { + // correctHead.add(0); + // System.out.println(numsent+" error gold + // "+goldLabels[i]+" "+predLabels[i]+" head + // "+goldHeads[i]+" child "+i); wholeL = false; } - } - else { - - //correctHead.add(0); - - // System.out.println(numsent+"error gold "+goldLabels[i]+" "+predLabels[i]+" head "+goldHeads[i]+" child "+i); - whole = false; wholeL = false; - + } else { + + // correctHead.add(0); + + // System.out.println(numsent+"error gold "+goldLabels[i]+" + // "+predLabels[i]+" head "+goldHeads[i]+" child "+i); + whole = false; + wholeL = false; + Integer count = label.get(goldLabels[i]); - if (count==null)count = 0; + if (count == null) + count = 0; count++; label.put(goldLabels[i], count); - - - int d = Math.abs(goldInstance.heads[i]-i); + Math.abs(goldInstance.heads[i] - i); } + if (!("!\"#$%&''()*+,-./:;<=>?@[\\]^_{|}~``".contains(goldInstance.forms[i]))) { - if( ! ("!\"#$%&''()*+,-./:;<=>?@[\\]^_{|}~``".contains(goldInstance.forms[i]))) { - if (predHeads[i] == goldHeads[i]) { BPcorr++; if (goldLabels[i].equals(predLabels[i])) { BPcorrL++; + } else { + // System.out.println(numsent+" error gold + // "+goldLabels[i]+" "+predLabels[i]+" head + // "+goldHeads[i]+" child "+i); + // PwholeL = false; } - else { - // System.out.println(numsent+" error gold "+goldLabels[i]+" "+predLabels[i]+" head "+goldHeads[i]+" child "+i); - // PwholeL = false; - } - } else { - // System.out.println(numsent+"error gold "+goldLabels[i]+" "+predLabels[i]+" head "+goldHeads[i]+" child "+i); - //Pwhole = false; wholeL = false; + } else { + // System.out.println(numsent+"error gold + // "+goldLabels[i]+" "+predLabels[i]+" head + // "+goldHeads[i]+" child "+i); + // Pwhole = false; wholeL = false; } - } else bpunc++; + } else + bpunc++; - if( ! (",.:''``".contains(goldInstance.forms[i]))) { - + if (!(",.:''``".contains(goldInstance.forms[i]))) { if (predHeads[i] == goldHeads[i]) { - if (goldInstance.gpos[i].equals(predInstance.ppos[i])) corrHeadAndPosP ++; + if (goldInstance.gpos[i].equals(predInstance.ppos[i])) { + } Pcorr++; if (goldLabels[i].equals(predLabels[i])) { PcorrL++; - if (goldInstance.gpos[i].equals(predInstance.ppos[i])) corrLableAndPosP ++; + if (goldInstance.gpos[i].equals(predInstance.ppos[i])) + corrLableAndPosP++; - } - else { - // System.out.println(numsent+" error gold "+goldLabels[i]+" "+predLabels[i]+" head "+goldHeads[i]+" child "+i); + } else { + // System.out.println(numsent+" error gold + // "+goldLabels[i]+" "+predLabels[i]+" head + // "+goldHeads[i]+" child "+i); PwholeL = false; } - } else { - // System.out.println(numsent+"error gold "+goldLabels[i]+" "+predLabels[i]+" head "+goldHeads[i]+" child "+i); - Pwhole = false; PwholeL = false; + } else { + // System.out.println(numsent+"error gold + // "+goldLabels[i]+" "+predLabels[i]+" head + // "+goldHeads[i]+" child "+i); + Pwhole = false; + PwholeL = false; } - } else punc++; - - - if( ! (goldInstance.gpos[i].toLowerCase().startsWith("pu"))) { + } else + punc++; + + if (!(goldInstance.gpos[i].toLowerCase().startsWith("pu"))) { if (predHeads[i] == goldHeads[i]) { correctChnWoPunc++; if (goldLabels[i].equals(predLabels[i])) { correctLChnWoPunc++; - if (goldInstance.gpos[i].equals(predInstance.ppos[i])) corrLableAndPosC ++; + if (goldInstance.gpos[i].equals(predInstance.ppos[i])) + corrLableAndPosC++; + } else { + // System.out.println(numsent+" error gold + // "+goldLabels[i]+" "+predLabels[i]+" head + // "+goldHeads[i]+" child "+i); + // PwholeL = false; } - else { - // System.out.println(numsent+" error gold "+goldLabels[i]+" "+predLabels[i]+" head "+goldHeads[i]+" child "+i); - // PwholeL = false; - } - } else { - // System.out.println(numsent+"error gold "+goldLabels[i]+" "+predLabels[i]+" head "+goldHeads[i]+" child "+i); - // Pwhole = false; PwholeL = false; + } else { + // System.out.println(numsent+"error gold + // "+goldLabels[i]+" "+predLabels[i]+" head + // "+goldHeads[i]+" child "+i); + // Pwhole = false; PwholeL = false; } - } else totalChnWoPunc++; + } else + totalChnWoPunc++; - if (sig) { - if(tlas) System.out.println("1\t"); - else System.out.println("0\t"); + if (tlas) + System.out.println("1\t"); + else + System.out.println("0\t"); } } - total += ((instanceLength - 1)); // Subtract one to not score fake root token + total += ((instanceLength - 1)); // Subtract one to not score fake + // root token Ptotal += ((instanceLength - 1) - punc); BPtotal += ((instanceLength - 1) - bpunc); CPtotal += ((instanceLength - 1) - totalChnWoPunc); - if(whole) corrsent++; - if(wholeL) corrsentL++; - if(Pwhole) Pcorrsent++; - if(PwholeL) PcorrsentL++; + if (whole) + corrsent++; + if (wholeL) + corrsentL++; + if (Pwhole) + Pcorrsent++; + if (PwholeL) + PcorrsentL++; numsent++; goldInstance = goldReader.getNext(); predInstance = predictedReader.getNext(); - correctHead.add((double) ((double)corrLabels/(instanceLength - 1))); - // System.out.println(""+((double)corrLabels/(instanceLength - 1))); + correctHead.add((double) corrLabels / (instanceLength - 1)); + // System.out.println(""+((double)corrLabels/(instanceLength - 1))); } Results r = new Results(); - r.correctHead =correctHead; - int mult=100000, diff=1000; + r.correctHead = correctHead; + int mult = 100000, diff = 1000; r.total = total; r.corr = corr; - r.las =(float)Math.round(((double)corrL/total)*mult)/diff; - r.ula =(float)Math.round(((double)corr /total)*mult)/diff; - r.lpas =(float)Math.round(((double)corrLableAndPos/total)*mult)/diff; - r.upla =(float)Math.round(((double)corrHeadAndPos /total)*mult)/diff; - float tlasp = (float)Math.round(((double)corrLableAndPosP/Ptotal)*mult)/diff; - float tlasc = (float)Math.round(((double)corrLableAndPosC/Ptotal)*mult)/diff; - - // System.out.print("Total: " + total+" \tCorrect: " + corr+" "); - System.out.print(" LAS/Total/UAS/Total: " + r.las+"/" + (double)Math.round(((double)corrsentL/numsent)*mult)/diff+ - "/" + r.ula+"/" + (double)Math.round(((double)corrsent /numsent)*mult)/diff+" LPAS/UPAS "+r.lpas+"/"+r.upla); - - System.out.println("; without . " + (double)Math.round(((double)PcorrL/Ptotal)*mult)/diff+"/" + - (double)Math.round(((double)PcorrsentL/numsent)*mult)/diff+ - "/" + (double)Math.round(((double)Pcorr /Ptotal)*mult)/diff+"/" + - (double)Math.round(((double)Pcorrsent /numsent)*mult)/diff+" TLAS "+tlasp+ - " V2 LAS/UAS "+(double)Math.round(((double)BPcorrL/BPtotal)*mult)/diff+ - "/"+(double)Math.round(((double)BPcorr/BPtotal)*mult)/diff+ - " CHN LAS/UAS "+(double)Math.round(((double)correctLChnWoPunc/CPtotal)*mult)/diff+ - "/"+(double)Math.round(((double)correctChnWoPunc/CPtotal)*mult)/diff+" TLAS "+tlasc); - - float precisionNonProj = ((float)nonProjOk)/((float)nonProjOk+nonProjWrong); - float recallNonProj = ((float)nonProjOk)/((float)(nonproj)); - System.out.println("proj "+proj+" nonp "+nonproj+"; predicted proj "+pproj+" non "+pnonproj+"; nonp correct "+ - nonProjOk+" nonp wrong "+nonProjWrong+ - " precision=(nonProjOk)/(non-projOk+nonProjWrong): "+precisionNonProj+ - " recall=nonProjOk/nonproj="+recallNonProj+" F="+(2*precisionNonProj*recallNonProj)/(precisionNonProj+recallNonProj)); - - if (!printEval) return r; - - - HashMap<String,Integer> totalX = new HashMap<String,Integer>(); - HashMap<String,Integer> totalY = new HashMap<String,Integer>(); - - String A=" "; // & + r.las = (float) Math.round(((double) corrL / total) * mult) / diff; + r.ula = (float) Math.round(((double) corr / total) * mult) / diff; + r.lpas = (float) Math.round(((double) corrLableAndPos / total) * mult) / diff; + r.upla = (float) Math.round(((double) corrHeadAndPos / total) * mult) / diff; + float tlasp = (float) Math.round(((double) corrLableAndPosP / Ptotal) * mult) / diff; + float tlasc = (float) Math.round(((double) corrLableAndPosC / Ptotal) * mult) / diff; + + // System.out.print("Total: " + total+" \tCorrect: " + corr+" "); + System.out.print(" LAS/Total/UAS/Total: " + r.las + "/" + + (double) Math.round(((double) corrsentL / numsent) * mult) / diff + "/" + r.ula + "/" + + (double) Math.round(((double) corrsent / numsent) * mult) / diff + " LPAS/UPAS " + r.lpas + "/" + + r.upla); + + System.out.println("; without . " + (double) Math.round(((double) PcorrL / Ptotal) * mult) / diff + "/" + + (double) Math.round(((double) PcorrsentL / numsent) * mult) / diff + "/" + + (double) Math.round(((double) Pcorr / Ptotal) * mult) / diff + "/" + + (double) Math.round(((double) Pcorrsent / numsent) * mult) / diff + " TLAS " + tlasp + " V2 LAS/UAS " + + (double) Math.round(((double) BPcorrL / BPtotal) * mult) / diff + "/" + + (double) Math.round(((double) BPcorr / BPtotal) * mult) / diff + " CHN LAS/UAS " + + (double) Math.round(((double) correctLChnWoPunc / CPtotal) * mult) / diff + "/" + + (double) Math.round(((double) correctChnWoPunc / CPtotal) * mult) / diff + " TLAS " + tlasc); + + float precisionNonProj = (nonProjOk) / ((float) nonProjOk + nonProjWrong); + float recallNonProj = ((float) nonProjOk) / ((float) (nonproj)); + System.out.println("proj " + proj + " nonp " + nonproj + "; predicted proj " + pproj + " non " + pnonproj + + "; nonp correct " + nonProjOk + " nonp wrong " + nonProjWrong + + " precision=(nonProjOk)/(non-projOk+nonProjWrong): " + precisionNonProj + " recall=nonProjOk/nonproj=" + + recallNonProj + " F=" + (2 * precisionNonProj * recallNonProj) / (precisionNonProj + recallNonProj)); + + if (!printEval) + return r; + + new HashMap<String, Integer>(); + new HashMap<String, Integer>(); + System.out.println("label\ttp\tcount\trecall\t\ttp\tfp+tp\tprecision\t F-Score "); - for(Entry<String, Integer> e : labelCount.entrySet()) { - - int tp = labelCorrect.get(e.getKey())==null?0:labelCorrect.get(e.getKey()).intValue(); + for (Entry<String, Integer> e : labelCount.entrySet()) { + + int tp = labelCorrect.get(e.getKey()) == null ? 0 : labelCorrect.get(e.getKey()).intValue(); Integer count = labelCount.get(e.getKey()); - int fp = falsePositive.get(e.getKey())==null?0:falsePositive.get(e.getKey()).intValue(); - System.out.println(e.getKey()+"\t"+tp+"\t"+count+"\t"+roundPercent((float)tp/count)+"\t\t"+tp+"\t"+(fp+tp)+ - "\t"+roundPercent((float)tp/(fp+tp))+"\t\t"+roundPercent((((float)tp/count))+(float)tp/(fp+tp))/2F); //+totalD + int fp = falsePositive.get(e.getKey()) == null ? 0 : falsePositive.get(e.getKey()).intValue(); + System.out.println(e.getKey() + "\t" + tp + "\t" + count + "\t" + roundPercent((float) tp / count) + "\t\t" + + tp + "\t" + (fp + tp) + "\t" + roundPercent((float) tp / (fp + tp)) + "\t\t" + + roundPercent((((float) tp / count)) + (float) tp / (fp + tp)) / 2F); // +totalD } - - - - + return r; } + public static float round(double v) { - public static float round (double v){ - - return Math.round(v*10000F)/10000F; + return Math.round(v * 10000F) / 10000F; } - public static float roundPercent (double v){ + public static float roundPercent(double v) { - return Math.round(v*10000F)/100F; + return Math.round(v * 10000F) / 100F; } - - - } diff --git a/dependencyParser/mate-tools/src/is2/util/EvaluatorTagger.java b/dependencyParser/mate-tools/src/is2/util/EvaluatorTagger.java index c1ee7df..c1f88f4 100644 --- a/dependencyParser/mate-tools/src/is2/util/EvaluatorTagger.java +++ b/dependencyParser/mate-tools/src/is2/util/EvaluatorTagger.java @@ -4,174 +4,160 @@ import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; -import java.util.HashSet; import java.util.Hashtable; import java.util.Map.Entry; - import org.apache.commons.math.stat.inference.TestUtils; import is2.data.Parse; import is2.data.SentenceData09; import is2.io.CONLLReader09; - public class EvaluatorTagger { - public static int TAGGER = 1; public static int what = 0; - public static void main(String[] args) { + public static void main(String[] args) { Options options = new Options(args); - what = options.tt; - - if (options.eval && options.significant1==null ) { - - Results r = evaluate(options.goldfile, options.outfile); - - } else if (options.significant1!=null && options.significant2!=null ) { - - System.out.println("compare1 "+options.significant1); - System.out.println("compare2 "+options.significant2); - System.out.println("gold "+options.goldfile); - - check( options.significant1, options.significant2, options.testfile); - - Results r1 = evaluate(options.goldfile, options.significant1,false); - + what = options.tt; + + if (options.eval && options.significant1 == null) { + + evaluate(options.goldfile, options.outfile); + + } else if (options.significant1 != null && options.significant2 != null) { + + System.out.println("compare1 " + options.significant1); + System.out.println("compare2 " + options.significant2); + System.out.println("gold " + options.goldfile); + + check(options.significant1, options.significant2, options.testfile); + + Results r1 = evaluate(options.goldfile, options.significant1, false); + System.out.println("file 1 done "); - - Results r2 = evaluate(options.goldfile, options.significant2,false); - - double[] s1 = new double[r1.correctHead.size()]; - double[] s2 = new double[r1.correctHead.size()]; - - for(int k=0;k<r1.correctHead.size();k++) { + + Results r2 = evaluate(options.goldfile, options.significant2, false); + + double[] s1 = new double[r1.correctHead.size()]; + double[] s2 = new double[r1.correctHead.size()]; + + for (int k = 0; k < r1.correctHead.size(); k++) { s1[k] = r1.correctHead.get(k); s2[k] = r2.correctHead.get(k); } - + try { double p = TestUtils.pairedTTest(s1, s2); - System.out.print("significant to "+p); + System.out.print("significant to " + p); } catch (Exception e) { e.printStackTrace(); } - -// significant(options.significant1, options.significant2) ; + // significant(options.significant1, options.significant2) ; - } else if (options.significant1!=null) { - Results r = evaluate(options.goldfile, options.outfile,true); -// significant(options.significant1, options.significant2) ; + } else if (options.significant1 != null) { + evaluate(options.goldfile, options.outfile, true); } - } - private static void check(String s1, String s2, String pos) { CONLLReader09 s1reader = new CONLLReader09(s1, -1); SentenceData09 s1i = s1reader.getNext(); CONLLReader09 s2reader = new CONLLReader09(s2, -1); SentenceData09 s2i = s2reader.getNext(); + HashMap<String, HashMap<String, Integer>> labchanged = new HashMap<String, HashMap<String, Integer>>(); + + int snt = 0; + + while (s1i != null) { + + snt++; + int good = 0, wrong = 0; - HashMap<String,HashMap<String,Integer> > labchanged = new HashMap<String,HashMap<String,Integer> > (); + for (int w = 1; w < s1i.length(); w++) { - int snt =0; - - while(s1i != null) { - - snt ++; - int good =0,wrong=0; - - for(int w=1;w<s1i.length();w++) { + // p(s1:head-pos wrong s2:head-pos good => dep-wrong => + // dep-good) - // p(s1:head-pos wrong s2:head-pos good => dep-wrong => dep-good) + if (s1i.gpos[s1i.heads[w]].equals(pos) && !!s1i.ppos[s1i.heads[w]].equals(s1i.gpos[s1i.heads[w]]) + && s2i.ppos[s2i.heads[w]].equals(s2i.gpos[s2i.heads[w]])) { - if (s1i.gpos[s1i.heads[w]].equals(pos) && ! - ! s1i.ppos[s1i.heads[w]].equals(s1i.gpos[s1i.heads[w]]) && s2i.ppos[s2i.heads[w]].equals(s2i.gpos[s2i.heads[w]]) - ) { - - - HashMap<String,Integer> changed = labchanged.get(s2i.labels[w]); - if (changed ==null) { - changed= new HashMap<String,Integer>(); + HashMap<String, Integer> changed = labchanged.get(s2i.labels[w]); + if (changed == null) { + changed = new HashMap<String, Integer>(); labchanged.put(s2i.labels[w], changed); } - if (! (s1i.plabels[w].equals(s1i.labels[w]) && s1i.pheads[w] == s1i.heads[w] )&& - (s2i.plabels[w].equals(s2i.labels[w]) && s2i.pheads[w] == s2i.heads[w] ) ) { - good ++; + if (!(s1i.plabels[w].equals(s1i.labels[w]) && s1i.pheads[w] == s1i.heads[w]) + && (s2i.plabels[w].equals(s2i.labels[w]) && s2i.pheads[w] == s2i.heads[w])) { + good++; Integer goodL = changed.get("G"); - if (goodL== null) goodL =0; - goodL+=1; + if (goodL == null) + goodL = 0; + goodL += 1; changed.put("G", goodL); - } - else { + } else { wrong++; Integer wrongL = changed.get("W"); - if (wrongL== null) wrongL =0; - wrongL+=1; + if (wrongL == null) + wrongL = 0; + wrongL += 1; changed.put("W", wrongL); } - - - - - + } } - - if (good!=0 || wrong!=0) - System.out.println(snt+" changed yes:"+good+" no:"+wrong); - s1i = s1reader.getNext(); - s2i = s2reader.getNext(); + + if (good != 0 || wrong != 0) + System.out.println(snt + " changed yes:" + good + " no:" + wrong); + s1i = s1reader.getNext(); + s2i = s2reader.getNext(); } - System.out.println(""+labchanged); - - } + System.out.println("" + labchanged); + } /** - * + * * @param act_file * @param pred_file - * @param what top, pos, length, mor + * @param what + * top, pos, length, mor */ - public static void evaluateTagger (String act_file, String pred_file, String what) { + public static void evaluateTagger(String act_file, String pred_file, String what) { - - CONLLReader09 goldReader = new CONLLReader09(act_file); + CONLLReader09 goldReader = new CONLLReader09(act_file); CONLLReader09 predictedReader = new CONLLReader09(); - predictedReader.startReading(pred_file); + predictedReader.startReading(pred_file); - Hashtable<String,Integer> errors = new Hashtable<String,Integer>(); - Hashtable<String,StringBuffer> words = new Hashtable<String,StringBuffer>(); + Hashtable<String, Integer> errors = new Hashtable<String, Integer>(); + Hashtable<String, StringBuffer> words = new Hashtable<String, StringBuffer>(); - int total = 0, numsent = 0, corrT=0; + int total = 0, numsent = 0, corrT = 0; SentenceData09 goldInstance = goldReader.getNext(); SentenceData09 predInstance = predictedReader.getNext(); + HashMap<Integer, int[]> correctL = new HashMap<Integer, int[]>(); + HashMap<String, int[]> pos = new HashMap<String, int[]>(); + HashMap<String, int[]> mor = new HashMap<String, int[]>(); - HashMap<Integer,int[]> correctL = new HashMap<Integer,int[]>(); - HashMap<String,int[]> pos = new HashMap<String,int[]>(); - HashMap<String,int[]> mor = new HashMap<String,int[]>(); + float correctM = 0; + ; - float correctM = 0, allM=0;; - - while(goldInstance != null) { + while (goldInstance != null) { int instanceLength = goldInstance.length(); if (instanceLength != predInstance.length()) - System.out.println("Lengths do not match on sentence "+numsent); + System.out.println("Lengths do not match on sentence " + numsent); String gold[] = goldInstance.gpos; String pred[] = predInstance.ppos; @@ -179,182 +165,171 @@ public class EvaluatorTagger { String goldM[] = goldInstance.ofeats; String predM[] = predInstance.pfeats; - - // NOTE: the first item is the root info added during nextInstance(), so we skip it. + // NOTE: the first item is the root info added during + // nextInstance(), so we skip it. for (int i = 1; i < instanceLength; i++) { - + int[] cwr = correctL.get(i); - if (cwr ==null) { + if (cwr == null) { cwr = new int[2]; correctL.put(i, cwr); } cwr[1]++; int[] correctPos = pos.get(gold[i]); - if (correctPos==null) { + if (correctPos == null) { correctPos = new int[2]; pos.put(gold[i], correctPos); } correctPos[1]++; int[] correctMor = mor.get(goldM[i]); - if (correctMor==null) { + if (correctMor == null) { correctMor = new int[2]; mor.put(goldM[i], correctMor); } - if ((goldM[i].equals("_")&&predM[i]==null) || goldM[i].equals(predM[i])) { + if ((goldM[i].equals("_") && predM[i] == null) || goldM[i].equals(predM[i])) { correctM++; correctMor[0]++; } - allM++; correctMor[1]++; - + if (gold[i].equals(pred[i])) { corrT++; cwr[0]++; correctPos[0]++; } else { - String key = "gold: '"+gold[i]+"' pred: '"+pred[i]+"'"; + String key = "gold: '" + gold[i] + "' pred: '" + pred[i] + "'"; Integer cnt = errors.get(key); StringBuffer errWrd = words.get(key); - if (cnt==null) { - errors.put(key,1); + if (cnt == null) { + errors.put(key, 1); words.put(key, new StringBuffer().append(goldInstance.forms[i])); - } - else { - errors.put(key,cnt+1); - errWrd.append(" "+goldInstance.forms[i]); + } else { + errors.put(key, cnt + 1); + errWrd.append(" " + goldInstance.forms[i]); } } - - + } - total += instanceLength - 1; // Subtract one to not score fake root token + total += instanceLength - 1; // Subtract one to not score fake root + // token - numsent++; goldInstance = goldReader.getNext(); predInstance = predictedReader.getNext(); } - - - - - // System.out.println("error gold:"+goldPos[i]+" pred:"+predPos[i]+" "+goldInstance.forms[i]+" snt "+numsent+" i:"+i); + // System.out.println("error gold:"+goldPos[i]+" pred:"+predPos[i]+" + // "+goldInstance.forms[i]+" snt "+numsent+" i:"+i); ArrayList<Entry<String, Integer>> opsl = new ArrayList<Entry<String, Integer>>(); - for(Entry<String, Integer> e : errors.entrySet()) { + for (Entry<String, Integer> e : errors.entrySet()) { opsl.add(e); } - - Collections.sort(opsl, new Comparator<Entry<String, Integer>>(){ + + Collections.sort(opsl, new Comparator<Entry<String, Integer>>() { @Override - public int compare(Entry<String, Integer> o1, - Entry<String, Integer> o2) { - - return o1.getValue()==o2.getValue()?0:o1.getValue()>o2.getValue()?-1:1; + public int compare(Entry<String, Integer> o1, Entry<String, Integer> o2) { + + return o1.getValue() == o2.getValue() ? 0 : o1.getValue() > o2.getValue() ? -1 : 1; } - - + }); - - - int cnt=0; - if (what.contains("top") ) { + + if (what.contains("top")) { System.out.println("top most errors:"); - for(Entry<String, Integer> e : opsl) { - cnt++; - if(e.getValue()>10) System.out.println(e.getKey()+" "+e.getValue()+" context: "+words.get(e.getKey())); + for (Entry<String, Integer> e : opsl) { + if (e.getValue() > 10) + System.out.println(e.getKey() + " " + e.getValue() + " context: " + words.get(e.getKey())); } - } - + } + if (what.contains("length")) { - for(int k=0;k<60;k++) { + for (int k = 0; k < 60; k++) { int[] cwr = correctL.get(k); - if (cwr == null) continue; - System.out.print(k+":"+cwr[0]+":"+cwr[1]+":"+(((float)Math.round(10000*(float)((float)cwr[0])/(float)cwr[1]))/100)+" "); + if (cwr == null) + continue; + System.out.print(k + ":" + cwr[0] + ":" + cwr[1] + ":" + + (((float) Math.round(10000 * (cwr[0]) / cwr[1])) / 100) + " "); } System.out.println(); } - + if (what.contains("pos")) { - for(Entry<String,int[]> e : pos.entrySet()) { - - System.out.print(e.getKey()+":"+e.getValue()[0]+":"+e.getValue()[1]+":"+ - (((float)Math.round(10000*((float)e.getValue()[0])/((float)e.getValue()[1])))/100)+" "); - + for (Entry<String, int[]> e : pos.entrySet()) { + + System.out.print(e.getKey() + ":" + e.getValue()[0] + ":" + e.getValue()[1] + ":" + + (((float) Math.round(10000 * ((float) e.getValue()[0]) / (e.getValue()[1]))) / 100) + " "); + } System.out.print(""); } System.out.println(); if (what.contains("mor")) { - for(Entry<String,int[]> e : mor.entrySet()) { - - System.out.print(e.getKey()+":"+e.getValue()[0]+":"+e.getValue()[1]+":"+ - (((float)Math.round(10000*((float)e.getValue()[0])/((float)e.getValue()[1])))/100)+" "); - + for (Entry<String, int[]> e : mor.entrySet()) { + + System.out.print(e.getKey() + ":" + e.getValue()[0] + ":" + e.getValue()[1] + ":" + + (((float) Math.round(10000 * ((float) e.getValue()[0]) / (e.getValue()[1]))) / 100) + " "); + } System.out.print(""); } - System.out.println("\nTokens: " + total+" Correct: " + corrT+" "+(float)corrT/total+" Correct M.:"+(int)correctM+ " morphology "+(correctM/total)); + System.out.println("\nTokens: " + total + " Correct: " + corrT + " " + (float) corrT / total + " Correct M.:" + + (int) correctM + " morphology " + (correctM / total)); } - - - public static int errors(SentenceData09 s, boolean uas) { - int errors =0; - for (int k =1;k<s.length();k++) { + int errors = 0; + for (int k = 1; k < s.length(); k++) { - if (s.heads[k] != s.pheads[k] && (uas || ! s.labels[k].equals(s.plabels[k]))) { + if (s.heads[k] != s.pheads[k] && (uas || !s.labels[k].equals(s.plabels[k]))) { errors++; } } return errors; } - public static int errors(SentenceData09 s1, SentenceData09 s2, HashMap<String,Integer> r1,HashMap<String,Integer> r2) { - - + public static int errors(SentenceData09 s1, SentenceData09 s2, HashMap<String, Integer> r1, + HashMap<String, Integer> r2) { - int errors =0; - for (int k =1;k<s1.length();k++) { + int errors = 0; + for (int k = 1; k < s1.length(); k++) { - if (s1.heads[k] != s1.pheads[k] || (! s1.labels[k].equals(s1.plabels[k]))) { + if (s1.heads[k] != s1.pheads[k] || (!s1.labels[k].equals(s1.plabels[k]))) { - if (s2.heads[k] != s2.pheads[k] || (! s2.labels[k].equals(s2.plabels[k]))) { + if (s2.heads[k] != s2.pheads[k] || (!s2.labels[k].equals(s2.plabels[k]))) { // equal do nothing } else { Integer cnt = r1.get(s1.labels[k]); - if (cnt==null) cnt=0; + if (cnt == null) + cnt = 0; cnt++; - r1.put(s1.labels[k],cnt); - + r1.put(s1.labels[k], cnt); } } - if (s2.heads[k] != s2.pheads[k] || (! s2.labels[k].equals(s2.plabels[k]))) { + if (s2.heads[k] != s2.pheads[k] || (!s2.labels[k].equals(s2.plabels[k]))) { - if (s1.heads[k] != s1.pheads[k] || (! s1.labels[k].equals(s1.plabels[k]))) { + if (s1.heads[k] != s1.pheads[k] || (!s1.labels[k].equals(s1.plabels[k]))) { // equal do nothing } else { Integer cnt = r2.get(s2.labels[k]); - if (cnt==null) cnt=0; + if (cnt == null) + cnt = 0; cnt++; - r2.put(s2.labels[k],cnt); - + r2.put(s2.labels[k], cnt); } @@ -363,8 +338,7 @@ public class EvaluatorTagger { return errors; } - - public static final String PUNCT ="!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"; + public static final String PUNCT = "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"; public static class Results { @@ -374,67 +348,64 @@ public class EvaluatorTagger { public float ula; public float lpas; public float upla; - + ArrayList<Double> correctHead; } - - public static Results evaluate (String act_file, String pred_file) { - return evaluate (act_file, pred_file,true); + + public static Results evaluate(String act_file, String pred_file) { + return evaluate(act_file, pred_file, true); } - public static Results evaluate (String act_file, String pred_file, boolean printEval) { - return evaluate ( act_file, pred_file, printEval, false); + + public static Results evaluate(String act_file, String pred_file, boolean printEval) { + return evaluate(act_file, pred_file, printEval, false); } - - - public static Results evaluate (String act_file, String pred_file, boolean printEval, boolean sig) { + + public static Results evaluate(String act_file, String pred_file, boolean printEval, boolean sig) { CONLLReader09 goldReader = new CONLLReader09(act_file, -1); CONLLReader09 predictedReader = new CONLLReader09(pred_file, -1); - int total = 0, corr = 0, corrL = 0, Ptotal=0, Pcorr = 0, PcorrL = 0, BPtotal=0, BPcorr = 0, BPcorrL = 0, corrLableAndPos=0, corrHeadAndPos=0; - int corrLableAndPosP=0, corrHeadAndPosP=0,corrLableAndPosC=0; - int numsent = 0, corrsent = 0, corrsentL = 0, Pcorrsent = 0, PcorrsentL = 0,sameProj=0;; - int proj=0, nonproj=0, pproj=0, pnonproj=0, nonProjOk=0, nonProjWrong=0; - - int corrOne = 0; - - int correctChnWoPunc =0, correctLChnWoPunc=0,CPtotal=0; + int total = 0, corr = 0, corrL = 0, Ptotal = 0, Pcorr = 0, PcorrL = 0, BPtotal = 0, BPcorr = 0, BPcorrL = 0, + corrLableAndPos = 0, corrHeadAndPos = 0; + int corrLableAndPosP = 0, corrLableAndPosC = 0; + int numsent = 0, corrsent = 0, corrsentL = 0, Pcorrsent = 0, PcorrsentL = 0; + ; + int proj = 0, nonproj = 0, pproj = 0, pnonproj = 0, nonProjOk = 0, nonProjWrong = 0; + + int correctChnWoPunc = 0, correctLChnWoPunc = 0, CPtotal = 0; SentenceData09 goldInstance = goldReader.getNext(); SentenceData09 predInstance = predictedReader.getNext(); - HashMap<String,Integer> label = new HashMap<String,Integer>(); - HashMap<String,Integer> labelCount = new HashMap<String,Integer>(); - HashMap<String,Integer> labelCorrect = new HashMap<String,Integer>(); - HashMap<String,Integer> falsePositive = new HashMap<String,Integer>(); - HashMap<String,HashMap<String,Integer> > confusion = new HashMap<String,HashMap<String,Integer> >(); - - HashMap<String,HashMap<String,Integer> > posLabelAssign = new HashMap<String,HashMap<String,Integer> >(); - + HashMap<String, Integer> label = new HashMap<String, Integer>(); + HashMap<String, Integer> labelCount = new HashMap<String, Integer>(); + HashMap<String, Integer> labelCorrect = new HashMap<String, Integer>(); + HashMap<String, Integer> falsePositive = new HashMap<String, Integer>(); + HashMap<String, HashMap<String, Integer>> confusion = new HashMap<String, HashMap<String, Integer>>(); + + HashMap<String, HashMap<String, Integer>> posLabelAssign = new HashMap<String, HashMap<String, Integer>>(); + // does the node have the correct head? ArrayList<Double> correctHead = new ArrayList<Double>(); - - while(goldInstance != null) { + + while (goldInstance != null) { int instanceLength = goldInstance.length(); if (instanceLength != predInstance.length()) - System.out.println("Lengths do not match on sentence "+numsent); + System.out.println("Lengths do not match on sentence " + numsent); int[] goldHeads = goldInstance.heads; - - String[] goldLabels,predLabels; + + String[] goldLabels, predLabels; if (what == TAGGER) { - goldLabels= goldInstance.gpos; - predLabels= predInstance.ppos; - } - else { - goldLabels = goldInstance.labels ; - predLabels = predInstance.plabels ; + goldLabels = goldInstance.gpos; + predLabels = predInstance.ppos; + } else { + goldLabels = goldInstance.labels; + predLabels = predInstance.plabels; } - - + int[] predHeads = predInstance.pheads; - boolean whole = true; boolean wholeL = true; @@ -442,295 +413,295 @@ public class EvaluatorTagger { boolean Pwhole = true; boolean PwholeL = true; + int corrLabels = 0; - int tlasS=0, totalS=0,corrLabels=0, XLabels=0; - - // NOTE: the first item is the root info added during nextInstance(), so we skip it. + // NOTE: the first item is the root info added during + // nextInstance(), so we skip it. - - - int punc=0, bpunc=0,totalChnWoPunc=0; + int punc = 0, bpunc = 0, totalChnWoPunc = 0; for (int i = 1; i < instanceLength; i++) { - - Parse p = new Parse(predHeads.length); - for (int k=0;k<p.heads.length;k++) p.heads[k]=(short) predHeads[k]; + for (int k = 0; k < p.heads.length; k++) + p.heads[k] = (short) predHeads[k]; Parse g = new Parse(predHeads.length); - for (int k=0;k<g.heads.length;k++) g.heads[k]=(short) goldHeads[k]; - - + for (int k = 0; k < g.heads.length; k++) + g.heads[k] = (short) goldHeads[k]; - HashMap<String,Integer> labelsNum =posLabelAssign.get(goldInstance.gpos[goldInstance.heads[i]]); - if (labelsNum== null) { - labelsNum = new HashMap<String,Integer>(); + HashMap<String, Integer> labelsNum = posLabelAssign.get(goldInstance.gpos[goldInstance.heads[i]]); + if (labelsNum == null) { + labelsNum = new HashMap<String, Integer>(); posLabelAssign.put(goldInstance.gpos[goldInstance.heads[i]], labelsNum); } - + Integer num = labelsNum.get(goldInstance.labels[i]); - if (num==null) num =0; + if (num == null) + num = 0; num++; - labelsNum.put(goldInstance.labels[i],num); - - - - Integer count = labelCount.get(goldLabels[i]); - if (count==null)count = 0; - - count++; - - labelCount.put(goldLabels[i], count); - - if(goldLabels[i].equals(predLabels[i]) && (what==TAGGER || predHeads[i] == goldHeads[i] )) { - Integer correct = labelCorrect.get(goldLabels[i]); - if (correct ==null) correct =0; - correct ++; - labelCorrect.put(goldLabels[i], correct); - - } else { - - Integer fp = falsePositive.get(predLabels[i]); - if (fp ==null) fp =0; - fp ++; - falsePositive.put(predLabels[i], fp); - - HashMap<String,Integer> conf = confusion.get(goldLabels[i]); - if (conf == null) confusion.put(goldLabels[i], conf = new HashMap<String,Integer>()); - - conf.put(predLabels[i], conf.get(predLabels[i])==null?1:conf.get(predLabels[i])+1); - + labelsNum.put(goldInstance.labels[i], num); - } - - - - - - - - - - boolean tlas =false; + Integer count = labelCount.get(goldLabels[i]); + if (count == null) + count = 0; + + count++; + + labelCount.put(goldLabels[i], count); + + if (goldLabels[i].equals(predLabels[i]) && (what == TAGGER || predHeads[i] == goldHeads[i])) { + Integer correct = labelCorrect.get(goldLabels[i]); + if (correct == null) + correct = 0; + correct++; + labelCorrect.put(goldLabels[i], correct); + + } else { + + Integer fp = falsePositive.get(predLabels[i]); + if (fp == null) + fp = 0; + fp++; + falsePositive.put(predLabels[i], fp); + + HashMap<String, Integer> conf = confusion.get(goldLabels[i]); + if (conf == null) + confusion.put(goldLabels[i], conf = new HashMap<String, Integer>()); + + conf.put(predLabels[i], conf.get(predLabels[i]) == null ? 1 : conf.get(predLabels[i]) + 1); + + } + + boolean tlas = false; if (predHeads[i] == goldHeads[i]) { corr++; - - if (goldInstance.gpos[i].equals(predInstance.ppos[i])) corrHeadAndPos ++; + + if (goldInstance.gpos[i].equals(predInstance.ppos[i])) + corrHeadAndPos++; if (goldLabels[i].equals(predLabels[i])) { corrL++; - // if (predLabels[i].startsWith("PMOD")) - corrLabels++; - // else correctHead.add(0); + // if (predLabels[i].startsWith("PMOD")) + corrLabels++; + // else correctHead.add(0); if (goldInstance.gpos[i].equals(predInstance.ppos[i])) { - tlasS++; - tlas=true; - corrLableAndPos ++; + tlas = true; + corrLableAndPos++; } - } - else { - // correctHead.add(0); - // System.out.println(numsent+" error gold "+goldLabels[i]+" "+predLabels[i]+" head "+goldHeads[i]+" child "+i); + } else { + // correctHead.add(0); + // System.out.println(numsent+" error gold + // "+goldLabels[i]+" "+predLabels[i]+" head + // "+goldHeads[i]+" child "+i); wholeL = false; } - } - else { - - //correctHead.add(0); - - // System.out.println(numsent+"error gold "+goldLabels[i]+" "+predLabels[i]+" head "+goldHeads[i]+" child "+i); - whole = false; wholeL = false; - - count = label.get(goldLabels[i]); - - if (count==null)count = 0; - count++; - label.put(goldLabels[i], count); + } else { + + // correctHead.add(0); + // System.out.println(numsent+"error gold "+goldLabels[i]+" + // "+predLabels[i]+" head "+goldHeads[i]+" child "+i); + whole = false; + wholeL = false; + count = label.get(goldLabels[i]); - int d = Math.abs(goldInstance.heads[i]-i); + if (count == null) + count = 0; + count++; + label.put(goldLabels[i], count); + + Math.abs(goldInstance.heads[i] - i); } + if (!("!\"#$%&''()*+,-./:;<=>?@[\\]^_{|}~``".contains(goldInstance.forms[i]))) { - if( ! ("!\"#$%&''()*+,-./:;<=>?@[\\]^_{|}~``".contains(goldInstance.forms[i]))) { - if (predHeads[i] == goldHeads[i]) { BPcorr++; if (goldLabels[i].equals(predLabels[i])) { BPcorrL++; + } else { + // System.out.println(numsent+" error gold + // "+goldLabels[i]+" "+predLabels[i]+" head + // "+goldHeads[i]+" child "+i); + // PwholeL = false; } - else { - // System.out.println(numsent+" error gold "+goldLabels[i]+" "+predLabels[i]+" head "+goldHeads[i]+" child "+i); - // PwholeL = false; - } - } else { - // System.out.println(numsent+"error gold "+goldLabels[i]+" "+predLabels[i]+" head "+goldHeads[i]+" child "+i); - //Pwhole = false; wholeL = false; + } else { + // System.out.println(numsent+"error gold + // "+goldLabels[i]+" "+predLabels[i]+" head + // "+goldHeads[i]+" child "+i); + // Pwhole = false; wholeL = false; } - } else bpunc++; + } else + bpunc++; - if( ! (",.:''``".contains(goldInstance.forms[i]))) { - + if (!(",.:''``".contains(goldInstance.forms[i]))) { if (predHeads[i] == goldHeads[i]) { - if (goldInstance.gpos[i].equals(predInstance.ppos[i])) corrHeadAndPosP ++; + if (goldInstance.gpos[i].equals(predInstance.ppos[i])) { + } Pcorr++; if (goldLabels[i].equals(predLabels[i])) { PcorrL++; - if (goldInstance.gpos[i].equals(predInstance.ppos[i])) corrLableAndPosP ++; + if (goldInstance.gpos[i].equals(predInstance.ppos[i])) + corrLableAndPosP++; - } - else { - // System.out.println(numsent+" error gold "+goldLabels[i]+" "+predLabels[i]+" head "+goldHeads[i]+" child "+i); + } else { + // System.out.println(numsent+" error gold + // "+goldLabels[i]+" "+predLabels[i]+" head + // "+goldHeads[i]+" child "+i); PwholeL = false; } - } else { - // System.out.println(numsent+"error gold "+goldLabels[i]+" "+predLabels[i]+" head "+goldHeads[i]+" child "+i); - Pwhole = false; PwholeL = false; + } else { + // System.out.println(numsent+"error gold + // "+goldLabels[i]+" "+predLabels[i]+" head + // "+goldHeads[i]+" child "+i); + Pwhole = false; + PwholeL = false; } - } else punc++; - - - if( ! (goldInstance.gpos[i].toLowerCase().startsWith("pu"))) { + } else + punc++; + + if (!(goldInstance.gpos[i].toLowerCase().startsWith("pu"))) { if (predHeads[i] == goldHeads[i]) { correctChnWoPunc++; if (goldLabels[i].equals(predLabels[i])) { correctLChnWoPunc++; - if (goldInstance.gpos[i].equals(predInstance.ppos[i])) corrLableAndPosC ++; - } - else { - // System.out.println(numsent+" error gold "+goldLabels[i]+" "+predLabels[i]+" head "+goldHeads[i]+" child "+i); - // PwholeL = false; + if (goldInstance.gpos[i].equals(predInstance.ppos[i])) + corrLableAndPosC++; + } else { + // System.out.println(numsent+" error gold + // "+goldLabels[i]+" "+predLabels[i]+" head + // "+goldHeads[i]+" child "+i); + // PwholeL = false; } - } else { - // System.out.println(numsent+"error gold "+goldLabels[i]+" "+predLabels[i]+" head "+goldHeads[i]+" child "+i); - // Pwhole = false; PwholeL = false; + } else { + // System.out.println(numsent+"error gold + // "+goldLabels[i]+" "+predLabels[i]+" head + // "+goldHeads[i]+" child "+i); + // Pwhole = false; PwholeL = false; } - } else totalChnWoPunc++; + } else + totalChnWoPunc++; - if (sig) { - if(tlas) System.out.println("1\t"); - else System.out.println("0\t"); + if (tlas) + System.out.println("1\t"); + else + System.out.println("0\t"); } } - total += ((instanceLength - 1)); // Subtract one to not score fake root token + total += ((instanceLength - 1)); // Subtract one to not score fake + // root token Ptotal += ((instanceLength - 1) - punc); BPtotal += ((instanceLength - 1) - bpunc); CPtotal += ((instanceLength - 1) - totalChnWoPunc); - if(whole) corrsent++; - if(wholeL) corrsentL++; - if(Pwhole) Pcorrsent++; - if(PwholeL) PcorrsentL++; + if (whole) + corrsent++; + if (wholeL) + corrsentL++; + if (Pwhole) + Pcorrsent++; + if (PwholeL) + PcorrsentL++; numsent++; goldInstance = goldReader.getNext(); predInstance = predictedReader.getNext(); - correctHead.add((double) ((double)corrLabels/(instanceLength - 1))); - // System.out.println(""+((double)corrLabels/(instanceLength - 1))); + correctHead.add((double) corrLabels / (instanceLength - 1)); + // System.out.println(""+((double)corrLabels/(instanceLength - 1))); } Results r = new Results(); - r.correctHead =correctHead; - int mult=100000, diff=1000; + r.correctHead = correctHead; + int mult = 100000, diff = 1000; r.total = total; r.corr = corr; - r.las =(float)Math.round(((double)corrL/total)*mult)/diff; - r.ula =(float)Math.round(((double)corr /total)*mult)/diff; - r.lpas =(float)Math.round(((double)corrLableAndPos/total)*mult)/diff; - r.upla =(float)Math.round(((double)corrHeadAndPos /total)*mult)/diff; - float tlasp = (float)Math.round(((double)corrLableAndPosP/Ptotal)*mult)/diff; - float tlasc = (float)Math.round(((double)corrLableAndPosC/Ptotal)*mult)/diff; - - // System.out.print("Total: " + total+" \tCorrect: " + corr+" "); - System.out.print(" LAS/Total/UAS/Total: " + r.las+"/" + (double)Math.round(((double)corrsentL/numsent)*mult)/diff+ - "/" + r.ula+"/" + (double)Math.round(((double)corrsent /numsent)*mult)/diff+" LPAS/UPAS "+r.lpas+"/"+r.upla); - - System.out.println("; without . " + (double)Math.round(((double)PcorrL/Ptotal)*mult)/diff+"/" + - (double)Math.round(((double)PcorrsentL/numsent)*mult)/diff+ - "/" + (double)Math.round(((double)Pcorr /Ptotal)*mult)/diff+"/" + - (double)Math.round(((double)Pcorrsent /numsent)*mult)/diff+" TLAS "+tlasp+ - " V2 LAS/UAS "+(double)Math.round(((double)BPcorrL/BPtotal)*mult)/diff+ - "/"+(double)Math.round(((double)BPcorr/BPtotal)*mult)/diff+ - " CHN LAS/UAS "+(double)Math.round(((double)correctLChnWoPunc/CPtotal)*mult)/diff+ - "/"+(double)Math.round(((double)correctChnWoPunc/CPtotal)*mult)/diff+" TLAS "+tlasc); - - float precisionNonProj = ((float)nonProjOk)/((float)nonProjOk+nonProjWrong); - float recallNonProj = ((float)nonProjOk)/((float)(nonproj)); - System.out.println("proj "+proj+" nonp "+nonproj+"; predicted proj "+pproj+" non "+pnonproj+"; nonp correct "+ - nonProjOk+" nonp wrong "+nonProjWrong+ - " precision=(nonProjOk)/(non-projOk+nonProjWrong): "+precisionNonProj+ - " recall=nonProjOk/nonproj="+recallNonProj+" F="+(2*precisionNonProj*recallNonProj)/(precisionNonProj+recallNonProj)); - - if (!printEval) return r; - - - HashMap<String,Integer> totalX = new HashMap<String,Integer>(); - HashMap<String,Integer> totalY = new HashMap<String,Integer>(); - - String A=" "; // & + r.las = (float) Math.round(((double) corrL / total) * mult) / diff; + r.ula = (float) Math.round(((double) corr / total) * mult) / diff; + r.lpas = (float) Math.round(((double) corrLableAndPos / total) * mult) / diff; + r.upla = (float) Math.round(((double) corrHeadAndPos / total) * mult) / diff; + float tlasp = (float) Math.round(((double) corrLableAndPosP / Ptotal) * mult) / diff; + float tlasc = (float) Math.round(((double) corrLableAndPosC / Ptotal) * mult) / diff; + + // System.out.print("Total: " + total+" \tCorrect: " + corr+" "); + System.out.print(" LAS/Total/UAS/Total: " + r.las + "/" + + (double) Math.round(((double) corrsentL / numsent) * mult) / diff + "/" + r.ula + "/" + + (double) Math.round(((double) corrsent / numsent) * mult) / diff + " LPAS/UPAS " + r.lpas + "/" + + r.upla); + + System.out.println("; without . " + (double) Math.round(((double) PcorrL / Ptotal) * mult) / diff + "/" + + (double) Math.round(((double) PcorrsentL / numsent) * mult) / diff + "/" + + (double) Math.round(((double) Pcorr / Ptotal) * mult) / diff + "/" + + (double) Math.round(((double) Pcorrsent / numsent) * mult) / diff + " TLAS " + tlasp + " V2 LAS/UAS " + + (double) Math.round(((double) BPcorrL / BPtotal) * mult) / diff + "/" + + (double) Math.round(((double) BPcorr / BPtotal) * mult) / diff + " CHN LAS/UAS " + + (double) Math.round(((double) correctLChnWoPunc / CPtotal) * mult) / diff + "/" + + (double) Math.round(((double) correctChnWoPunc / CPtotal) * mult) / diff + " TLAS " + tlasc); + + float precisionNonProj = (nonProjOk) / ((float) nonProjOk + nonProjWrong); + float recallNonProj = ((float) nonProjOk) / ((float) (nonproj)); + System.out.println("proj " + proj + " nonp " + nonproj + "; predicted proj " + pproj + " non " + pnonproj + + "; nonp correct " + nonProjOk + " nonp wrong " + nonProjWrong + + " precision=(nonProjOk)/(non-projOk+nonProjWrong): " + precisionNonProj + " recall=nonProjOk/nonproj=" + + recallNonProj + " F=" + (2 * precisionNonProj * recallNonProj) / (precisionNonProj + recallNonProj)); + + if (!printEval) + return r; + + new HashMap<String, Integer>(); + new HashMap<String, Integer>(); + System.out.println("label\ttp\tcount\trecall\t\ttp\tfp+tp\tprecision\t F-Score "); - for(Entry<String, Integer> e : labelCount.entrySet()) { - - int tp = labelCorrect.get(e.getKey())==null?0:labelCorrect.get(e.getKey()).intValue(); + for (Entry<String, Integer> e : labelCount.entrySet()) { + + int tp = labelCorrect.get(e.getKey()) == null ? 0 : labelCorrect.get(e.getKey()).intValue(); Integer count = labelCount.get(e.getKey()); - int fp = falsePositive.get(e.getKey())==null?0:falsePositive.get(e.getKey()).intValue(); - System.out.println(e.getKey()+"\t"+tp+"\t"+count+"\t"+roundPercent((float)tp/count)+"\t\t"+tp+"\t"+(fp+tp)+ - "\t"+roundPercent((float)tp/(fp+tp))+"\t\t"+roundPercent((((float)tp/count))+(float)tp/(fp+tp))/2F); //+totalD + int fp = falsePositive.get(e.getKey()) == null ? 0 : falsePositive.get(e.getKey()).intValue(); + System.out.println(e.getKey() + "\t" + tp + "\t" + count + "\t" + roundPercent((float) tp / count) + "\t\t" + + tp + "\t" + (fp + tp) + "\t" + roundPercent((float) tp / (fp + tp)) + "\t\t" + + roundPercent((((float) tp / count)) + (float) tp / (fp + tp)) / 2F); // +totalD } - - for(Entry<String, HashMap<String, Integer>> e : confusion.entrySet()) { + + for (Entry<String, HashMap<String, Integer>> e : confusion.entrySet()) { HashMap<String, Integer> values = e.getValue(); ArrayList<Entry<String, Integer>> entries = new ArrayList<Entry<String, Integer>>(values.entrySet()); Collections.sort(entries, new Comparator<Entry<String, Integer>>() { - - @Override public int compare(Entry<String, Integer> o1, Entry<String, Integer> o2) { - + return o2.getValue().compareTo(o1.getValue()); } - - } - ); - - - System.out.println(e.getKey()+"\t"+entries); - - - } - System.out.println(""+posLabelAssign); - - - - return r; - } + }); + System.out.println(e.getKey() + "\t" + entries); - public static float round (double v){ + } + System.out.println("" + posLabelAssign); - return Math.round(v*10000F)/10000F; + return r; } - public static float roundPercent (double v){ + public static float round(double v) { - return Math.round(v*10000F)/100F; + return Math.round(v * 10000F) / 10000F; } + public static float roundPercent(double v) { - + return Math.round(v * 10000F) / 100F; + } } diff --git a/dependencyParser/mate-tools/src/is2/util/ExtractParagraphs.java b/dependencyParser/mate-tools/src/is2/util/ExtractParagraphs.java index a9fabca..aa65d8d 100644 --- a/dependencyParser/mate-tools/src/is2/util/ExtractParagraphs.java +++ b/dependencyParser/mate-tools/src/is2/util/ExtractParagraphs.java @@ -8,80 +8,67 @@ import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStreamReader; import java.io.OutputStreamWriter; -import java.io.Reader; -import java.nio.channels.Channels; -import java.nio.channels.FileChannel; -import java.nio.charset.Charset; -import java.nio.charset.CharsetDecoder; -import java.util.StringTokenizer; public class ExtractParagraphs { /** - + * * @param args * @throws IOException */ public static void main(String args[]) throws IOException { - if (args.length<1) { + if (args.length < 1) { System.out.println("Please provide a file name."); System.exit(0); } - + File file = new File(args[0]); file.isDirectory(); String[] dirs = file.list(); - - BufferedWriter write = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(args[1]),"UTF-8"),32768); - int cnt=0; - -for (String fileName : dirs) { - BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(args[0]+fileName),"UTF-8"),32768); - - - - - int state =0; - - String s; - while ((s = reader.readLine()) != null) { - - if (s.startsWith("<P>")||s.startsWith("<p>")) { - state=1; // paragraph start - continue; - } - - - - - if (s.startsWith("</P>")||s.startsWith("</p>")) { - state=2; // paragraph end - write.newLine(); - } - - boolean lastNL =false; - if (state==1) { - String sp[] = s.split("\\. "); - for(String p : sp) { - write.write(p); - // if (sp.length>1) write.newLine(); + + BufferedWriter write = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(args[1]), "UTF-8"), + 32768); + int cnt = 0; + + for (String fileName : dirs) { + BufferedReader reader = new BufferedReader( + new InputStreamReader(new FileInputStream(args[0] + fileName), "UTF-8"), 32768); + + int state = 0; + + String s; + while ((s = reader.readLine()) != null) { + + if (s.startsWith("<P>") || s.startsWith("<p>")) { + state = 1; // paragraph start + continue; + } + + if (s.startsWith("</P>") || s.startsWith("</p>")) { + state = 2; // paragraph end + write.newLine(); + } + + if (state == 1) { + String sp[] = s.split("\\. "); + for (String p : sp) { + write.write(p); + // if (sp.length>1) write.newLine(); + } + cnt++; } - cnt++; } + + // if (cnt>5000) break; + + reader.close(); } - - //if (cnt>5000) break; - - reader.close(); -} write.flush(); write.close(); - - System.out.println("Extract "+cnt+" lines "); - - + + System.out.println("Extract " + cnt + " lines "); + } - - + } diff --git a/dependencyParser/mate-tools/src/is2/util/IntStack.java b/dependencyParser/mate-tools/src/is2/util/IntStack.java index e92c02c..b291d16 100644 --- a/dependencyParser/mate-tools/src/is2/util/IntStack.java +++ b/dependencyParser/mate-tools/src/is2/util/IntStack.java @@ -1,86 +1,90 @@ /** - * + * */ package is2.util; - - /** * @author Dr. Bernd Bohnet, 01.06.2011 - * - * + * + * */ final public class IntStack { - + final public int[] stack; - public int position =-1; - + public int position = -1; + public IntStack(int size) { - if (size<=0) stack = new int[1]; - else stack = new int[size+1]; + if (size <= 0) + stack = new int[1]; + else + stack = new int[size + 1]; } public IntStack(IntStack s) { - stack=s.stack; + stack = s.stack; position = s.position; } - public int peek() { - return position==-1?-1:stack[position]; + return position == -1 ? -1 : stack[position]; } public void push(int i) { - // if (i ==2)new Exception().printStackTrace(); - stack[++position]=i; + // if (i ==2)new Exception().printStackTrace(); + stack[++position] = i; } - + public int pop() { - return position==-1?-1:stack[position--]; + return position == -1 ? -1 : stack[position--]; } - + public int size() { - return position+1; + return position + 1; } - + public boolean isEmpty() { - return position==-1?true:false; + return position == -1 ? true : false; } - + public int get(int p) { return stack[p]; } - + public void clear() { - position=-1; + position = -1; } /** * @param b */ public void addAll(IntStack b) { - - position=b.position; - if (position<0) return; - - for(int k=0; k<=position;k++) stack[k]=b.stack[k]; - + + position = b.position; + if (position < 0) + return; + + for (int k = 0; k <= position; k++) + stack[k] = b.stack[k]; + } - public boolean contains(int s) {; - - for(int k=0; k<=position;k++) - if (stack[k]==s) return true; - + public boolean contains(int s) { + ; + + for (int k = 0; k <= position; k++) + if (stack[k] == s) + return true; + return false; } - + + @Override public String toString() { StringBuffer s = new StringBuffer(); - for(int k = position;k>=0;k--) { + for (int k = position; k >= 0; k--) { s.append(k).append(":").append(this.stack[k]).append(" "); } return s.toString(); } - + } diff --git a/dependencyParser/mate-tools/src/is2/util/Long2Int.java b/dependencyParser/mate-tools/src/is2/util/Long2Int.java index d461df8..e6ef45c 100644 --- a/dependencyParser/mate-tools/src/is2/util/Long2Int.java +++ b/dependencyParser/mate-tools/src/is2/util/Long2Int.java @@ -2,80 +2,80 @@ package is2.util; import is2.data.Long2IntInterface; - /** * @author Bernd Bohnet, 01.09.2009 - * - * Maps for the Hash Kernel the long values to the int values. + * + * Maps for the Hash Kernel the long values to the int values. */ final public class Long2Int implements Long2IntInterface { - public Long2Int() { - size=115911564; + size = 115911564; } - - + public Long2Int(int s) { - size=s; + size = s; } - - + /** Integer counter for long2int */ - final private int size; //0x03ffffff //0x07ffffff - - - /* (non-Javadoc) + final private int size; // 0x03ffffff //0x07ffffff + + /* + * (non-Javadoc) + * * @see is2.sp09k9992.Long2IntIterface#size() */ - public int size() {return size;} - - /* (non-Javadoc) - * @see is2.sp09k9992.Long2IntIterface#start() - * has no meaning for this implementation + @Override + public int size() { + return size; + } + + /* + * (non-Javadoc) + * + * @see is2.sp09k9992.Long2IntIterface#start() has no meaning for this + * implementation */ - final public void start() {} - + final public void start() { + } - /* (non-Javadoc) + /* + * (non-Javadoc) + * * @see is2.sp09k9992.Long2IntIterface#l2i(long) */ - final public int l2i(long l) { - if (l<0) return -1; - + @Override + final public int l2i(long l) { + if (l < 0) + return -1; + // this works well LAS 88.138 - // int r= (int)(( l ^ (l&0xffffffff00000000L) >>> 29 ));//0x811c9dc5 ^ // 29 - // return Math.abs(r % size); - // this works a bit better and good with 0x03ffffff - // + // int r= (int)(( l ^ (l&0xffffffff00000000L) >>> 29 ));//0x811c9dc5 ^ + // // 29 + // return Math.abs(r % size); + // this works a bit better and good with 0x03ffffff + // /* - long r= l;//26 - l = (l>>12)&0xfffffffffffff000L; - r ^= l;//38 - l = (l>>11)&0xffffffffffffc000L; - r ^= l;//49 - l = (l>>9)& 0xffffffffffff0000L; //53 - r ^= l;//58 - l = (l>>7)&0xfffffffffffc0000L; //62 - r ^=l;//65 - int x = (int)r; - x = x % size; - // return x >= 0 ? x : -x ;// Math.abs(r % size); - - */ - // 26 0x03ffffff + * long r= l;//26 l = (l>>12)&0xfffffffffffff000L; r ^= l;//38 l = + * (l>>11)&0xffffffffffffc000L; r ^= l;//49 l = (l>>9)& + * 0xffffffffffff0000L; //53 r ^= l;//58 l = (l>>7)&0xfffffffffffc0000L; + * //62 r ^=l;//65 int x = (int)r; x = x % size; // return x >= 0 ? x : + * -x ;// Math.abs(r % size); + * + */ + // 26 0x03ffffff // together with 0x07ffffff 27 88.372 - long r= l;// 27 - l = (l>>13)&0xffffffffffffe000L; - r ^= l; // 40 - l = (l>>11)&0xffffffffffff0000L; - r ^= l; // 51 - l = (l>>9)& 0xfffffffffffc0000L; //53 - r ^= l; // 60 - l = (l>>7)& 0xfffffffffff00000L; //62 - r ^=l; //67 - int x = ((int)r) % size; - - return x >= 0 ? x : -x ; + long r = l;// 27 + l = (l >> 13) & 0xffffffffffffe000L; + r ^= l; // 40 + l = (l >> 11) & 0xffffffffffff0000L; + r ^= l; // 51 + l = (l >> 9) & 0xfffffffffffc0000L; // 53 + r ^= l; // 60 + l = (l >> 7) & 0xfffffffffff00000L; // 62 + r ^= l; // 67 + int x = ((int) r) % size; + + return x >= 0 ? x : -x; } } diff --git a/dependencyParser/mate-tools/src/is2/util/Options.java b/dependencyParser/mate-tools/src/is2/util/Options.java index 5989483..30b53b0 100644 --- a/dependencyParser/mate-tools/src/is2/util/Options.java +++ b/dependencyParser/mate-tools/src/is2/util/Options.java @@ -1,104 +1,103 @@ package is2.util; -import is2.util.OptionsSuper; - import java.io.File; - public final class Options extends OptionsSuper { - + public Options(String[] args) { - public Options (String[] args) { - - for(int i = 0; i < args.length; i++) { + for (int i = 0; i < args.length; i++) { String[] pair = args[i].split(":"); - if (pair[0].equals("--help")) explain(); + if (pair[0].equals("--help")) + explain(); else if (pair[0].equals("-train")) { train = true; - trainfile = args[i+1]; + trainfile = args[i + 1]; } else if (pair[0].equals("-eval")) { eval = true; - goldfile =args[i+1]; i++; + goldfile = args[i + 1]; + i++; } else if (pair[0].equals("-test")) { test = true; - testfile = args[i+1]; i++; + testfile = args[i + 1]; + i++; } else if (pair[0].equals("-i")) { - numIters = Integer.parseInt(args[i+1]); i++; - } - else if (pair[0].equals("-out")) { - outfile = args[i+1]; i++; - } - else if (pair[0].equals("-decode")) { - decodeProjective = args[i+1].equals("proj"); i++; - } - else if (pair[0].equals("-confidence")) { - + numIters = Integer.parseInt(args[i + 1]); + i++; + } else if (pair[0].equals("-out")) { + outfile = args[i + 1]; + i++; + } else if (pair[0].equals("-decode")) { + decodeProjective = args[i + 1].equals("proj"); + i++; + } else if (pair[0].equals("-confidence")) { + conf = true; } else if (pair[0].equals("-count")) { - count = Integer.parseInt(args[i+1]); i++; + count = Integer.parseInt(args[i + 1]); + i++; } else if (pair[0].equals("-model")) { - modelName = args[i+1]; i++; - } - else if (pair[0].equals("-device")) { - device = args[i+1]; i++; - } else if (pair[0].equals("-tmp")) { - tmp = args[i+1]; i++; + modelName = args[i + 1]; + i++; + } else if (pair[0].equals("-device")) { + device = args[i + 1]; + i++; + } else if (pair[0].equals("-tmp")) { + tmp = args[i + 1]; + i++; } else if (pair[0].equals("-format")) { - //format = args[i+1]; - formatTask = Integer.parseInt(args[i+1]); i++; + // format = args[i+1]; + formatTask = Integer.parseInt(args[i + 1]); + i++; } else if (pair[0].equals("-allfeatures")) { - allFeatures=true; + allFeatures = true; } else if (pair[0].equals("-nonormalize")) { - normalize=false; - }else if (pair[0].equals("-nframes")) { - //format = args[i+1]; - nbframes= args[i+1]; i++; - - + normalize = false; + } else if (pair[0].equals("-nframes")) { + // format = args[i+1]; + nbframes = args[i + 1]; + i++; + } else if (pair[0].equals("-pframes")) { - //format = args[i+1]; - pbframes= args[i+1]; i++; + // format = args[i+1]; + pbframes = args[i + 1]; + i++; } else if (pair[0].equals("-nopred")) { - nopred =true; + nopred = true; } else if (pair[0].equals("-divide")) { - keep =true; + keep = true; } else if (pair[0].equals("-lexicon")) { - lexicon= args[i+1]; i++; + lexicon = args[i + 1]; + i++; + + } else + super.addOption(args, i); - } else super.addOption(args, i); - } - - + try { + if (trainfile != null) { - try { - - if (trainfile!=null) { - - if (keep && tmp!=null) { + if (keep && tmp != null) { trainforest = new File(tmp); - if (!trainforest.exists()) keep=false; - - } else - if (tmp!=null) { + if (!trainforest.exists()) + keep = false; + + } else if (tmp != null) { trainforest = File.createTempFile("train", ".tmp", new File(tmp)); trainforest.deleteOnExit(); - } - else { - trainforest = File.createTempFile("train", ".tmp"); //,new File("F:\\") + } else { + trainforest = File.createTempFile("train", ".tmp"); // ,new + // File("F:\\") trainforest.deleteOnExit(); } - - + } - } catch (java.io.IOException e) { System.out.println("Unable to create tmp files for feature forests!"); System.out.println(e); @@ -111,19 +110,24 @@ public final class Options extends OptionsSuper { System.out.println("java -class mate.jar is2.parser.Parser [Options]"); System.out.println(); System.out.println("Example: "); - System.out.println(" java -class mate.jar is2.parser.Parser -model eps3.model -train corpora/conll08st/train/train.closed -test corpora/conll08st/devel/devel.closed -out b3.test -eval corpora/conll08st/devel/devel.closed -count 2000 -i 6"); + System.out.println( + " java -class mate.jar is2.parser.Parser -model eps3.model -train corpora/conll08st/train/train.closed -test corpora/conll08st/devel/devel.closed -out b3.test -eval corpora/conll08st/devel/devel.closed -count 2000 -i 6"); System.out.println(""); System.out.println("Options:"); System.out.println(""); - System.out.println(" -train <file> the corpus a model is trained on; default "+this.trainfile); - System.out.println(" -test <file> the input corpus for testing; default "+this.testfile); - System.out.println(" -out <file> the output corpus (result) of a test run; default "+this.outfile); + System.out.println(" -train <file> the corpus a model is trained on; default " + this.trainfile); + System.out.println(" -test <file> the input corpus for testing; default " + this.testfile); + System.out.println(" -out <file> the output corpus (result) of a test run; default " + this.outfile); System.out.println(" -model <file> the parsing model for traing the model is stored in the files"); - System.out.println(" and for parsing the model is load from this file; default "+this.modelName); - System.out.println(" -i <number> the number of training iterations; good numbers are 10 for smaller corpora and 6 for bigger; default "+this.numIters); - System.out.println(" -count <number> the n first sentences of the corpus are take for the training default "+this.count); - System.out.println(" -format <number> conll format of the year 8 or 9; default "+this.formatTask); - + System.out.println( + " and for parsing the model is load from this file; default " + this.modelName); + System.out.println( + " -i <number> the number of training iterations; good numbers are 10 for smaller corpora and 6 for bigger; default " + + this.numIters); + System.out.println(" -count <number> the n first sentences of the corpus are take for the training default " + + this.count); + System.out.println(" -format <number> conll format of the year 8 or 9; default " + this.formatTask); + System.exit(0); } } diff --git a/dependencyParser/mate-tools/src/is2/util/OptionsSuper.java b/dependencyParser/mate-tools/src/is2/util/OptionsSuper.java index 0a40f73..f6370f7 100755 --- a/dependencyParser/mate-tools/src/is2/util/OptionsSuper.java +++ b/dependencyParser/mate-tools/src/is2/util/OptionsSuper.java @@ -1,7 +1,5 @@ package is2.util; -import is2.io.CONLLReader09; - import java.io.File; public class OptionsSuper { @@ -15,17 +13,17 @@ public class OptionsSuper { public boolean nopred = false; public boolean upper = false; - + public boolean train = false; public boolean eval = false; public boolean test = false; public boolean keep = false; public boolean flt = false; - public boolean loadTaggerModels =false; + public boolean loadTaggerModels = false; public String modelName = "prs.mdl"; public String modelTaggerName = null; - + public String useMapping = null; public String device = "C:"; public String tmp = null; @@ -33,7 +31,7 @@ public class OptionsSuper { public boolean decodeProjective = false; public double decodeTH = 0.3d; public String format = "CONLL"; - public int formatTask =9; + public int formatTask = 9; public int numIters = 10; public int best = 1000; public String outfile = "dp.conll"; @@ -49,136 +47,153 @@ public class OptionsSuper { public int maxForms = Integer.MAX_VALUE; public int beam = 4; public float prune = -100000000; - - public String third =""; - public String second =""; - public String first =""; - - public int cross=10; - - //public boolean secondOrder = true; + + public String third = ""; + public String second = ""; + public String first = ""; + + public int cross = 10; + + // public boolean secondOrder = true; public boolean useRelationalFeatures = false; public int count = 10000000; public int cores = Integer.MAX_VALUE; public int start = 0; public int minOccureForms = 0; - public int tt=30; // tagger averaging - public boolean allFeatures =false; - public boolean normalize =false; - public boolean no2nd =false; - public boolean noLemmas=false; - public boolean few2nd =false,noLinear=false,noMorph=false; + public int tt = 30; // tagger averaging + public boolean allFeatures = false; + public boolean normalize = false; + public boolean no2nd = false; + public boolean noLemmas = false; + public boolean few2nd = false, noLinear = false, noMorph = false; public String clusterFile; - + // output confidence values - public boolean conf =false; - public String phraseFormat="penn"; // tiger | penn + public boolean conf = false; + public String phraseFormat = "penn"; // tiger | penn public boolean average = true; - public boolean label =false; - public boolean stack=false; + public boolean label = false; + public boolean stack = false; public boolean oneRoot = false; - - public String significant1 =null,significant2 =null; - - - // horizontal stacking - public int minLength =0, maxLength =Integer.MAX_VALUE; - public boolean overwritegold =false; - - - public static final int MULTIPLICATIVE=1, SHIFT=2; + + public String significant1 = null, significant2 = null; + + // horizontal stacking + public int minLength = 0, maxLength = Integer.MAX_VALUE; + public boolean overwritegold = false; + + public static final int MULTIPLICATIVE = 1, SHIFT = 2; public int featureCreation = MULTIPLICATIVE; - - - public OptionsSuper (String[] args, String dummy) { - - for(int i = 0; i < args.length; i++) { - i = addOption(args,i); + + public OptionsSuper(String[] args, String dummy) { + + for (int i = 0; i < args.length; i++) { + i = addOption(args, i); } - + + } + + public OptionsSuper() { } - - public OptionsSuper() {} - - - public int addOption(String args[], int i) { - - if (args[i].equals("-train")) { - train = true; - trainfile = args[i+1]; - } else if (args[i].equals("-eval")) { - eval = true; - goldfile =args[i+1]; i++; - } else if (args[i].equals("-gout")) { - gout =args[i+1]; i++; - } else if (args[i].equals("-test")) { - test = true; - testfile = args[i+1]; i++; - } else if (args[i].equals("-sig1")) { - significant1 = args[i+1]; i++; - } else if (args[i].equals("-sig2")) { - significant2 = args[i+1]; i++; - } else if (args[i].equals("-i")) { - numIters = Integer.parseInt(args[i+1]); i++; - } else if (args[i].equals("-out")) { - outfile = args[i+1]; i++; - } else if (args[i].equals("-cluster")) { - clusterFile = args[i+1]; i++; - } - - else if (args[i].equals("-count")) { - count = Integer.parseInt(args[i+1]); i++; - } else if (args[i].equals("-model")) { - modelName = args[i+1]; i++; - } else if (args[i].equals("-tmodel")) { - this.modelTaggerName = args[i+1]; i++; - } else if (args[i].equals("-nonormalize")) { - normalize=false; - } else if (args[i].equals("-float")) { - flt =true; - } else if (args[i].equals("-hsize")) { - hsize= Integer.parseInt(args[i+1]); i++; - } else if (args[i].equals("-charset")) { - charset= args[++i]; - } else if (args[i].equals("-pstrain")) { - this.phraseTrain=args[i+1]; i++; - } else if (args[i].equals("-pstest")) { - this.phraseTest=args[i+1]; i++; - } else if (args[i].equals("-len")) { - maxLen= Integer.parseInt(args[i+1]); i++; - } else if (args[i].equals("-cores")) { - cores= Integer.parseInt(args[i+1]); i++; - } else if (args[i].equals("-start")) { - start= Integer.parseInt(args[i+1]); i++; - } else if (args[i].equals("-max")) { - maxLength= Integer.parseInt(args[i+1]); i++; - } else if (args[i].equals("-min")) { - minLength= Integer.parseInt(args[i+1]); i++; - } else if (args[i].equals("-noLemmas")) { - noLemmas= true; - } else if (args[i].equals("-noavg")) { - this.average= false; - } else if (args[i].equals("-label")) { - label= true; - } else if (args[i].equals("-stack")) { - stack= true; - } else if (args[i].equals("-overwritegold")) { - overwritegold = true; - } else if (args[i].equals("-format")) { - formatTask = Integer.parseInt(args[++i]); - } else if (args[i].equals("-tt")) { - tt = Integer.parseInt(args[++i]); - } else if (args[i].equals("-min-occure-forms")) { - minOccureForms = Integer.parseInt(args[++i]); - } else if (args[i].equals("-loadTaggerModels")) { - this.loadTaggerModels=true;; - - } else if (args[i].equals("-feature_creation")) { - this.featureCreation = args[++i].equals("shift")?SHIFT:MULTIPLICATIVE; - } - - return i; - + + public int addOption(String args[], int i) { + + if (args[i].equals("-train")) { + train = true; + trainfile = args[i + 1]; + } else if (args[i].equals("-eval")) { + eval = true; + goldfile = args[i + 1]; + i++; + } else if (args[i].equals("-gout")) { + gout = args[i + 1]; + i++; + } else if (args[i].equals("-test")) { + test = true; + testfile = args[i + 1]; + i++; + } else if (args[i].equals("-sig1")) { + significant1 = args[i + 1]; + i++; + } else if (args[i].equals("-sig2")) { + significant2 = args[i + 1]; + i++; + } else if (args[i].equals("-i")) { + numIters = Integer.parseInt(args[i + 1]); + i++; + } else if (args[i].equals("-out")) { + outfile = args[i + 1]; + i++; + } else if (args[i].equals("-cluster")) { + clusterFile = args[i + 1]; + i++; + } + + else if (args[i].equals("-count")) { + count = Integer.parseInt(args[i + 1]); + i++; + } else if (args[i].equals("-model")) { + modelName = args[i + 1]; + i++; + } else if (args[i].equals("-tmodel")) { + this.modelTaggerName = args[i + 1]; + i++; + } else if (args[i].equals("-nonormalize")) { + normalize = false; + } else if (args[i].equals("-float")) { + flt = true; + } else if (args[i].equals("-hsize")) { + hsize = Integer.parseInt(args[i + 1]); + i++; + } else if (args[i].equals("-charset")) { + charset = args[++i]; + } else if (args[i].equals("-pstrain")) { + this.phraseTrain = args[i + 1]; + i++; + } else if (args[i].equals("-pstest")) { + this.phraseTest = args[i + 1]; + i++; + } else if (args[i].equals("-len")) { + maxLen = Integer.parseInt(args[i + 1]); + i++; + } else if (args[i].equals("-cores")) { + cores = Integer.parseInt(args[i + 1]); + i++; + } else if (args[i].equals("-start")) { + start = Integer.parseInt(args[i + 1]); + i++; + } else if (args[i].equals("-max")) { + maxLength = Integer.parseInt(args[i + 1]); + i++; + } else if (args[i].equals("-min")) { + minLength = Integer.parseInt(args[i + 1]); + i++; + } else if (args[i].equals("-noLemmas")) { + noLemmas = true; + } else if (args[i].equals("-noavg")) { + this.average = false; + } else if (args[i].equals("-label")) { + label = true; + } else if (args[i].equals("-stack")) { + stack = true; + } else if (args[i].equals("-overwritegold")) { + overwritegold = true; + } else if (args[i].equals("-format")) { + formatTask = Integer.parseInt(args[++i]); + } else if (args[i].equals("-tt")) { + tt = Integer.parseInt(args[++i]); + } else if (args[i].equals("-min-occure-forms")) { + minOccureForms = Integer.parseInt(args[++i]); + } else if (args[i].equals("-loadTaggerModels")) { + this.loadTaggerModels = true; + ; + + } else if (args[i].equals("-feature_creation")) { + this.featureCreation = args[++i].equals("shift") ? SHIFT : MULTIPLICATIVE; + } + + return i; + } @Override @@ -208,7 +223,7 @@ public class OptionsSuper { sb.append("create-forest: " + createForest); sb.append(" | "); sb.append("format: " + format); - + sb.append("]\n"); return sb.toString(); } diff --git a/dependencyParser/mate-tools/src/is2/util/ParserEvaluator.java b/dependencyParser/mate-tools/src/is2/util/ParserEvaluator.java index 260e4b7..95e8949 100644 --- a/dependencyParser/mate-tools/src/is2/util/ParserEvaluator.java +++ b/dependencyParser/mate-tools/src/is2/util/ParserEvaluator.java @@ -3,23 +3,20 @@ package is2.util; import is2.data.SentenceData09; import is2.io.CONLLReader09; - public class ParserEvaluator { - + public static final String PUNCT = "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"; - public static final String PUNCT ="!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"; - public static class Results { public int total; public int corr; public float las; public float ula; - + } - - public static Results evaluate (String act_file, String pred_file) throws Exception { + + public static Results evaluate(String act_file, String pred_file) throws Exception { CONLLReader09 goldReader = new CONLLReader09(act_file, -1); CONLLReader09 predictedReader = new CONLLReader09(pred_file, -1); @@ -29,12 +26,12 @@ public class ParserEvaluator { SentenceData09 goldInstance = goldReader.getNext(); SentenceData09 predInstance = predictedReader.getNext(); - while(goldInstance != null) { + while (goldInstance != null) { int instanceLength = goldInstance.length(); if (instanceLength != predInstance.length()) - System.out.println("Lengths do not match on sentence "+numsent); + System.out.println("Lengths do not match on sentence " + numsent); int[] goldHeads = goldInstance.heads; String[] goldLabels = goldInstance.labels; @@ -44,51 +41,60 @@ public class ParserEvaluator { boolean whole = true; boolean wholeL = true; - // NOTE: the first item is the root info added during nextInstance(), so we skip it. + // NOTE: the first item is the root info added during + // nextInstance(), so we skip it. - int punc=0; + int punc = 0; for (int i = 1; i < instanceLength; i++) { if (predHeads[i] == goldHeads[i]) { corr++; - if (goldLabels[i].equals(predLabels[i])) corrL++; + if (goldLabels[i].equals(predLabels[i])) + corrL++; else { - // System.out.println(numsent+" error gold "+goldLabels[i]+" "+predLabels[i]+" head "+goldHeads[i]+" child "+i); + // System.out.println(numsent+" error gold + // "+goldLabels[i]+" "+predLabels[i]+" head + // "+goldHeads[i]+" child "+i); wholeL = false; } - } - else { - // System.out.println(numsent+"error gold "+goldLabels[i]+" "+predLabels[i]+" head "+goldHeads[i]+" child "+i); - whole = false; wholeL = false; + } else { + // System.out.println(numsent+"error gold "+goldLabels[i]+" + // "+predLabels[i]+" head "+goldHeads[i]+" child "+i); + whole = false; + wholeL = false; } } - total += ((instanceLength - 1) - punc); // Subtract one to not score fake root token + total += ((instanceLength - 1) - punc); // Subtract one to not score + // fake root token - if(whole) corrsent++; - if(wholeL) corrsentL++; + if (whole) + corrsent++; + if (wholeL) + corrsentL++; numsent++; goldInstance = goldReader.getNext(); predInstance = predictedReader.getNext(); } - + Results r = new Results(); - + r.total = total; r.corr = corr; - r.las =(float)Math.round(((double)corrL/total)*100000)/1000; - r.ula =(float)Math.round(((double)corr /total)*100000)/1000; - System.out.print("Total: " + total+" \tCorrect: " + corr+" "); - System.out.println("LAS: " + (double)Math.round(((double)corrL/total)*100000)/1000+" \tTotal: " + (double)Math.round(((double)corrsentL/numsent)*100000)/1000+ - " \tULA: " + (double)Math.round(((double)corr /total)*100000)/1000+" \tTotal: " + (double)Math.round(((double)corrsent /numsent)*100000)/1000); - + r.las = (float) Math.round(((double) corrL / total) * 100000) / 1000; + r.ula = (float) Math.round(((double) corr / total) * 100000) / 1000; + System.out.print("Total: " + total + " \tCorrect: " + corr + " "); + System.out.println("LAS: " + (double) Math.round(((double) corrL / total) * 100000) / 1000 + " \tTotal: " + + (double) Math.round(((double) corrsentL / numsent) * 100000) / 1000 + " \tULA: " + + (double) Math.round(((double) corr / total) * 100000) / 1000 + " \tTotal: " + + (double) Math.round(((double) corrsent / numsent) * 100000) / 1000); + return r; } - - - public static float round (double v){ - - return Math.round(v*10000F)/10000F; + + public static float round(double v) { + + return Math.round(v * 10000F) / 10000F; } - + } diff --git a/dependencyParser/mate-tools/src/is2/util/Split.java b/dependencyParser/mate-tools/src/is2/util/Split.java index 48eadbe..ea1151b 100755 --- a/dependencyParser/mate-tools/src/is2/util/Split.java +++ b/dependencyParser/mate-tools/src/is2/util/Split.java @@ -15,80 +15,75 @@ public class Split { /** * Splits a tokenized sentences into one word per line format: * - * Input - * > I am an text . - * > Sentence two ... - * - * Output: - * I _ _ _ ... - * am _ _ _ ... - * ... - * + * Input > I am an text . > Sentence two ... + * + * Output: I _ _ _ ... am _ _ _ ... ... + * * @param args * @throws IOException */ public static void main(String args[]) throws IOException { - if (args.length!=1) { + if (args.length != 1) { System.out.println("Please provide a file name."); System.exit(0); } - - String filename = args[0]; -// Charset charset = Charset.forName("UTF-8"); + + String filename = args[0]; + // Charset charset = Charset.forName("UTF-8"); FileInputStream in = new FileInputStream(filename); FileChannel channel = in.getChannel(); - CharsetDecoder decoder = Charset.defaultCharset().newDecoder();//charset.newDecoder(); - Reader infile = Channels.newReader(channel , decoder, 16*1024); + CharsetDecoder decoder = Charset.defaultCharset().newDecoder();// charset.newDecoder(); + Reader infile = Channels.newReader(channel, decoder, 16 * 1024); BufferedReader bInfile = new BufferedReader(infile); - -// DataOutputStream dos = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(options.modelName))); - + // DataOutputStream dos = new DataOutputStream(new + // BufferedOutputStream(new FileOutputStream(options.modelName))); + String s; while ((s = bInfile.readLine()) != null) { - - + // do the first tokens contain a colon? - int colon =0; - for(int k=0;k<12;k++) { - if (s.length()<=k) break; + int colon = 0; + for (int k = 0; k < 12; k++) { + if (s.length() <= k) + break; if (s.charAt(k) == ':') { - + colon++; break; } - if (s.charAt(k) == ' ') break; + if (s.charAt(k) == ' ') + break; } - String prefix =colon>0?s.substring(0,s.indexOf(":"))+"_":""; - - if (colon>0) { - s = s.substring(s.indexOf(":")+1); + String prefix = colon > 0 ? s.substring(0, s.indexOf(":")) + "_" : ""; + + if (colon > 0) { + s = s.substring(s.indexOf(":") + 1); } - + StringTokenizer t = new StringTokenizer(s); - int i=1; - boolean found=false; - while(t.hasMoreTokens()) { - found =true; - String tk =t.nextToken(); - if (tk.contains("=")) continue; - System.out.print(prefix+i+"\t"); + int i = 1; + boolean found = false; + while (t.hasMoreTokens()) { + found = true; + String tk = t.nextToken(); + if (tk.contains("=")) + continue; + System.out.print(prefix + i + "\t"); System.out.print(tk); System.out.println("\t_\t_\t_\t_\t_\t_\t_\t_\t_\t_\t_\t_\t_"); i++; } - if (found) System.out.println(); - + if (found) + System.out.println(); + } bInfile.close(); + in.close(); - - - } - - + } diff --git a/dependencyParser/mate-tools/src/is2/util/Split2.java b/dependencyParser/mate-tools/src/is2/util/Split2.java index 1690a3d..4ed4004 100644 --- a/dependencyParser/mate-tools/src/is2/util/Split2.java +++ b/dependencyParser/mate-tools/src/is2/util/Split2.java @@ -7,11 +7,6 @@ import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStreamReader; import java.io.OutputStreamWriter; -import java.io.Reader; -import java.nio.channels.Channels; -import java.nio.channels.FileChannel; -import java.nio.charset.Charset; -import java.nio.charset.CharsetDecoder; import java.util.StringTokenizer; public class Split2 { @@ -19,38 +14,32 @@ public class Split2 { /** * Splits a tokenized sentences into one word per line format: * - * Input - * > I am an text . - * > Sentence two ... - * - * Output: - * I _ _ _ ... - * am _ _ _ ... - * ... - * + * Input > I am an text . > Sentence two ... + * + * Output: I _ _ _ ... am _ _ _ ... ... + * * @param args * @throws IOException */ public static void main(String args[]) throws IOException { - if (args.length<1) { + if (args.length < 1) { System.out.println("Please provide a file name."); System.exit(0); } - - - BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(args[0]),"UTF-8"),32768); - BufferedWriter write = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(args[1]),"ISO-8859-1")); - - + + BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(args[0]), "UTF-8"), 32768); + BufferedWriter write = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(args[1]), "ISO-8859-1")); + String s; - int cnt=0; + int cnt = 0; while ((s = reader.readLine()) != null) { StringTokenizer t = new StringTokenizer(s); - while(t.hasMoreTokens()) { - String tk =t.nextToken(); - for(int c : tk.toCharArray()) { - if (c<0 && c>=255) System.out.println("contain sign "+c+" "+cnt); + while (t.hasMoreTokens()) { + String tk = t.nextToken(); + for (int c : tk.toCharArray()) { + if (c < 0 && c >= 255) + System.out.println("contain sign " + c + " " + cnt); } write.write(tk); write.newLine(); @@ -61,10 +50,7 @@ public class Split2 { reader.close(); write.flush(); write.close(); - - - + } - - + } diff --git a/dependencyParser/mate-tools/src/is2/util/Split3.java b/dependencyParser/mate-tools/src/is2/util/Split3.java index 03d920c..2cf7cf2 100644 --- a/dependencyParser/mate-tools/src/is2/util/Split3.java +++ b/dependencyParser/mate-tools/src/is2/util/Split3.java @@ -7,11 +7,6 @@ import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStreamReader; import java.io.OutputStreamWriter; -import java.io.Reader; -import java.nio.channels.Channels; -import java.nio.channels.FileChannel; -import java.nio.charset.Charset; -import java.nio.charset.CharsetDecoder; import java.util.StringTokenizer; public class Split3 { @@ -19,49 +14,38 @@ public class Split3 { /** * Splits a tokenized sentences into one word per line format: * - * Input - * > I am an text . - * > Sentence two ... - * - * Output: - * I _ _ _ ... - * am _ _ _ ... - * ... - * + * Input > I am an text . > Sentence two ... + * + * Output: I _ _ _ ... am _ _ _ ... ... + * * @param args * @throws IOException */ public static void main(String args[]) throws IOException { - if (args.length<1) { + if (args.length < 1) { System.out.println("Please provide a file name."); System.exit(0); } - - - BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(args[0]),"UTF-8"),32768); - BufferedWriter write = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(args[1]),"UTF-8"),32768); - - + + BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(args[0]), "UTF-8"), 32768); + BufferedWriter write = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(args[1]), "UTF-8"), + 32768); + String s; - int cnt=0; while ((s = reader.readLine()) != null) { StringTokenizer t = new StringTokenizer(s); - while(t.hasMoreTokens()) { - String tk =t.nextToken(); + while (t.hasMoreTokens()) { + String tk = t.nextToken(); write.write(tk); write.newLine(); - cnt++; } write.newLine(); } reader.close(); write.flush(); write.close(); - - - + } - - + } -- libgit2 0.22.2