Commit 62ccdfdc3fd9bc5b1ab9fbd162abc22fc8339c0d
1 parent
ae044e90
dependencies update
Showing
8 changed files
with
57 additions
and
56 deletions
pom.xml
@@ -4,6 +4,9 @@ | @@ -4,6 +4,9 @@ | ||
4 | <groupId>pl.waw.ipipan.zil.core</groupId> | 4 | <groupId>pl.waw.ipipan.zil.core</groupId> |
5 | <artifactId>md</artifactId> | 5 | <artifactId>md</artifactId> |
6 | <version>1.2-SNAPSHOT</version> | 6 | <version>1.2-SNAPSHOT</version> |
7 | + <properties> | ||
8 | + <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> | ||
9 | + </properties> | ||
7 | <build> | 10 | <build> |
8 | <plugins> | 11 | <plugins> |
9 | <plugin> | 12 | <plugin> |
@@ -16,6 +19,7 @@ | @@ -16,6 +19,7 @@ | ||
16 | </plugin> | 19 | </plugin> |
17 | <plugin> | 20 | <plugin> |
18 | <artifactId>maven-source-plugin</artifactId> | 21 | <artifactId>maven-source-plugin</artifactId> |
22 | + <version>2.4</version> | ||
19 | <executions> | 23 | <executions> |
20 | <execution> | 24 | <execution> |
21 | <id>attach-sources</id> | 25 | <id>attach-sources</id> |
@@ -28,6 +32,7 @@ | @@ -28,6 +32,7 @@ | ||
28 | </plugin> | 32 | </plugin> |
29 | <plugin> | 33 | <plugin> |
30 | <artifactId>maven-javadoc-plugin</artifactId> | 34 | <artifactId>maven-javadoc-plugin</artifactId> |
35 | + <version>2.10.3</version> | ||
31 | <executions> | 36 | <executions> |
32 | <execution> | 37 | <execution> |
33 | <id>attach-javadocs</id> | 38 | <id>attach-javadocs</id> |
@@ -42,6 +47,7 @@ | @@ -42,6 +47,7 @@ | ||
42 | <!-- explicitly define maven-deploy-plugin after other to force exec | 47 | <!-- explicitly define maven-deploy-plugin after other to force exec |
43 | order --> | 48 | order --> |
44 | <artifactId>maven-deploy-plugin</artifactId> | 49 | <artifactId>maven-deploy-plugin</artifactId> |
50 | + <version>2.7</version> | ||
45 | <executions> | 51 | <executions> |
46 | <execution> | 52 | <execution> |
47 | <id>deploy</id> | 53 | <id>deploy</id> |
@@ -76,12 +82,12 @@ | @@ -76,12 +82,12 @@ | ||
76 | <version>1.2.17</version> | 82 | <version>1.2.17</version> |
77 | </dependency> | 83 | </dependency> |
78 | <dependency> | 84 | <dependency> |
79 | - <groupId>ipipan.multiservice</groupId> | ||
80 | - <artifactId>MultiserviceUtils</artifactId> | 85 | + <groupId>pl.waw.ipipan.zil.multiservice</groupId> |
86 | + <artifactId>utils</artifactId> | ||
81 | <version>1.0-SNAPSHOT</version> | 87 | <version>1.0-SNAPSHOT</version> |
82 | </dependency> | 88 | </dependency> |
83 | <dependency> | 89 | <dependency> |
84 | - <groupId>ipipan</groupId> | 90 | + <groupId>pl.waw.ipipan.zil.nkjp</groupId> |
85 | <artifactId>teiapi</artifactId> | 91 | <artifactId>teiapi</artifactId> |
86 | <version>1.0-SNAPSHOT</version> | 92 | <version>1.0-SNAPSHOT</version> |
87 | </dependency> | 93 | </dependency> |
src/main/java/pl/waw/ipipan/zil/core/md/Main.java
1 | package pl.waw.ipipan.zil.core.md; | 1 | package pl.waw.ipipan.zil.core.md; |
2 | 2 | ||
3 | -import ipipan.clarin.tei.api.entities.TEICorpusText; | ||
4 | -import ipipan.clarin.tei.api.exceptions.TEIException; | ||
5 | -import ipipan.clarin.tei.api.io.IOUtils; | ||
6 | - | ||
7 | import java.io.File; | 3 | import java.io.File; |
8 | import java.io.FileInputStream; | 4 | import java.io.FileInputStream; |
9 | import java.io.IOException; | 5 | import java.io.IOException; |
@@ -11,8 +7,6 @@ import java.io.InputStream; | @@ -11,8 +7,6 @@ import java.io.InputStream; | ||
11 | 7 | ||
12 | import org.apache.log4j.Logger; | 8 | import org.apache.log4j.Logger; |
13 | 9 | ||
14 | -import pl.waw.ipipan.multiservice.thrift.types.MultiserviceException; | ||
15 | -import pl.waw.ipipan.multiservice.thrift.types.TText; | ||
16 | import pl.waw.ipipan.zil.core.md.detection.Detector; | 10 | import pl.waw.ipipan.zil.core.md.detection.Detector; |
17 | import pl.waw.ipipan.zil.core.md.detection.zero.ZeroSubjectDetector; | 11 | import pl.waw.ipipan.zil.core.md.detection.zero.ZeroSubjectDetector; |
18 | import pl.waw.ipipan.zil.core.md.entities.Text; | 12 | import pl.waw.ipipan.zil.core.md.entities.Text; |
@@ -20,6 +14,11 @@ import pl.waw.ipipan.zil.core.md.io.tei.TeiLoader; | @@ -20,6 +14,11 @@ import pl.waw.ipipan.zil.core.md.io.tei.TeiLoader; | ||
20 | import pl.waw.ipipan.zil.core.md.io.tei.TeiSaver; | 14 | import pl.waw.ipipan.zil.core.md.io.tei.TeiSaver; |
21 | import pl.waw.ipipan.zil.core.md.io.thrift.ThriftLoader; | 15 | import pl.waw.ipipan.zil.core.md.io.thrift.ThriftLoader; |
22 | import pl.waw.ipipan.zil.core.md.io.thrift.ThriftSaver; | 16 | import pl.waw.ipipan.zil.core.md.io.thrift.ThriftSaver; |
17 | +import pl.waw.ipipan.zil.multiservice.thrift.types.MultiserviceException; | ||
18 | +import pl.waw.ipipan.zil.multiservice.thrift.types.TText; | ||
19 | +import pl.waw.ipipan.zil.nkjp.teiapi.api.entities.TEICorpusText; | ||
20 | +import pl.waw.ipipan.zil.nkjp.teiapi.api.exceptions.TEIException; | ||
21 | +import pl.waw.ipipan.zil.nkjp.teiapi.api.io.IOUtils; | ||
23 | 22 | ||
24 | /** | 23 | /** |
25 | * @author Mateusz Kopeć | 24 | * @author Mateusz Kopeć |
src/main/java/pl/waw/ipipan/zil/core/md/detection/zero/FeatureGeneration.java
1 | package pl.waw.ipipan.zil.core.md.detection.zero; | 1 | package pl.waw.ipipan.zil.core.md.detection.zero; |
2 | 2 | ||
3 | -import ipipan.clarin.tei.api.entities.TEIMention; | ||
4 | -import ipipan.clarin.tei.api.entities.TEIMorph; | ||
5 | - | ||
6 | import java.util.ArrayList; | 3 | import java.util.ArrayList; |
7 | import java.util.Arrays; | 4 | import java.util.Arrays; |
8 | import java.util.HashMap; | 5 | import java.util.HashMap; |
@@ -18,6 +15,8 @@ import pl.waw.ipipan.zil.core.md.entities.Sentence; | @@ -18,6 +15,8 @@ import pl.waw.ipipan.zil.core.md.entities.Sentence; | ||
18 | import pl.waw.ipipan.zil.core.md.entities.SyntacticGroup; | 15 | import pl.waw.ipipan.zil.core.md.entities.SyntacticGroup; |
19 | import pl.waw.ipipan.zil.core.md.entities.SyntacticWord; | 16 | import pl.waw.ipipan.zil.core.md.entities.SyntacticWord; |
20 | import pl.waw.ipipan.zil.core.md.entities.Token; | 17 | import pl.waw.ipipan.zil.core.md.entities.Token; |
18 | +import pl.waw.ipipan.zil.nkjp.teiapi.api.entities.TEIMention; | ||
19 | +import pl.waw.ipipan.zil.nkjp.teiapi.api.entities.TEIMorph; | ||
21 | 20 | ||
22 | public class FeatureGeneration { | 21 | public class FeatureGeneration { |
23 | final private static Set<String> CLAUSE_SPLIT_LEMMAS = new HashSet<>(Arrays.asList(new String[] { "i", "albo", | 22 | final private static Set<String> CLAUSE_SPLIT_LEMMAS = new HashSet<>(Arrays.asList(new String[] { "i", "albo", |
src/main/java/pl/waw/ipipan/zil/core/md/detection/zero/InstanceCreator.java
1 | package pl.waw.ipipan.zil.core.md.detection.zero; | 1 | package pl.waw.ipipan.zil.core.md.detection.zero; |
2 | 2 | ||
3 | -import ipipan.clarin.tei.api.entities.TEICorpusText; | ||
4 | -import ipipan.clarin.tei.api.io.IOUtils; | ||
5 | -import ipipan.clarin.tei.api.io.TEI_IO; | ||
6 | - | ||
7 | import java.io.File; | 3 | import java.io.File; |
8 | import java.util.ArrayList; | 4 | import java.util.ArrayList; |
9 | import java.util.HashSet; | 5 | import java.util.HashSet; |
@@ -21,6 +17,9 @@ import pl.waw.ipipan.zil.core.md.entities.Sentence; | @@ -21,6 +17,9 @@ import pl.waw.ipipan.zil.core.md.entities.Sentence; | ||
21 | import pl.waw.ipipan.zil.core.md.entities.Text; | 17 | import pl.waw.ipipan.zil.core.md.entities.Text; |
22 | import pl.waw.ipipan.zil.core.md.entities.Token; | 18 | import pl.waw.ipipan.zil.core.md.entities.Token; |
23 | import pl.waw.ipipan.zil.core.md.io.tei.TeiLoader; | 19 | import pl.waw.ipipan.zil.core.md.io.tei.TeiLoader; |
20 | +import pl.waw.ipipan.zil.nkjp.teiapi.api.entities.TEICorpusText; | ||
21 | +import pl.waw.ipipan.zil.nkjp.teiapi.api.io.IOUtils; | ||
22 | +import pl.waw.ipipan.zil.nkjp.teiapi.api.io.TEI_IO; | ||
24 | import weka.core.Attribute; | 23 | import weka.core.Attribute; |
25 | import weka.core.FastVector; | 24 | import weka.core.FastVector; |
26 | import weka.core.Instance; | 25 | import weka.core.Instance; |
src/main/java/pl/waw/ipipan/zil/core/md/io/tei/TeiLoader.java
1 | package pl.waw.ipipan.zil.core.md.io.tei; | 1 | package pl.waw.ipipan.zil.core.md.io.tei; |
2 | 2 | ||
3 | -import ipipan.clarin.tei.api.entities.TEICorpusText; | ||
4 | -import ipipan.clarin.tei.api.entities.TEIGroup; | ||
5 | -import ipipan.clarin.tei.api.entities.TEIInterpretation; | ||
6 | -import ipipan.clarin.tei.api.entities.TEIMention; | ||
7 | -import ipipan.clarin.tei.api.entities.TEIMorph; | ||
8 | -import ipipan.clarin.tei.api.entities.TEINamedEntity; | ||
9 | -import ipipan.clarin.tei.api.entities.TEIParagraph; | ||
10 | -import ipipan.clarin.tei.api.entities.TEISentence; | ||
11 | -import ipipan.clarin.tei.api.entities.TEISyntacticEntity; | ||
12 | -import ipipan.clarin.tei.api.entities.TEIWord; | ||
13 | -import ipipan.clarin.tei.api.exceptions.TEIException; | ||
14 | -import ipipan.clarin.tei.api.io.TEI_IO; | ||
15 | - | ||
16 | import java.io.File; | 3 | import java.io.File; |
17 | import java.util.ArrayList; | 4 | import java.util.ArrayList; |
18 | import java.util.HashMap; | 5 | import java.util.HashMap; |
@@ -30,6 +17,18 @@ import pl.waw.ipipan.zil.core.md.entities.SyntacticGroup; | @@ -30,6 +17,18 @@ import pl.waw.ipipan.zil.core.md.entities.SyntacticGroup; | ||
30 | import pl.waw.ipipan.zil.core.md.entities.SyntacticWord; | 17 | import pl.waw.ipipan.zil.core.md.entities.SyntacticWord; |
31 | import pl.waw.ipipan.zil.core.md.entities.Text; | 18 | import pl.waw.ipipan.zil.core.md.entities.Text; |
32 | import pl.waw.ipipan.zil.core.md.entities.Token; | 19 | import pl.waw.ipipan.zil.core.md.entities.Token; |
20 | +import pl.waw.ipipan.zil.nkjp.teiapi.api.entities.TEICorpusText; | ||
21 | +import pl.waw.ipipan.zil.nkjp.teiapi.api.entities.TEIGroup; | ||
22 | +import pl.waw.ipipan.zil.nkjp.teiapi.api.entities.TEIInterpretation; | ||
23 | +import pl.waw.ipipan.zil.nkjp.teiapi.api.entities.TEIMention; | ||
24 | +import pl.waw.ipipan.zil.nkjp.teiapi.api.entities.TEIMorph; | ||
25 | +import pl.waw.ipipan.zil.nkjp.teiapi.api.entities.TEINamedEntity; | ||
26 | +import pl.waw.ipipan.zil.nkjp.teiapi.api.entities.TEIParagraph; | ||
27 | +import pl.waw.ipipan.zil.nkjp.teiapi.api.entities.TEISentence; | ||
28 | +import pl.waw.ipipan.zil.nkjp.teiapi.api.entities.TEISyntacticEntity; | ||
29 | +import pl.waw.ipipan.zil.nkjp.teiapi.api.entities.TEIWord; | ||
30 | +import pl.waw.ipipan.zil.nkjp.teiapi.api.exceptions.TEIException; | ||
31 | +import pl.waw.ipipan.zil.nkjp.teiapi.api.io.TEI_IO; | ||
33 | 32 | ||
34 | public class TeiLoader { | 33 | public class TeiLoader { |
35 | 34 |
src/main/java/pl/waw/ipipan/zil/core/md/io/tei/TeiSaver.java
1 | package pl.waw.ipipan.zil.core.md.io.tei; | 1 | package pl.waw.ipipan.zil.core.md.io.tei; |
2 | 2 | ||
3 | -import ipipan.clarin.tei.api.entities.AnnotationLayer; | ||
4 | -import ipipan.clarin.tei.api.entities.EntitiesFactory; | ||
5 | -import ipipan.clarin.tei.api.entities.TEICoreference; | ||
6 | -import ipipan.clarin.tei.api.entities.TEICorpusText; | ||
7 | -import ipipan.clarin.tei.api.entities.TEIMention; | ||
8 | -import ipipan.clarin.tei.api.entities.TEIMorph; | ||
9 | -import ipipan.clarin.tei.api.entities.TEIParagraph; | ||
10 | -import ipipan.clarin.tei.api.entities.TEISentence; | ||
11 | -import ipipan.clarin.tei.api.exceptions.TEIException; | ||
12 | -import ipipan.clarin.tei.api.io.TEI_IO; | ||
13 | -import ipipan.clarin.tei.api.io.TEI_IO.CompressionMethod; | ||
14 | - | ||
15 | import java.io.File; | 3 | import java.io.File; |
16 | import java.util.ArrayList; | 4 | import java.util.ArrayList; |
17 | import java.util.HashMap; | 5 | import java.util.HashMap; |
@@ -26,6 +14,17 @@ import pl.waw.ipipan.zil.core.md.entities.Paragraph; | @@ -26,6 +14,17 @@ import pl.waw.ipipan.zil.core.md.entities.Paragraph; | ||
26 | import pl.waw.ipipan.zil.core.md.entities.Sentence; | 14 | import pl.waw.ipipan.zil.core.md.entities.Sentence; |
27 | import pl.waw.ipipan.zil.core.md.entities.Text; | 15 | import pl.waw.ipipan.zil.core.md.entities.Text; |
28 | import pl.waw.ipipan.zil.core.md.entities.Token; | 16 | import pl.waw.ipipan.zil.core.md.entities.Token; |
17 | +import pl.waw.ipipan.zil.nkjp.teiapi.api.entities.AnnotationLayer; | ||
18 | +import pl.waw.ipipan.zil.nkjp.teiapi.api.entities.EntitiesFactory; | ||
19 | +import pl.waw.ipipan.zil.nkjp.teiapi.api.entities.TEICoreference; | ||
20 | +import pl.waw.ipipan.zil.nkjp.teiapi.api.entities.TEICorpusText; | ||
21 | +import pl.waw.ipipan.zil.nkjp.teiapi.api.entities.TEIMention; | ||
22 | +import pl.waw.ipipan.zil.nkjp.teiapi.api.entities.TEIMorph; | ||
23 | +import pl.waw.ipipan.zil.nkjp.teiapi.api.entities.TEIParagraph; | ||
24 | +import pl.waw.ipipan.zil.nkjp.teiapi.api.entities.TEISentence; | ||
25 | +import pl.waw.ipipan.zil.nkjp.teiapi.api.exceptions.TEIException; | ||
26 | +import pl.waw.ipipan.zil.nkjp.teiapi.api.io.TEI_IO; | ||
27 | +import pl.waw.ipipan.zil.nkjp.teiapi.api.io.TEI_IO.CompressionMethod; | ||
29 | 28 | ||
30 | public class TeiSaver { | 29 | public class TeiSaver { |
31 | 30 |
src/main/java/pl/waw/ipipan/zil/core/md/io/thrift/ThriftLoader.java
@@ -7,15 +7,6 @@ import java.util.Map; | @@ -7,15 +7,6 @@ import java.util.Map; | ||
7 | 7 | ||
8 | import org.apache.log4j.Logger; | 8 | import org.apache.log4j.Logger; |
9 | 9 | ||
10 | -import pl.waw.ipipan.multiservice.thrift.types.MultiserviceException; | ||
11 | -import pl.waw.ipipan.multiservice.thrift.types.TInterpretation; | ||
12 | -import pl.waw.ipipan.multiservice.thrift.types.TNamedEntity; | ||
13 | -import pl.waw.ipipan.multiservice.thrift.types.TParagraph; | ||
14 | -import pl.waw.ipipan.multiservice.thrift.types.TSentence; | ||
15 | -import pl.waw.ipipan.multiservice.thrift.types.TSyntacticGroup; | ||
16 | -import pl.waw.ipipan.multiservice.thrift.types.TSyntacticWord; | ||
17 | -import pl.waw.ipipan.multiservice.thrift.types.TText; | ||
18 | -import pl.waw.ipipan.multiservice.thrift.types.TToken; | ||
19 | import pl.waw.ipipan.zil.core.md.entities.Interpretation; | 10 | import pl.waw.ipipan.zil.core.md.entities.Interpretation; |
20 | import pl.waw.ipipan.zil.core.md.entities.NamedEntity; | 11 | import pl.waw.ipipan.zil.core.md.entities.NamedEntity; |
21 | import pl.waw.ipipan.zil.core.md.entities.Paragraph; | 12 | import pl.waw.ipipan.zil.core.md.entities.Paragraph; |
@@ -24,6 +15,15 @@ import pl.waw.ipipan.zil.core.md.entities.SyntacticGroup; | @@ -24,6 +15,15 @@ import pl.waw.ipipan.zil.core.md.entities.SyntacticGroup; | ||
24 | import pl.waw.ipipan.zil.core.md.entities.SyntacticWord; | 15 | import pl.waw.ipipan.zil.core.md.entities.SyntacticWord; |
25 | import pl.waw.ipipan.zil.core.md.entities.Text; | 16 | import pl.waw.ipipan.zil.core.md.entities.Text; |
26 | import pl.waw.ipipan.zil.core.md.entities.Token; | 17 | import pl.waw.ipipan.zil.core.md.entities.Token; |
18 | +import pl.waw.ipipan.zil.multiservice.thrift.types.MultiserviceException; | ||
19 | +import pl.waw.ipipan.zil.multiservice.thrift.types.TInterpretation; | ||
20 | +import pl.waw.ipipan.zil.multiservice.thrift.types.TNamedEntity; | ||
21 | +import pl.waw.ipipan.zil.multiservice.thrift.types.TParagraph; | ||
22 | +import pl.waw.ipipan.zil.multiservice.thrift.types.TSentence; | ||
23 | +import pl.waw.ipipan.zil.multiservice.thrift.types.TSyntacticGroup; | ||
24 | +import pl.waw.ipipan.zil.multiservice.thrift.types.TSyntacticWord; | ||
25 | +import pl.waw.ipipan.zil.multiservice.thrift.types.TText; | ||
26 | +import pl.waw.ipipan.zil.multiservice.thrift.types.TToken; | ||
27 | 27 | ||
28 | public class ThriftLoader { | 28 | public class ThriftLoader { |
29 | 29 |
src/main/java/pl/waw/ipipan/zil/core/md/io/thrift/ThriftSaver.java
@@ -8,17 +8,17 @@ import java.util.Map; | @@ -8,17 +8,17 @@ import java.util.Map; | ||
8 | 8 | ||
9 | import org.apache.log4j.Logger; | 9 | import org.apache.log4j.Logger; |
10 | 10 | ||
11 | -import pl.waw.ipipan.multiservice.thrift.types.MultiserviceException; | ||
12 | -import pl.waw.ipipan.multiservice.thrift.types.TMention; | ||
13 | -import pl.waw.ipipan.multiservice.thrift.types.TParagraph; | ||
14 | -import pl.waw.ipipan.multiservice.thrift.types.TSentence; | ||
15 | -import pl.waw.ipipan.multiservice.thrift.types.TText; | ||
16 | -import pl.waw.ipipan.multiservice.thrift.types.TToken; | ||
17 | import pl.waw.ipipan.zil.core.md.entities.Mention; | 11 | import pl.waw.ipipan.zil.core.md.entities.Mention; |
18 | import pl.waw.ipipan.zil.core.md.entities.Paragraph; | 12 | import pl.waw.ipipan.zil.core.md.entities.Paragraph; |
19 | import pl.waw.ipipan.zil.core.md.entities.Sentence; | 13 | import pl.waw.ipipan.zil.core.md.entities.Sentence; |
20 | import pl.waw.ipipan.zil.core.md.entities.Text; | 14 | import pl.waw.ipipan.zil.core.md.entities.Text; |
21 | import pl.waw.ipipan.zil.core.md.entities.Token; | 15 | import pl.waw.ipipan.zil.core.md.entities.Token; |
16 | +import pl.waw.ipipan.zil.multiservice.thrift.types.MultiserviceException; | ||
17 | +import pl.waw.ipipan.zil.multiservice.thrift.types.TMention; | ||
18 | +import pl.waw.ipipan.zil.multiservice.thrift.types.TParagraph; | ||
19 | +import pl.waw.ipipan.zil.multiservice.thrift.types.TSentence; | ||
20 | +import pl.waw.ipipan.zil.multiservice.thrift.types.TText; | ||
21 | +import pl.waw.ipipan.zil.multiservice.thrift.types.TToken; | ||
22 | 22 | ||
23 | public class ThriftSaver { | 23 | public class ThriftSaver { |
24 | 24 |