Commit 8f86545e14f99bbf47ab83bf202e26af7a2716c4
1 parent
1dc4f947
Cleaning unused experimental code.
Showing
9 changed files
with
233 additions
and
702 deletions
src/main/java/pl/waw/ipipan/zil/core/md/Main.java
@@ -33,9 +33,8 @@ public class Main { | @@ -33,9 +33,8 @@ public class Main { | ||
33 | 33 | ||
34 | private static final boolean GZIP_OUTPUT = true; | 34 | private static final boolean GZIP_OUTPUT = true; |
35 | private static final String DEFAULT_ZERO_SUBJECT_MODEL = "/zero_subject_model.bin"; | 35 | private static final String DEFAULT_ZERO_SUBJECT_MODEL = "/zero_subject_model.bin"; |
36 | - private static final String DEFAULT_VERBS_VALENCE = "/walenty_20170117_verbs_all.txt"; | ||
37 | - private static final String DEFAULT_NOUNS_VALENCE = "/walenty_20170117_nouns_all.txt"; | ||
38 | - private static final String COMPLEX_PREPS = "/complex_preps_Walenty_USJP.txt"; | 36 | + private static final String DEFAULT_VERBS_VALENCE = "/walenty_20170117_verbs_all_with_realizations.txt"; |
37 | + private static final String DEFAULT_NOUNS_VALENCE = "/walenty_20170117_nouns_all_with_realizations.txt"; | ||
39 | 38 | ||
40 | private static ZeroSubjectDetector zeroSubjectModel; | 39 | private static ZeroSubjectDetector zeroSubjectModel; |
41 | 40 | ||
@@ -46,8 +45,6 @@ public class Main { | @@ -46,8 +45,6 @@ public class Main { | ||
46 | 45 | ||
47 | private static Map<ValenceDicts,Map<String,ArrayList<String>>> valence = | 46 | private static Map<ValenceDicts,Map<String,ArrayList<String>>> valence = |
48 | new EnumMap(ValenceDicts.class); | 47 | new EnumMap(ValenceDicts.class); |
49 | - | ||
50 | - private static final ArrayList<String> complexPreps; | ||
51 | 48 | ||
52 | static { | 49 | static { |
53 | InputStream zeroSubjectDetectionModelStream = Main.class.getResourceAsStream(DEFAULT_ZERO_SUBJECT_MODEL); | 50 | InputStream zeroSubjectDetectionModelStream = Main.class.getResourceAsStream(DEFAULT_ZERO_SUBJECT_MODEL); |
@@ -58,9 +55,6 @@ public class Main { | @@ -58,9 +55,6 @@ public class Main { | ||
58 | 55 | ||
59 | InputStream walentyNounsStream = Main.class.getResourceAsStream(DEFAULT_NOUNS_VALENCE); | 56 | InputStream walentyNounsStream = Main.class.getResourceAsStream(DEFAULT_NOUNS_VALENCE); |
60 | valence.put(ValenceDicts.NounsValence, readWalenty(walentyNounsStream)); | 57 | valence.put(ValenceDicts.NounsValence, readWalenty(walentyNounsStream)); |
61 | - | ||
62 | - InputStream complexPrepositionsStream = Main.class.getResourceAsStream(COMPLEX_PREPS); | ||
63 | - complexPreps = readValues(complexPrepositionsStream); | ||
64 | } | 58 | } |
65 | 59 | ||
66 | 60 | ||
@@ -125,34 +119,6 @@ public class Main { | @@ -125,34 +119,6 @@ public class Main { | ||
125 | 119 | ||
126 | return false; | 120 | return false; |
127 | } | 121 | } |
128 | - | ||
129 | - public static ArrayList<String> readValues(InputStream stream) { | ||
130 | - ArrayList<String> values; | ||
131 | - try { | ||
132 | - BufferedReader br=new BufferedReader(new InputStreamReader(stream)); | ||
133 | - values = new ArrayList<String>(); | ||
134 | - String line; | ||
135 | - boolean firstLine = true; | ||
136 | - while((line = br.readLine()) != null) { | ||
137 | - if (firstLine) { | ||
138 | - line = line.replace("\uFEFF", ""); // remove BOM character | ||
139 | - firstLine = false; | ||
140 | - } | ||
141 | - | ||
142 | - if (!line.startsWith("%")) { | ||
143 | - String value = line.trim(); | ||
144 | - if (!value.isEmpty()) { | ||
145 | - values.add(value); | ||
146 | - } | ||
147 | - } | ||
148 | - } | ||
149 | - br.close(); | ||
150 | - } catch (IOException ex) { | ||
151 | - ex.printStackTrace(); | ||
152 | - throw new RuntimeException(ex); | ||
153 | - } | ||
154 | - return values; | ||
155 | - } | ||
156 | 122 | ||
157 | private Main() { | 123 | private Main() { |
158 | } | 124 | } |
@@ -244,7 +210,7 @@ public class Main { | @@ -244,7 +210,7 @@ public class Main { | ||
244 | */ | 210 | */ |
245 | public static void annotateThriftText(TText thriftText) throws MultiserviceException { | 211 | public static void annotateThriftText(TText thriftText) throws MultiserviceException { |
246 | Text responseText = ThriftLoader.loadTextFromThrift(thriftText); | 212 | Text responseText = ThriftLoader.loadTextFromThrift(thriftText); |
247 | - Detector.findMentionsInText(responseText, zeroSubjectModel, valence, complexPreps); | 213 | + Detector.findMentionsInText(responseText, zeroSubjectModel, valence); |
248 | ThriftSaver.updateThriftText(responseText, thriftText); | 214 | ThriftSaver.updateThriftText(responseText, thriftText); |
249 | } | 215 | } |
250 | 216 | ||
@@ -257,7 +223,7 @@ public class Main { | @@ -257,7 +223,7 @@ public class Main { | ||
257 | */ | 223 | */ |
258 | public static void annotateTeiText(TEICorpusText teiText) throws TEIException { | 224 | public static void annotateTeiText(TEICorpusText teiText) throws TEIException { |
259 | Text responseText = TeiLoader.loadTextFromTei(teiText); | 225 | Text responseText = TeiLoader.loadTextFromTei(teiText); |
260 | - Detector.findMentionsInText(responseText, zeroSubjectModel, valence, complexPreps); | 226 | + Detector.findMentionsInText(responseText, zeroSubjectModel, valence); |
261 | TeiSaver.updateTeiText(responseText, teiText); | 227 | TeiSaver.updateTeiText(responseText, teiText); |
262 | } | 228 | } |
263 | 229 |
src/main/java/pl/waw/ipipan/zil/core/md/detection/Cleaner.java
1 | package pl.waw.ipipan.zil.core.md.detection; | 1 | package pl.waw.ipipan.zil.core.md.detection; |
2 | 2 | ||
3 | -import pl.waw.ipipan.zil.core.md.Main.ValenceDicts; | ||
4 | import pl.waw.ipipan.zil.core.md.entities.Mention; | 3 | import pl.waw.ipipan.zil.core.md.entities.Mention; |
5 | import pl.waw.ipipan.zil.core.md.entities.Sentence; | 4 | import pl.waw.ipipan.zil.core.md.entities.Sentence; |
6 | import pl.waw.ipipan.zil.core.md.entities.SyntacticGroup; | 5 | import pl.waw.ipipan.zil.core.md.entities.SyntacticGroup; |
@@ -164,33 +163,6 @@ public class Cleaner { | @@ -164,33 +163,6 @@ public class Cleaner { | ||
164 | } | 163 | } |
165 | } | 164 | } |
166 | 165 | ||
167 | - /*private static void removeWalentyFramedMentions(Sentence sentence, | ||
168 | - ArrayList<Mention> mentions, | ||
169 | - ArrayList<String> schemata) { | ||
170 | - ArrayList<Mention> mentionsToRemove = new ArrayList<Mention>(); | ||
171 | - for (Mention mention : mentions) { | ||
172 | - int mentionStart = mention.getFirstSegment().getSentencePosition(); | ||
173 | - int mentionEnd = mention.getLastSegment().getSentencePosition(); | ||
174 | - SyntacticGroup startGroup = sentence.getFirstGroup(mentionStart, mentionEnd); | ||
175 | - SyntacticGroup endGroup = sentence.getLastGroup(mentionStart, mentionEnd); | ||
176 | - if (startGroup != null && endGroup != null | ||
177 | - && startGroup.compareTo(endGroup) != 0) { | ||
178 | - ArrayList<String> startGroupRealizations = startGroup.getWalentyRealizations(); | ||
179 | - ArrayList<String> endGroupRealizations = endGroup.getWalentyRealizations(); | ||
180 | - for (String schema : schemata) { | ||
181 | - if (isProperSchema(schema, startGroupRealizations, endGroupRealizations)) { | ||
182 | - mentionsToRemove.add(mention); | ||
183 | - break; | ||
184 | - } | ||
185 | - } | ||
186 | - } | ||
187 | - } | ||
188 | - | ||
189 | - for (Mention mentionToRemove : mentionsToRemove) { | ||
190 | - sentence.removeMention(mentionToRemove); | ||
191 | - } | ||
192 | - }*/ | ||
193 | - | ||
194 | private static boolean isProperSchema(String schema, ArrayList<String> group1Types, | 166 | private static boolean isProperSchema(String schema, ArrayList<String> group1Types, |
195 | ArrayList<String> group2Types) { | 167 | ArrayList<String> group2Types) { |
196 | for (String group1Type : group1Types) { | 168 | for (String group1Type : group1Types) { |
@@ -207,7 +179,7 @@ public class Cleaner { | @@ -207,7 +179,7 @@ public class Cleaner { | ||
207 | String phraseType2) { | 179 | String phraseType2) { |
208 | boolean phrType1Found = false; | 180 | boolean phrType1Found = false; |
209 | boolean phrType2Found = false; | 181 | boolean phrType2Found = false; |
210 | - for (String position : schema.split("\\+")) { | 182 | + for (String position : schema.split("\\s\\+\\s")) { |
211 | position = position.trim(); | 183 | position = position.trim(); |
212 | position = position.substring(1, position.length()-1); | 184 | position = position.substring(1, position.length()-1); |
213 | for (String phrT : position.split(";")) { | 185 | for (String phrT : position.split(";")) { |
@@ -226,34 +198,6 @@ public class Cleaner { | @@ -226,34 +198,6 @@ public class Cleaner { | ||
226 | return false; | 198 | return false; |
227 | } | 199 | } |
228 | 200 | ||
229 | - | ||
230 | - // wykrywa, ze "wszystkim" jest czescia "przede wszystkim" (kublika Qub) | ||
231 | - public static void cleanQubs(Sentence sentence) { | ||
232 | - ArrayList<Mention> mentionsToRemove = new ArrayList<Mention>(); | ||
233 | - for (Mention mention : sentence.getMentions()) { | ||
234 | - if (mention.isPartOfQub()) { | ||
235 | - mentionsToRemove.add(mention); | ||
236 | - } | ||
237 | - } | ||
238 | - | ||
239 | - for (Mention mentionToRemove : mentionsToRemove) { | ||
240 | - sentence.removeMention(mentionToRemove); | ||
241 | - } | ||
242 | - } | ||
243 | - | ||
244 | - public static void cleanPreps(Sentence sentence) { | ||
245 | - ArrayList<Mention> mentionsToRemove = new ArrayList<Mention>(); | ||
246 | - for (Mention mention : sentence.getMentions()) { | ||
247 | - if (mention.isPartOfPrep()) { | ||
248 | - mentionsToRemove.add(mention); | ||
249 | - } | ||
250 | - } | ||
251 | - | ||
252 | - for (Mention mentionToRemove : mentionsToRemove) { | ||
253 | - sentence.removeMention(mentionToRemove); | ||
254 | - } | ||
255 | - } | ||
256 | - | ||
257 | public static void cleanFrazeos(Sentence sentence) { | 201 | public static void cleanFrazeos(Sentence sentence) { |
258 | ArrayList<Mention> mentionsToRemove = new ArrayList<Mention>(); | 202 | ArrayList<Mention> mentionsToRemove = new ArrayList<Mention>(); |
259 | for (Mention mention : sentence.getMentions()) { | 203 | for (Mention mention : sentence.getMentions()) { |
@@ -267,20 +211,4 @@ public class Cleaner { | @@ -267,20 +211,4 @@ public class Cleaner { | ||
267 | } | 211 | } |
268 | } | 212 | } |
269 | 213 | ||
270 | - // wyrzuca wzmianki bedace czescia przyimkow zlozonych | ||
271 | - public static void cleanComplexPreps(Sentence sentence, | ||
272 | - ArrayList<String> complexPreps) { | ||
273 | - | ||
274 | - ArrayList<Mention> mentionsToRemove = new ArrayList<Mention>(); | ||
275 | - for (Mention mention : sentence.getMentions()) { | ||
276 | - if (mention.isPartOfComplexPrep(complexPreps)) { | ||
277 | - mentionsToRemove.add(mention); | ||
278 | - } | ||
279 | - } | ||
280 | - | ||
281 | - for (Mention mentionToRemove : mentionsToRemove) { | ||
282 | - sentence.removeMention(mentionToRemove); | ||
283 | - } | ||
284 | - } | ||
285 | - | ||
286 | } | 214 | } |
src/main/java/pl/waw/ipipan/zil/core/md/detection/Constants.java
1 | package pl.waw.ipipan.zil.core.md.detection; | 1 | package pl.waw.ipipan.zil.core.md.detection; |
2 | 2 | ||
3 | +import java.util.Arrays; | ||
4 | +import java.util.List; | ||
5 | + | ||
3 | public class Constants { | 6 | public class Constants { |
4 | public static final String MORPHO_NOUN_CTAGS = "subst|depr|ger"; | 7 | public static final String MORPHO_NOUN_CTAGS = "subst|depr|ger"; |
5 | public static final String MORPHO_VERB_CTAGS = "fin|bedzie|aglt|impt"; | 8 | public static final String MORPHO_VERB_CTAGS = "fin|bedzie|aglt|impt"; |
@@ -7,6 +10,11 @@ public class Constants { | @@ -7,6 +10,11 @@ public class Constants { | ||
7 | public static final String MORPHO_CTAGS = MORPHO_NOUN_CTAGS + "|" | 10 | public static final String MORPHO_CTAGS = MORPHO_NOUN_CTAGS + "|" |
8 | + MORPHO_PRONOUN_CTAGS; | 11 | + MORPHO_PRONOUN_CTAGS; |
9 | public static final String WORDS_CTAGS = "Noun|Ppron.*"; | 12 | public static final String WORDS_CTAGS = "Noun|Ppron.*"; |
13 | + | ||
14 | + public static final List<String> FRAZEO_CTAGS = Arrays.asList("Prep", "Qub", "Adv", "Interj", | ||
15 | + "Adj", "Conj", "Comp"); | ||
16 | + | ||
17 | + public static final List<String> VERB_CTAGS = Arrays.asList("Inf", "Verbfin"); | ||
10 | 18 | ||
11 | private Constants() { | 19 | private Constants() { |
12 | } | 20 | } |
src/main/java/pl/waw/ipipan/zil/core/md/detection/Detector.java
@@ -22,24 +22,22 @@ public class Detector { | @@ -22,24 +22,22 @@ public class Detector { | ||
22 | 22 | ||
23 | public static void findMentionsInText(Text text, | 23 | public static void findMentionsInText(Text text, |
24 | ZeroSubjectDetector zeroSubjectModel, | 24 | ZeroSubjectDetector zeroSubjectModel, |
25 | - Map<ValenceDicts,Map<String,ArrayList<String>>> valence, | ||
26 | - ArrayList<String> complexPreps) { | 25 | + Map<ValenceDicts,Map<String,ArrayList<String>>> valence) { |
27 | text.clearMentions(); | 26 | text.clearMentions(); |
28 | logger.debug("Detecting mentions in text " + text.getId()); | 27 | logger.debug("Detecting mentions in text " + text.getId()); |
29 | for (Paragraph p : text) | 28 | for (Paragraph p : text) |
30 | for (Sentence s : p) | 29 | for (Sentence s : p) |
31 | - detectMentionsInSentence(s, zeroSubjectModel, valence, complexPreps); | 30 | + detectMentionsInSentence(s, zeroSubjectModel, valence); |
32 | } | 31 | } |
33 | 32 | ||
34 | private static void detectMentionsInSentence(Sentence sentence, | 33 | private static void detectMentionsInSentence(Sentence sentence, |
35 | ZeroSubjectDetector zeroSubjectModel, | 34 | ZeroSubjectDetector zeroSubjectModel, |
36 | - Map<ValenceDicts,Map<String,ArrayList<String>>> valence, | ||
37 | - ArrayList<String> complexPreps) { | 35 | + Map<ValenceDicts,Map<String,ArrayList<String>>> valence) { |
38 | // adding mentions | 36 | // adding mentions |
39 | addMentionsByTokenCtag(sentence); | 37 | addMentionsByTokenCtag(sentence); |
40 | addMentionsBySyntacticWordsCtag(sentence); | 38 | addMentionsBySyntacticWordsCtag(sentence); |
41 | addMentionsByNamedEntities(sentence); | 39 | addMentionsByNamedEntities(sentence); |
42 | - addMentionsByGroups(sentence, valence, complexPreps); | 40 | + addMentionsByGroups(sentence, valence); |
43 | addSpeakerMentionsInSpoken(sentence); | 41 | addSpeakerMentionsInSpoken(sentence); |
44 | 42 | ||
45 | // zero subject detection | 43 | // zero subject detection |
@@ -47,12 +45,9 @@ public class Detector { | @@ -47,12 +45,9 @@ public class Detector { | ||
47 | 45 | ||
48 | // removing mentions | 46 | // removing mentions |
49 | removeTo(sentence); | 47 | removeTo(sentence); |
48 | + Cleaner.cleanWalentyFramedMentions(sentence, valence.get(ValenceDicts.VerbsValence)); | ||
50 | Cleaner.cleanUnnecessarySentenceMentions(sentence); | 49 | Cleaner.cleanUnnecessarySentenceMentions(sentence); |
51 | - //Cleaner.cleanQubs(sentence); | ||
52 | - //Cleaner.cleanPreps(sentence); | ||
53 | - //Cleaner.cleanComplexPreps(sentence, complexPreps); | ||
54 | Cleaner.cleanFrazeos(sentence); | 50 | Cleaner.cleanFrazeos(sentence); |
55 | - Cleaner.cleanWalentyFramedMentions(sentence, valence.get(ValenceDicts.VerbsValence)); | ||
56 | 51 | ||
57 | // updating mention heads | 52 | // updating mention heads |
58 | updateMentionHeads(sentence); | 53 | updateMentionHeads(sentence); |
@@ -108,294 +103,64 @@ public class Detector { | @@ -108,294 +103,64 @@ public class Detector { | ||
108 | * @param sentence | 103 | * @param sentence |
109 | */ | 104 | */ |
110 | private static void addMentionsByGroups(Sentence sentence, | 105 | private static void addMentionsByGroups(Sentence sentence, |
111 | - Map<ValenceDicts,Map<String,ArrayList<String>>> valence, | ||
112 | - ArrayList<String> complexPreps) { | ||
113 | - List<SyntacticGroup> groups = sentence.getGroups(); | ||
114 | - for (int i = 0; i < groups.size(); i++) { | ||
115 | - SyntacticGroup thisGroup = groups.get(i); | ||
116 | - | ||
117 | - /*SyntacticGroup nearPrepNG = null; | ||
118 | - SyntacticGroup nextNG = null;*/ | ||
119 | - | ||
120 | - SyntacticGroup nextGroup = thisGroup.getFollowingGroup(); | ||
121 | - | ||
122 | - /*if (thisGroup.getType().startsWith("NG")) { | ||
123 | - nearPrepNG = getFollowingPrepNGs(thisGroup.getSentencePositionEnd(), | ||
124 | - sentence); | ||
125 | - nextNG = thisGroup.getNextNG(); | ||
126 | - }*/ | ||
127 | - | ||
128 | - /*if (nextNG != null) { | ||
129 | - int prepStart = thisGroup.getSentencePositionEnd() + 1; | ||
130 | - int prepEnd = nextNG.getSentencePositionStart() - 1; | ||
131 | - String prep = sentence.getTextInsideSpan(prepStart, prepEnd); | ||
132 | - if (complexPreps.contains(prep)) { | ||
133 | - String cos = ""; | ||
134 | - } | ||
135 | - }*/ | 106 | + Map<ValenceDicts,Map<String,ArrayList<String>>> valence) { |
107 | + | ||
108 | + for (SyntacticGroup group : sentence.getGroups()) { | ||
109 | + SyntacticGroup nextGroup = group.getFollowingGroup(); | ||
110 | + SyntacticGroup nextnextGroup = null; | ||
111 | + SyntacticGroup nextnextnextGroup = null; | ||
112 | + if (nextGroup != null) { | ||
113 | + nextnextGroup = nextGroup.getFollowingGroup(); | ||
114 | + if (nextnextGroup != null) { | ||
115 | + nextnextnextGroup = nextnextGroup.getFollowingGroup(); | ||
116 | + } | ||
117 | + } | ||
136 | 118 | ||
137 | - /*if (thisGroup.getType().startsWith("NG") && nearPrepNG != null && | ||
138 | - //!isPartOfPrepNG(thisGroup, sentence) && | ||
139 | - //getFollowingPrepNGs(nearPrepNG.getSentencePositionEnd(), sentence) == null && | ||
140 | - precedingWordIsVerb(thisGroup, sentence) && | ||
141 | - //!precedingVerbModifyPrepNG(thisGroup, nearPrepNG, sentence, valence.get(ValenceDicts.VerbsValence)) && | ||
142 | - !precedingVerbModifyNG(thisGroup, nearPrepNG, sentence, valence.get(ValenceDicts.VerbsValence)) && | ||
143 | - !sameSemanticHeads(thisGroup, nearPrepNG)) { | ||
144 | - List<Token> heads = thisGroup.getSemanticHeadTokens(); | ||
145 | - List<Token> segments = thisGroup.getTokens(); | ||
146 | - segments.addAll(nearPrepNG.getTokens()); | ||
147 | - | ||
148 | - sentence.addMention(new Mention(segments, heads)); | ||
149 | - }*/ | ||
150 | - /*if (thisGroup.getType().startsWith("NG") && nearPrepNG != null && | ||
151 | - // !precedingWordIsVerb(thisGroup, sentence) && | ||
152 | - !isPartOfPrepNG(thisGroup, sentence) && | ||
153 | - getFollowingPrepNGs(nearPrepNG.getSentencePositionEnd(), sentence) == null && | ||
154 | - //!precedingWordIsVerb(thisGroup, sentence) && | ||
155 | - !precedingVerbModifyPrepNG(thisGroup, nearPrepNG, sentence, valence.get(ValenceDicts.VerbsValence)) && | ||
156 | - //!precedingVerbModifyNG(thisGroup, nearPrepNG, sentence, valence.get(ValenceDicts.VerbsValence)) && | ||
157 | - !sameSemanticHeads(thisGroup, nearPrepNG)) { | ||
158 | - List<Token> heads = thisGroup.getSemanticHeadTokens(); | ||
159 | - List<Token> segments = thisGroup.getTokens(); | ||
160 | - segments.addAll(nearPrepNG.getTokens()); | ||
161 | - | ||
162 | - sentence.addMention(new Mention(segments, heads)); | ||
163 | - }*/ | ||
164 | - if (thisGroup.getType().startsWith("NG") && | ||
165 | - nextGroup != null && nextGroup.getType().startsWith("PrepNG") && | ||
166 | - NGPrepNGValenceCompatibility(thisGroup, nextGroup, sentence, valence.get(ValenceDicts.NounsValence))) { | ||
167 | - List<Token> heads = thisGroup.getSemanticHeadTokens(); | 119 | + if (group.getType().startsWith("NG") && nextGroup != null && |
120 | + nextnextGroup != null && nextnextnextGroup != null && | ||
121 | + quatroCompatibility(group, nextGroup, nextnextGroup, | ||
122 | + nextnextnextGroup, valence.get(ValenceDicts.NounsValence))) { | ||
123 | + List<Token> heads = group.getSemanticHeadTokens(); | ||
168 | List<Token> segments = new ArrayList<Token>(); | 124 | List<Token> segments = new ArrayList<Token>(); |
169 | - segments.addAll(thisGroup.getTokens()); | 125 | + segments.addAll(group.getTokens()); |
170 | segments.addAll(nextGroup.getTokens()); | 126 | segments.addAll(nextGroup.getTokens()); |
127 | + segments.addAll(nextnextGroup.getTokens()); | ||
128 | + segments.addAll(nextnextnextGroup.getTokens()); | ||
171 | 129 | ||
172 | sentence.addMention(new Mention(segments, heads)); | 130 | sentence.addMention(new Mention(segments, heads)); |
173 | - } else if (thisGroup.getType().startsWith("NG") && nextGroup != null && | ||
174 | - nextGroup.getType().startsWith("NG") && | ||
175 | - NGNGValenceCompatibility(thisGroup, nextGroup, sentence, valence.get(ValenceDicts.NounsValence)) | ||
176 | - ) { | ||
177 | - List<Token> heads = thisGroup.getSemanticHeadTokens(); | 131 | + } else if (group.getType().startsWith("NG") && nextGroup != null && |
132 | + nextnextGroup != null && tripleCompatibility(group, nextGroup, nextnextGroup, valence.get(ValenceDicts.NounsValence))) { | ||
133 | + List<Token> heads = group.getSemanticHeadTokens(); | ||
178 | List<Token> segments = new ArrayList<Token>(); | 134 | List<Token> segments = new ArrayList<Token>(); |
179 | - segments.addAll(thisGroup.getTokens()); | 135 | + segments.addAll(group.getTokens()); |
180 | segments.addAll(nextGroup.getTokens()); | 136 | segments.addAll(nextGroup.getTokens()); |
137 | + segments.addAll(nextnextGroup.getTokens()); | ||
181 | 138 | ||
182 | sentence.addMention(new Mention(segments, heads)); | 139 | sentence.addMention(new Mention(segments, heads)); |
183 | - } /*else if (thisGroup.getType().startsWith("NG") && nextNG != null && nearPrepNG == null && | ||
184 | - NGcomplexPrepNGValenceCompatibility(thisGroup, nextNG, sentence, valence.get(ValenceDicts.NounsValence))) { | ||
185 | - List<Token> heads = thisGroup.getSemanticHeadTokens(); | ||
186 | - | 140 | + } else if (group.getType().startsWith("NG") && nextGroup != null && |
141 | + groupsValenceCompatibility(group, nextGroup, sentence, valence.get(ValenceDicts.NounsValence)) | ||
142 | + ) { | ||
143 | + List<Token> heads = group.getSemanticHeadTokens(); | ||
187 | List<Token> segments = new ArrayList<Token>(); | 144 | List<Token> segments = new ArrayList<Token>(); |
188 | - segments.addAll(thisGroup.getTokens()); | ||
189 | - | ||
190 | - int prepStart = thisGroup.getSentencePositionEnd() + 1; | ||
191 | - int prepEnd = nextNG.getSentencePositionStart() - 1; | ||
192 | - ArrayList<Token> prepSegments = sentence.getSegmentsInsideSpan(prepStart, prepEnd); | ||
193 | - segments.addAll(prepSegments); | ||
194 | - | ||
195 | - segments.addAll(nextNG.getTokens()); | 145 | + segments.addAll(group.getTokens()); |
146 | + segments.addAll(nextGroup.getTokens()); | ||
196 | 147 | ||
197 | sentence.addMention(new Mention(segments, heads)); | 148 | sentence.addMention(new Mention(segments, heads)); |
198 | - }*/ | ||
199 | - //else if // NG + im./pt. NG | ||
200 | - // daty nie sa oznaczane np 21 kwietnia itd. , to co z ta gramatyka | ||
201 | - // "instytut naukowy w montrealu" czemu sie nie laczy? ==> NE(orgName) + w(prep) + NE(placeName) | ||
202 | - else if (thisGroup.getType().startsWith("NG")) { | ||
203 | - List<Token> segments = thisGroup.getTokens(); | ||
204 | - List<Token> heads = thisGroup.getSemanticHeadTokens(); | ||
205 | - | ||
206 | - sentence.addMention(new Mention(segments, heads)); | ||
207 | - } | ||
208 | - } | ||
209 | - | ||
210 | - // oryginalna wersja | ||
211 | - /*for (SyntacticGroup group : sentence.getGroups()) { | ||
212 | - if (group.getType().startsWith("NG")) { | 149 | + } else if (group.getType().startsWith("NG")) { |
213 | List<Token> segments = group.getTokens(); | 150 | List<Token> segments = group.getTokens(); |
214 | List<Token> heads = group.getSemanticHeadTokens(); | 151 | List<Token> heads = group.getSemanticHeadTokens(); |
215 | 152 | ||
216 | sentence.addMention(new Mention(segments, heads)); | 153 | sentence.addMention(new Mention(segments, heads)); |
217 | } | 154 | } |
218 | - }*/ | ||
219 | - } | ||
220 | - | ||
221 | - private static boolean followingWordIsInf(SyntacticGroup group, | ||
222 | - Sentence sentence) { | ||
223 | - int followingTokenPosition = group.getSentencePositionEnd() + 1; | ||
224 | - for (SyntacticWord word : sentence.getSyntacticWords()) { | ||
225 | - int firstWordPosition = word.getSentencePositionStart(); | ||
226 | - if (followingTokenPosition == firstWordPosition && | ||
227 | - (word.getCtag().equals("Inf"))) { | ||
228 | - return true; | ||
229 | - } | ||
230 | - } | ||
231 | - | ||
232 | - return false; | ||
233 | - } | ||
234 | - | ||
235 | - private static SyntacticGroup getFollowingPrepNGs(int sentencePosition, | ||
236 | - Sentence sentence) { | ||
237 | - SyntacticGroup largestGroup = null; | ||
238 | - int nextTokenPosition = sentencePosition + 1; | ||
239 | - for (SyntacticGroup group : sentence.getGroups()) { | ||
240 | - if (group.getType().startsWith("PrepNG") && | ||
241 | - group.getSentencePositionStart() == nextTokenPosition) { | ||
242 | - if (largestGroup == null || | ||
243 | - largestGroup.getTokens().size() < group.getTokens().size()) { | ||
244 | - largestGroup = group; | ||
245 | - } | ||
246 | - } | ||
247 | - } | ||
248 | - return largestGroup; | ||
249 | - } | ||
250 | - | ||
251 | - private static boolean isPartOfPrepNG(SyntacticGroup NGGroup, | ||
252 | - Sentence sentence) { | ||
253 | - int NGGroupStart = NGGroup.getSentencePositionStart(); | ||
254 | - int NGGroupEnd = NGGroup.getSentencePositionEnd(); | ||
255 | - for (SyntacticGroup group : sentence.getGroups()) { | ||
256 | - if (group.getType().startsWith("PrepNG") && | ||
257 | - group.getSentencePositionStart() <= NGGroupStart && | ||
258 | - group.getSentencePositionEnd() >= NGGroupEnd) { | ||
259 | - return true; | ||
260 | - } | ||
261 | - } | ||
262 | - return false; | ||
263 | - } | ||
264 | - | ||
265 | - private static boolean precedingWordIsVerb(SyntacticGroup group, | ||
266 | - Sentence sentence) { | ||
267 | - int precedingTokenPosition = group.getSentencePositionStart() - 1; | ||
268 | - if(isPartOfPrepNG(group, sentence)) { | ||
269 | - SyntacticGroup parentGroup = getParentPrepNG(group, sentence); | ||
270 | - precedingTokenPosition = parentGroup.getSentencePositionStart() - 1; | ||
271 | - } | ||
272 | - | ||
273 | - for (SyntacticWord word : sentence.getSyntacticWords()) { | ||
274 | - int lastWordPosition = word.getSentencePositionEnd(); | ||
275 | - if (precedingTokenPosition == lastWordPosition && | ||
276 | - (word.getCtag().equals("Verbfin") || word.getCtag().equals("Inf"))) { | ||
277 | - return true; | ||
278 | - } | ||
279 | - } | ||
280 | - return false; | ||
281 | - } | ||
282 | - | ||
283 | - // czy sie w lemacie bedzie zgodne miedzy walentym a spejdem? | ||
284 | - // czy prep moze sie skladac z wiecej niz jednego segmentu? | ||
285 | - // dopasowywac refla i recip do sie spejdowego | ||
286 | - private static boolean precedingVerbModifyPrepNG(SyntacticGroup NGGroup, | ||
287 | - SyntacticGroup PrepNGGroup, Sentence sentence, | ||
288 | - Map<String,ArrayList<String>> walentyMapping) { | ||
289 | - int precedingTokenPosition = NGGroup.getSentencePositionStart() - 1; | ||
290 | - for (SyntacticWord word : sentence.getSyntacticWords()) { | ||
291 | - int lastWordPosition = word.getSentencePositionEnd(); | ||
292 | - if (precedingTokenPosition == lastWordPosition && | ||
293 | - (word.getCtag().equals("Verbfin") || word.getCtag().equals("Inf"))) { | ||
294 | - String verb = word.getBase(); | ||
295 | - if (!walentyMapping.containsKey(verb)) { | ||
296 | - return true; | ||
297 | - } else { | ||
298 | - SyntacticWord prepWord = PrepNGGroup.getFirstWord(); | ||
299 | - | ||
300 | - if (prepWord.getTokens().size() == 1) { | ||
301 | - Token prep = prepWord.getTokens().get(0); | ||
302 | - String prepBase = prep.getBase(); | ||
303 | - // sprawdzic czy glowa moze miec wiele tokenow | ||
304 | - String prepCase = PrepNGGroup.getSemanticHeadTokens().get(0).getCase(); | ||
305 | - ArrayList<String> prepnps = getPrepnps(prepBase, prepCase); | ||
306 | - | ||
307 | - ArrayList<String> schemata = walentyMapping.get(verb); | ||
308 | - for (String schema : schemata) { | ||
309 | - for (String prepnp : prepnps) { | ||
310 | - if (schema.contains(prepnp)) { | ||
311 | - return true; | ||
312 | - } | ||
313 | - } | ||
314 | - } | ||
315 | - } else if (prepWord.getTokens().size() > 1) { | ||
316 | - String prepOrth = prepWord.getOrth().toLowerCase(); | ||
317 | - String comprepnp = String.format("comprepnp(%s)", prepOrth); | ||
318 | - ArrayList<String> schemata = walentyMapping.get(verb); | ||
319 | - for (String schema : schemata) { | ||
320 | - if (schema.contains(comprepnp)) { | ||
321 | - return true; | ||
322 | - } | ||
323 | - } | ||
324 | - | ||
325 | - } | ||
326 | - | ||
327 | - | ||
328 | - } | ||
329 | - } | ||
330 | } | 155 | } |
331 | - return false; | ||
332 | - } | ||
333 | - | ||
334 | - private static boolean precedingVerbModifyNG(SyntacticGroup NGGroup, | ||
335 | - SyntacticGroup PrepNGGroup, Sentence sentence, | ||
336 | - Map<String,ArrayList<String>> walentyMapping) { | ||
337 | - int precedingTokenPosition = NGGroup.getSentencePositionStart() - 1; | ||
338 | - if(isPartOfPrepNG(NGGroup, sentence)) { | ||
339 | - SyntacticGroup parentNGGroup = getParentPrepNG(NGGroup, sentence); | ||
340 | - precedingTokenPosition = parentNGGroup.getSentencePositionStart() - 1; | ||
341 | - } | ||
342 | - for (SyntacticWord word : sentence.getSyntacticWords()) { | ||
343 | - int lastWordPosition = word.getSentencePositionEnd(); | ||
344 | - if (precedingTokenPosition == lastWordPosition && | ||
345 | - (word.getCtag().equals("Verbfin") || word.getCtag().equals("Inf"))) { | ||
346 | - if (verbModifyNG(word, NGGroup, PrepNGGroup, sentence, walentyMapping)) { | ||
347 | - return true; | ||
348 | - } | ||
349 | - if (!walentyMapping.containsKey(word.getBase())) { | ||
350 | - return true; | ||
351 | - } | ||
352 | - | ||
353 | - } | ||
354 | - } | ||
355 | - return false; | ||
356 | - } | ||
357 | - | ||
358 | - private static boolean verbModifyNG(SyntacticWord verb, SyntacticGroup NGGroup, | ||
359 | - SyntacticGroup PrepNGGroup, Sentence sentence, | ||
360 | - Map<String,ArrayList<String>> walentyMapping) { | ||
361 | - String verbBase = verb.getBase(); | ||
362 | - if (!walentyMapping.containsKey(verbBase)) { | ||
363 | - return true; | ||
364 | - } else { | ||
365 | - ArrayList<String> schemata = walentyMapping.get(verbBase); | ||
366 | - | ||
367 | - // PrepNG + PrepNG | ||
368 | - if (isPartOfPrepNG(NGGroup, sentence)) { | ||
369 | - SyntacticGroup NGParentGroup = getParentPrepNG(NGGroup, sentence); | ||
370 | - ArrayList<String> prepNG1Realizations = NGParentGroup.getWalentyRealizations(); | ||
371 | - ArrayList<String> prepNG2Realizations = PrepNGGroup.getWalentyRealizations(); | ||
372 | - for (String schema : schemata) { | ||
373 | - if (isProperSchema(schema, prepNG1Realizations, prepNG2Realizations)) { | ||
374 | - return true; | ||
375 | - } | ||
376 | - } | ||
377 | - } | ||
378 | - | ||
379 | - // NG + PrepNG | ||
380 | - else { | ||
381 | - ArrayList<String> NGRealizations = NGGroup.getWalentyRealizations(); | ||
382 | - ArrayList<String> prepNGRealizations = PrepNGGroup.getWalentyRealizations(); | ||
383 | - for (String schema : schemata) { | ||
384 | - if (isProperSchema(schema, NGRealizations, prepNGRealizations)) { | ||
385 | - return true; | ||
386 | - } | ||
387 | - } | ||
388 | - } | ||
389 | - } | ||
390 | - return false; | ||
391 | } | 156 | } |
392 | 157 | ||
393 | private static boolean isProperSchema(String schema, ArrayList<String> group1Types, | 158 | private static boolean isProperSchema(String schema, ArrayList<String> group1Types, |
394 | ArrayList<String> group2Types) { | 159 | ArrayList<String> group2Types) { |
395 | for (String group1Type : group1Types) { | 160 | for (String group1Type : group1Types) { |
396 | - if (schema.contains(group1Type)) { | 161 | + if (schemaContains(schema, group1Type)) { |
397 | for (String group2Type : group2Types) { | 162 | for (String group2Type : group2Types) { |
398 | - if (schema.contains(group2Type)) { | 163 | + if (schemaContains(schema, group2Type)) { |
399 | return true; | 164 | return true; |
400 | } | 165 | } |
401 | } | 166 | } |
@@ -404,103 +169,71 @@ public class Detector { | @@ -404,103 +169,71 @@ public class Detector { | ||
404 | return false; | 169 | return false; |
405 | } | 170 | } |
406 | 171 | ||
407 | - private static SyntacticGroup getParentPrepNG(SyntacticGroup NGGroup, | ||
408 | - Sentence sentence) { | ||
409 | - SyntacticGroup parentPrepNG = null; | ||
410 | - int NGGroupStart = NGGroup.getSentencePositionStart(); | ||
411 | - int NGGroupEnd = NGGroup.getSentencePositionEnd(); | ||
412 | - for (SyntacticGroup group : sentence.getGroups()) { | ||
413 | - if (group.getType().startsWith("PrepNG") && | ||
414 | - group.getSentencePositionStart() <= NGGroupStart && | ||
415 | - group.getSentencePositionEnd() >= NGGroupEnd) { | ||
416 | - if (parentPrepNG == null || group.getTokens().size() > parentPrepNG.getTokens().size()) { | ||
417 | - parentPrepNG = group; | ||
418 | - } | ||
419 | - } | ||
420 | - } | ||
421 | - return parentPrepNG; | ||
422 | - } | ||
423 | - | ||
424 | - private static boolean NGPrepNGValenceCompatibility(SyntacticGroup NGGroup, | ||
425 | - SyntacticGroup PrepNGGroup, Sentence sentence, | 172 | + private static boolean groupsValenceCompatibility(SyntacticGroup NG1, |
173 | + SyntacticGroup NG2, Sentence sentence, | ||
426 | Map<String,ArrayList<String>> walentyMapping) { | 174 | Map<String,ArrayList<String>> walentyMapping) { |
427 | - Token NGHead = NGGroup.getSemanticHeadTokens().get(0); | 175 | + Token NG1Head = NG1.getSemanticHeadTokens().get(0); |
428 | 176 | ||
429 | - String NGHeadBase = NGHead.getBase(); | 177 | + String NGHeadBase = NG1Head.getBase(); |
430 | 178 | ||
431 | if (!walentyMapping.containsKey(NGHeadBase)) { | 179 | if (!walentyMapping.containsKey(NGHeadBase)) { |
432 | return false; | 180 | return false; |
433 | } else { | 181 | } else { |
434 | - SyntacticWord prepWord = PrepNGGroup.getFirstWord(); | 182 | + ArrayList<String> NG2realizations = NG2.getWalentyRealizations(); |
435 | 183 | ||
436 | - if (prepWord.getTokens().size() == 1) { | ||
437 | - Token prep = prepWord.getTokens().get(0); | ||
438 | - String prepBase = prep.getBase(); | ||
439 | - String prepCase = PrepNGGroup.getSemanticHeadTokens().get(0).getCase(); | ||
440 | - String prepnp = String.format("prepnp(%s,%s)", prepBase, prepCase); | ||
441 | - ArrayList<String> schemata = walentyMapping.get(NGHeadBase); | ||
442 | - for (String schema : schemata) { | ||
443 | - if (schemaContains(schema, prepnp)) { | ||
444 | - return true; | ||
445 | - } | ||
446 | - } | ||
447 | - } else if (prepWord.getTokens().size() > 1) { | ||
448 | - String prepOrth = prepWord.getOrth().toLowerCase(); | ||
449 | - String comprepnp = String.format("comprepnp(%s)", prepOrth); | ||
450 | - ArrayList<String> schemata = walentyMapping.get(NGHeadBase); | 184 | + ArrayList<String> schemata = walentyMapping.get(NGHeadBase); |
185 | + for (String real : NG2realizations) { | ||
451 | for (String schema : schemata) { | 186 | for (String schema : schemata) { |
452 | - if (schemaContains(schema, comprepnp)) { | 187 | + if (schemaContains(schema, real)) { |
453 | return true; | 188 | return true; |
454 | } | 189 | } |
455 | } | 190 | } |
456 | - | ||
457 | } | 191 | } |
458 | - | ||
459 | } | 192 | } |
460 | return false; | 193 | return false; |
461 | } | 194 | } |
462 | 195 | ||
463 | - private static boolean NGNGValenceCompatibility(SyntacticGroup NG1, | ||
464 | - SyntacticGroup NG2, Sentence sentence, | 196 | + private static boolean tripleCompatibility(SyntacticGroup group1, |
197 | + SyntacticGroup group2, SyntacticGroup group3, | ||
465 | Map<String,ArrayList<String>> walentyMapping) { | 198 | Map<String,ArrayList<String>> walentyMapping) { |
466 | - Token NG1Head = NG1.getSemanticHeadTokens().get(0); | 199 | + Token group1Head = group1.getSemanticHeadTokens().get(0); |
467 | 200 | ||
468 | - String NGHeadBase = NG1Head.getBase(); | 201 | + String group1HeadBase = group1Head.getBase(); |
469 | 202 | ||
470 | - if (!walentyMapping.containsKey(NGHeadBase)) { | 203 | + if (!walentyMapping.containsKey(group1HeadBase)) { |
471 | return false; | 204 | return false; |
472 | } else { | 205 | } else { |
473 | - ArrayList<String> NG2realizations = NG2.getWalentyRealizations(); | 206 | + ArrayList<String> group2realizations = group2.getWalentyRealizations(); |
207 | + ArrayList<String> group3realizations = group3.getWalentyRealizations(); | ||
474 | 208 | ||
475 | - ArrayList<String> schemata = walentyMapping.get(NGHeadBase); | ||
476 | - for (String real : NG2realizations) { | ||
477 | - for (String schema : schemata) { | ||
478 | - if (schemaContains(schema, real)) { | ||
479 | - return true; | ||
480 | - } | 209 | + ArrayList<String> schemata = walentyMapping.get(group1HeadBase); |
210 | + for (String schema : schemata) { | ||
211 | + if (isProperSchema(schema, group2realizations, group3realizations)) { | ||
212 | + return true; | ||
481 | } | 213 | } |
482 | } | 214 | } |
483 | } | 215 | } |
484 | return false; | 216 | return false; |
485 | } | 217 | } |
486 | 218 | ||
487 | - private static boolean NGcomplexPrepNGValenceCompatibility(SyntacticGroup NGGroup1, | ||
488 | - SyntacticGroup NGGroup2, Sentence sentence, | 219 | + private static boolean quatroCompatibility(SyntacticGroup group1, |
220 | + SyntacticGroup group2, SyntacticGroup group3, SyntacticGroup group4, | ||
489 | Map<String,ArrayList<String>> walentyMapping) { | 221 | Map<String,ArrayList<String>> walentyMapping) { |
490 | - | ||
491 | - Token NGHead = NGGroup1.getSemanticHeadTokens().get(0); | ||
492 | - String NGHeadBase = NGHead.getBase(); | 222 | + Token group1Head = group1.getSemanticHeadTokens().get(0); |
223 | + | ||
224 | + String group1HeadBase = group1Head.getBase(); | ||
493 | 225 | ||
494 | - if (!walentyMapping.containsKey(NGHeadBase)) { | 226 | + if (!walentyMapping.containsKey(group1HeadBase)) { |
495 | return false; | 227 | return false; |
496 | } else { | 228 | } else { |
497 | - int prepStart = NGGroup1.getSentencePositionEnd() + 1; | ||
498 | - int prepEnd = NGGroup2.getSentencePositionStart() - 1; | ||
499 | - String complexPrep = sentence.getTextInsideSpan(prepStart, prepEnd); | ||
500 | - String comprepnp = String.format("comprepnp(%s)", complexPrep); | ||
501 | - ArrayList<String> schemata = walentyMapping.get(NGHeadBase); | 229 | + ArrayList<String> group2realizations = group2.getWalentyRealizations(); |
230 | + ArrayList<String> group3realizations = group3.getWalentyRealizations(); | ||
231 | + ArrayList<String> group4realizations = group4.getWalentyRealizations(); | ||
232 | + | ||
233 | + ArrayList<String> schemata = walentyMapping.get(group1HeadBase); | ||
502 | for (String schema : schemata) { | 234 | for (String schema : schemata) { |
503 | - if (schemaContains(schema, comprepnp)) { | 235 | + if (isTripleProperSchema(schema, group2realizations, group3realizations, |
236 | + group4realizations)) { | ||
504 | return true; | 237 | return true; |
505 | } | 238 | } |
506 | } | 239 | } |
@@ -508,67 +241,119 @@ public class Detector { | @@ -508,67 +241,119 @@ public class Detector { | ||
508 | return false; | 241 | return false; |
509 | } | 242 | } |
510 | 243 | ||
511 | - private static boolean schemaContains(String schema, String phraseType) { | ||
512 | - for (String position : schema.split("\\s\\+\\s")) { | ||
513 | - position = position.trim(); | ||
514 | - position = position.substring(1, position.length()-1); | ||
515 | - for (String phrT : position.split(";")) { | ||
516 | - if (phrT.equals(phraseType)) { | ||
517 | - return true; | 244 | + private static boolean isTripleProperSchema(String schema, ArrayList<String> group1Types, |
245 | + ArrayList<String> group2Types, ArrayList<String> group3Types) { | ||
246 | + for (String group1Type : group1Types) { | ||
247 | + if (schemaContains(schema, group1Type)) { | ||
248 | + for (String group2Type : group2Types) { | ||
249 | + if (schemaContains(schema, group2Type)) { | ||
250 | + for (String group3Type : group3Types) { | ||
251 | + if (schemaContains(schema, group3Type)) { | ||
252 | + return true; | ||
253 | + } | ||
254 | + } | ||
255 | + } | ||
518 | } | 256 | } |
519 | } | 257 | } |
520 | } | 258 | } |
521 | return false; | 259 | return false; |
522 | } | 260 | } |
523 | 261 | ||
524 | - private static boolean schemaContainsType(String schema, String type) { | ||
525 | - // to lepiej dziala dla rzeczownikow | ||
526 | - for (String position : schema.split("\\s\\+\\s")) { | ||
527 | - position = position.trim(); | ||
528 | - position = position.substring(1, position.length()-1); | ||
529 | - for (String phrT : position.split(";")) { | ||
530 | - | ||
531 | - if (phrT.startsWith(type+"(")) { | ||
532 | - return true; | 262 | + /*private static boolean isTripleProperSchema(String schema, ArrayList<String> group1Types, |
263 | + ArrayList<String> group2Types, ArrayList<String> group3Types) { | ||
264 | + | ||
265 | + ArrayList<String> group1MPositions = getMatchingPositions(schema, group1Types); | ||
266 | + ArrayList<String> group2MPositions = getMatchingPositions(schema, group2Types); | ||
267 | + ArrayList<String> group3MPositions = getMatchingPositions(schema, group3Types); | ||
268 | + | ||
269 | + | ||
270 | + | ||
271 | + ArrayList<String> group1MPositionsCopy = new ArrayList<String>(); | ||
272 | + ArrayList<String> group2MPositionsCopy = getMatchingPositions(schema, group2Types); | ||
273 | + ArrayList<String> group3MPositionsCopy = getMatchingPositions(schema, group3Types); | ||
274 | + | ||
275 | + | ||
276 | + if (group1MPositions.isEmpty() || group2MPositions.isEmpty() || group3MPositions.isEmpty()) { | ||
277 | + return false; | ||
278 | + } | ||
279 | + | ||
280 | + boolean group1ok = false; | ||
281 | + boolean group2ok = false; | ||
282 | + boolean group3ok = false; | ||
283 | + | ||
284 | + for (String pos : group1MPositions) { | ||
285 | + | ||
286 | + } | ||
287 | + | ||
288 | + ArrayList<String> | ||
289 | + | ||
290 | + if (union(group1MPositions, group2MPositions).size() > group1MPositions.size() && | ||
291 | + ) | ||
292 | + | ||
293 | + | ||
294 | + for (String group1Type : group1Types) { | ||
295 | + if (schemaContains(schema, group1Type)) { | ||
296 | + for (String group2Type : group2Types) { | ||
297 | + if (schemaContains(schema, group2Type)) { | ||
298 | + for (String group3Type : group3Types) { | ||
299 | + if (schemaContains(schema, group3Type)) { | ||
300 | + return true; | ||
301 | + } | ||
302 | + } | ||
303 | + } | ||
533 | } | 304 | } |
534 | } | 305 | } |
535 | } | 306 | } |
536 | return false; | 307 | return false; |
308 | + }*/ | ||
309 | + | ||
310 | + public static List<String> union(List<String> list1, List<String> list2) { | ||
311 | + HashSet<String> set = new HashSet<String>(); | ||
312 | + | ||
313 | + set.addAll(list1); | ||
314 | + set.addAll(list2); | ||
315 | + | ||
316 | + return new ArrayList<String>(set); | ||
537 | } | 317 | } |
538 | 318 | ||
319 | + public static List<String> tripleUnion(List<String> list1, List<String> list2, | ||
320 | + List<String> list3) { | ||
321 | + HashSet<String> set = new HashSet<String>(); | ||
322 | + | ||
323 | + set.addAll(list1); | ||
324 | + set.addAll(list2); | ||
325 | + set.addAll(list3); | ||
326 | + | ||
327 | + return new ArrayList<String>(set); | ||
328 | + } | ||
539 | 329 | ||
540 | - // compar ?? | ||
541 | - private static ArrayList<String> getPrepnps(String prepBase, String prepCase) { | ||
542 | - ArrayList<String> prepnps = new ArrayList<String>(); | ||
543 | - prepnps.add(String.format("prepnp(%s,%s)", prepBase, prepCase)); | ||
544 | - if (prepCase.equals("nom") || prepCase.equals("gen") || prepCase.equals("acc")) { | ||
545 | - prepnps.add(String.format("prepnp(%s,str)", prepBase)); | ||
546 | - } | ||
547 | - if (prepCase.equals("gen") || prepCase.equals("acc")) { | ||
548 | - prepnps.add(String.format("prepnp(%s,part)", prepBase)); | 330 | + private static ArrayList<String> getMatchingPositions(String schema, ArrayList<String> phraseRealizations) { |
331 | + ArrayList<String> positions = new ArrayList<String>(); | ||
332 | + for (String position : schema.split("\\s\\+\\s")) { | ||
333 | + position = position.trim(); | ||
334 | + position = position.substring(1, position.length()-1); | ||
335 | + for (String phrT : position.split(";")) { | ||
336 | + if (phraseRealizations.contains(phrT.trim())) { | ||
337 | + positions.add(position); | ||
338 | + break; | ||
339 | + } | ||
340 | + } | ||
549 | } | 341 | } |
550 | - return prepnps; | 342 | + return positions; |
551 | } | 343 | } |
552 | 344 | ||
553 | - // eliminuje "od wsi do wsi" | ||
554 | - private static boolean sameSemanticHeads(SyntacticGroup group1, | ||
555 | - SyntacticGroup group2) { | ||
556 | - | ||
557 | - List<Token> group1HeadTokens = group1.getSemanticHeadTokens(); | ||
558 | - List<Token> group2HeadTokens = group2.getSemanticHeadTokens(); | ||
559 | - if (group1HeadTokens.size() != group2HeadTokens.size()) { | ||
560 | - return false; | ||
561 | - } | ||
562 | - | ||
563 | - for (int i=0; i < group1HeadTokens.size(); i++) { | ||
564 | - if (!group1HeadTokens.get(i).getBase().equals(group2HeadTokens.get(i).getBase())) { | ||
565 | - return false; | 345 | + private static boolean schemaContains(String schema, String phraseType) { |
346 | + for (String position : schema.split("\\s\\+\\s")) { | ||
347 | + position = position.trim(); | ||
348 | + position = position.substring(1, position.length()-1); | ||
349 | + for (String phrT : position.split(";")) { | ||
350 | + if (phrT.equals(phraseType)) { | ||
351 | + return true; | ||
352 | + } | ||
566 | } | 353 | } |
567 | } | 354 | } |
568 | - | ||
569 | - return true; | 355 | + return false; |
570 | } | 356 | } |
571 | - | ||
572 | 357 | ||
573 | /** | 358 | /** |
574 | * Wyszukuję i oznaczam wszystkie NER | 359 | * Wyszukuję i oznaczam wszystkie NER |
src/main/java/pl/waw/ipipan/zil/core/md/entities/Mention.java
1 | package pl.waw.ipipan.zil.core.md.entities; | 1 | package pl.waw.ipipan.zil.core.md.entities; |
2 | 2 | ||
3 | import java.util.ArrayList; | 3 | import java.util.ArrayList; |
4 | -import java.util.Arrays; | ||
5 | import java.util.List; | 4 | import java.util.List; |
6 | 5 | ||
6 | +import pl.waw.ipipan.zil.core.md.detection.Constants; | ||
7 | + | ||
7 | /** | 8 | /** |
8 | * @author Mateusz Kopec | 9 | * @author Mateusz Kopec |
10 | + * Modified 2017 by Bartlomiej Niton | ||
9 | * | 11 | * |
10 | */ | 12 | */ |
11 | public class Mention implements Comparable<Mention> { | 13 | public class Mention implements Comparable<Mention> { |
@@ -205,77 +207,22 @@ public class Mention implements Comparable<Mention> { | @@ -205,77 +207,22 @@ public class Mention implements Comparable<Mention> { | ||
205 | return isZeroSubject; | 207 | return isZeroSubject; |
206 | } | 208 | } |
207 | 209 | ||
208 | - public int getSentencePositionStart() { | 210 | + public int getSentenceStartPosition() { |
209 | Token startToken = this.getFirstSegment(); | 211 | Token startToken = this.getFirstSegment(); |
210 | return startToken.getSentencePosition(); | 212 | return startToken.getSentencePosition(); |
211 | } | 213 | } |
212 | 214 | ||
213 | - public int getSentencePositionEnd() { | 215 | + public int getSentenceEndPosition() { |
214 | Token endToken = this.getLastSegment(); | 216 | Token endToken = this.getLastSegment(); |
215 | return endToken.getSentencePosition(); | 217 | return endToken.getSentencePosition(); |
216 | } | 218 | } |
217 | - | ||
218 | - public boolean isPartOfQub() { | ||
219 | - if (this.segments.size() == 1) { | ||
220 | - Sentence sentence = this.segments.get(0).getSentence(); | ||
221 | - for (SyntacticWord word : sentence.getSyntacticWords()) { | ||
222 | - if (word.getTokens().contains(this.segments.get(0)) && | ||
223 | - word.getCtag().equals("Qub")) { | ||
224 | - return true; | ||
225 | - } | ||
226 | - } | ||
227 | - } | ||
228 | - return false; | ||
229 | - } | ||
230 | - | ||
231 | - public boolean isPartOfPrep() { | ||
232 | - if (this.segments.size() == 1) { | ||
233 | - Sentence sentence = this.segments.get(0).getSentence(); | ||
234 | - for (SyntacticWord word : sentence.getSyntacticWords()) { | ||
235 | - if (word.getTokens().contains(this.segments.get(0)) && | ||
236 | - word.getCtag().equals("Prep")) { | ||
237 | - return true; | ||
238 | - } | ||
239 | - } | ||
240 | - } | ||
241 | - return false; | ||
242 | - } | ||
243 | - | ||
244 | - private final List<String> FRAZEOS = Arrays.asList("Prep", "Qub", "Adv", "Interj", | ||
245 | - "Adj", "Conj", "Comp"); | ||
246 | 219 | ||
247 | public boolean isPartOfFrazeo() { | 220 | public boolean isPartOfFrazeo() { |
248 | if (this.segments.size() == 1) { | 221 | if (this.segments.size() == 1) { |
249 | Sentence sentence = this.segments.get(0).getSentence(); | 222 | Sentence sentence = this.segments.get(0).getSentence(); |
250 | for (SyntacticWord word : sentence.getSyntacticWords()) { | 223 | for (SyntacticWord word : sentence.getSyntacticWords()) { |
251 | if (word.getTokens().contains(this.segments.get(0)) && | 224 | if (word.getTokens().contains(this.segments.get(0)) && |
252 | - FRAZEOS.contains(word.getCtag())) { | ||
253 | - return true; | ||
254 | - } | ||
255 | - } | ||
256 | - } | ||
257 | - return false; | ||
258 | - } | ||
259 | - | ||
260 | - public boolean isPartOfComplexPrep(ArrayList<String> complexPreps) { | ||
261 | - if (this.segments.size() == 1) { | ||
262 | - Sentence sentence = this.segments.get(0).getSentence(); | ||
263 | - if (this.getSentencePositionStart() - 1 >= 0) { | ||
264 | - String prep = sentence.get(this.getSentencePositionStart() - 1).getOrth(); | ||
265 | - String noun = sentence.get(this.getSentencePositionStart()).getOrth(); | ||
266 | - String possiblePrep = String.format("%s %s", prep, noun); | ||
267 | - if (complexPreps.contains(possiblePrep)) { | ||
268 | - return true; | ||
269 | - } | ||
270 | - } | ||
271 | - | ||
272 | - if (this.getSentencePositionStart() - 1 >= 0 && | ||
273 | - this.getSentencePositionStart() + 1 < sentence.size()) { | ||
274 | - String prep1 = sentence.get(this.getSentencePositionStart() - 1).getOrth(); | ||
275 | - String noun = sentence.get(this.getSentencePositionStart()).getOrth(); | ||
276 | - String prep2 = sentence.get(this.getSentencePositionStart() + 1).getOrth(); | ||
277 | - String possiblePrep = String.format("%s %s %s", prep1, noun, prep2); | ||
278 | - if (complexPreps.contains(possiblePrep)) { | 225 | + Constants.FRAZEO_CTAGS.contains(word.getCtag())) { |
279 | return true; | 226 | return true; |
280 | } | 227 | } |
281 | } | 228 | } |
src/main/java/pl/waw/ipipan/zil/core/md/entities/Sentence.java
@@ -110,35 +110,6 @@ public class Sentence extends ArrayList<Token> { | @@ -110,35 +110,6 @@ public class Sentence extends ArrayList<Token> { | ||
110 | namedEntities.add(namedEntity); | 110 | namedEntities.add(namedEntity); |
111 | } | 111 | } |
112 | 112 | ||
113 | - public ArrayList<SyntacticGroup> getGroupsInsideSpan(int start, int end) { | ||
114 | - ArrayList<SyntacticGroup> groupsAtSpan = new ArrayList<SyntacticGroup>(); | ||
115 | - for (SyntacticGroup group : this.syntacticGroups) { | ||
116 | - if (group.getSentencePositionStart() >= start && | ||
117 | - group.getSentencePositionEnd() <= end) { | ||
118 | - if (!(group.getSentencePositionStart() == start && | ||
119 | - group.getSentencePositionEnd() == end)) { | ||
120 | - groupsAtSpan.add(group); | ||
121 | - } | ||
122 | - } | ||
123 | - } | ||
124 | - return groupsAtSpan; | ||
125 | - } | ||
126 | - | ||
127 | - public ArrayList<SyntacticGroup> getLargestNotIntersectingGroupsInsideSpan(int start, int end) { | ||
128 | - ArrayList<SyntacticGroup> groupsAtSpan = new ArrayList<SyntacticGroup>(); | ||
129 | - for (SyntacticGroup group : this.syntacticGroups) { | ||
130 | - | ||
131 | - if (group.getSentencePositionStart() >= start && | ||
132 | - group.getSentencePositionEnd() <= end) { | ||
133 | - if (!(group.getSentencePositionStart() == start && | ||
134 | - group.getSentencePositionEnd() == end)) { | ||
135 | - groupsAtSpan.add(group); | ||
136 | - } | ||
137 | - } | ||
138 | - } | ||
139 | - return groupsAtSpan; | ||
140 | - } | ||
141 | - | ||
142 | public SyntacticGroup getFirstGroup(int start, int end) { | 113 | public SyntacticGroup getFirstGroup(int start, int end) { |
143 | SyntacticGroup largestGroup = null; | 114 | SyntacticGroup largestGroup = null; |
144 | int step = start; | 115 | int step = start; |
@@ -152,8 +123,8 @@ public class Sentence extends ArrayList<Token> { | @@ -152,8 +123,8 @@ public class Sentence extends ArrayList<Token> { | ||
152 | private SyntacticGroup getLargestGroupOnStartPoint(int start, int end) { | 123 | private SyntacticGroup getLargestGroupOnStartPoint(int start, int end) { |
153 | SyntacticGroup largestGroup = null; | 124 | SyntacticGroup largestGroup = null; |
154 | for (SyntacticGroup group : this.getGroups()) { | 125 | for (SyntacticGroup group : this.getGroups()) { |
155 | - int groupStart = group.getSentencePositionStart(); | ||
156 | - int groupEnd = group.getSentencePositionEnd(); | 126 | + int groupStart = group.getSentenceStartPosition(); |
127 | + int groupEnd = group.getSentenceEndPosition(); | ||
157 | if (groupStart == start && groupEnd <= end && | 128 | if (groupStart == start && groupEnd <= end && |
158 | !(groupStart == start && groupEnd == end) && | 129 | !(groupStart == start && groupEnd == end) && |
159 | (largestGroup == null || | 130 | (largestGroup == null || |
@@ -177,8 +148,8 @@ public class Sentence extends ArrayList<Token> { | @@ -177,8 +148,8 @@ public class Sentence extends ArrayList<Token> { | ||
177 | private SyntacticGroup getLargestGroupOnEndPoint(int start, int end) { | 148 | private SyntacticGroup getLargestGroupOnEndPoint(int start, int end) { |
178 | SyntacticGroup largestGroup = null; | 149 | SyntacticGroup largestGroup = null; |
179 | for (SyntacticGroup group : this.getGroups()) { | 150 | for (SyntacticGroup group : this.getGroups()) { |
180 | - int groupStart = group.getSentencePositionStart(); | ||
181 | - int groupEnd = group.getSentencePositionEnd(); | 151 | + int groupStart = group.getSentenceStartPosition(); |
152 | + int groupEnd = group.getSentenceEndPosition(); | ||
182 | if (groupEnd == end && groupStart >= start && | 153 | if (groupEnd == end && groupStart >= start && |
183 | !(groupStart == start && groupEnd == end) && | 154 | !(groupStart == start && groupEnd == end) && |
184 | (largestGroup == null || | 155 | (largestGroup == null || |
@@ -189,38 +160,4 @@ public class Sentence extends ArrayList<Token> { | @@ -189,38 +160,4 @@ public class Sentence extends ArrayList<Token> { | ||
189 | return largestGroup; | 160 | return largestGroup; |
190 | } | 161 | } |
191 | 162 | ||
192 | - public ArrayList<Mention> getMentionsInsideSpan(int start, int end) { | ||
193 | - ArrayList<Mention> mentionsAtSpan = new ArrayList<Mention>(); | ||
194 | - for (Mention mention : this.mentions) { | ||
195 | - if (mention.getSentencePositionStart() >= start && | ||
196 | - mention.getSentencePositionEnd() <= end) { | ||
197 | - mentionsAtSpan.add(mention); | ||
198 | - } | ||
199 | - } | ||
200 | - return mentionsAtSpan; | ||
201 | - } | ||
202 | - | ||
203 | - public String getTextInsideSpan(int start, int end) { | ||
204 | - String text = ""; | ||
205 | - int step = start; | ||
206 | - while (step <= end) { | ||
207 | - if (step != start) { | ||
208 | - text += " "; | ||
209 | - } | ||
210 | - text += this.get(step).getOrth(); | ||
211 | - step++; | ||
212 | - } | ||
213 | - return text; | ||
214 | - } | ||
215 | - | ||
216 | - public ArrayList<Token> getSegmentsInsideSpan(int start, int end) { | ||
217 | - ArrayList<Token> tokensAtSpan = new ArrayList<Token>(); | ||
218 | - int step = start; | ||
219 | - while (step <= end) { | ||
220 | - tokensAtSpan.add(this.get(step)); | ||
221 | - step++; | ||
222 | - } | ||
223 | - return tokensAtSpan; | ||
224 | - } | ||
225 | - | ||
226 | } | 163 | } |
src/main/java/pl/waw/ipipan/zil/core/md/entities/SyntacticGroup.java
@@ -55,34 +55,19 @@ public class SyntacticGroup implements Comparable<SyntacticGroup> { | @@ -55,34 +55,19 @@ public class SyntacticGroup implements Comparable<SyntacticGroup> { | ||
55 | return getType().compareTo(o.getType()); | 55 | return getType().compareTo(o.getType()); |
56 | } | 56 | } |
57 | 57 | ||
58 | - public int getSentencePositionStart() { | 58 | + public int getSentenceStartPosition() { |
59 | Token startToken = tokens.get(0); | 59 | Token startToken = tokens.get(0); |
60 | return startToken.getSentencePosition(); | 60 | return startToken.getSentencePosition(); |
61 | } | 61 | } |
62 | 62 | ||
63 | - public int getSentencePositionEnd() { | 63 | + public int getSentenceEndPosition() { |
64 | Token endToken = tokens.get(tokens.size()-1); | 64 | Token endToken = tokens.get(tokens.size()-1); |
65 | return endToken.getSentencePosition(); | 65 | return endToken.getSentencePosition(); |
66 | } | 66 | } |
67 | 67 | ||
68 | - | ||
69 | - public SyntacticWord getFirstWord() { | ||
70 | - SyntacticWord firstWord = null; | ||
71 | - Token startToken = tokens.get(0); | ||
72 | - Sentence sentence = startToken.getSentence(); | ||
73 | - for (SyntacticWord word : sentence.getSyntacticWords()) { | ||
74 | - if(startToken.compareTo(word.getTokens().get(0)) == 0 && | ||
75 | - (firstWord == null || firstWord.getTokens().size() < word.getTokens().size())) { | ||
76 | - firstWord = word; | ||
77 | - } | ||
78 | - } | ||
79 | - return firstWord; | ||
80 | - } | ||
81 | - | ||
82 | - // NG and PrepNG only now | ||
83 | public ArrayList<String> getWalentyRealizations() { | 68 | public ArrayList<String> getWalentyRealizations() { |
84 | ArrayList<String> realizations = new ArrayList<String>(); | 69 | ArrayList<String> realizations = new ArrayList<String>(); |
85 | - if (this.type.startsWith("PrepNG")) { | 70 | + if (this.type.equals("PrepNG")) { |
86 | SyntacticWord prepWord = this.getFirstWord(); | 71 | SyntacticWord prepWord = this.getFirstWord(); |
87 | if (prepWord.getTokens().size() == 1) { | 72 | if (prepWord.getTokens().size() == 1) { |
88 | 73 | ||
@@ -105,7 +90,19 @@ public class SyntacticGroup implements Comparable<SyntacticGroup> { | @@ -105,7 +90,19 @@ public class SyntacticGroup implements Comparable<SyntacticGroup> { | ||
105 | return realizations; | 90 | return realizations; |
106 | } | 91 | } |
107 | 92 | ||
108 | - // compar ?? | 93 | + public SyntacticWord getFirstWord() { |
94 | + SyntacticWord firstWord = null; | ||
95 | + Token startToken = tokens.get(0); | ||
96 | + Sentence sentence = startToken.getSentence(); | ||
97 | + for (SyntacticWord word : sentence.getSyntacticWords()) { | ||
98 | + if(startToken.compareTo(word.getTokens().get(0)) == 0 && | ||
99 | + (firstWord == null || firstWord.getTokens().size() < word.getTokens().size())) { | ||
100 | + firstWord = word; | ||
101 | + } | ||
102 | + } | ||
103 | + return firstWord; | ||
104 | + } | ||
105 | + | ||
109 | private ArrayList<String> getPrepnps(String prepBase, String prepCase) { | 106 | private ArrayList<String> getPrepnps(String prepBase, String prepCase) { |
110 | ArrayList<String> prepnps = new ArrayList<String>(); | 107 | ArrayList<String> prepnps = new ArrayList<String>(); |
111 | prepnps.add(String.format("prepnp(%s,%s)", prepBase, prepCase)); | 108 | prepnps.add(String.format("prepnp(%s,%s)", prepBase, prepCase)); |
@@ -130,44 +127,13 @@ public class SyntacticGroup implements Comparable<SyntacticGroup> { | @@ -130,44 +127,13 @@ public class SyntacticGroup implements Comparable<SyntacticGroup> { | ||
130 | return nps; | 127 | return nps; |
131 | } | 128 | } |
132 | 129 | ||
133 | - public boolean precedingWordIsVerb() { | ||
134 | - Sentence sentence = this.tokens.get(0).getSentence(); | ||
135 | - int precedingTokenPosition = this.getSentencePositionStart() - 1; | ||
136 | - for (SyntacticWord word : sentence.getSyntacticWords()) { | ||
137 | - int lastWordPosition = word.getSentencePositionEnd(); | ||
138 | - if (precedingTokenPosition == lastWordPosition && | ||
139 | - (word.getCtag().equals("Verbfin") || word.getCtag().equals("Inf"))) { | ||
140 | - return true; | ||
141 | - } | ||
142 | - } | ||
143 | - return false; | ||
144 | - } | ||
145 | - | ||
146 | - public SyntacticGroup getNextNG() { | ||
147 | - Sentence sentence = this.tokens.get(0).getSentence(); | ||
148 | - int thisGroupEnd = this.getSentencePositionEnd(); | ||
149 | - int sentenceLength = sentence.size(); | ||
150 | - | ||
151 | - SyntacticGroup nextNG = null; | ||
152 | - for (int step = thisGroupEnd; step < sentenceLength; step++) { | ||
153 | - nextNG = sentence.getFirstGroup(step, sentenceLength); | ||
154 | - if (nextNG != null && nextNG.type.startsWith("NG") && | ||
155 | - this.getSentencePositionEnd() < nextNG.getSentencePositionStart()) { | ||
156 | - break; | ||
157 | - } else { | ||
158 | - nextNG = null; | ||
159 | - } | ||
160 | - } | ||
161 | - return nextNG; | ||
162 | - } | ||
163 | - | ||
164 | public SyntacticGroup getFollowingGroup() { | 130 | public SyntacticGroup getFollowingGroup() { |
165 | SyntacticGroup largestGroup = null; | 131 | SyntacticGroup largestGroup = null; |
166 | Sentence sentence = this.tokens.get(0).getSentence(); | 132 | Sentence sentence = this.tokens.get(0).getSentence(); |
167 | - int nextTokenPosition = this.getSentencePositionEnd() + 1; | 133 | + int nextTokenPosition = this.getSentenceEndPosition() + 1; |
168 | for (SyntacticGroup group : sentence.getGroups()) { | 134 | for (SyntacticGroup group : sentence.getGroups()) { |
169 | - if ((group.getType().startsWith("PrepNG") || group.getType().startsWith("NG")) && | ||
170 | - group.getSentencePositionStart() == nextTokenPosition) { | 135 | + if ((group.getType().equals("PrepNG") || group.getType().startsWith("NG")) && |
136 | + group.getSentenceStartPosition() == nextTokenPosition) { | ||
171 | if (largestGroup == null || | 137 | if (largestGroup == null || |
172 | largestGroup.getTokens().size() < group.getTokens().size()) { | 138 | largestGroup.getTokens().size() < group.getTokens().size()) { |
173 | largestGroup = group; | 139 | largestGroup = group; |
@@ -178,14 +144,14 @@ public class SyntacticGroup implements Comparable<SyntacticGroup> { | @@ -178,14 +144,14 @@ public class SyntacticGroup implements Comparable<SyntacticGroup> { | ||
178 | } | 144 | } |
179 | 145 | ||
180 | public SyntacticWord getPrecedingVerb() { | 146 | public SyntacticWord getPrecedingVerb() { |
181 | - int precedingTokenPosition = this.getSentencePositionStart() - 1; | 147 | + int precedingTokenPosition = this.getSentenceStartPosition() - 1; |
182 | Sentence sentence = this.tokens.get(0).getSentence(); | 148 | Sentence sentence = this.tokens.get(0).getSentence(); |
183 | if(this.isPartOfPrepNG()) { | 149 | if(this.isPartOfPrepNG()) { |
184 | SyntacticGroup parentNGGroup = this.getParentPrepNG(); | 150 | SyntacticGroup parentNGGroup = this.getParentPrepNG(); |
185 | - precedingTokenPosition = parentNGGroup.getSentencePositionStart() - 1; | 151 | + precedingTokenPosition = parentNGGroup.getSentenceStartPosition() - 1; |
186 | } | 152 | } |
187 | for (SyntacticWord word : sentence.getSyntacticWords()) { | 153 | for (SyntacticWord word : sentence.getSyntacticWords()) { |
188 | - int lastWordPosition = word.getSentencePositionEnd(); | 154 | + int lastWordPosition = word.getSentenceEndPosition(); |
189 | if (precedingTokenPosition == lastWordPosition && | 155 | if (precedingTokenPosition == lastWordPosition && |
190 | (word.getCtag().equals("Verbfin") || word.getCtag().equals("Inf"))) { | 156 | (word.getCtag().equals("Verbfin") || word.getCtag().equals("Inf"))) { |
191 | return word; | 157 | return word; |
@@ -195,13 +161,13 @@ public class SyntacticGroup implements Comparable<SyntacticGroup> { | @@ -195,13 +161,13 @@ public class SyntacticGroup implements Comparable<SyntacticGroup> { | ||
195 | } | 161 | } |
196 | 162 | ||
197 | private boolean isPartOfPrepNG() { | 163 | private boolean isPartOfPrepNG() { |
198 | - int NGGroupStart = this.getSentencePositionStart(); | ||
199 | - int NGGroupEnd = this.getSentencePositionEnd(); | 164 | + int NGGroupStart = this.getSentenceStartPosition(); |
165 | + int NGGroupEnd = this.getSentenceEndPosition(); | ||
200 | Sentence sentence = this.tokens.get(0).getSentence(); | 166 | Sentence sentence = this.tokens.get(0).getSentence(); |
201 | for (SyntacticGroup group : sentence.getGroups()) { | 167 | for (SyntacticGroup group : sentence.getGroups()) { |
202 | - if (group.getType().startsWith("PrepNG") && | ||
203 | - group.getSentencePositionStart() <= NGGroupStart && | ||
204 | - group.getSentencePositionEnd() >= NGGroupEnd) { | 168 | + if (group.getType().equals("PrepNG") && |
169 | + group.getSentenceStartPosition() <= NGGroupStart && | ||
170 | + group.getSentenceEndPosition() >= NGGroupEnd) { | ||
205 | return true; | 171 | return true; |
206 | } | 172 | } |
207 | } | 173 | } |
@@ -210,13 +176,13 @@ public class SyntacticGroup implements Comparable<SyntacticGroup> { | @@ -210,13 +176,13 @@ public class SyntacticGroup implements Comparable<SyntacticGroup> { | ||
210 | 176 | ||
211 | private SyntacticGroup getParentPrepNG() { | 177 | private SyntacticGroup getParentPrepNG() { |
212 | SyntacticGroup parentPrepNG = null; | 178 | SyntacticGroup parentPrepNG = null; |
213 | - int NGGroupStart = this.getSentencePositionStart(); | ||
214 | - int NGGroupEnd = this.getSentencePositionEnd(); | 179 | + int NGGroupStart = this.getSentenceStartPosition(); |
180 | + int NGGroupEnd = this.getSentenceEndPosition(); | ||
215 | Sentence sentence = this.tokens.get(0).getSentence(); | 181 | Sentence sentence = this.tokens.get(0).getSentence(); |
216 | for (SyntacticGroup group : sentence.getGroups()) { | 182 | for (SyntacticGroup group : sentence.getGroups()) { |
217 | - if (group.getType().startsWith("PrepNG") && | ||
218 | - group.getSentencePositionStart() <= NGGroupStart && | ||
219 | - group.getSentencePositionEnd() >= NGGroupEnd) { | 183 | + if (group.getType().equals("PrepNG") && |
184 | + group.getSentenceStartPosition() <= NGGroupStart && | ||
185 | + group.getSentenceEndPosition() >= NGGroupEnd) { | ||
220 | if (parentPrepNG == null || group.getTokens().size() > parentPrepNG.getTokens().size()) { | 186 | if (parentPrepNG == null || group.getTokens().size() > parentPrepNG.getTokens().size()) { |
221 | parentPrepNG = group; | 187 | parentPrepNG = group; |
222 | } | 188 | } |
src/main/java/pl/waw/ipipan/zil/core/md/entities/SyntacticWord.java
@@ -4,6 +4,8 @@ import java.util.ArrayList; | @@ -4,6 +4,8 @@ import java.util.ArrayList; | ||
4 | import java.util.Iterator; | 4 | import java.util.Iterator; |
5 | import java.util.List; | 5 | import java.util.List; |
6 | 6 | ||
7 | +import pl.waw.ipipan.zil.core.md.detection.Constants; | ||
8 | + | ||
7 | public class SyntacticWord implements Comparable<SyntacticWord> { | 9 | public class SyntacticWord implements Comparable<SyntacticWord> { |
8 | 10 | ||
9 | private String base; | 11 | private String base; |
@@ -22,6 +24,14 @@ public class SyntacticWord implements Comparable<SyntacticWord> { | @@ -22,6 +24,14 @@ public class SyntacticWord implements Comparable<SyntacticWord> { | ||
22 | public String getCtag() { | 24 | public String getCtag() { |
23 | return ctag; | 25 | return ctag; |
24 | } | 26 | } |
27 | + | ||
28 | + public String getBase() { | ||
29 | + return base; | ||
30 | + } | ||
31 | + | ||
32 | + public String getOrth() { | ||
33 | + return orth; | ||
34 | + } | ||
25 | 35 | ||
26 | public List<Token> getTokens() { | 36 | public List<Token> getTokens() { |
27 | return tokens; | 37 | return tokens; |
@@ -45,33 +55,18 @@ public class SyntacticWord implements Comparable<SyntacticWord> { | @@ -45,33 +55,18 @@ public class SyntacticWord implements Comparable<SyntacticWord> { | ||
45 | return getCtag().compareTo(o.getCtag()); | 55 | return getCtag().compareTo(o.getCtag()); |
46 | } | 56 | } |
47 | 57 | ||
48 | - public int getSentencePositionStart() { | 58 | + public int getSentenceStartPosition() { |
49 | Token startToken = tokens.get(0); | 59 | Token startToken = tokens.get(0); |
50 | return startToken.getSentencePosition(); | 60 | return startToken.getSentencePosition(); |
51 | } | 61 | } |
52 | 62 | ||
53 | - public int getSentencePositionEnd() { | 63 | + public int getSentenceEndPosition() { |
54 | Token endToken = tokens.get(tokens.size()-1); | 64 | Token endToken = tokens.get(tokens.size()-1); |
55 | return endToken.getSentencePosition(); | 65 | return endToken.getSentencePosition(); |
56 | } | 66 | } |
57 | 67 | ||
58 | - public String getBase() { | ||
59 | - return this.base; | ||
60 | - } | ||
61 | - | ||
62 | - public String getOrth() { | ||
63 | - return this.orth; | ||
64 | - } | ||
65 | - | ||
66 | public boolean isVerb() { | 68 | public boolean isVerb() { |
67 | - if (this.ctag.equals("Verbfin") || this.ctag.equals("Inf")) { | ||
68 | - return true; | ||
69 | - } | ||
70 | - return false; | ||
71 | - } | ||
72 | - | ||
73 | - public boolean isInterp() { | ||
74 | - if (this.ctag.equals("Interp")) { | 69 | + if (Constants.VERB_CTAGS.contains(this.ctag)) { |
75 | return true; | 70 | return true; |
76 | } | 71 | } |
77 | return false; | 72 | return false; |
src/main/java/pl/waw/ipipan/zil/core/md/io/tei/TeiLoader.java
@@ -70,7 +70,6 @@ public class TeiLoader { | @@ -70,7 +70,6 @@ public class TeiLoader { | ||
70 | for (TEIMorph mo : m.getHeadMorphs()) | 70 | for (TEIMorph mo : m.getHeadMorphs()) |
71 | headTokens.add(teiMorph2Segment.get(mo)); | 71 | headTokens.add(teiMorph2Segment.get(mo)); |
72 | s.addMention(new Mention(tokens, headTokens, m.isZeroSubject())); | 72 | s.addMention(new Mention(tokens, headTokens, m.isZeroSubject())); |
73 | - System.out.println(tokens.toString()); | ||
74 | } | 73 | } |
75 | 74 | ||
76 | private static void loadSyntacticGroup(Sentence s, TEIGroup g, | 75 | private static void loadSyntacticGroup(Sentence s, TEIGroup g, |