Commit 9bd8588210b09b1b9c30d085300d32c6b0b91091
1 parent
184af486
AddMentionsByGroups comment update.
Showing
1 changed file
with
3 additions
and
211 deletions
src/main/java/pl/waw/ipipan/zil/core/md/detection/Detector.java
@@ -98,25 +98,15 @@ public class Detector { | @@ -98,25 +98,15 @@ public class Detector { | ||
98 | } | 98 | } |
99 | 99 | ||
100 | /** | 100 | /** |
101 | - * Wyszukuję i oznaczam wszystkie NG* | 101 | + * Wyszukuję i oznaczam wszystkie NG lub ciagi grup NG i PrepNG |
102 | + * pasujace do schematow rzeczownikowych z Walentego | ||
102 | * | 103 | * |
103 | * @param sentence | 104 | * @param sentence |
104 | */ | 105 | */ |
105 | private static void addMentionsByGroups(Sentence sentence, | 106 | private static void addMentionsByGroups(Sentence sentence, |
106 | Map<ValenceDicts,Map<String,ArrayList<String>>> valence) { | 107 | Map<ValenceDicts,Map<String,ArrayList<String>>> valence) { |
107 | 108 | ||
108 | - for (SyntacticGroup group : sentence.getGroups()) { | ||
109 | - | ||
110 | - /*SyntacticGroup nextGroup = group.getFollowingGroup(); | ||
111 | - SyntacticGroup nextnextGroup = null; | ||
112 | - SyntacticGroup nextnextnextGroup = null; | ||
113 | - if (nextGroup != null) { | ||
114 | - nextnextGroup = nextGroup.getFollowingGroup(); | ||
115 | - if (nextnextGroup != null) { | ||
116 | - nextnextnextGroup = nextnextGroup.getFollowingGroup(); | ||
117 | - } | ||
118 | - }*/ | ||
119 | - | 109 | + for (SyntacticGroup group : sentence.getGroups()) { |
120 | if (group.getType().startsWith("NG")) { | 110 | if (group.getType().startsWith("NG")) { |
121 | ArrayList<SyntacticGroup> nestedGroups = new ArrayList<SyntacticGroup>(); | 111 | ArrayList<SyntacticGroup> nestedGroups = new ArrayList<SyntacticGroup>(); |
122 | nestedGroups.add(group); | 112 | nestedGroups.add(group); |
@@ -130,194 +120,9 @@ public class Detector { | @@ -130,194 +120,9 @@ public class Detector { | ||
130 | Mention mention = createMention(nestedGroups, valence.get(ValenceDicts.NounsValence)); | 120 | Mention mention = createMention(nestedGroups, valence.get(ValenceDicts.NounsValence)); |
131 | sentence.addMention(mention); | 121 | sentence.addMention(mention); |
132 | } | 122 | } |
133 | - | ||
134 | - /*if (group.getType().startsWith("NG") && nextGroup != null && | ||
135 | - nextnextGroup != null && nextnextnextGroup != null && | ||
136 | - quatroCompatibility(group, nextGroup, nextnextGroup, | ||
137 | - nextnextnextGroup, valence.get(ValenceDicts.NounsValence))) { | ||
138 | - List<Token> heads = group.getSemanticHeadTokens(); | ||
139 | - List<Token> segments = new ArrayList<Token>(); | ||
140 | - segments.addAll(group.getTokens()); | ||
141 | - segments.addAll(nextGroup.getTokens()); | ||
142 | - segments.addAll(nextnextGroup.getTokens()); | ||
143 | - segments.addAll(nextnextnextGroup.getTokens()); | ||
144 | - | ||
145 | - sentence.addMention(new Mention(segments, heads)); | ||
146 | - } else if (group.getType().startsWith("NG") && nextGroup != null && | ||
147 | - nextnextGroup != null && tripleCompatibility(group, nextGroup, nextnextGroup, valence.get(ValenceDicts.NounsValence))) { | ||
148 | - List<Token> heads = group.getSemanticHeadTokens(); | ||
149 | - List<Token> segments = new ArrayList<Token>(); | ||
150 | - segments.addAll(group.getTokens()); | ||
151 | - segments.addAll(nextGroup.getTokens()); | ||
152 | - segments.addAll(nextnextGroup.getTokens()); | ||
153 | - | ||
154 | - sentence.addMention(new Mention(segments, heads)); | ||
155 | - } else if (group.getType().startsWith("NG") && nextGroup != null && | ||
156 | - groupsValenceCompatibility(group, nextGroup, sentence, valence.get(ValenceDicts.NounsValence)) | ||
157 | - ) { | ||
158 | - List<Token> heads = group.getSemanticHeadTokens(); | ||
159 | - List<Token> segments = new ArrayList<Token>(); | ||
160 | - segments.addAll(group.getTokens()); | ||
161 | - segments.addAll(nextGroup.getTokens()); | ||
162 | - | ||
163 | - sentence.addMention(new Mention(segments, heads)); | ||
164 | - } else if (group.getType().startsWith("NG")) { | ||
165 | - List<Token> segments = group.getTokens(); | ||
166 | - List<Token> heads = group.getSemanticHeadTokens(); | ||
167 | - | ||
168 | - sentence.addMention(new Mention(segments, heads)); | ||
169 | - }*/ | ||
170 | - } | ||
171 | - } | ||
172 | - | ||
173 | - /*private static boolean isProperSchema(String schema, ArrayList<String> group1Types, | ||
174 | - ArrayList<String> group2Types) { | ||
175 | - for (String group1Type : group1Types) { | ||
176 | - if (schemaContains(schema, group1Type)) { | ||
177 | - for (String group2Type : group2Types) { | ||
178 | - if (schemaContains(schema, group2Type)) { | ||
179 | - return true; | ||
180 | - } | ||
181 | - } | ||
182 | - } | ||
183 | - } | ||
184 | - return false; | ||
185 | - }*/ | ||
186 | - | ||
187 | - /*private static boolean isProperSchema(String schema, ArrayList<String> group1Types, | ||
188 | - ArrayList<String> group2Types) { | ||
189 | - ArrayList<String> group1MPositions = getMatchingPositions(schema, group1Types); | ||
190 | - ArrayList<String> group2MPositions = getMatchingPositions(schema, group2Types); | ||
191 | - | ||
192 | - ArrayList<ArrayList<String>> matchingPositions = new ArrayList<ArrayList<String>>(); | ||
193 | - matchingPositions.add(group1MPositions); | ||
194 | - matchingPositions.add(group2MPositions); | ||
195 | - | ||
196 | - if (matchingPositionsExists(matchingPositions)) { | ||
197 | - ArrayList<ArrayList<String>> product = cartesianProduct(matchingPositions); | ||
198 | - for (ArrayList<String> combination : product) { | ||
199 | - Set<String> combinationSet = new HashSet<String>(combination); | ||
200 | - if (combinationSet.size() == matchingPositions.size()) { | ||
201 | - return true; | ||
202 | - } | ||
203 | - } | ||
204 | } | 123 | } |
205 | - return false; | ||
206 | - }*/ | ||
207 | - | ||
208 | - /*private static boolean groupsValenceCompatibility(SyntacticGroup NG1, | ||
209 | - SyntacticGroup NG2, Sentence sentence, | ||
210 | - Map<String,ArrayList<String>> walentyMapping) { | ||
211 | - Token NG1Head = NG1.getSemanticHeadTokens().get(0); | ||
212 | - | ||
213 | - String NGHeadBase = NG1Head.getBase(); | ||
214 | - | ||
215 | - if (!walentyMapping.containsKey(NGHeadBase)) { | ||
216 | - return false; | ||
217 | - } else { | ||
218 | - ArrayList<String> NG2realizations = NG2.getWalentyRealizations(); | ||
219 | - | ||
220 | - ArrayList<String> schemata = walentyMapping.get(NGHeadBase); | ||
221 | - for (String real : NG2realizations) { | ||
222 | - for (String schema : schemata) { | ||
223 | - if (schemaContains(schema, real)) { | ||
224 | - return true; | ||
225 | - } | ||
226 | - } | ||
227 | - } | ||
228 | - } | ||
229 | - return false; | ||
230 | } | 124 | } |
231 | 125 | ||
232 | - private static boolean tripleCompatibility(SyntacticGroup group1, | ||
233 | - SyntacticGroup group2, SyntacticGroup group3, | ||
234 | - Map<String,ArrayList<String>> walentyMapping) { | ||
235 | - Token group1Head = group1.getSemanticHeadTokens().get(0); | ||
236 | - | ||
237 | - String group1HeadBase = group1Head.getBase(); | ||
238 | - | ||
239 | - if (!walentyMapping.containsKey(group1HeadBase)) { | ||
240 | - return false; | ||
241 | - } else { | ||
242 | - ArrayList<String> group2realizations = group2.getWalentyRealizations(); | ||
243 | - ArrayList<String> group3realizations = group3.getWalentyRealizations(); | ||
244 | - | ||
245 | - ArrayList<String> schemata = walentyMapping.get(group1HeadBase); | ||
246 | - for (String schema : schemata) { | ||
247 | - if (isProperSchema(schema, group2realizations, group3realizations)) { | ||
248 | - return true; | ||
249 | - } | ||
250 | - } | ||
251 | - } | ||
252 | - return false; | ||
253 | - } | ||
254 | - | ||
255 | - private static boolean quatroCompatibility(SyntacticGroup group1, | ||
256 | - SyntacticGroup group2, SyntacticGroup group3, SyntacticGroup group4, | ||
257 | - Map<String,ArrayList<String>> walentyMapping) { | ||
258 | - Token group1Head = group1.getSemanticHeadTokens().get(0); | ||
259 | - | ||
260 | - String group1HeadBase = group1Head.getBase(); | ||
261 | - | ||
262 | - if (!walentyMapping.containsKey(group1HeadBase)) { | ||
263 | - return false; | ||
264 | - } else { | ||
265 | - ArrayList<String> group2realizations = group2.getWalentyRealizations(); | ||
266 | - ArrayList<String> group3realizations = group3.getWalentyRealizations(); | ||
267 | - ArrayList<String> group4realizations = group4.getWalentyRealizations(); | ||
268 | - | ||
269 | - ArrayList<String> schemata = walentyMapping.get(group1HeadBase); | ||
270 | - for (String schema : schemata) { | ||
271 | - if (isTripleProperSchema(schema, group2realizations, group3realizations, | ||
272 | - group4realizations)) { | ||
273 | - return true; | ||
274 | - } | ||
275 | - } | ||
276 | - } | ||
277 | - return false; | ||
278 | - } | ||
279 | - | ||
280 | - private static boolean isTripleProperSchema(String schema, ArrayList<String> group1Types, | ||
281 | - ArrayList<String> group2Types, ArrayList<String> group3Types) { | ||
282 | - for (String group1Type : group1Types) { | ||
283 | - if (schemaContains(schema, group1Type)) { | ||
284 | - for (String group2Type : group2Types) { | ||
285 | - if (schemaContains(schema, group2Type)) { | ||
286 | - for (String group3Type : group3Types) { | ||
287 | - if (schemaContains(schema, group3Type)) { | ||
288 | - return true; | ||
289 | - } | ||
290 | - } | ||
291 | - } | ||
292 | - } | ||
293 | - } | ||
294 | - } | ||
295 | - return false; | ||
296 | - } | ||
297 | - | ||
298 | - private static boolean isTripleProperSchema(String schema, ArrayList<String> group1Types, | ||
299 | - ArrayList<String> group2Types, ArrayList<String> group3Types) { | ||
300 | - | ||
301 | - ArrayList<String> group1MPositions = getMatchingPositions(schema, group1Types); | ||
302 | - ArrayList<String> group2MPositions = getMatchingPositions(schema, group2Types); | ||
303 | - ArrayList<String> group3MPositions = getMatchingPositions(schema, group3Types); | ||
304 | - | ||
305 | - ArrayList<ArrayList<String>> matchingPositions = new ArrayList<ArrayList<String>>(); | ||
306 | - matchingPositions.add(group1MPositions); | ||
307 | - matchingPositions.add(group2MPositions); | ||
308 | - matchingPositions.add(group3MPositions); | ||
309 | - | ||
310 | - if (matchingPositionsExists(matchingPositions)) { | ||
311 | - ArrayList<ArrayList<String>> product = cartesianProduct(matchingPositions); | ||
312 | - for (ArrayList<String> combination : product) { | ||
313 | - Set<String> combinationSet = new HashSet<String>(combination); | ||
314 | - if (combinationSet.size() == matchingPositions.size()) { | ||
315 | - return true; | ||
316 | - } | ||
317 | - } | ||
318 | - } | ||
319 | - return false; | ||
320 | - }*/ | ||
321 | 126 | ||
322 | private static Mention createMention(ArrayList<SyntacticGroup> nestedGroups, | 127 | private static Mention createMention(ArrayList<SyntacticGroup> nestedGroups, |
323 | Map<String,ArrayList<String>> walentyNouns) { | 128 | Map<String,ArrayList<String>> walentyNouns) { |
@@ -430,19 +235,6 @@ public class Detector { | @@ -430,19 +235,6 @@ public class Detector { | ||
430 | } | 235 | } |
431 | return positions; | 236 | return positions; |
432 | } | 237 | } |
433 | - | ||
434 | - private static boolean schemaContains(String schema, String phraseType) { | ||
435 | - for (String position : schema.split("\\s\\+\\s")) { | ||
436 | - position = position.trim(); | ||
437 | - position = position.substring(1, position.length()-1); | ||
438 | - for (String phrT : position.split(";")) { | ||
439 | - if (phrT.equals(phraseType)) { | ||
440 | - return true; | ||
441 | - } | ||
442 | - } | ||
443 | - } | ||
444 | - return false; | ||
445 | - } | ||
446 | 238 | ||
447 | /** | 239 | /** |
448 | * Wyszukuję i oznaczam wszystkie NER | 240 | * Wyszukuję i oznaczam wszystkie NER |