Commit 1ed8f98cfc84530725600672e69a2762c69c97f3

Authored by Bartłomiej Nitoń
1 parent 3d23a642

Removed unused depparse layer.

src/main/java/pl/waw/ipipan/zil/core/md/Main.java
@@ -21,11 +21,9 @@ import pl.waw.ipipan.zil.nkjp.teiapi.api.io.IOUtils; @@ -21,11 +21,9 @@ import pl.waw.ipipan.zil.nkjp.teiapi.api.io.IOUtils;
21 import java.io.BufferedReader; 21 import java.io.BufferedReader;
22 import java.io.File; 22 import java.io.File;
23 import java.io.FileInputStream; 23 import java.io.FileInputStream;
24 -import java.io.FileNotFoundException;  
25 import java.io.IOException; 24 import java.io.IOException;
26 import java.io.InputStream; 25 import java.io.InputStream;
27 import java.io.InputStreamReader; 26 import java.io.InputStreamReader;
28 -import java.io.PrintWriter;  
29 import java.util.ArrayList; 27 import java.util.ArrayList;
30 import java.util.EnumMap; 28 import java.util.EnumMap;
31 import java.util.HashMap; 29 import java.util.HashMap;
@@ -152,14 +150,6 @@ public class Main { @@ -152,14 +150,6 @@ public class Main {
152 150
153 File inputDir = new File(args[0]); 151 File inputDir = new File(args[0]);
154 File outputDir = new File(args[1]); 152 File outputDir = new File(args[1]);
155 - File defsOutputFile = new File(args[1], "definitions.csv");  
156 - PrintWriter defsWriter = null;  
157 - try {  
158 - defsWriter = new PrintWriter(defsOutputFile);  
159 - } catch (FileNotFoundException e1) {  
160 - // TODO Auto-generated catch block  
161 - e1.printStackTrace();  
162 - }  
163 153
164 if (!inputDir.isDirectory()) { 154 if (!inputDir.isDirectory()) {
165 logger.error(inputDir + " is not a directory!"); 155 logger.error(inputDir + " is not a directory!");
@@ -188,15 +178,13 @@ public class Main { @@ -188,15 +178,13 @@ public class Main {
188 try { 178 try {
189 File targetDir = createTargetTextDir(inputDir, outputDir, teiDir); 179 File targetDir = createTargetTextDir(inputDir, outputDir, teiDir);
190 TEICorpusText teiText = TeiLoader.readTeiText(teiDir); 180 TEICorpusText teiText = TeiLoader.readTeiText(teiDir);
191 - annotateTeiText(teiText, teiDir, defsWriter); 181 + annotateTeiText(teiText, teiDir);
192 TeiSaver.saveTeiText(teiText, targetDir, GZIP_OUTPUT); 182 TeiSaver.saveTeiText(teiText, targetDir, GZIP_OUTPUT);
193 } catch (IOException e) { 183 } catch (IOException e) {
194 logger.error("Error processing text in dir:" + teiDir + " Error details: " + e.getLocalizedMessage(), e); 184 logger.error("Error processing text in dir:" + teiDir + " Error details: " + e.getLocalizedMessage(), e);
195 errors++; 185 errors++;
196 } 186 }
197 } 187 }
198 -  
199 - defsWriter.close();  
200 188
201 logger.info(all + " texts processed succesfully."); 189 logger.info(all + " texts processed succesfully.");
202 if (errors > 0) 190 if (errors > 0)
@@ -231,9 +219,9 @@ public class Main { @@ -231,9 +219,9 @@ public class Main {
231 * @param thriftText text to annotate with mentions 219 * @param thriftText text to annotate with mentions
232 * @throws MultiserviceException when an error occures 220 * @throws MultiserviceException when an error occures
233 */ 221 */
234 - public static void annotateThriftText(TText thriftText, PrintWriter defsWriter) throws MultiserviceException { 222 + public static void annotateThriftText(TText thriftText) throws MultiserviceException {
235 Text responseText = ThriftLoader.loadTextFromThrift(thriftText); 223 Text responseText = ThriftLoader.loadTextFromThrift(thriftText);
236 - Detector.findMentionsInText(responseText, headModel, zeroSubjectModel, nominalMentionModel, valence, defsWriter); 224 + Detector.findMentionsInText(responseText, headModel, zeroSubjectModel, nominalMentionModel, valence);
237 ThriftSaver.updateThriftText(responseText, thriftText); 225 ThriftSaver.updateThriftText(responseText, thriftText);
238 } 226 }
239 227
@@ -244,9 +232,9 @@ public class Main { @@ -244,9 +232,9 @@ public class Main {
244 * @param teiText text to annotate with mentions 232 * @param teiText text to annotate with mentions
245 * @throws TEIException when an error occurs 233 * @throws TEIException when an error occurs
246 */ 234 */
247 - public static void annotateTeiText(TEICorpusText teiText, File textDir, PrintWriter defsWriter) throws TEIException { 235 + public static void annotateTeiText(TEICorpusText teiText, File textDir) throws TEIException {
248 Text responseText = TeiLoader.loadTextFromTei(teiText, textDir); 236 Text responseText = TeiLoader.loadTextFromTei(teiText, textDir);
249 - Detector.findMentionsInText(responseText, headModel, zeroSubjectModel, nominalMentionModel, valence, defsWriter); 237 + Detector.findMentionsInText(responseText, headModel, zeroSubjectModel, nominalMentionModel, valence);
250 TeiSaver.updateTeiText(responseText, teiText); 238 TeiSaver.updateTeiText(responseText, teiText);
251 } 239 }
252 240
src/main/java/pl/waw/ipipan/zil/core/md/detection/Detector.java
@@ -29,44 +29,24 @@ public class Detector { @@ -29,44 +29,24 @@ public class Detector {
29 HeadDetector headModel, 29 HeadDetector headModel,
30 ZeroSubjectDetector zeroSubjectModel, 30 ZeroSubjectDetector zeroSubjectModel,
31 NominalMentionDetector nominalMentionModel, 31 NominalMentionDetector nominalMentionModel,
32 - Map<ValenceDicts,Map<String,ArrayList<String>>> valence,  
33 - PrintWriter defsWriter) { 32 + Map<ValenceDicts,Map<String,ArrayList<String>>> valence) {
34 text.clearMentions(); 33 text.clearMentions();
35 logger.debug("Detecting mentions in text " + text.getId()); 34 logger.debug("Detecting mentions in text " + text.getId());
36 for (Paragraph p : text) 35 for (Paragraph p : text)
37 for (Sentence s : p) 36 for (Sentence s : p)
38 - detectMentionsInSentence(s, headModel, zeroSubjectModel, nominalMentionModel, valence, defsWriter); 37 + detectMentionsInSentence(s, headModel, zeroSubjectModel, nominalMentionModel, valence);
39 } 38 }
40 39
41 private static void detectMentionsInSentence(Sentence sentence, 40 private static void detectMentionsInSentence(Sentence sentence,
42 HeadDetector headModel, 41 HeadDetector headModel,
43 ZeroSubjectDetector zeroSubjectModel, 42 ZeroSubjectDetector zeroSubjectModel,
44 NominalMentionDetector nominalMentionModel, 43 NominalMentionDetector nominalMentionModel,
45 - Map<ValenceDicts,Map<String,ArrayList<String>>> valence,  
46 - PrintWriter defsWriter) {  
47 - // adding mentions  
48 -// addMentionsByTokenCtag(sentence);  
49 -// addMentionsBySyntacticWordsCtag(sentence);  
50 -// addMentionsByNamedEntities(sentence);  
51 -// addMentionsByGroups(sentence, valence);  
52 -// //addMentionsByDeppParse(sentence);  
53 -// addSpeakerMentionsInSpoken(sentence);  
54 - 44 + Map<ValenceDicts,Map<String,ArrayList<String>>> valence) {
55 // zero subject detection 45 // zero subject detection
56 zeroSubjectModel.addZeroSubjectMentions(sentence); 46 zeroSubjectModel.addZeroSubjectMentions(sentence);
57 47
58 List<Token> heads = headModel.detectHeads(sentence); 48 List<Token> heads = headModel.detectHeads(sentence);
59 nominalMentionModel.addNominalMentions(sentence, valence, heads); 49 nominalMentionModel.addNominalMentions(sentence, valence, heads);
60 -  
61 - // removing mentions  
62 - // removeTo(sentence); to nic nie daje, jeszcze ponizsze spradzic  
63 -// Cleaner.cleanWalentyFramedMentions(sentence, valence.get(ValenceDicts.VerbsValence));  
64 -// Cleaner.cleanUnnecessarySentenceMentions(sentence);  
65 -// Cleaner.cleanFrazeos(sentence);  
66 -  
67 -  
68 - // updating mention heads  
69 - // updateMentionHeads(sentence);  
70 } 50 }
71 51
72 /** 52 /**
src/main/java/pl/waw/ipipan/zil/core/md/io/tei/TeiLoader.java
@@ -8,18 +8,11 @@ import pl.waw.ipipan.zil.nkjp.teiapi.api.exceptions.TEIException; @@ -8,18 +8,11 @@ import pl.waw.ipipan.zil.nkjp.teiapi.api.exceptions.TEIException;
8 import pl.waw.ipipan.zil.nkjp.teiapi.api.io.TEI_IO; 8 import pl.waw.ipipan.zil.nkjp.teiapi.api.io.TEI_IO;
9 9
10 import java.io.File; 10 import java.io.File;
11 -import java.io.IOException;  
12 -import java.nio.charset.StandardCharsets;  
13 -import java.nio.file.Files;  
14 -import java.nio.file.Paths;  
15 import java.util.ArrayList; 11 import java.util.ArrayList;
16 import java.util.HashMap; 12 import java.util.HashMap;
17 import java.util.List; 13 import java.util.List;
18 import java.util.Map; 14 import java.util.Map;
19 15
20 -import org.json.JSONArray;  
21 -import org.json.JSONObject;  
22 -  
23 16
24 public class TeiLoader { 17 public class TeiLoader {
25 18
@@ -36,68 +29,33 @@ public class TeiLoader { @@ -36,68 +29,33 @@ public class TeiLoader {
36 public static Text loadTextFromTei(TEICorpusText teiText, File textDir) { 29 public static Text loadTextFromTei(TEICorpusText teiText, File textDir) {
37 Text text = new Text(teiText.getCorpusHeader().getId()); 30 Text text = new Text(teiText.getCorpusHeader().getId());
38 31
39 - String textId = textDir.getName();  
40 -  
41 - System.out.println(textId);  
42 -  
43 - byte[] encoded;  
44 - JSONArray jsonParagraphs = null;  
45 - try {  
46 - //encoded = Files.readAllBytes(Paths.get("/home/bniton/Projekty/koreferencja/COTHEC/clients/python/data/train-JSON-Concraft-old_grammar/"+textId+".json"));  
47 - encoded = Files.readAllBytes(Paths.get("/home/bartek/Projekty/COTHEC/clients_data/"+textId+".json"));  
48 - String jsonContent = new String(encoded, StandardCharsets.UTF_8);  
49 - JSONObject jsonObject = new JSONObject(jsonContent);  
50 -  
51 - jsonParagraphs = jsonObject.getJSONArray("paragraphs");  
52 - } catch (IOException e) {  
53 - // TODO Auto-generated catch block  
54 - //e.printStackTrace();  
55 - logger.debug("No depparse layer.");  
56 - }  
57 -  
58 logger.debug("Loading tei text " + text.getId() + "..."); 32 logger.debug("Loading tei text " + text.getId() + "...");
59 33
60 List<TEIParagraph> teiParagraphs = teiText.getParagraphs(); 34 List<TEIParagraph> teiParagraphs = teiText.getParagraphs();
61 35
62 for (int i=0; i < teiParagraphs.size(); i++) { 36 for (int i=0; i < teiParagraphs.size(); i++) {
63 TEIParagraph teiP = teiParagraphs.get(i); 37 TEIParagraph teiP = teiParagraphs.get(i);
64 - JSONObject jsonP = null;  
65 - if (jsonParagraphs != null) {  
66 - jsonP = new JSONObject(jsonParagraphs.get(i).toString());  
67 - }  
68 - loadParagraph(text, teiP, jsonP); 38 + loadParagraph(text, teiP);
69 } 39 }
70 logger.debug("Tei text loaded."); 40 logger.debug("Tei text loaded.");
71 41
72 return text; 42 return text;
73 } 43 }
74 44
75 - private static void loadParagraph(Text text, TEIParagraph teiP, JSONObject jsonP) { 45 + private static void loadParagraph(Text text, TEIParagraph teiP) {
76 Paragraph p = new Paragraph(); 46 Paragraph p = new Paragraph();
77 text.add(p); 47 text.add(p);
78 48
79 List<TEISentence> teiSentences = teiP.getSentences(); 49 List<TEISentence> teiSentences = teiP.getSentences();
80 50
81 - JSONArray jsonSentences = null;  
82 - if (jsonP != null) {  
83 - jsonSentences = jsonP.getJSONArray("sentences");  
84 - }  
85 -  
86 for (int i=0; i < teiSentences.size(); i++) { 51 for (int i=0; i < teiSentences.size(); i++) {
87 TEISentence teiS = teiSentences.get(i); 52 TEISentence teiS = teiSentences.get(i);
88 53
89 - JSONObject jsonS = null;  
90 - if (jsonP != null) {  
91 - if (i < jsonSentences.length()) {  
92 - jsonS = new JSONObject(jsonSentences.get(i).toString());  
93 - }  
94 - }  
95 -  
96 - loadSentence(p, teiS, jsonS); 54 + loadSentence(p, teiS);
97 } 55 }
98 } 56 }
99 57
100 - private static void loadSentence(Paragraph p, TEISentence teiS, JSONObject jsonS) { 58 + private static void loadSentence(Paragraph p, TEISentence teiS) {
101 Sentence s = new Sentence(); 59 Sentence s = new Sentence();
102 p.add(s); 60 p.add(s);
103 61
@@ -114,33 +72,6 @@ public class TeiLoader { @@ -114,33 +72,6 @@ public class TeiLoader {
114 loadSyntacticGroup(s, g, teiMorph2Segment); 72 loadSyntacticGroup(s, g, teiMorph2Segment);
115 for (TEIMention m : teiS.getAllMentions()) 73 for (TEIMention m : teiS.getAllMentions())
116 loadMentions(s, m, teiMorph2Segment); 74 loadMentions(s, m, teiMorph2Segment);
117 -  
118 - if (jsonS != null && s.size() == jsonS.getJSONArray("tokens").length()) {  
119 - JSONArray relations = jsonS.getJSONArray("dependencyParse");  
120 - for (int i=0; i<relations.length(); i++) {  
121 - loadRelation(s, new JSONObject(relations.get(i).toString()));  
122 - }  
123 - } else {  
124 - //System.out.println(s.toStringWithoutMentions());  
125 - }  
126 - }  
127 -  
128 - private static void loadRelation(Sentence s, JSONObject jsonRelation) {  
129 - String label = jsonRelation.getString("label");  
130 - if (!label.equals("root") && !jsonRelation.getString("startTokenId").isEmpty() &&  
131 - jsonRelation.get("startTokenId").getClass() == String.class) {  
132 - String[] sourceIdParts = jsonRelation.getString("startTokenId").split("\\.");  
133 - String[] targetIdParts = jsonRelation.getString("endTokenId").split("\\.");  
134 -  
135 - int sourceId = Integer.parseInt(sourceIdParts[sourceIdParts.length-1]);  
136 - int targetId = Integer.parseInt(targetIdParts[sourceIdParts.length-1]);  
137 -  
138 - Token source = s.get(sourceId);  
139 - Token target = s.get(targetId);  
140 -  
141 - source.addRelation(new Relation(label, target));  
142 - target.setReturnRelation(new Relation(label, source));  
143 - }  
144 } 75 }
145 76
146 private static void loadMentions(Sentence s, TEIMention m, 77 private static void loadMentions(Sentence s, TEIMention m,