Commit 1ed8f98cfc84530725600672e69a2762c69c97f3

Authored by Bartłomiej Nitoń
1 parent 3d23a642

Removed unused depparse layer.

src/main/java/pl/waw/ipipan/zil/core/md/Main.java
... ... @@ -21,11 +21,9 @@ import pl.waw.ipipan.zil.nkjp.teiapi.api.io.IOUtils;
21 21 import java.io.BufferedReader;
22 22 import java.io.File;
23 23 import java.io.FileInputStream;
24   -import java.io.FileNotFoundException;
25 24 import java.io.IOException;
26 25 import java.io.InputStream;
27 26 import java.io.InputStreamReader;
28   -import java.io.PrintWriter;
29 27 import java.util.ArrayList;
30 28 import java.util.EnumMap;
31 29 import java.util.HashMap;
... ... @@ -152,14 +150,6 @@ public class Main {
152 150  
153 151 File inputDir = new File(args[0]);
154 152 File outputDir = new File(args[1]);
155   - File defsOutputFile = new File(args[1], "definitions.csv");
156   - PrintWriter defsWriter = null;
157   - try {
158   - defsWriter = new PrintWriter(defsOutputFile);
159   - } catch (FileNotFoundException e1) {
160   - // TODO Auto-generated catch block
161   - e1.printStackTrace();
162   - }
163 153  
164 154 if (!inputDir.isDirectory()) {
165 155 logger.error(inputDir + " is not a directory!");
... ... @@ -188,15 +178,13 @@ public class Main {
188 178 try {
189 179 File targetDir = createTargetTextDir(inputDir, outputDir, teiDir);
190 180 TEICorpusText teiText = TeiLoader.readTeiText(teiDir);
191   - annotateTeiText(teiText, teiDir, defsWriter);
  181 + annotateTeiText(teiText, teiDir);
192 182 TeiSaver.saveTeiText(teiText, targetDir, GZIP_OUTPUT);
193 183 } catch (IOException e) {
194 184 logger.error("Error processing text in dir:" + teiDir + " Error details: " + e.getLocalizedMessage(), e);
195 185 errors++;
196 186 }
197 187 }
198   -
199   - defsWriter.close();
200 188  
201 189 logger.info(all + " texts processed succesfully.");
202 190 if (errors > 0)
... ... @@ -231,9 +219,9 @@ public class Main {
231 219 * @param thriftText text to annotate with mentions
232 220 * @throws MultiserviceException when an error occures
233 221 */
234   - public static void annotateThriftText(TText thriftText, PrintWriter defsWriter) throws MultiserviceException {
  222 + public static void annotateThriftText(TText thriftText) throws MultiserviceException {
235 223 Text responseText = ThriftLoader.loadTextFromThrift(thriftText);
236   - Detector.findMentionsInText(responseText, headModel, zeroSubjectModel, nominalMentionModel, valence, defsWriter);
  224 + Detector.findMentionsInText(responseText, headModel, zeroSubjectModel, nominalMentionModel, valence);
237 225 ThriftSaver.updateThriftText(responseText, thriftText);
238 226 }
239 227  
... ... @@ -244,9 +232,9 @@ public class Main {
244 232 * @param teiText text to annotate with mentions
245 233 * @throws TEIException when an error occurs
246 234 */
247   - public static void annotateTeiText(TEICorpusText teiText, File textDir, PrintWriter defsWriter) throws TEIException {
  235 + public static void annotateTeiText(TEICorpusText teiText, File textDir) throws TEIException {
248 236 Text responseText = TeiLoader.loadTextFromTei(teiText, textDir);
249   - Detector.findMentionsInText(responseText, headModel, zeroSubjectModel, nominalMentionModel, valence, defsWriter);
  237 + Detector.findMentionsInText(responseText, headModel, zeroSubjectModel, nominalMentionModel, valence);
250 238 TeiSaver.updateTeiText(responseText, teiText);
251 239 }
252 240  
... ...
src/main/java/pl/waw/ipipan/zil/core/md/detection/Detector.java
... ... @@ -29,44 +29,24 @@ public class Detector {
29 29 HeadDetector headModel,
30 30 ZeroSubjectDetector zeroSubjectModel,
31 31 NominalMentionDetector nominalMentionModel,
32   - Map<ValenceDicts,Map<String,ArrayList<String>>> valence,
33   - PrintWriter defsWriter) {
  32 + Map<ValenceDicts,Map<String,ArrayList<String>>> valence) {
34 33 text.clearMentions();
35 34 logger.debug("Detecting mentions in text " + text.getId());
36 35 for (Paragraph p : text)
37 36 for (Sentence s : p)
38   - detectMentionsInSentence(s, headModel, zeroSubjectModel, nominalMentionModel, valence, defsWriter);
  37 + detectMentionsInSentence(s, headModel, zeroSubjectModel, nominalMentionModel, valence);
39 38 }
40 39  
41 40 private static void detectMentionsInSentence(Sentence sentence,
42 41 HeadDetector headModel,
43 42 ZeroSubjectDetector zeroSubjectModel,
44 43 NominalMentionDetector nominalMentionModel,
45   - Map<ValenceDicts,Map<String,ArrayList<String>>> valence,
46   - PrintWriter defsWriter) {
47   - // adding mentions
48   -// addMentionsByTokenCtag(sentence);
49   -// addMentionsBySyntacticWordsCtag(sentence);
50   -// addMentionsByNamedEntities(sentence);
51   -// addMentionsByGroups(sentence, valence);
52   -// //addMentionsByDeppParse(sentence);
53   -// addSpeakerMentionsInSpoken(sentence);
54   -
  44 + Map<ValenceDicts,Map<String,ArrayList<String>>> valence) {
55 45 // zero subject detection
56 46 zeroSubjectModel.addZeroSubjectMentions(sentence);
57 47  
58 48 List<Token> heads = headModel.detectHeads(sentence);
59 49 nominalMentionModel.addNominalMentions(sentence, valence, heads);
60   -
61   - // removing mentions
62   - // removeTo(sentence); to nic nie daje, jeszcze ponizsze spradzic
63   -// Cleaner.cleanWalentyFramedMentions(sentence, valence.get(ValenceDicts.VerbsValence));
64   -// Cleaner.cleanUnnecessarySentenceMentions(sentence);
65   -// Cleaner.cleanFrazeos(sentence);
66   -
67   -
68   - // updating mention heads
69   - // updateMentionHeads(sentence);
70 50 }
71 51  
72 52 /**
... ...
src/main/java/pl/waw/ipipan/zil/core/md/io/tei/TeiLoader.java
... ... @@ -8,18 +8,11 @@ import pl.waw.ipipan.zil.nkjp.teiapi.api.exceptions.TEIException;
8 8 import pl.waw.ipipan.zil.nkjp.teiapi.api.io.TEI_IO;
9 9  
10 10 import java.io.File;
11   -import java.io.IOException;
12   -import java.nio.charset.StandardCharsets;
13   -import java.nio.file.Files;
14   -import java.nio.file.Paths;
15 11 import java.util.ArrayList;
16 12 import java.util.HashMap;
17 13 import java.util.List;
18 14 import java.util.Map;
19 15  
20   -import org.json.JSONArray;
21   -import org.json.JSONObject;
22   -
23 16  
24 17 public class TeiLoader {
25 18  
... ... @@ -36,68 +29,33 @@ public class TeiLoader {
36 29 public static Text loadTextFromTei(TEICorpusText teiText, File textDir) {
37 30 Text text = new Text(teiText.getCorpusHeader().getId());
38 31  
39   - String textId = textDir.getName();
40   -
41   - System.out.println(textId);
42   -
43   - byte[] encoded;
44   - JSONArray jsonParagraphs = null;
45   - try {
46   - //encoded = Files.readAllBytes(Paths.get("/home/bniton/Projekty/koreferencja/COTHEC/clients/python/data/train-JSON-Concraft-old_grammar/"+textId+".json"));
47   - encoded = Files.readAllBytes(Paths.get("/home/bartek/Projekty/COTHEC/clients_data/"+textId+".json"));
48   - String jsonContent = new String(encoded, StandardCharsets.UTF_8);
49   - JSONObject jsonObject = new JSONObject(jsonContent);
50   -
51   - jsonParagraphs = jsonObject.getJSONArray("paragraphs");
52   - } catch (IOException e) {
53   - // TODO Auto-generated catch block
54   - //e.printStackTrace();
55   - logger.debug("No depparse layer.");
56   - }
57   -
58 32 logger.debug("Loading tei text " + text.getId() + "...");
59 33  
60 34 List<TEIParagraph> teiParagraphs = teiText.getParagraphs();
61 35  
62 36 for (int i=0; i < teiParagraphs.size(); i++) {
63 37 TEIParagraph teiP = teiParagraphs.get(i);
64   - JSONObject jsonP = null;
65   - if (jsonParagraphs != null) {
66   - jsonP = new JSONObject(jsonParagraphs.get(i).toString());
67   - }
68   - loadParagraph(text, teiP, jsonP);
  38 + loadParagraph(text, teiP);
69 39 }
70 40 logger.debug("Tei text loaded.");
71 41  
72 42 return text;
73 43 }
74 44  
75   - private static void loadParagraph(Text text, TEIParagraph teiP, JSONObject jsonP) {
  45 + private static void loadParagraph(Text text, TEIParagraph teiP) {
76 46 Paragraph p = new Paragraph();
77 47 text.add(p);
78 48  
79 49 List<TEISentence> teiSentences = teiP.getSentences();
80 50  
81   - JSONArray jsonSentences = null;
82   - if (jsonP != null) {
83   - jsonSentences = jsonP.getJSONArray("sentences");
84   - }
85   -
86 51 for (int i=0; i < teiSentences.size(); i++) {
87 52 TEISentence teiS = teiSentences.get(i);
88 53  
89   - JSONObject jsonS = null;
90   - if (jsonP != null) {
91   - if (i < jsonSentences.length()) {
92   - jsonS = new JSONObject(jsonSentences.get(i).toString());
93   - }
94   - }
95   -
96   - loadSentence(p, teiS, jsonS);
  54 + loadSentence(p, teiS);
97 55 }
98 56 }
99 57  
100   - private static void loadSentence(Paragraph p, TEISentence teiS, JSONObject jsonS) {
  58 + private static void loadSentence(Paragraph p, TEISentence teiS) {
101 59 Sentence s = new Sentence();
102 60 p.add(s);
103 61  
... ... @@ -114,33 +72,6 @@ public class TeiLoader {
114 72 loadSyntacticGroup(s, g, teiMorph2Segment);
115 73 for (TEIMention m : teiS.getAllMentions())
116 74 loadMentions(s, m, teiMorph2Segment);
117   -
118   - if (jsonS != null && s.size() == jsonS.getJSONArray("tokens").length()) {
119   - JSONArray relations = jsonS.getJSONArray("dependencyParse");
120   - for (int i=0; i<relations.length(); i++) {
121   - loadRelation(s, new JSONObject(relations.get(i).toString()));
122   - }
123   - } else {
124   - //System.out.println(s.toStringWithoutMentions());
125   - }
126   - }
127   -
128   - private static void loadRelation(Sentence s, JSONObject jsonRelation) {
129   - String label = jsonRelation.getString("label");
130   - if (!label.equals("root") && !jsonRelation.getString("startTokenId").isEmpty() &&
131   - jsonRelation.get("startTokenId").getClass() == String.class) {
132   - String[] sourceIdParts = jsonRelation.getString("startTokenId").split("\\.");
133   - String[] targetIdParts = jsonRelation.getString("endTokenId").split("\\.");
134   -
135   - int sourceId = Integer.parseInt(sourceIdParts[sourceIdParts.length-1]);
136   - int targetId = Integer.parseInt(targetIdParts[sourceIdParts.length-1]);
137   -
138   - Token source = s.get(sourceId);
139   - Token target = s.get(targetId);
140   -
141   - source.addRelation(new Relation(label, target));
142   - target.setReturnRelation(new Relation(label, source));
143   - }
144 75 }
145 76  
146 77 private static void loadMentions(Sentence s, TEIMention m,
... ...