Removed unused depparse layer.

Bartłomiej Nitoń
1 parent 3d23a642
Showing 3 changed files with 12 additions and 113 deletions
src/main/java/pl/waw/ipipan/zil/core/md/Main.java
src/main/java/pl/waw/ipipan/zil/core/md/detection/Detector.java
src/main/java/pl/waw/ipipan/zil/core/md/io/tei/TeiLoader.java
@@ -21,11 +21,9 @@ import pl.waw.ipipan.zil.nkjp.teiapi.api.io.IOUtils;
 import java.io.BufferedReader;
 import java.io.File;
 import java.io.FileInputStream;
-import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
-import java.io.PrintWriter;
 import java.util.ArrayList;
 import java.util.EnumMap;
 import java.util.HashMap;
@@ -152,14 +150,6 @@ public class Main {
  
         File inputDir = new File(args[0]);
         File outputDir = new File(args[1]);
-        File defsOutputFile = new File(args[1], "definitions.csv");
-        PrintWriter defsWriter = null;
-		try {
-			defsWriter = new PrintWriter(defsOutputFile);
-		} catch (FileNotFoundException e1) {
-			// TODO Auto-generated catch block
-			e1.printStackTrace();
-		}
  
         if (!inputDir.isDirectory()) {
             logger.error(inputDir + " is not a directory!");
@@ -188,15 +178,13 @@ public class Main {
             try {
                 File targetDir = createTargetTextDir(inputDir, outputDir, teiDir);
                 TEICorpusText teiText = TeiLoader.readTeiText(teiDir);
-                annotateTeiText(teiText, teiDir, defsWriter);
+                annotateTeiText(teiText, teiDir);
                 TeiSaver.saveTeiText(teiText, targetDir, GZIP_OUTPUT);
             } catch (IOException e) {
                 logger.error("Error processing text in dir:" + teiDir + " Error details: " + e.getLocalizedMessage(), e);
                 errors++;
             }
         }
-        
-        defsWriter.close();
  
         logger.info(all + " texts processed succesfully.");
         if (errors > 0)
@@ -231,9 +219,9 @@ public class Main {
      * @param thriftText text to annotate with mentions
      * @throws MultiserviceException when an error occures
      */
-    public static void annotateThriftText(TText thriftText, PrintWriter defsWriter) throws MultiserviceException {
+    public static void annotateThriftText(TText thriftText) throws MultiserviceException {
         Text responseText = ThriftLoader.loadTextFromThrift(thriftText);
-        Detector.findMentionsInText(responseText, headModel, zeroSubjectModel, nominalMentionModel, valence, defsWriter);
+        Detector.findMentionsInText(responseText, headModel, zeroSubjectModel, nominalMentionModel, valence);
         ThriftSaver.updateThriftText(responseText, thriftText);
     }
  
@@ -244,9 +232,9 @@ public class Main {
      * @param teiText text to annotate with mentions
      * @throws TEIException when an error occurs
      */
-    public static void annotateTeiText(TEICorpusText teiText, File textDir, PrintWriter defsWriter) throws TEIException {
+    public static void annotateTeiText(TEICorpusText teiText, File textDir) throws TEIException {
         Text responseText = TeiLoader.loadTextFromTei(teiText, textDir);
-        Detector.findMentionsInText(responseText, headModel, zeroSubjectModel, nominalMentionModel, valence, defsWriter);
+        Detector.findMentionsInText(responseText, headModel, zeroSubjectModel, nominalMentionModel, valence);
         TeiSaver.updateTeiText(responseText, teiText);
     }
  
@@ -29,44 +29,24 @@ public class Detector {
     									  HeadDetector headModel,
                                           ZeroSubjectDetector zeroSubjectModel,
                                           NominalMentionDetector nominalMentionModel,
-                                          Map<ValenceDicts,Map<String,ArrayList<String>>> valence,
-                                          PrintWriter defsWriter) {
+                                          Map<ValenceDicts,Map<String,ArrayList<String>>> valence) {
         text.clearMentions();
         logger.debug("Detecting mentions in text " + text.getId());
         for (Paragraph p : text)
             for (Sentence s : p)
-                detectMentionsInSentence(s, headModel, zeroSubjectModel, nominalMentionModel, valence, defsWriter);
+                detectMentionsInSentence(s, headModel, zeroSubjectModel, nominalMentionModel, valence);
     }
  
     private static void detectMentionsInSentence(Sentence sentence,
     											 HeadDetector headModel,
                                                  ZeroSubjectDetector zeroSubjectModel,
                                                  NominalMentionDetector nominalMentionModel,
-                                                 Map<ValenceDicts,Map<String,ArrayList<String>>> valence,
-                                                 PrintWriter defsWriter) {
-        // adding mentions
-//        addMentionsByTokenCtag(sentence);
-//        addMentionsBySyntacticWordsCtag(sentence);
-//        addMentionsByNamedEntities(sentence);
-//        addMentionsByGroups(sentence, valence);
-//        //addMentionsByDeppParse(sentence);
-//        addSpeakerMentionsInSpoken(sentence);
-
+                                                 Map<ValenceDicts,Map<String,ArrayList<String>>> valence) {
         // zero subject detection
         zeroSubjectModel.addZeroSubjectMentions(sentence);
  
         List<Token> heads = headModel.detectHeads(sentence);
         nominalMentionModel.addNominalMentions(sentence, valence, heads);
-
-        // removing mentions
-        // removeTo(sentence); to nic nie daje, jeszcze ponizsze spradzic
-//        Cleaner.cleanWalentyFramedMentions(sentence, valence.get(ValenceDicts.VerbsValence));
-//        Cleaner.cleanUnnecessarySentenceMentions(sentence);
-//        Cleaner.cleanFrazeos(sentence);
-        
-
-        // updating mention heads
-   //     updateMentionHeads(sentence);
     }
  
     /**
@@ -8,18 +8,11 @@ import pl.waw.ipipan.zil.nkjp.teiapi.api.exceptions.TEIException;
 import pl.waw.ipipan.zil.nkjp.teiapi.api.io.TEI_IO;
  
 import java.io.File;
-import java.io.IOException;
-import java.nio.charset.StandardCharsets;
-import java.nio.file.Files;
-import java.nio.file.Paths;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
  
-import org.json.JSONArray;
-import org.json.JSONObject;
-
  
 public class TeiLoader {
  
@@ -36,68 +29,33 @@ public class TeiLoader {
     public static Text loadTextFromTei(TEICorpusText teiText, File textDir) {
         Text text = new Text(teiText.getCorpusHeader().getId());
  
-        String textId = textDir.getName();
-        
-        System.out.println(textId);
-        
-        byte[] encoded;
-        JSONArray jsonParagraphs = null;
-		try {
-			//encoded = Files.readAllBytes(Paths.get("/home/bniton/Projekty/koreferencja/COTHEC/clients/python/data/train-JSON-Concraft-old_grammar/"+textId+".json"));
-			encoded = Files.readAllBytes(Paths.get("/home/bartek/Projekty/COTHEC/clients_data/"+textId+".json"));
-			String jsonContent = new String(encoded, StandardCharsets.UTF_8);
-			JSONObject jsonObject = new JSONObject(jsonContent);
-	        
-	        jsonParagraphs = jsonObject.getJSONArray("paragraphs");
-		} catch (IOException e) {
-			// TODO Auto-generated catch block
-			//e.printStackTrace();
-			logger.debug("No depparse layer.");
-		}
-
         logger.debug("Loading tei text " + text.getId() + "...");
  
         List<TEIParagraph> teiParagraphs = teiText.getParagraphs();
  
         for (int i=0; i < teiParagraphs.size(); i++) {
         	TEIParagraph teiP = teiParagraphs.get(i);
-        	JSONObject jsonP = null;
-        	if (jsonParagraphs != null) {
-        		jsonP = new JSONObject(jsonParagraphs.get(i).toString());
-        	}
-        	loadParagraph(text, teiP, jsonP);
+        	loadParagraph(text, teiP);
         }
         logger.debug("Tei text loaded.");
  
         return text;
     }
  
-    private static void loadParagraph(Text text, TEIParagraph teiP, JSONObject jsonP) {
+    private static void loadParagraph(Text text, TEIParagraph teiP) {
         Paragraph p = new Paragraph();
         text.add(p);
  
         List<TEISentence> teiSentences = teiP.getSentences();
  
-        JSONArray jsonSentences = null;
-        if (jsonP != null) {
-        	jsonSentences = jsonP.getJSONArray("sentences");
-        }
-        
         for (int i=0; i < teiSentences.size(); i++) {
         	TEISentence teiS = teiSentences.get(i);
  
-        	JSONObject jsonS = null;
-        	if (jsonP != null) {
-            	if (i < jsonSentences.length()) {
-            		jsonS = new JSONObject(jsonSentences.get(i).toString());
-            	}
-        	}
-        	
-        	loadSentence(p, teiS, jsonS);
+        	loadSentence(p, teiS);
         }
     }
  
-    private static void loadSentence(Paragraph p, TEISentence teiS, JSONObject jsonS) {
+    private static void loadSentence(Paragraph p, TEISentence teiS) {
         Sentence s = new Sentence();
         p.add(s);
  
@@ -114,33 +72,6 @@ public class TeiLoader {
             loadSyntacticGroup(s, g, teiMorph2Segment);
         for (TEIMention m : teiS.getAllMentions())
             loadMentions(s, m, teiMorph2Segment);
-        
-        if (jsonS != null && s.size() == jsonS.getJSONArray("tokens").length()) {
-        	JSONArray relations = jsonS.getJSONArray("dependencyParse");
-            for (int i=0; i<relations.length(); i++) {
-            	loadRelation(s, new JSONObject(relations.get(i).toString()));
-            }
-        } else {
-        	//System.out.println(s.toStringWithoutMentions());
-        }
-    }
-    
-    private static void loadRelation(Sentence s, JSONObject jsonRelation) {
-    	String label = jsonRelation.getString("label");
-    	if (!label.equals("root") && !jsonRelation.getString("startTokenId").isEmpty() &&
-    			jsonRelation.get("startTokenId").getClass() == String.class) {
-    		String[] sourceIdParts = jsonRelation.getString("startTokenId").split("\\.");
-    		String[] targetIdParts = jsonRelation.getString("endTokenId").split("\\.");
-    		
-        	int sourceId = Integer.parseInt(sourceIdParts[sourceIdParts.length-1]);
-        	int targetId = Integer.parseInt(targetIdParts[sourceIdParts.length-1]);
-        	
-        	Token source = s.get(sourceId);
-        	Token target = s.get(targetId);
-        	
-        	source.addRelation(new Relation(label, target));
-        	target.setReturnRelation(new Relation(label, source));
-    	}
     }
  
     private static void loadMentions(Sentence s, TEIMention m,