From 78e20b4b74f1f5a06ee32a42436f1fcff34afc05 Mon Sep 17 00:00:00 2001
From: Wojciech Jaworski <wjaworski@mimuw.edu.pl>
Date: Sat, 22 Oct 2016 12:32:42 +0200
Subject: [PATCH] generowanie wejścia dla Swigry i POLFIE

---
 corpora/CONLL.ml        | 22 +++++++++++-----------
 parser/exec.ml          | 56 +++++++++++++++++++++++++++++++++++---------------------
 parser/execTypes.ml     | 12 ++++++------
 parser/pipe.ml          |  9 +++++----
 parser/visualization.ml | 70 ++++++++++++++++++++++++++++++++++++----------------------------------
 pre/preProcessing.ml    |  5 +++--
 pre/preSentences.ml     | 23 ++++++++++++-----------
 pre/preTypes.ml         |  8 ++++----
 8 files changed, 112 insertions(+), 93 deletions(-)

diff --git a/corpora/CONLL.ml b/corpora/CONLL.ml
index c4661f0..953ec6e 100644
--- a/corpora/CONLL.ml
+++ b/corpora/CONLL.ml
@@ -33,8 +33,8 @@ let string_of_paths mode tokens paths =
 
 let rec string_of_sentence mode tokens = function
       RawSentence s -> if mode = Raw then s else ""
-    | StructSentence (_,tokens, _) -> failwith ("string_of_sentence: StructSentence") (*String.concat "\n" @@ Xlist.map tokens (fun x -> string_of_token mode x)*)
-    | DepSentence (_, paths) -> string_of_paths mode tokens paths
+    | StructSentence (tokens, _) -> failwith ("string_of_sentence: StructSentence") (*String.concat "\n" @@ Xlist.map tokens (fun x -> string_of_token mode x)*)
+    | DepSentence (paths) -> string_of_paths mode tokens paths
     | QuotedSentences _ -> failwith ("string_of_sentence: QuotedSentences")
     | AltSentence alts -> alternative_string (string_of_sentence mode tokens) mode alts
 
@@ -111,8 +111,8 @@ let info_map =
 let match_sentence (p_record,tokens) =
   let rec info_token s = match s with
       RawSentence text -> failwith ("match_sentence: " ^ text)
-    | StructSentence (_, tokens, n) -> failwith ("match_sentence: StructSentence") (*String.concat " " @@ List.map (fun x -> x.orth) tokens*)
-    | DepSentence (_, paths) -> String.concat " " @@ List.map (fun (id,_,_) -> (ExtArray.get tokens id).orth) (List.tl (Array.to_list paths)), paths
+    | StructSentence (tokens, n) -> failwith ("match_sentence: StructSentence") (*String.concat " " @@ List.map (fun x -> x.orth) tokens*)
+    | DepSentence (paths) -> String.concat " " @@ List.map (fun (id,_,_) -> (ExtArray.get tokens id).orth) (List.tl (Array.to_list paths)), paths
     | QuotedSentences _ -> failwith ("match_sentence: QuotedSentences")
     | AltSentence alts -> failwith ("match_sentence: AltSentence")
         (*if List.exists (fun (mode, s) -> mode = CONLL) alts
@@ -122,8 +122,8 @@ let match_sentence (p_record,tokens) =
   try
     let id, text = StringMap.find info_map info_token in
     let beg, len = establish_lengths text paths tokens (* -1, -1, p_record.psentence *) in
-    AltText[Raw,RawText text;CONLL,StructText([StructParagraph[{pid = id; pbeg = beg; plen = len; pnext = beg+len;
-     psentence = AltSentence[Raw, RawSentence text; CONLL, DepSentence("", paths)]}]],tokens)]
+    AltText[Raw,RawText text;CONLL,StructText([StructParagraph[{pid = id; pbeg = beg; plen = len; pnext = beg+len; pfile_prefix="";
+     psentence = AltSentence[Raw, RawSentence text; CONLL, DepSentence paths]}]],tokens)]
 (*  {s_id = id; s_text = text; s_tokens = sentence.s_tokens} *)
   with _ -> AltText[CONLL,StructText([StructParagraph[p_record]],tokens)]
 
@@ -188,8 +188,8 @@ let info_map =
 let match_sentence (p_record,tokens) =
   let rec info_token s = match s with
       RawSentence text -> failwith ("match_sentence: " ^ text)
-    | StructSentence (_, tokens, n) -> failwith ("match_sentence: StructSentence") (*String.concat " " @@ List.map (fun x -> x.orth) tokens*)
-    | DepSentence (_, paths) -> String.concat " " @@ List.map (fun (id,_,_) -> (ExtArray.get tokens id).orth) (List.tl (Array.to_list paths)), paths
+    | StructSentence (tokens, n) -> failwith ("match_sentence: StructSentence") (*String.concat " " @@ List.map (fun x -> x.orth) tokens*)
+    | DepSentence (paths) -> String.concat " " @@ List.map (fun (id,_,_) -> (ExtArray.get tokens id).orth) (List.tl (Array.to_list paths)), paths
     | QuotedSentences _ -> failwith ("match_sentence: QuotedSentences")
     | AltSentence alts -> failwith ("match_sentence: AltSentence")
         (*if List.exists (fun (mode, s) -> mode = CONLL) alts
@@ -199,8 +199,8 @@ let match_sentence (p_record,tokens) =
   try
     let id, text = StringMap.find info_map info_token in
     let beg, len = establish_lengths text paths tokens (* -1, -1, p_record.psentence *) in
-    AltText[Raw,RawText text;CONLL,StructText([StructParagraph[{pid = id; pbeg = beg; plen = len; pnext = beg+len;
-     psentence = AltSentence[Raw, RawSentence text; CONLL, DepSentence("", paths)]}]],tokens)]
+    AltText[Raw,RawText text;CONLL,StructText([StructParagraph[{pid = id; pbeg = beg; plen = len; pnext = beg+len; pfile_prefix="";
+     psentence = AltSentence[Raw, RawSentence text; CONLL, DepSentence paths]}]],tokens)]
 (*  {s_id = id; s_text = text; s_tokens = sentence.s_tokens} *)
   with _ -> AltText[CONLL,StructText([StructParagraph[p_record]],tokens)]
 
@@ -274,7 +274,7 @@ let load_sentence in_channel =
           then raise End_of_file
           else rev_paths, id in
   let rev_paths, id = pom [] "" in
-  {pid = id; pbeg = -1; plen = -1; pnext = -1; psentence = DepSentence("",Array.of_list ((0,-1,"") :: List.rev rev_paths))}, tokens
+  {pid = id; pbeg = -1; plen = -1; pnext = -1; pfile_prefix = ""; psentence = DepSentence(Array.of_list ((0,-1,"") :: List.rev rev_paths))}, tokens
 (*  {s_id = id; s_text = ""; s_paths = (List.rev rev_paths)} *)
 
 let load_corpus in_channel =
diff --git a/parser/exec.ml b/parser/exec.ml
index 0370366..1645c49 100644
--- a/parser/exec.ml
+++ b/parser/exec.ml
@@ -43,7 +43,7 @@ let empty_result = {
   (*structs=SemTypes.Atom "",SemTypes.Label "",SemTypes.Label "",[],""*)}
 
 let empty_eniam_parse_result = {
-  id="";
+  file_prefix="";
   status=Idle;
   msg="";
   lex_time=0.;
@@ -58,7 +58,7 @@ let empty_eniam_parse_result = {
   }
 
 let empty_conll_parse_result = {
-  id="";
+  file_prefix="";
   status=Idle;
   msg="";
   lex_time=0.;
@@ -102,14 +102,16 @@ let translate_mode = function
   | PreTypes.CONLL -> CONLL
   | PreTypes.ENIAM -> ENIAM
   | PreTypes.Mate -> Mate
+  | PreTypes.Swigra -> Swigra
+  | PreTypes.POLFIE -> POLFIE
 
 let rec translate_sentence = function
     PreTypes.RawSentence s -> RawSentence s
-  | PreTypes.StructSentence(id,paths,last) -> StructSentence(id,paths,last)
-  | PreTypes.DepSentence(id,paths) -> DepSentence(id,paths)
+  | PreTypes.StructSentence(paths,last) -> StructSentence(paths,last)
+  | PreTypes.DepSentence(paths) -> DepSentence(paths)
   | PreTypes.QuotedSentences sentences ->
       QuotedSentences(Xlist.map sentences (fun p ->
-        {pid=p.PreTypes.pid; pbeg=p.PreTypes.pbeg; plen=p.PreTypes.plen; pnext=p.PreTypes.pnext;
+        {pid=p.PreTypes.pid; pbeg=p.PreTypes.pbeg; plen=p.PreTypes.plen; pnext=p.PreTypes.pnext; pfile_prefix=p.PreTypes.pfile_prefix;
          psentence=translate_sentence p.PreTypes.psentence}))
   | PreTypes.AltSentence l -> AltSentence(Xlist.map l (fun (mode,sentence) ->
       translate_mode mode, translate_sentence sentence))
@@ -118,7 +120,7 @@ let rec translate_paragraph = function
     PreTypes.RawParagraph s -> RawParagraph s
   | PreTypes.StructParagraph sentences ->
       StructParagraph(Xlist.map sentences (fun p ->
-        {pid=p.PreTypes.pid; pbeg=p.PreTypes.pbeg; plen=p.PreTypes.plen; pnext=p.PreTypes.pnext;
+        {pid=p.PreTypes.pid; pbeg=p.PreTypes.pbeg; plen=p.PreTypes.plen; pnext=p.PreTypes.pnext; pfile_prefix=p.PreTypes.pfile_prefix;
          psentence=translate_sentence p.PreTypes.psentence}))
   | PreTypes.AltParagraph l -> AltParagraph(Xlist.map l (fun (mode,paragraph) ->
       translate_mode mode, translate_paragraph paragraph))
@@ -130,8 +132,8 @@ let rec translate_text = function
   | PreTypes.AltText l -> AltText(Xlist.map l (fun (mode,text) ->
       translate_mode mode, translate_text text))
 
-let eniam_parse_sentence timeout test_only_flag id paths last tokens =
-  let result = {empty_eniam_parse_result with id=id} in
+let eniam_parse_sentence timeout test_only_flag paths last tokens =
+  let result = empty_eniam_parse_result in
   let time2 = time_fun () in
   try
     let chart = LCGlexicon.create (paths,last) tokens in
@@ -187,8 +189,8 @@ let eniam_parse_sentence timeout test_only_flag id paths last tokens =
     let time3 = time_fun () in
     {result with status=LexiconError; msg=Printexc.to_string e; lex_time=time3 -. time2}
 
-let conll_parse_sentence timeout test_only_flag id paths tokens =
-  let result = {empty_conll_parse_result with id=id} in
+let conll_parse_sentence timeout test_only_flag paths tokens =
+  let result = empty_conll_parse_result in
   let time2 = time_fun () in
   try
     let dep_chart = LCGlexicon.dep_create paths tokens in
@@ -253,22 +255,33 @@ let conll_parse_sentence timeout test_only_flag id paths tokens =
 
 let mate_in, mate_out = Unix.open_process "java -jar ../dependencyParser/basic/mate-tools/dist/anna-3.5.jar -model ../dependencyParser/basic/mate-tools/examples/160622_Polish_MateParser.mdl -test"
 
+let file_prefix_of_mode = function
+    Raw -> "R"
+  | Struct -> "St"
+  | CONLL -> "C"
+  | ENIAM -> "E"
+  | Mate -> "M"
+  | Swigra -> "S"
+  | POLFIE -> "P"
+
 let get_paths = function
-    {PreTypes.psentence=PreTypes.DepSentence(_,paths)},_ -> paths
+    {PreTypes.psentence=PreTypes.DepSentence(paths)},_ -> paths
   | _ -> failwith "get_paths"
 
-let rec parse_sentence timeout test_only_flag mode tokens = function
+let rec parse_sentence timeout test_only_flag mode file_prefix tokens = function
     RawSentence s -> RawSentence s
-  | StructSentence(id,paths,last) ->
+  | StructSentence(paths,last) ->
       (match mode with
         ENIAM ->
-          let result = eniam_parse_sentence timeout test_only_flag id paths last tokens in
+          let result = eniam_parse_sentence timeout test_only_flag paths last tokens in
+          let result = {result with file_prefix = file_prefix_of_mode mode ^ file_prefix} in
           ENIAMSentence result
       | _ -> failwith "parse_sentence")
-  | DepSentence(id,paths) ->
+  | DepSentence(paths) ->
       (match mode with
         CONLL ->
-          let result = conll_parse_sentence timeout test_only_flag id paths tokens in
+          let result = conll_parse_sentence timeout test_only_flag paths tokens in
+          let result = {result with file_prefix = file_prefix_of_mode mode ^ file_prefix} in
           CONLLSentence result
           (* let xml = DepTree.conll_to_xml paths in
           let graph = XmlPrinter.graph_of_xml xml in (* FIXME: do poprawy *)
@@ -279,22 +292,23 @@ let rec parse_sentence timeout test_only_flag mode tokens = function
           print_endline "parse_sentence 1";
           let conll = CONLL.string_of_paths PreTypes.Mate tokens paths in
           print_endline "parse_sentence 2";
-          printf "|%s|\n" conll;
+          (* printf "|%s|\n" conll; *)
           Printf.fprintf mate_out "%s\n\n%!" conll;
           print_endline "parse_sentence 3";
           let new_paths = get_paths (CONLL.load_sentence mate_in) in
           print_endline "parse_sentence 4";
-          let result = conll_parse_sentence timeout test_only_flag id new_paths tokens in
+          let result = conll_parse_sentence timeout test_only_flag new_paths tokens in
+          let result = {result with file_prefix = file_prefix_of_mode mode ^ file_prefix} in
           CONLLSentence result
       | _ -> failwith "parse_sentence")
   | QuotedSentences sentences ->
       let sentences = Xlist.rev_map sentences (fun p ->
-        let sentence = parse_sentence timeout test_only_flag mode tokens p.psentence in
+        let sentence = parse_sentence timeout test_only_flag mode p.pfile_prefix tokens p.psentence in
         {p with psentence=sentence}) in
       QuotedSentences(List.rev sentences)
   | AltSentence l ->
       let l = Xlist.rev_map l (fun (mode,sentence) ->
-        mode, parse_sentence timeout test_only_flag mode tokens sentence) in
+        mode, parse_sentence timeout test_only_flag mode file_prefix tokens sentence) in
       AltSentence(List.rev l)
  | _ -> failwith "parse_sentence"
 
@@ -302,7 +316,7 @@ let rec parse_paragraph timeout test_only_flag mode tokens = function
     RawParagraph s -> RawParagraph s
   | StructParagraph sentences ->
       let sentences = Xlist.rev_map sentences (fun p ->
-        let sentence = parse_sentence timeout test_only_flag mode tokens p.psentence in
+        let sentence = parse_sentence timeout test_only_flag mode p.pfile_prefix tokens p.psentence in
         {p with psentence=sentence}) in
       StructParagraph(List.rev sentences)
   | AltParagraph l ->
diff --git a/parser/execTypes.ml b/parser/execTypes.ml
index f6841fc..ab8c868 100644
--- a/parser/execTypes.ml
+++ b/parser/execTypes.ml
@@ -20,7 +20,7 @@
 type status = Idle | PreprocessingError | LexiconError | ParseError | ParseTimeout | Parsed | TooManyNodes | NotParsed | NotReduced | ReductionError | SemError | NotTranslated
 
 type eniam_parse_result = {
-  id: string;
+  file_prefix: string;
   status: status;
   msg: string;
   lex_time: float;
@@ -35,7 +35,7 @@ type eniam_parse_result = {
   }
 
 type conll_parse_result = {
-  id: string;
+  file_prefix: string;
   status: status;
   msg: string;
   lex_time: float;
@@ -54,13 +54,13 @@ type conll_parse_result = {
   }
 
 type mode =
-    Raw | Struct | CONLL | ENIAM | Mate
+    Raw | Struct | CONLL | ENIAM | Mate | Swigra | POLFIE
 
 type sentence =
     RawSentence of string
   (* | CONLL of conll list *)
-  | StructSentence of string * (int * int * int) list * int (* file_prefix * (id * lnode * rnode) list * last *)
-  | DepSentence of string * (int * int * string) array (* file_prefix * (id * super * label) conll_id *)
+  | StructSentence of (int * int * int) list * int (* (id * lnode * rnode) list * last *)
+  | DepSentence of (int * int * string) array (* (id * super * label) conll_id *)
   | QuotedSentences of paragraph_record list
   (* | NKJP1M of nkjp1m list *)
   (* | Skladnica of skladnica_tree *)
@@ -68,7 +68,7 @@ type sentence =
   | ENIAMSentence of eniam_parse_result
   | CONLLSentence of conll_parse_result
 
-and paragraph_record = {pid: string; pbeg: int; plen: int; pnext: int; psentence: sentence} (* beg i len liczone po znakach unicode ( * 100 ???) *)
+and paragraph_record = {pid: string; pbeg: int; plen: int; pnext: int; psentence: sentence; pfile_prefix: string} (* beg i len liczone po znakach unicode ( * 100 ???) *)
 
 and paragraph =
     RawParagraph of string
diff --git a/parser/pipe.ml b/parser/pipe.ml
index 3df2897..8cb246c 100644
--- a/parser/pipe.ml
+++ b/parser/pipe.ml
@@ -118,9 +118,9 @@ let lcg_process query =
   let _ = Unix.shutdown_connection ic in
   ()
 
-(* let _ =
+let _ =
   if Array.length Sys.argv < 2 then print_endline "missing argument" else
-  lcg_process Sys.argv.(1) *)
+  lcg_process Sys.argv.(1)
 
 
 (* FIXME: parser dziwnie się zachowuje dla 'ścieżki anomalia.' 'ścieżki anomalia. GG' itp. - nie parsuje '.' a jak sparsuje to nie chce redukować *)
@@ -210,7 +210,7 @@ let process_conll_corpus filename =
 let _ =
   (* process_conll_corpus "../../NLP resources/Skladnica-zaleznosciowa-mod_130121.conll"; *)
   (* process_conll_corpus "../../NLP resources/skladnica_zaleznosciowa.conll"; *)
-  process_conll_corpus "../testy/skladnica-test1.conll";
+  (* process_conll_corpus "../testy/skladnica-test1.conll"; *)
   ()
 
   (* TO DO:
@@ -227,7 +227,8 @@ let _ =
   - assign_not_parsed
   - sprawdzenie zerowania globalnych referencji przy parsowaniu korpusu
   - mateParser
-  2016.10.19
+  2016.10.22
+  - przerobić AltSentence tak by prefix nazw plików był jego elementem, albo wstawić liczbę z prefiksu do paragraph_record
   *)
 
 
diff --git a/parser/visualization.ml b/parser/visualization.ml
index 6d8c1be..9a21b45 100644
--- a/parser/visualization.ml
+++ b/parser/visualization.ml
@@ -640,6 +640,8 @@ let string_of_mode = function
   | CONLL -> "CONLL"
   | ENIAM -> "ENIAM"
   | Mate -> "Mate"
+  | Swigra -> "Swigra"
+  | POLFIE -> "POLFIE"
 
 (*let rec string_of_sentence = function
     RawSentence s -> sprintf "RawSentence(%s)" s
@@ -742,30 +744,30 @@ let html_of_eniam_sentence path tokens (result : eniam_parse_result) =
   (* | PreprocessingError -> "error_pre: %s\n" result.msg *)
   | LexiconError -> sprintf "error_lex: %s\n" result.msg
   | ParseError ->
-      create_latex_chart path (result.id ^ "_chart") result.chart;
+      create_latex_chart path (result.file_prefix ^ "_chart") result.chart;
       sprintf "error_parse: %s\n" result.msg ^
-      sprintf "<BR><A HREF=\"%s_chart.pdf\">Chart</A>\n" result.id
+      sprintf "<BR><A HREF=\"%s_chart.pdf\">Chart</A>\n" result.file_prefix
   | ParseTimeout ->
-      create_latex_chart path (result.id ^ "_chart") result.chart;
+      create_latex_chart path (result.file_prefix ^ "_chart") result.chart;
       sprintf "timeout: %s\n" result.msg ^
-      sprintf "<BR><A HREF=\"%s_chart.pdf\">Chart</A>\n" result.id
+      sprintf "<BR><A HREF=\"%s_chart.pdf\">Chart</A>\n" result.file_prefix
   | NotParsed ->
-      create_latex_chart path (result.id ^ "_chart") result.chart;
+      create_latex_chart path (result.file_prefix ^ "_chart") result.chart;
       sprintf "not_parsed: paths_size=%d chart_size=%d\n" result.paths_size result.chart_size ^
-      sprintf "<BR><A HREF=\"%s_chart.pdf\">Chart</A>\n" result.id
+      sprintf "<BR><A HREF=\"%s_chart.pdf\">Chart</A>\n" result.file_prefix
   | ReductionError -> sprintf "error_reduction: %s\n" result.msg
   | TooManyNodes -> sprintf "to_many_nodes: paths_size=%d chart_size=%d\n" result.paths_size result.chart_size
   | NotReduced -> sprintf "not_reduced: paths_size=%d chart_size=%d\n" result.paths_size result.chart_size
   | SemError -> sprintf "error_sem: %s dependency_tree_size=%d\n" result.msg result.dependency_tree_size
   (* | NotTranslated -> "not_translated: \n"  *)
   | Parsed ->
-      print_simplified_dependency_tree path (result.id ^ "_simplified_dependency_tree") tokens result.dependency_tree;
-      print_dependency_tree path (result.id ^ "_dependency_tree") result.dependency_tree;
-      LCGlatexOf.print_dependency_tree path (result.id ^ "_dependency_tree_references") result.dependency_tree;
+      print_simplified_dependency_tree path (result.file_prefix ^ "_simplified_dependency_tree") tokens result.dependency_tree;
+      print_dependency_tree path (result.file_prefix ^ "_dependency_tree") result.dependency_tree;
+      LCGlatexOf.print_dependency_tree path (result.file_prefix ^ "_dependency_tree_references") result.dependency_tree;
       sprintf "parsed: paths_size=%d chart_size=%d dependency_tree_size=%d\n" result.paths_size result.chart_size result.dependency_tree_size ^
-      sprintf "<BR><A HREF=\"%s_simplified_dependency_tree.png\">Simplified Dependency Tree</A>\n" result.id ^
-      sprintf "<BR><A HREF=\"%s_dependency_tree.png\">Dependency Tree</A>\n" result.id ^
-      sprintf "<BR><A HREF=\"%s_dependency_tree_references.pdf\">Dependency Tree References</A>\n" result.id
+      sprintf "<BR><A HREF=\"%s_simplified_dependency_tree.png\">Simplified Dependency Tree</A>\n" result.file_prefix ^
+      sprintf "<BR><A HREF=\"%s_dependency_tree.png\">Dependency Tree</A>\n" result.file_prefix ^
+      sprintf "<BR><A HREF=\"%s_dependency_tree_references.pdf\">Dependency Tree References</A>\n" result.file_prefix
   | _ -> failwith "html_of_eniam_sentence"
 
 let html_of_conll_sentence path tokens (result : conll_parse_result) =
@@ -774,46 +776,46 @@ let html_of_conll_sentence path tokens (result : conll_parse_result) =
   (* | PreprocessingError -> "error_pre: %s\n" result.msg *)
   | LexiconError -> sprintf "error_lex: %s\n" result.msg
   | ParseError ->
-      create_latex_dep_chart path (result.id ^ "_dep_chart") result.dep_chart;
-      create_latex_parsed_dep_chart path (result.id ^ "_parsed_dep_chart") result.parsed_dep_chart;
+      create_latex_dep_chart path (result.file_prefix ^ "_dep_chart") result.dep_chart;
+      create_latex_parsed_dep_chart path (result.file_prefix ^ "_parsed_dep_chart") result.parsed_dep_chart;
       sprintf "error_parse: %s\n" result.msg ^
-      sprintf "<BR><A HREF=\"%s_dep_chart.pdf\">Chart</A>\n" result.id ^
-      sprintf "<BR><A HREF=\"%s_parsed_dep_chart.pdf\">Parsed Chart</A>\n" result.id
+      sprintf "<BR><A HREF=\"%s_dep_chart.pdf\">Chart</A>\n" result.file_prefix ^
+      sprintf "<BR><A HREF=\"%s_parsed_dep_chart.pdf\">Parsed Chart</A>\n" result.file_prefix
   | ParseTimeout ->
-      create_latex_dep_chart path (result.id ^ "_dep_chart") result.dep_chart;
-      create_latex_parsed_dep_chart path (result.id ^ "_parsed_dep_chart") result.parsed_dep_chart;
+      create_latex_dep_chart path (result.file_prefix ^ "_dep_chart") result.dep_chart;
+      create_latex_parsed_dep_chart path (result.file_prefix ^ "_parsed_dep_chart") result.parsed_dep_chart;
       sprintf "timeout: %s\n" result.msg ^
-      sprintf "<BR><A HREF=\"%s_dep_chart.pdf\">Chart</A>\n" result.id ^
-      sprintf "<BR><A HREF=\"%s_parsed_dep_chart.pdf\">Parsed Chart</A>\n" result.id
+      sprintf "<BR><A HREF=\"%s_dep_chart.pdf\">Chart</A>\n" result.file_prefix ^
+      sprintf "<BR><A HREF=\"%s_parsed_dep_chart.pdf\">Parsed Chart</A>\n" result.file_prefix
   | NotParsed ->
-      create_latex_dep_chart path (result.id ^ "_dep_chart") result.dep_chart;
-      create_latex_not_parsed_dep_chart path (result.id ^ "_not_parsed_dep_chart") result.not_parsed_dep_chart;
+      create_latex_dep_chart path (result.file_prefix ^ "_dep_chart") result.dep_chart;
+      create_latex_not_parsed_dep_chart path (result.file_prefix ^ "_not_parsed_dep_chart") result.not_parsed_dep_chart;
       sprintf "not_parsed\n" ^
-      sprintf "<BR><A HREF=\"%s_dep_chart.pdf\">Chart</A>\n" result.id ^
-      sprintf "<BR><A HREF=\"%s_not_parsed_dep_chart.pdf\">Not Parsed Chart</A>\n" result.id
+      sprintf "<BR><A HREF=\"%s_dep_chart.pdf\">Chart</A>\n" result.file_prefix ^
+      sprintf "<BR><A HREF=\"%s_not_parsed_dep_chart.pdf\">Not Parsed Chart</A>\n" result.file_prefix
   | ReductionError -> sprintf "error_reduction: %s\n" result.msg
   | TooManyNodes -> sprintf "to_many_nodes: paths_size=%d\n" result.paths_size
   | NotReduced ->
-      LCGlatexOf.print_dependency_tree path (result.id ^ "_dependency_tree_references") result.dependency_tree;
+      LCGlatexOf.print_dependency_tree path (result.file_prefix ^ "_dependency_tree_references") result.dependency_tree;
       sprintf "not_reduced: paths_size=%d\n" result.paths_size ^
-      sprintf "<BR><A HREF=\"%s_dependency_tree_references.pdf\">Dependency Tree References</A>\n" result.id
+      sprintf "<BR><A HREF=\"%s_dependency_tree_references.pdf\">Dependency Tree References</A>\n" result.file_prefix
   | SemError -> sprintf "error_sem: %s dependency_tree_size=%d\n" result.msg result.dependency_tree_size
   (* | NotTranslated -> "not_translated: \n"  *)
   | Parsed ->
-      print_simplified_dependency_tree path (result.id ^ "_simplified_dependency_tree") tokens result.dependency_tree;
-      print_dependency_tree path (result.id ^ "_dependency_tree") result.dependency_tree;
-      LCGlatexOf.print_dependency_tree path (result.id ^ "_dependency_tree_references") result.dependency_tree;
+      print_simplified_dependency_tree path (result.file_prefix ^ "_simplified_dependency_tree") tokens result.dependency_tree;
+      print_dependency_tree path (result.file_prefix ^ "_dependency_tree") result.dependency_tree;
+      LCGlatexOf.print_dependency_tree path (result.file_prefix ^ "_dependency_tree_references") result.dependency_tree;
       sprintf "parsed: paths_size=%d dependency_tree_size=%d\n" result.paths_size result.dependency_tree_size ^
-      sprintf "<BR><A HREF=\"%s_simplified_dependency_tree.png\">Simplified Dependency Tree</A>\n" result.id ^
-      sprintf "<BR><A HREF=\"%s_dependency_tree.png\">Dependency Tree</A>\n" result.id ^
-      sprintf "<BR><A HREF=\"%s_dependency_tree_references.pdf\">Dependency Tree References</A>\n" result.id
+      sprintf "<BR><A HREF=\"%s_simplified_dependency_tree.png\">Simplified Dependency Tree</A>\n" result.file_prefix ^
+      sprintf "<BR><A HREF=\"%s_dependency_tree.png\">Dependency Tree</A>\n" result.file_prefix ^
+      sprintf "<BR><A HREF=\"%s_dependency_tree_references.pdf\">Dependency Tree References</A>\n" result.file_prefix
   | _ -> failwith "html_of_conll_sentence"
 
 
 let rec html_of_sentence path tokens = function
     RawSentence s -> s
-  | StructSentence(_,paths,last) -> html_of_struct_sentence tokens paths last
-  | DepSentence(_,paths) -> html_of_dep_sentence tokens paths
+  | StructSentence(paths,last) -> html_of_struct_sentence tokens paths last
+  | DepSentence(paths) -> html_of_dep_sentence tokens paths
   | ENIAMSentence result -> html_of_eniam_sentence path tokens result
   | CONLLSentence result -> html_of_conll_sentence path tokens result
   | QuotedSentences sentences ->
diff --git a/pre/preProcessing.ml b/pre/preProcessing.ml
index 3e97784..db2f025 100644
--- a/pre/preProcessing.ml
+++ b/pre/preProcessing.ml
@@ -614,12 +614,13 @@ let parse_text = function
         AltParagraph[Raw,RawParagraph paragraph; Struct,StructParagraph sentences]) in
       AltText[Raw,RawText query; Struct,StructText(List.rev paragraphs, tokens)]
   | AltText[Raw,RawText query;CONLL,StructText([
-            StructParagraph[{psentence = AltSentence[Raw, RawSentence text; CONLL, DepSentence(_,dep_paths)]} as p]],tokens)] ->
+            StructParagraph[{psentence = AltSentence[Raw, RawSentence text; CONLL, DepSentence dep_paths]} as p]],tokens)] ->
         parse_conll tokens dep_paths;
         let paths = parse query in
         let sentences = PreSentences.split_into_sentences query tokens paths in
+        let m_dep_paths = Array.map (fun (id,_,_) -> id,-1,"") dep_paths in
         let conll = StructParagraph[{p with psentence = AltSentence[Raw, RawSentence text;
-          Mate, DepSentence("M",dep_paths); CONLL, DepSentence("C",dep_paths)]}] in
+          Mate, DepSentence m_dep_paths; CONLL, DepSentence dep_paths]}] in
         AltText[Raw,RawText query; Struct, StructText([
           AltParagraph[Raw,RawParagraph query; ENIAM, StructParagraph sentences; CONLL, conll]],tokens)]
   | _ -> failwith "parse_text: not implemented"
diff --git a/pre/preSentences.ml b/pre/preSentences.ml
index 2745985..abe5073 100644
--- a/pre/preSentences.ml
+++ b/pre/preSentences.ml
@@ -147,17 +147,15 @@ let find_tokens_in_chart tokens chart lnode rnode cat =
 
 let rec add_struct_sentence_ids_rec n sentences =
   Xlist.fold sentences ([],n) (fun (l,n) -> function
-      {psentence=AltSentence[Raw,s;ENIAM,StructSentence(_,paths,last)]} as p ->
-         {p with psentence=AltSentence[Raw,s;ENIAM,StructSentence("E" ^ string_of_int n,paths,last)]} :: l, n+1
-    | {psentence=AltSentence[Raw,s;ENIAM,QuotedSentences sentences]} as p ->
+      {psentence=AltSentence[Raw,s;Struct,QuotedSentences sentences]} as p ->
          let sentences, n = add_struct_sentence_ids_rec n sentences in
-         {p with psentence=AltSentence[Raw,s;ENIAM,QuotedSentences (List.rev sentences)]} :: l, n+1
-    | _ -> failwith "add_struct_sentence_ids")
+         {p with psentence=AltSentence[Raw,s;Struct,QuotedSentences (List.rev sentences)]} :: l, n
+    | p -> {p with pfile_prefix=string_of_int n} :: l, n+1)
 
 let add_struct_sentence_ids sentences =
   match sentences with
-    [{psentence=AltSentence[Raw,s;ENIAM,StructSentence(_,paths,last)]} as p] ->
-        [{p with psentence=AltSentence[Raw,s;ENIAM,StructSentence("E",paths,last)]}]
+    [{psentence=AltSentence[Raw,_;Struct,QuotedSentences _]}] -> List.rev (fst (add_struct_sentence_ids_rec 1 sentences))
+  | [p] -> [p]
   | _ -> List.rev (fst (add_struct_sentence_ids_rec 1 sentences))
 
 let prepare_indexes paths =
@@ -181,13 +179,16 @@ let rec extract_sentences_rec tokens id =
   match t.token with
     Tokens("sentence",ids) ->
       let paths,last = make_paths tokens ids in
-      [{pid=string_of_int id; pbeg=t.beg; plen=t.len; pnext=t.next;
+      [{pid=string_of_int id; pbeg=t.beg; plen=t.len; pnext=t.next; pfile_prefix="";
         psentence=AltSentence[Raw,RawSentence t.orth;
-                              ENIAM,StructSentence("",paths,last)]}]
+                              ENIAM,StructSentence(paths,last);
+                              Mate,RawSentence t.orth;
+                              Swigra,RawSentence t.orth;
+                              POLFIE,RawSentence t.orth]}]
   | Tokens("quoted_sentences",ids) ->
-      [{pid=string_of_int id; pbeg=t.beg; plen=t.len; pnext=t.next;
+      [{pid=string_of_int id; pbeg=t.beg; plen=t.len; pnext=t.next; pfile_prefix="";
         psentence=AltSentence[Raw,RawSentence t.orth;
-          ENIAM,QuotedSentences(List.sort par_compare (List.flatten (Xlist.rev_map ids (extract_sentences_rec tokens))))]}]
+          Struct,QuotedSentences(List.sort par_compare (List.flatten (Xlist.rev_map ids (extract_sentences_rec tokens))))]}]
   | _ -> []
 
 let extract_sentences tokens chart last =
diff --git a/pre/preTypes.ml b/pre/preTypes.ml
index e3d120a..f63ae2a 100644
--- a/pre/preTypes.ml
+++ b/pre/preTypes.ml
@@ -117,7 +117,7 @@ let empty_token = {
   lroles="",""; semantics=Normal}
 
 type mode =
-    Raw | Struct | CONLL | ENIAM | Mate
+    Raw | Struct | CONLL | ENIAM | Mate | Swigra | POLFIE
 
 (* warstwy nkjp1m do analizy:
 header
@@ -133,14 +133,14 @@ ann_named
 type sentence =
     RawSentence of string
   (* | CONLL of conll list *)
-  | StructSentence of string * (int * int * int) list * int (* file_prefix * (id * lnode * rnode) list * last *)
-  | DepSentence of string * (int * int * string) array (* file_prefix * (id * super * label) conll_id *)
+  | StructSentence of (int * int * int) list * int (* (id * lnode * rnode) list * last *)
+  | DepSentence of (int * int * string) array (* (id * super * label) conll_id *)
   | QuotedSentences of paragraph_record list
   (* | NKJP1M of nkjp1m list *)
   (* | Skladnica of skladnica_tree *)
   | AltSentence of (mode * sentence) list  (* string = etykieta np raw, nkjp, krzaki *)
 
-and paragraph_record = {pid: string; pbeg: int; plen: int; pnext: int; psentence: sentence} (* beg i len liczone po znakach unicode ( * 100 ???) *)
+and paragraph_record = {pid: string; pbeg: int; plen: int; pnext: int; psentence: sentence; pfile_prefix: string} (* beg i len liczone po znakach unicode ( * 100 ???) *)
 
 and paragraph =
     RawParagraph of string
--
libgit2 0.22.2