poprawienie test_conll - wypisywanie danych diagnostycznych

Daniel Oklesiński
1 parent 9fe1accb
Showing 12 changed files with 154 additions and 412 deletions
LCGparser/ENIAM_LCGrenderer.ml
corpora/README
corpora/XmlPrinter.ml
corpora/conllParser.ml
corpora/depTree.ml
corpora/generate.ml
corpora/makefile
corpora/resources.ml
corpora/test_conll.ml
corpora/test_conll2.ml
corpora/types.ml
testy/skladnica-test1-Failure.conll
@@ -164,7 +164,7 @@ let rec make_raised_term_imp inner_node outer_node arg_symbol = function
   | Tensor l ->
     if outer_node.lemma="" then inner_node else
     Node (add_args outer_node [Cut(SetAttr("ARG_SYMBOL",arg_symbol,inner_node))])
-  | _ -> failwith "make_raised_term_imp"
+  | c -> (print_endline (ENIAM_LCGstringOf.grammar_symbol 0 c); failwith "make_raised_term_imp")
  
 let is_raised = function
     [_,Imp(_,_,_)] -> true
+ENIAMcorpora Version 1.0 :
+-----------------------
+
+ENIAMcorpora is a library that
+- parses corpuses into CONLL format;
+- converts dependencies structures;
+- tests results of conversion.
+
+Install
+-------
+
+ENIAMcorpora requires OCaml version 4.02.3 compiler
+together with Xlib library version 3.2 or later,
+ENIAMtokenizer library version 1.1, ENIAMmorphology library version 1.1,
+ENIAMsubsyntax library version 1.1, ENIAMintegration library version 1.0,
+ENIAM_LCGparser library version 2.0, ENIAM_LCGlexicon library version 1.0,
+ENIAMsemValence library version 1.0.
+
+In order to install type:
+
+make install
+
+by default, ENIAMcorpora is installed in the 'ocamlc -where'/eniam directory.
+you can change it by editing the Makefile.
+
+In order to test library type (graphviz installed required):
+make test
+./test
+
+By default ENIAMcorpora looks for resources in /usr/share/eniam directory.
+However this behaviour may be changed by setting end exporting ENIAM_RESOURCE_PATH
+environment variable.
+
+Credits
+-------
+Copyright © 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
+Copyright © 2016 Daniel Oklesinski <oklesinski dot daniel atSPAMfree gmail dot com>
+Copyright © 2016 Institute of Computer Science Polish Academy of Sciences
+
+The library uses the following licensed resources:
+
+NKJP1M: the manually annotated 1-million word subcorpus sampled
+from texts of a subset of the National Corpus of Polish.
+version 1.2
+
+SGJP: Grammatical Dictionary of Polish, version 20151020
+Copyright © 2007–2015 Zygmunt Saloni, Włodzimierz Gruszczyński, Marcin
+Woliński, Robert Wołosz, Danuta Skowrońska
+
+Licence
+-------
+
+This library is free software: you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with this program.  If not, see <http://www.gnu.org/licenses/>.
-open Xstd
-open WalTypes
-open LCGtypes
-
-let gf_of_string = function
-    "subj" -> SUBJ
-  | "obj" -> OBJ
-  | "arg"(*""*) -> ARG
-  | "core" -> CORE
-  | "nosem" -> NOSEM
-  | "nogf" -> NOGF
-  | "adjunct" -> ADJUNCT
-  | "raised" -> RAISED
-  | "clause" -> CLAUSE
-  | "sentence" -> SENTENCE
-  | s -> prerr_endline s; SUBJ
-(*  | s -> failwith ("gf_of_string:" ^ s) *)
-
-(*let morf_of_string s =
-  let s = Str.split (Str.regexp "[()]") s in
-  WalParser.parse_morf_single (List.hd s, List.tl s)*)
-
-let rec lt_of_xml = function
-    Xml.Element("node",["pred",pred;"cat",cat;"weight",weight;"id",id],[
-          Xml.Element("gs",[],[gs]);
-          Xml.Element("agf",[],[Xml.PCData agf]);
-          Xml.Element("amorf",[],[amorf]);
-          Xml.Element("attrs",[],attrs);
-          Xml.Element("args",[],[args])]) ->
-             Node{pred=pred; cat=cat; weight=float_of_string weight; id=int_of_string id;
-                  gs = lt_of_xml gs;
-                  agf = gf_of_string agf;  (* FIXME *)
-                  amorf = WalTypes.Phrase(WalTypes.Null);  (* FIXME *)
-                  arole = "";  (* FIXME *)
-                  arole_attr = "";  (* FIXME *)
-                  meaning = "";  (* FIXME *)
-                  hipero = StringSet.empty;  (* FIXME *)
-                  meaning_weight = -1.;  (* FIXME *)
-                  position = WalTypes.{gf = WalTypes.SUBJ; role = ""; role_attr = ""; sel_prefs = [];
-                     cr = []; ce = []; dir = WalTypes.Both; morfs = []};  (* FIXME *)
-                  attrs=List.map (function Xml.Element("attr",["label",e],[t]) -> e,lt_of_xml t | _ -> failwith "lt_of_xml") attrs;
-                  args=lt_of_xml args;}
-  | Xml.Element("tuple",[],l) -> Tuple(List.map lt_of_xml l)
-  | Xml.Element("val",[],[Xml.PCData s]) -> Val s
-  | Xml.Element("variants",["label",e],l) -> Variant(e,List.map (function Xml.Element("variant",["id",i],[t]) -> i, lt_of_xml t | _ -> failwith "lt_of_xml") l)
-  | Xml.Element("dot",[],[]) -> Dot
-  | Xml.Element("ref",["id",i],[]) -> Ref(int_of_string i)
-  | xml -> print_endline (Xml.to_string_fmt xml); failwith "lt_of_xml"
-
-let graph_of_xml xml =
-  let establish_indexs graph =
-    let max = Xlist.fold graph 0 (fun acc (n, _) -> if n > acc then n else acc) in
-    let table = Array.make (max+1) Dot in
-    Xlist.iter graph (fun (n,x) -> table.(n) <- x); table in
-  match xml with
-    Xml.Element("graph",[],l) ->
-      establish_indexs @@ List.map (function Xml.Element("graph_node",["id",i],[xml]) -> int_of_string i, lt_of_xml xml | _ -> failwith "graph_of_xml") l
-  | _ -> failwith "graph_of_xml"
-
-let print_xml path name xml =
-  let graph = graph_of_xml xml in
-  Visualization.print_dependency_tree path name graph
-
-let load_and_print_xml path name filename =
-  print_xml path name @@ Xml.parse_file filename
-
-(*let _ =
-    load_and_print_xml "xml_test/" "test1.0" "xml_test/sentence1.0.xml"*)
@@ -22,7 +22,7 @@ open Types
  
 let skladnica_zaleznosciowa_filename = "../../NLP resources/skladnica_zaleznosciowa.conll"
  
-let oc = open_out "../corpora/info_sentences.txt"
+let oc = open_out @@ resource_path ^ "/info_sentences.txt"
  
 let empty_token = { c_id = 0; c_orth = ""; c_lemma = ""; c_cat = "";
       c_interp = []; c_super = 0; c_label = ""; c_beg = 0; c_len = 0}
-open Xstd
-open PreTypes
-
-let tuple_it taglist =
-  match List.length taglist with
-    0 -> Xml.Element("dot",[],[])
-  | 1 -> List.hd taglist
-  | _ -> Xml.Element("tuple",[],taglist)
-
-let get_amorf_basic token_r = "empty" (* FIXME *)
-
-let get_amorf token_r = "empty" (* FIXME *)
-
-let get_vals token_r cat interp = get_amorf_basic token_r ::
-  match cat with
-    "subst" -> List.rev ("ter" :: (List.rev interp))
-  | _ -> interp (* FIXME *)
-
-let get_basic_attrs token_r = ["A","a";"B","b"] (* FIXME *)
-
-let get_attrs token_r =
-  let attrs = get_basic_attrs token_r in
-  List.map (fun (label, value) ->
-      Xml.Element("attr",["label",label],[
-        Xml.Element("val",[],[Xml.PCData value])])) attrs
-
-let xml_of_gs token_r cat interp =
-  let vals = get_vals token_r cat interp in (** **)
-  let vals = List.map (fun x -> Xml.Element("val",[],[Xml.PCData x])) vals in
-  Xml.Element("gs",[],[tuple_it vals])
-
-let xml_of_agf token_r = Xml.Element("agf",[],[Xml.PCData token_r.conll_label])
-
-let xml_of_amorf token_r = Xml.Element("amorf",[],[Xml.PCData (get_amorf token_r)])
-
-let xml_of_attrs token_r = Xml.Element("attrs",[],get_attrs token_r) (* FIXME *)
-
-let xml_of_args token_rs token_r =
-  let children = List.filter (fun pom -> pom.conll_super = token_r.conll_id) token_rs in
-  let children_to_graph = List.map (fun pom ->
-    Xml.Element("ref",["id", pom.conll_id],[])) children in
-  Xml.Element("args",[],[tuple_it children_to_graph])
-
-let xml_of_token_r token_rs token_r =
-  let pred, cat, interp = match token_r.token with
-      | Lemma(a,b,c) -> a, b, Xlist.map (List.hd c) (fun x -> List.hd x)
-      | _ -> failwith ("xml_of_token_r: not Lemma") in
-  Xml.Element("graph_node",["id", token_r.conll_id],[
-    Xml.Element("node",["pred",pred;"cat",cat;"weight","0";"id", token_r.conll_id],
-      (xml_of_gs token_r cat interp) ::    (** **)
-      (xml_of_agf token_r) ::
-      (xml_of_amorf token_r) :: (** **)
-      (xml_of_attrs token_r) :: (** **)
-      [xml_of_args token_rs token_r]
-      ) ])
-
-let conll_to_xml token_rs =
-  Xml.Element("graph",[],List.map (xml_of_token_r token_rs) token_rs)
-
-
-(***************************************************************************************************)
-
-let get_info i = function
-    AltText[Raw,RawText text1;CONLL,StructText([StructParagraph[
-     {pid = id; pbeg = beg; plen = len; psentence =
-       AltSentence[Raw, RawSentence text2; CONLL, StructSentence(_,token_rs,-1)]}]],-1)] -> token_rs, id
-  | StructText([StructParagraph[{pid = id; pbeg = -1; plen = -1; psentence =
-      StructSentence(_,token_rs,-1)}]],-1) -> token_rs, "id_not_found" ^ (string_of_int i)
-  | _ -> failwith "get_info"
-
-let print_corpus filename =
-  let corpus = File.file_in filename (fun file -> CONLL.match_corpus (CONLL.load_corpus file)) in
-  List.mapi (fun i x ->
-    let token_rs, id = get_info i x in
-    let xml = conll_to_xml token_rs in
-    let id = Str.global_replace (Str.regexp "/") "_" id in
-    let oc = open_out ("xml_test/"^id^".xml") in
-      output_string oc (Xml.to_string_fmt xml);
-      flush oc;
-      XmlPrinter.print_xml "xml_test/" id xml) corpus
-
-(*let _ =
-  print_corpus "xml_test/sentence1.conll"*)
@@ -29,4 +29,5 @@ let _ =
  
 (* Generowanie pliku ../../NLP resources/krzaki_interp_statistics.txt na podstawie krzaków *)
 let _ =
-  InterpsInCorpus.print_diagnose ()
+  (* InterpsInCorpus.print_diagnose () *)
+  ()
@@ -4,17 +4,37 @@ OCAMLDEP=ocamldep
 INCLUDES=-I +xml-light -I +xlib -I +zip -I +bz2 -I +eniam
 OCAMLFLAGS=$(INCLUDES) -g
 OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa bz2.cmxa xlib.cmxa eniam-tokenizer.cmxa eniam-morphology.cmxa eniam-subsyntax.cmxa eniam-integration.cmxa eniam-lcg-parser.cmxa eniam-lcg-lexicon.cmxa eniam-lexSemantics.cmxa
+INSTALLDIR=`ocamlc -where`/eniam
  
-MODS= ../pre/walTypes.ml ../pre/preTypes.ml types.ml CONLL.ml
+SOURCES= types.ml CONLL.ml CONLL_adapter.ml resources.ml conllParser.ml interpsInCorpus.ml generate.ml
  
-all:
-	$(OCAMLOPT) -o generate $(OCAMLOPTFLAGS) $(MODS) resources.ml conllParser.ml interpsInCorpus.ml generate.ml
+all: eniam-corpora.cma eniam-corpora.cmxa freq_test
+	$(OCAMLOPT) -o generate $(OCAMLOPTFLAGS) $(SOURCES)
  
-lib:
-	$(OCAMLOPT) -linkall -a -o corpora.cmxa $(INCLUDES) $(MODS)
+install: all
+	mkdir -p $(INSTALLDIR)
+	cp eniam-corpora.cmxa eniam-corpora.a eniam-corpora.cma $(INSTALLDIR)
+	cp types.cmi CONLL.cmi CONLL_adapter.cmi resources.cmi conllParser.cmi interpsInCorpus.cmi generate.cmi $(INSTALLDIR)
+	cp types.cmx CONLL.cmx CONLL_adapter.cmx resources.cmx conllParser.cmx interpsInCorpus.cmx generate.cmx $(INSTALLDIR)
+	mkdir -p /usr/share/eniam/corpora
+	cp info_sentences*  /usr/share/eniam/corpora
+
+install-local: all
+	mkdir -p $(INSTALLDIR)
+	cp eniam-corpora.cmxa eniam-corpora.a eniam-corpora.cma $(INSTALLDIR)
+	cp types.cmi CONLL.cmi CONLL_adapter.cmi resources.cmi conllParser.cmi interpsInCorpus.cmi generate.cmi $(INSTALLDIR)
+	cp types.cmx CONLL.cmx CONLL_adapter.cmx resources.cmx conllParser.cmx interpsInCorpus.cmx generate.cmx $(INSTALLDIR)
+	mkdir -p /usr/local/share/eniam/corpora
+	cp info_sentences*  /usr/local/share/eniam/corpora
+
+eniam-corpora.cma: $(SOURCES)
+	ocamlc -linkall -a -o eniam-corpora.cma $(OCAMLFLAGS) $^
+
+eniam-corpora.cmxa: $(SOURCES)
+	$(OCAMLOPT) -linkall -a -o eniam-corpora.cmxa $(INCLUDES) $(SOURCES)
  
 freq_test:
-	$(OCAMLOPT) -o freq_test $(OCAMLOPTFLAGS) $(MODS) freq_test.ml
+	$(OCAMLOPT) -o freq_test $(OCAMLOPTFLAGS) $(SOURCES) freq_test.ml
  
 test: CONLL.ml CONLL_adapter.ml test_conll.ml
 	mkdir -p results
@@ -97,7 +97,7 @@ let conll_info () = Xlist.fold (data_conll ()) InfoMap.empty
   (fun map sentence -> InfoMap.add (List.map (fun token -> token.c_orth) sentence.s_tokens) sentence map)
  
 let info_file () =
-  let oc = open_out "../corpora/info_sentences2.txt" in
+  let oc = open_out @@ resource_path ^ "/info_sentences2.txt" in
   List.iter (fun (key, sentence) ->
     output_string oc (sentence.s_id^"\n"^sentence.s_text^"\n"^(String.concat " " key)^"\n\n");
     flush oc) (InfoMap.bindings (conll_info()))
@@ -207,26 +207,34 @@ let process_conll_corpus filename =
   let corpus = File.file_in filename (fun file -> CONLL.match_corpus (ENIAM_CONLL.load_corpus file)) in
   print_endline "process_conll_corpus";
   (* let corpus = [List.hd corpus] in *)
-  Xlist.iter corpus (fun query ->
-      let id = process_id (get_query_id query) in
-      let path = "results/" ^ id ^ "/" in
-      ignore (Sys.command ("mkdir -p " ^ path));
-      match query with
-      | AltText[Raw,RawText query;CONLL,StructText[
-          StructParagraph[{sentence = AltSentence[Raw, RawSentence text; CONLL, DepSentence dep_paths]} as p]]],tokens ->
-          print_endline ("\n" ^ text ^ "\n");
-          (* let m_dep_paths = Array.map (fun (id,_,_) -> id,-1,"") dep_paths in *)
-          let conll = StructParagraph[{p with sentence = AltSentence([Raw, RawSentence text; CONLL, DepSentence dep_paths]
-          (*@ if Paths.config.Paths.mate_parser_enabled then [Mate, DepSentence m_dep_paths] else []*))}] in
-          let text,tokens = ENIAMsubsyntax.parse_text_tokens tokens query in
-          let sentences = match text with
-              AltText[Raw,RawText _; Struct,StructText[AltParagraph[Raw,RawParagraph _; Struct,StructParagraph sentences]]] -> sentences
-            | _ -> failwith "process_conll_corpus 1" in
-          let text = AltText[Raw,RawText query; Struct, StructText([
-              AltParagraph[Raw,RawParagraph query; ENIAM, StructParagraph sentences; CONLL, conll]])] in
-          let lex_sems = ENIAMlexSemantics.assign tokens text in
-          ignore(parse_text id 1 tokens lex_sems text)
-      | _ -> failwith "process_conll_corpus 2")
+  Xlist.iter corpus (fun query -> try
+        let id = process_id (get_query_id query) in
+        let path = "results/" ^ id ^ "/" in
+        ignore (Sys.command ("mkdir -p " ^ path));
+        match query with
+        | AltText[Raw,RawText query;CONLL,StructText[
+            StructParagraph[{sentence = AltSentence[Raw, RawSentence text; CONLL, DepSentence dep_paths]} as p]]],tokens ->
+            print_endline ("\nPróba sparsowania zdania:\n" ^ text ^ "\n");
+            (* let m_dep_paths = Array.map (fun (id,_,_) -> id,-1,"") dep_paths in *)
+            let conll = StructParagraph[{p with sentence = AltSentence([Raw, RawSentence text; CONLL, DepSentence dep_paths]
+            (*@ if Paths.config.Paths.mate_parser_enabled then [Mate, DepSentence m_dep_paths] else []*))}] in
+            let text,tokens = ENIAMsubsyntax.parse_text_tokens tokens query in
+            let sentences = match text with
+                AltText[Raw,RawText _; Struct,StructText[AltParagraph[Raw,RawParagraph _; Struct,StructParagraph sentences]]] -> sentences
+              | _ -> failwith "process_conll_corpus 1" in
+            let text = AltText[Raw,RawText query; Struct, StructText([
+                AltParagraph[Raw,RawParagraph query; ENIAM, StructParagraph sentences; CONLL, conll]])] in
+            let lex_sems = ENIAMlexSemantics.assign tokens text in
+            ignore(parse_text id 1 tokens lex_sems text)
+        | _ -> failwith "process_conll_corpus 2"
+      with
+        Failure e -> print_endline ("Failure " ^ e)
+      | e -> print_endline (Printexc.get_backtrace () ^ "\n" ^ (Printexc.to_string e)))
  
 let _ =
-  process_conll_corpus "../testy/skladnica-test1-Failure.conll"
+  Printexc.record_backtrace true;
+  (* LCGfields.reset (); *)
+  process_conll_corpus "../../NLP resources/skladnica_zaleznosciowa.conll";
+  (* process_conll_corpus "../testy/skladnica-test1.conll"; *)
+  (* process_conll_corpus "../testy/skladnica-test1-Failure.conll"; *)
+  (* LCGfields.print_results () *)
-(*
- *  ENIAMcorpora is a library that integrates ENIAM with corpora in CONLL format
- *  Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
- *  Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences
- *
- *  This library is free software: you can redistribute it and/or modify
- *  it under the terms of the GNU Lesser General Public License as published by
- *  the Free Software Foundation, either version 3 of the License, or
- *  (at your option) any later version.
- *
- *  This library is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU Lesser General Public License for more details.
- *
- *  You should have received a copy of the GNU Lesser General Public License
- *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
- *)
-
-open Xstd
-open ENIAM_LCGlexiconTypes
-open ENIAM_LCGtypes
-open ENIAMsubsyntaxTypes
-
-let rules = ENIAM_LCGlexicon.make_rules false ENIAM_LCGlexiconTypes.rules_filename
-let dep_rules = ENIAM_LCGlexicon.make_rules true ENIAM_LCGlexiconTypes.rules_filename
-
-let examples = [
-  (* "Szpak","Szpak śpiewa.";*)
-  (* "miał","Miałem miał."; *)
-  (*  "Ala","Ala ma kota.";
-      "Ale","Ale mają kota:"; *)
-  (*  "zima","Szpak frunie zimą.";*)
-  (* "październik","Kot miauczy w październiku."; *)
-  (*  "Szpak-Kot","Szpak frunie. Kot miauczy.";
-      "powiedział","Szpak powiedział: „Frunę. Kiszę.”";*)
-  "teraz","Teraz frunie jakiś szpak.";
-  "chłopcy","Chłopcy mają ulicę kwiatami.";
-  (*  "arabia","Arabia Saudyjska biegnie.";*)
-  (*  "Tom","Tom idzie."; *)
-]
-
-let clarify_categories senses token =
-  match token.ENIAMtokenizerTypes.token with
-    ENIAMtokenizerTypes.Lemma(lemma,pos,interp) -> List.flatten (Xlist.map interp (fun interp -> ENIAMcategoriesPL.clarify_categories false senses (lemma,pos,interp)))
-  | ENIAMtokenizerTypes.Proper(lemma,pos,interp,_) -> List.flatten (Xlist.map interp (fun interp -> ENIAMcategoriesPL.clarify_categories true senses (lemma,pos,interp)))
-  | ENIAMtokenizerTypes.Interp lemma -> ENIAMcategoriesPL.clarify_categories false senses (lemma,"interp",[])
-  | _ -> []
-
-let create_chart tokens lex_sems paths last =
-  ENIAM_LCGrenderer.reset_variable_numbers ();
-  let chart = ENIAM_LCGchart.make last in
-  let chart = Xlist.fold paths chart (fun chart (id,lnode,rnode) ->
-      let t = ExtArray.get tokens id in
-      let s = ExtArray.get lex_sems id in
-      ENIAM_LCGrenderer.reset_variable_names ();
-      ENIAM_LCGrenderer.add_variable_numbers ();
-      let cats = clarify_categories ["X"] t in
-      let l = ENIAM_LCGlexicon.create_entries rules id t.ENIAMtokenizerTypes.orth cats s.ENIAMlexSemanticsTypes.schemata in
-      ENIAM_LCGchart.add_inc_list chart lnode rnode l 0) in
-  chart
-
-let rec split_sons left id right = function
-    [] -> List.rev (List.sort compare left), List.sort compare right
-  | x :: l -> if x < id then split_sons (x :: left) id right l else split_sons left id (x :: right) l
-
-let rec dep_create_rec nodes sons conll_id =
-  let node = IntMap.find nodes conll_id in
-  let l = try IntMap.find sons conll_id with Not_found -> [] in
-  let left,right = split_sons [] conll_id [] l in
-  (* Printf.printf "dep_create_rec [%s] %d [%s]\n" (String.concat ";" (Xlist.map left string_of_int)) conll_id (String.concat ";" (Xlist.map right string_of_int)); *)
-  DepNode(conll_id, Xlist.map left (dep_create_rec nodes sons), node, Xlist.map right (dep_create_rec nodes sons))
-
-let create_dep_chart tokens lex_sems paths =
-  let sons = Int.fold 1 (Array.length paths - 1) IntMap.empty (fun sons i ->
-      let _,super,_ = paths.(i) in
-      IntMap.add_inc sons super [i] (fun l -> i :: l)) in
-  let nodes = Int.fold 0 (Array.length paths - 1) IntMap.empty (fun nodes i ->
-      let id,_,_ = paths.(i) in
-      let t = ExtArray.get tokens id in
-      let s = ExtArray.get lex_sems id in
-      ENIAM_LCGrenderer.reset_variable_names ();
-      ENIAM_LCGrenderer.add_variable_numbers ();
-      let cats = clarify_categories ["X"] t in
-      let l = ENIAM_LCGlexicon.create_entries dep_rules id t.ENIAMtokenizerTypes.orth cats s.ENIAMlexSemanticsTypes.schemata in
-      IntMap.add nodes i l) in
-  dep_create_rec nodes sons 0
-
-let test_example path id tokens lex_sems paths last =
-  ENIAM_LCGreductions.reset_variant_label ();
-  let chart = create_chart tokens lex_sems paths last in
-  ENIAM_LCGlatexOf.print_chart path (id^"1_chart") "a1" chart;
-  let chart,references = ENIAM_LCGchart.lazify chart in
-  ENIAM_LCGlatexOf.print_chart path (id^"2_chart") "a4" chart;
-  ENIAM_LCGlatexOf.print_references path (id^"2_references") "a4" references;
-  let chart = ENIAM_LCGchart.parse chart references 30. Sys.time in (* uwaga: niejawna zmiana imperatywna w references *)
-  ENIAM_LCGlatexOf.print_chart path (id^"3_chart") "a4" chart;
-  ENIAM_LCGlatexOf.print_references path (id^"3_references") "a4" references;
-  if ENIAM_LCGchart.is_parsed chart then (
-    let term = ENIAM_LCGchart.get_parsed_term chart in
-    Xlatex.latex_file_out path (id^"4_term") "a4" false (fun file ->
-        Printf.fprintf file "\\[%s\\]\n" (ENIAM_LCGlatexOf.linear_term 0 term));
-    Xlatex.latex_compile_and_clean path (id^"4_term");
-    let dependency_tree = ENIAM_LCGreductions.reduce term references in
-    ENIAM_LCGlatexOf.print_dependency_tree path (id^"4_dependency_tree") "a0" dependency_tree;
-    if ENIAM_LCGreductions.is_reduced_dependency_tree dependency_tree then (
-      ENIAM_LCGreductions.assign_labels dependency_tree; (* uwaga: niejawna zmiana imperatywna w dependency_tree *)
-      ENIAM_LCGlatexOf.print_dependency_tree path (id^"5_dependency_tree") "a4" dependency_tree;
-      ENIAM_LCGreductions.remove_cuts dependency_tree; (* uwaga: niejawna zmiana imperatywna w dependency_tree *)
-      ENIAM_LCGlatexOf.print_dependency_tree path (id^"6_dependency_tree") "a4" dependency_tree;
-      ENIAM_LCGgraphOf.print_dependency_tree path (id^"6_dependency_tree") dependency_tree;
-      ENIAM_LCGgraphOf.print_simplified_dependency_tree path (id^"6_simple_dependency_tree") dependency_tree;
-      ())
-    else print_endline "not reduced")
-  else print_endline "not parsed"
-
-let test_dep_example path id tokens lex_sems paths =
-  try
-  ENIAM_LCGreductions.reset_variant_label ();
-  let paths = CONLL_adapter.convert_dep_tree id (*first_try*) true paths tokens in
-  ENIAMsubsyntaxHTMLof.print_dep_sentence path (id^"1_paths") tokens paths;
-  let chart = create_dep_chart tokens lex_sems paths in
-  ENIAM_LCGlatexOf.print_dep_chart path (id^"1_chart") "a1" chart;
-  let chart,references = ENIAM_LCGchart.dep_lazify chart in
-  ENIAM_LCGlatexOf.print_dep_chart path (id^"2_chart") "a4" chart;
-  ENIAM_LCGlatexOf.print_references path (id^"2_references") "a4" references;
-  let chart = ENIAM_LCGchart.dep_parse chart references 30. Sys.time in (* uwaga: niejawna zmiana imperatywna w references *)
-  (* ENIAM_LCGlatexOf.print_chart path (id^"3_chart") "a4" chart; *)
-  ENIAM_LCGlatexOf.print_references path (id^"3_references") "a4" references;
-  if ENIAM_LCGchart.is_dep_parsed chart then (
-    let term = ENIAM_LCGchart.get_dep_parsed_term chart in
-    Xlatex.latex_file_out path (id^"4_term") "a4" false (fun file ->
-        Printf.fprintf file "\\[%s\\]\n" (ENIAM_LCGlatexOf.linear_term 0 term));
-    Xlatex.latex_compile_and_clean path (id^"4_term");
-    let dependency_tree = ENIAM_LCGreductions.reduce term references in
-    ENIAM_LCGlatexOf.print_dependency_tree path (id^"4_dependency_tree") "a0" dependency_tree;
-    if ENIAM_LCGreductions.is_reduced_dependency_tree dependency_tree then (
-      ENIAM_LCGreductions.assign_labels dependency_tree; (* uwaga: niejawna zmiana imperatywna w dependency_tree *)
-      ENIAM_LCGlatexOf.print_dependency_tree path (id^"5_dependency_tree") "a4" dependency_tree;
-      ENIAM_LCGreductions.remove_cuts dependency_tree; (* uwaga: niejawna zmiana imperatywna w dependency_tree *)
-      ENIAM_LCGlatexOf.print_dependency_tree path (id^"6_dependency_tree") "a4" dependency_tree;
-      ENIAM_LCGgraphOf.print_dependency_tree path (id^"6_dependency_tree") dependency_tree;
-      ENIAM_LCGgraphOf.print_simplified_dependency_tree path (id^"6_simple_dependency_tree") dependency_tree;
-      ())
-    else print_endline "not reduced")
-  else print_endline "not parsed"
-  with NotDepParsed(id_ndp,left,l,right) -> (
-    print_endline "not parsed 2";
-    ENIAM_LCGlatexOf.print_not_parsed_dep_chart path (id^"3_not_parsed_chart") "a2" (id_ndp,left,l,right))
-
-let rec parse_sentence name id tokens lex_sems = function
-    RawSentence s -> id
-  | StructSentence(paths,last) ->
-    (* test_example ("results/" ^ name^"/") (string_of_int id ^ "_") tokens lex_sems paths last; *)
-    id + 1
-  | DepSentence(paths) ->
-    test_dep_example ("results/" ^ name ^ "/") (string_of_int id ^ "_") tokens lex_sems paths;
-    id + 1
-  | QuotedSentences sentences ->
-    Xlist.fold sentences id (fun id p ->
-        parse_sentence name id tokens lex_sems p.sentence)
-  | AltSentence l ->
-    Xlist.fold l id (fun id (mode,sentence) ->
-        parse_sentence name id tokens lex_sems sentence)
-
-let rec parse_paragraph name id tokens lex_sems = function
-    RawParagraph s -> id
-  | StructParagraph sentences ->
-    Xlist.fold sentences id (fun id p ->
-        parse_sentence name id tokens lex_sems p.sentence)
-  | AltParagraph l ->
-    Xlist.fold l id (fun id (mode,paragraph) ->
-        parse_paragraph name id tokens lex_sems paragraph)
-
-let rec parse_text name id tokens lex_sems = function
-    RawText s -> id
-  | StructText paragraphs ->
-    Xlist.fold paragraphs id (fun id paragraph ->
-        parse_paragraph name id tokens lex_sems paragraph)
-  | AltText l ->
-    Xlist.fold l id (fun id (mode,text) ->
-        parse_text name id tokens lex_sems text)
-
-let id_counter = ref 0
-
-let get_id () =
-  incr id_counter;
-  "ID_" ^ (string_of_int !id_counter)
-
-let get_query_id = function
-    AltText[_;CONLL,StructText[StructParagraph[p]]],_ -> if p.id = "" then get_id () else p.id
-  | AltText[CONLL,StructText[StructParagraph[p]]],_ -> if p.id = "" then get_id () else p.id
-  | _ -> failwith "get_query_id"
-
-let process_id s =
-  if Xstring.check_prefix "ID_" s then s else
-    let a,b,c = match Xstring.split_delim "/" s with
-        [a;b;c] -> a,b,c
-      | _ -> failwith ("process_id: " ^ s) in
-    if Xstring.check_prefix "NKJP_1M_" a && Xstring.check_prefix "morph_" b && Xstring.check_sufix "-p" b &&
-       Xstring.check_prefix "morph_" c && Xstring.check_sufix "-s" c then
-      Xstring.cut_prefix "NKJP_1M_" a ^ "." ^ Xstring.cut_sufix "-s" (Xstring.cut_prefix "morph_" c)
-    else failwith ("process_id: " ^ s)
-
-let process_conll_corpus filename =
-  let corpus = File.file_in filename (fun file -> CONLL.match_corpus (ENIAM_CONLL.load_corpus file)) in
-  print_endline "process_conll_corpus";
-  let corpus = [List.hd corpus] in
-  Xlist.iter corpus (fun query ->
-      let id = process_id (get_query_id query) in
-      let path = "results/" ^ id ^ "/" in
-      ignore (Sys.command ("mkdir -p " ^ path));
-      match query with
-      | AltText[Raw,RawText query;CONLL,StructText[
-          StructParagraph[{sentence = AltSentence[Raw, RawSentence text; CONLL, DepSentence dep_paths]} as p]]],tokens ->
-          (* let m_dep_paths = Array.map (fun (id,_,_) -> id,-1,"") dep_paths in *)
-          let conll = StructParagraph[{p with sentence = AltSentence([Raw, RawSentence text; CONLL, DepSentence dep_paths]
-          (*@ if Paths.config.Paths.mate_parser_enabled then [Mate, DepSentence m_dep_paths] else []*))}] in
-          let text,tokens = ENIAMsubsyntax.parse_text_tokens tokens query in
-          let sentences = match text with
-              AltText[Raw,RawText _; Struct,StructText[AltParagraph[Raw,RawParagraph _; Struct,StructParagraph sentences]]] -> sentences
-            | _ -> failwith "process_conll_corpus 1" in
-          let text = AltText[Raw,RawText query; Struct, StructText([
-              AltParagraph[Raw,RawParagraph query; ENIAM, StructParagraph sentences; CONLL, conll]])] in
-          let lex_sems = ENIAMlexSemantics.assign tokens text in
-          ignore(parse_text id 1 tokens lex_sems text)
-      | _ -> failwith "process_conll_corpus 2")
-
-let _ =
-  process_conll_corpus "../testy/skladnica-test1.conll"
@@ -27,3 +27,10 @@ type conll_sentence =
  
 type info_sentence =
   {i_id:string; i_text:string; i_tokens:string list}
+
+let resource_path =
+  try Sys.getenv "ENIAM_RESOURCE_PATH"
+  with Not_found ->
+    if Sys.file_exists "/usr/share/eniam" then "/usr/share/eniam" else
+    if Sys.file_exists "/usr/local/share/eniam" then "/usr/local/share/eniam" else
+    failwith "resource directory does not exists"
+1	-	-	interp	interp	_	3	punct	_	_
+2	Panowie	pan	subst	subst	pl|nom|m1	3	subj	_	_
+3	przyszli	przyjść	praet	praet	pl|m1|perf	0	pred	_	_
+4	.	.	interp	interp	_	3	punct	_	_	
+
+1	O	o	prep	prep	loc	12	comp	_	_
+2	klasztornym	klasztorny	adj	adj	sg|loc|n|pos	3	adjunct	_	_
+3	piekle	piekło	subst	subst	sg|loc|n	1	comp	_	_
+4	,	,	interp	interp	_	3	punct	_	_
+5	zgotowanym	zgotować	ppas	ppas	sg|loc|n|perf|aff	3	adjunct	_	_
+6	przez	przez	prep	prep	acc|nwok	5	comp_ag	_	_
+7	trzy	trzy	num	num	pl|acc|m2|congr	6	comp	_	_
+8	potwory	potwór	subst	subst	pl|acc|m2	7	comp	_	_
+9	w	w	prep	prep	loc|nwok	8	adjunct	_	_
+10	habitach	habit	subst	subst	pl|loc|m3	9	comp	_	_
+11	,	,	interp	interp	_	3	punct	_	_
+12	pisali	pisać	praet	praet	pl|m1|imperf	0	pred	_	_
+13	śmy	być	aglt	aglt	pl|pri|imperf|nwok	12	aglt	_	_
+14	w	w	prep	prep	loc|nwok	12	adjunct	_	_
+15	kwietniu	kwiecień	subst	subst	sg|loc|m3	14	comp	_	_
+16	br	bieżący_rok	brev	brev	pun	15	ne	_	_
+17	.	.	interp	interp	_	12	punct	_	_
+
 1	Następnie	następnie	adv	adv	_	2	adjunct	_	_
 2	rozłożyła	rozłożyć	praet	praet	sg|f|perf	10	conjunct	_	_
 3	wysoki	wysoki	adj	adj	sg|acc|m3|pos	4	adjunct	_	_