Merge branch 'integration' into dep_trees

Wojciech Jaworski
2 parents 66045a35 ec2ccc69
Showing 98 changed files with 512041 additions and 494159 deletions
LCGlexicon/ENIAM_LCGlexicon.ml
LCGlexicon/ENIAM_LCGlexiconTypes.ml
LCGlexicon/ENIAMcategoriesPL.ml
LCGlexicon/TODO
LCGlexicon/resources/lexicon-pl.dic
LCGlexicon/test.ml
LCGparser/ENIAM_LCGgraphOf.ml
LCGparser/ENIAM_LCGlatexOf.ml
LCGparser/test.ml
NKJP2/ENIAM_NKJP.ml
NKJP2/spelling.ml
exec/ENIAMexec.ml
exec/ENIAMexecTypes.ml
exec/ENIAMsemLexicon.ml
exec/ENIAMsemValence.ml
exec/ENIAMvisualization.ml
exec/TODO
exec/makefile
exec/parser.ml
exec/resources/lexicon-pl.dic
@@ -71,6 +71,30 @@ let assign_quantifiers (selectors,rule,weight) =
   let quant = merge_quant categories quant in
   selectors, (bracket,quant,syntax),(rule,weight)
  
+let rec check_quantifiers_int_rec (selectors,syntax) quants = function
+    Atom x -> ()
+  | AVar "schema" -> ()
+  | AVar x ->
+     if not (SelectorSet.mem quants (selector_of_string x))
+     then failwith ("Variable '" ^ x ^ "' is not quantified in rule " ^ string_of_selectors selectors ^ ": " ^ ENIAM_LCGstringOf.grammar_symbol 0 syntax)
+  | With l -> Xlist.iter l (check_quantifiers_int_rec (selectors,syntax) quants)
+  | Zero -> ()
+  | Top -> ()
+
+let rec check_quantifiers_rec rule quants  = function
+    Tensor l -> Xlist.iter l (check_quantifiers_int_rec rule quants)
+  | Plus l -> Xlist.iter l (check_quantifiers_rec rule quants)
+  | Imp(s,d,t) -> check_quantifiers_rec rule quants s; check_quantifiers_rec rule quants t
+  | One -> ()
+  | ImpSet(s,l) -> check_quantifiers_rec rule quants s;  Xlist.iter l (fun (_,t) -> check_quantifiers_rec rule quants t)
+  | Star s -> check_quantifiers_rec rule quants s
+  | Maybe s -> check_quantifiers_rec rule quants s
+  | _ -> failwith "check_quantifiers_rec"
+
+let check_quantifiers (selectors,(bracket,quant,syntax),_) =
+  let quants = Xlist.fold quant SelectorSet.empty (fun quants (q,_) -> SelectorSet.add quants q) in
+  check_quantifiers_rec (selectors,syntax) quants syntax
+
 let assign_semantics (selectors,(bracket,quant,syntax),(rule,weight)) =
   let semantics = try
     let raised,rule = get_raised [] rule in
@@ -135,6 +159,7 @@ let dict_of_grammar grammar =
 let make_rules x_flag filename =
   let lexicon = ENIAM_LCGlexiconParser.load_lexicon filename in
   let lexicon = List.rev (Xlist.rev_map lexicon assign_quantifiers) in
+  Xlist.iter lexicon check_quantifiers;
   let lexicon = List.rev (Xlist.rev_map lexicon assign_semantics) in
   let lexicon = if x_flag then List.rev (Xlist.rev_map lexicon add_x_args) else lexicon in
   dict_of_grammar lexicon
@@ -36,6 +36,7 @@ module OrderedSelector = struct
 end
  
 module SelectorMap=Xmap.Make(OrderedSelector)
+module SelectorSet=Xset.Make(OrderedSelector)
  
 type rule =
     Bracket
@@ -36,7 +36,7 @@ let selector_values = Xlist.fold [
           "match-result";"url";"email";"obj-id";"adj";"adjc";"adjp";"adja";
           "adv";"ger";"pact";"ppas";"fin";"bedzie";"praet";"winien";"impt";
           "imps";"pred";"aglt";"inf";"pcon";"pant";"qub";"part";"comp";"conj";"interj";
-          "sinterj";"burk";"interp";"unk";"html-tag"];
+          "sinterj";"burk";"interp";"xxx";"unk";"html-tag"];
     Pos2, [];
     Cat, [];
     Number, all_numbers;
@@ -365,6 +365,8 @@ let clarify_categories proper cat = function
   | lemma,"interp",[] -> [{empty_cats with lemma=lemma; pos="interp"; pos2="interp"}]
   | lemma,"unk",[] ->
       [{empty_cats with lemma=lemma; pos="unk"; pos2="noun"; numbers=all_numbers; cases=all_cases; genders=all_genders; persons=["ter"]}]
+  | lemma,"xxx",[] ->
+      [{empty_cats with lemma=lemma; pos="xxx"; pos2="noun"; numbers=all_numbers; cases=all_cases; genders=all_genders; persons=["ter"]}]
   | lemma,"html-tag",[] -> [{empty_cats with lemma=lemma; pos="html-tag"; pos2="html-tag"}]
   | lemma,c,l -> failwith ("clarify_categories: " ^ lemma ^ ":" ^ c ^ ":" ^ (String.concat ":" (Xlist.map l (String.concat "."))))
  
@@ -557,9 +559,9 @@ let pos_categories = Xlist.fold [
     "imps",[Lemma;(*NewLemma;*)Cat;Number;Gender;Person;Aspect;Negation;Mood;Tense;];
     "pred",[Lemma;(*NewLemma;*)Cat;Number;Gender;Person;Aspect;Negation;Mood;Tense;];
     "aglt",[Lemma;Number;Person;Aspect;];
-    "inf",[Lemma;(*NewLemma;*)Cat;Aspect;];
-    "pcon",[Lemma;(*NewLemma;*)Cat;Aspect;];
-    "pant",[Lemma;(*NewLemma;*)Cat;Aspect;];
+    "inf",[Lemma;(*NewLemma;*)Cat;Aspect;Negation;];
+    "pcon",[Lemma;(*NewLemma;*)Cat;Aspect;Negation;];
+    "pant",[Lemma;(*NewLemma;*)Cat;Aspect;Negation;];
     "qub",[Lemma;];
     "part",[Lemma;];
     "comp",[Lemma;];(* ctype *)
@@ -569,5 +571,6 @@ let pos_categories = Xlist.fold [
     "burk",[Lemma;];
     "interp",[Lemma;];
     "unk",[Lemma;Number;Case;Gender;Person;];
+    "xxx",[Lemma;Number;Case;Gender;Person;];
     "html-tag",[Lemma;];
   ] StringMap.empty (fun map (k,l) -> StringMap.add map k l)
+- dodac prepncp
+
 - dodać podniesione comprepy
   Pod jakim tytułem brykasz?
   Niezależnie od kogo brykasz?
@@ -104,11 +104,14 @@ pos=subst,case=gen,nsem=measure:
   measure*sg*case*n2*person{\num*number*case*gender*person*rec}{schema}{\(1+qub),/(1+inclusion)}: measure_weight; # UWAGA: number "sg" i gender "n2", żeby uzgadniać z podmiotem czasownika
  
 # frazy przyimkowe
+#lemma!=temu,pos=prep:           prepnp*lemma*case{\(1+advp*T),/np*T*case*T*T}{\(1+qub),/(1+inclusion)};
+#lemma!=temu,pos=prep:           prepadjp*lemma*case{\(1+advp*T),/adjp*T*case*T}{\(1+qub),/(1+inclusion)};
 pos=prep:           prepnp*lemma*case{\(1+advp*T),/np*T*case*T*T}{\(1+qub),/(1+inclusion)};
 pos=prep:           prepadjp*lemma*case{\(1+advp*T),/adjp*T*case*T}{\(1+qub),/(1+inclusion)};
 lemma=po,pos=prep:  QUANT[case=postp] prepadjp*lemma*case{\(1+advp*T),/(adjp*sg*dat*m1+adjp*T*postp*T)}{\(1+qub),/(1+inclusion)}; # po polsku, po kreciemu
 lemma=z,pos=prep:   QUANT[case=postp] prepadjp*lemma*case{\(1+advp*T),/adjp*sg*nom*f}{\(1+qub),/(1+inclusion)}; # z bliska
 lemma=na,pos=prep:  QUANT[case=postp] prepadjp*lemma*case{\(1+advp*T),/advp*T}{\(1+qub),/(1+inclusion)}; # na lewo
+lemma=temu,pos=prep: prepnp*lemma*case\np*T*case*T*T; # chwilę temu
  
 # przimkowe określenia czasu
 lemma=z,pos=prep,case=gen:      prepnp*lemma*case{\(1+advp*T),/(day-month+day+year+date+hour+hour-minute)}{\(1+qub),/(1+inclusion)};
@@ -292,6 +295,7 @@ lemma=[,pos=interp:       (inclusion/rparen2)/(np*T*T*T*T+ip*T*T*T+adjp*T*T*T+pr
 lemma=),pos=interp:       rparen;
 lemma=],pos=interp:       rparen2;
 pos=unk:                  np*number*case*gender*person;
+pos=xxx:                  np*number*case*gender*person;
  
 lemma=<conll_root>,pos=interp:    <conll_root>/(ip*T*T*T+cp*int*T+np*sg*voc*T*T+interj);
  
@@ -302,7 +306,7 @@ lemma=&lt;sentence&gt;,pos=interp:      BRACKET &lt;root&gt;/s;
  
 lemma=:,pos=interp:               BRACKET or;
 lemma=:s,pos=interp:              BRACKET <colon>\<speaker>;
-lemma=:s,pos=interp:              BRACKET (<colon>\<speaker>)/<squery>;
+lemma=:s,pos=interp:              BRACKET (<colon>\<speaker>)/<squery>; #FIXME <squery> nie jest nigdzie generowane
 lemma=<or-sentence>,pos=interp:   BRACKET <root>/s;
 lemma=<or-sentence>,pos=interp:   BRACKET ((<root>/<speaker-end>)/(ip*T*T*T/or))/or2 SEM[λxλyλz.NODE(yx,z)];
 lemma=</or-sentence>,pos=interp:  BRACKET or2\?(ip*T*T*T+cp*int*T+np*sg*voc*T*T+interj);
@@ -19,6 +19,7 @@
  
 open ENIAM_LCGlexiconTypes
 open ENIAM_LCGtypes
+open Xstd
  
 let rules = ENIAM_LCGlexicon.make_rules false ENIAM_LCGlexiconTypes.rules_filename
 (* let rules = ENIAM_LCGlexicon.make_rules false "resources/lexicon-pl.dic" *)
@@ -97,19 +98,32 @@ let create_chart valence tokens last =
       ENIAM_LCGrenderer.reset_variable_names ();
       ENIAM_LCGrenderer.add_variable_numbers ();
       let cats = ENIAMcategoriesPL.clarify_categories proper ["X"] (lemma,pos,interp) in
-      let l = ENIAM_LCGlexicon.create_entries rules id orth cats valence in
+      let l = ENIAM_LCGlexicon.create_entries rules id orth cats valence [] in
       ENIAM_LCGchart.add_inc_list chart lnode rnode l 0) in
   chart
  
+let create_text_fragments tokens last =
+  let text_fragments = Array.make last IntMap.empty in
+  Xlist.iter tokens (fun (id,lnode,rnode,orth,lemma,pos,interp,proper) ->
+    text_fragments.(lnode) <- IntMap.add text_fragments.(lnode) rnode orth);
+  Int.iter_down 0 (last - 1) (fun i ->
+    let map = IntMap.fold text_fragments.(i) text_fragments.(i) (fun map j orth ->
+      if j = last then map else
+      IntMap.fold text_fragments.(j) map (fun map k orth2 ->
+        IntMap.add map k (orth ^ " " ^ orth2))) in
+    text_fragments.(i) <- map);
+  text_fragments
+
 let test_example valence (name,tokens,last) =
   ENIAM_LCGreductions.reset_variant_label ();
   let chart = create_chart valence tokens last in
-  ENIAM_LCGlatexOf.print_chart "results/" (name^"1_chart") "a1" chart;
+  let text_fragments = create_text_fragments tokens last in
+  ENIAM_LCGlatexOf.print_chart "results/" (name^"1_chart") "a1" text_fragments chart;
   let chart,references = ENIAM_LCGchart.lazify chart in
-  ENIAM_LCGlatexOf.print_chart "results/" (name^"2_chart") "a4" chart;
+  ENIAM_LCGlatexOf.print_chart "results/" (name^"2_chart") "a4" text_fragments chart;
   ENIAM_LCGlatexOf.print_references "results/" (name^"2_references") "a4" references;
   let chart = ENIAM_LCGchart.parse chart references 30. Sys.time in (* uwaga: niejawna zmiana imperatywna w references *)
-  ENIAM_LCGlatexOf.print_chart "results/" (name^"3_chart") "a4" chart;
+  ENIAM_LCGlatexOf.print_chart "results/" (name^"3_chart") "a4" text_fragments chart;
   ENIAM_LCGlatexOf.print_references "results/" (name^"3_references") "a4" references;
   if ENIAM_LCGchart.is_parsed chart then (
     let term = ENIAM_LCGchart.get_parsed_term chart in
@@ -25,6 +25,7 @@ let escape_string s =
       match String.sub s i 1 with
         "<" -> t ^ "〈"
       | ">" -> t ^ "〉"
+      | "\"" -> t ^ "\\\""
       | c -> t ^ c)
  
 let string_of_node t =
@@ -58,6 +59,10 @@ let rec print_dependency_tree_rec file edge upper id = function
   | Val s ->
     fprintf file "  %s [shape=box,label=\"%s\"]\n" id s;
     print_edge file edge upper id
+  | SetAttr(a,s,t) ->
+    fprintf file "  %s [shape=box,label=\"SetAttr(%s,%s)\"]\n" id a (ENIAM_LCGstringOf.linear_term 0 s);
+    print_edge file edge upper id;
+    print_dependency_tree_rec2 file "" id t
   | Dot -> ()
   (*          fprintf file "  %s [shape=box,label=\"Dot\"]\n" id;
               print_edge file edge upper id*)
@@ -87,17 +92,26 @@ let rec print_simplified_dependency_tree_rec2 file edge upper = function
     Xlist.iter l (fun (i,t) -> print_simplified_dependency_tree_rec2 file i e t)
   | Dot -> ()
   | Ref i -> print_edge file edge upper ("x" ^ string_of_int i)
-  | t -> failwith ("print_simplified_dependency_tree_rec: " ^ ENIAM_LCGstringOf.linear_term 0 t)
+  | SetAttr(a,s,t) -> ()
+    (* fprintf file "  %s [shape=box,label=\"SetAttr(%s,%s)\"]\n" id a (ENIAM_LCGstringOf.linear_term 0 s);
+    print_edge file edge upper id; *)
+    (* print_simplified_dependency_tree_rec2 file "" id t *)
+  | Node t -> ()
+  | t -> failwith ("print_simplified_dependency_tree_rec2: " ^ ENIAM_LCGstringOf.linear_term 0 t)
  
 let rec print_simplified_dependency_tree_rec file edge upper id = function
     Node t ->
-    fprintf file "  %s [label=\"%s\\n%s:%s\\n%s\\n%f\"]\n" id t.orth t.lemma t.pos (ENIAM_LCGstringOf.linear_term 0 t.symbol) t.weight;
+    fprintf file "  %s [label=\"%s\\n%s:%s\\n%s\\n%f\"]\n" id (escape_string t.orth) (escape_string t.lemma) t.pos (escape_string (ENIAM_LCGstringOf.linear_term 0 t.symbol)) t.weight;
     print_edge file edge upper id;
     print_simplified_dependency_tree_rec2 file "" id t.args
   | Variant(e,l) ->
     fprintf file "  %s [shape=diamond,label=\"%s\"]\n" id e;
     print_edge file edge upper id;
     Xlist.iter l (fun (i,t) -> print_simplified_dependency_tree_rec file i id (id ^ "y" ^ i) t)
+  (* | SetAttr(a,s,t) ->
+    fprintf file "  %s [shape=box,label=\"SetAttr(%s,%s)\"]\n" id a (ENIAM_LCGstringOf.linear_term 0 s);
+    print_edge file edge upper id; *)
+    (* print_simplified_dependency_tree_rec2 file "" id t *)
   | Dot -> ()
   | t -> failwith ("print_simplified_dependency_tree_rec: " ^ ENIAM_LCGstringOf.linear_term 0 t)
  
@@ -199,7 +199,7 @@ let rec grammar_symbol c = function
   | BracketSet d -> "{\\bf BracketSet}(" ^ direction d ^ ")"
   | Maybe s -> "?" ^ grammar_symbol 2 s
  
-let chart page g =
+let chart page text_fragments g =
   let layers = ENIAM_LCGchart.fold g IntMap.empty (fun layers (symbol,node1,node2,sem,layer) ->
       let nodes = try IntMap.find layers layer with Not_found -> IntMap.empty in
       let content = node2, grammar_symbol 0 symbol, linear_term 0 sem in
@@ -207,16 +207,17 @@ let chart page g =
       let nodes = IntMap.add_inc nodes node1 [content] (fun l -> content :: l) in
       IntMap.add layers layer nodes) in
   let n = match page with "a4" -> "10" | "a1" -> "40" | _ -> "20" in
-  "\\begin{longtable}{|l|l|l|p{" ^ n ^ "cm}|}\n\\hline\n" ^
+  "\\begin{longtable}{|l|l|l|l|p{" ^ n ^ "cm}|}\n\\hline\n" ^
   String.concat "" (List.rev (IntMap.fold layers [] (fun l layer nodes ->
       IntMap.fold nodes l (fun l node1 contents ->
           Xlist.fold contents l (fun l (node2,symbol,sem) ->
-              (Printf.sprintf "%d & %d--%d & $\\begin{array}{l}%s\\end{array}$ & $%s$\\\\\n\\hline\n" layer node1 node2 symbol sem) :: l))))) ^
+              let s = try IntMap.find text_fragments.(node1) node2 with Not_found -> failwith (Printf.sprintf "chart: text_fragment not found %d-%d" node1 node2) in
+              (Printf.sprintf "%d & %d--%d & %s & $\\begin{array}{l}%s\\end{array}$ & $%s$\\\\\n\\hline\n" layer node1 node2 s symbol sem) :: l))))) ^
   "\\end{longtable}"
  
-let print_chart path name page g =
+let print_chart path name page text_fragments g =
   Xlatex.latex_file_out path name page false (fun file ->
-      Printf.fprintf file "%s\n" (chart page g));
+      Printf.fprintf file "%s\n" (chart page text_fragments g));
   Xlatex.latex_compile_and_clean path name
  
  
@@ -18,6 +18,7 @@
  *)
  
 open ENIAM_LCGtypes
+open Xstd
  
 type entry =
     Basic of grammar_symbol
@@ -90,15 +91,28 @@ let create_chart tokens last =
       ENIAM_LCGchart.add chart lnode rnode (Bracket(lf,rf,syntax),semantics) 0) in
   chart
  
+let create_text_fragments tokens last =
+  let text_fragments = Array.make last IntMap.empty in
+  Xlist.iter tokens (fun (lnode,rnode,orth,lemma,pos,entry) ->
+    text_fragments.(lnode) <- IntMap.add text_fragments.(lnode) rnode orth);
+  Int.iter_down 0 (last - 1) (fun i ->
+    let map = IntMap.fold text_fragments.(i) text_fragments.(i) (fun map j orth ->
+      if j = last then map else
+      IntMap.fold text_fragments.(j) map (fun map k orth2 ->
+        IntMap.add map k (orth ^ " " ^ orth2))) in
+    text_fragments.(i) <- map);
+  text_fragments
+
 let test_example (name,tokens,last) =
   ENIAM_LCGreductions.reset_variant_label ();
   let chart = create_chart tokens last in
-  ENIAM_LCGlatexOf.print_chart "results/" (name^"1_chart") "a3" chart;
+  let text_fragments = create_text_fragments tokens last in
+  ENIAM_LCGlatexOf.print_chart "results/" (name^"1_chart") "a3" text_fragments chart;
   let chart,references = ENIAM_LCGchart.lazify chart in
-  ENIAM_LCGlatexOf.print_chart "results/" (name^"2_chart") "a4" chart;
+  ENIAM_LCGlatexOf.print_chart "results/" (name^"2_chart") "a4" text_fragments chart;
   ENIAM_LCGlatexOf.print_references "results/" (name^"2_references") "a4" references;
   let chart = ENIAM_LCGchart.parse chart references 30. Sys.time in (* uwaga: niejawna zmiana imperatywna w references *)
-  ENIAM_LCGlatexOf.print_chart "results/" (name^"3_chart") "a4" chart;
+  ENIAM_LCGlatexOf.print_chart "results/" (name^"3_chart") "a4" text_fragments chart;
   ENIAM_LCGlatexOf.print_references "results/" (name^"3_references") "a4" references;
   if ENIAM_LCGchart.is_parsed chart then (
     let term = ENIAM_LCGchart.get_parsed_term chart in
@@ -330,6 +330,42 @@ let rec split_front rev n p =
   if n = 0 then List.rev rev, p else
   split_front (List.hd p :: rev) (n-1) (List.tl p)
  
+let rec combine_three = function
+    [],[],[] -> []
+  | x1 :: l1, x2 :: l2, x3 :: l3 -> (x1,x2,x3) :: combine_three (l1,l2,l3)
+  | _ -> failwith "combine_three"
+
+type split = Single of string | Split of (string * string * string * string list) list | Correct
+
+type err = Err | ErrTagE | TagE | TErr | DErr | CErr | Corr
+
+let parse_err = function
+    "ERR" -> Err
+  | "ERR-TAGE" -> ErrTagE
+  | "TAGE" -> TagE
+  | "TERR" -> TErr
+  | "DERR" -> DErr
+  | "CERR" -> CErr
+  | s -> failwith ("parser_err: " ^ s)
+
+let load_err_corr err_corr_filename =
+  File.fold_tab err_corr_filename StringMap.empty (fun err_corr -> function
+    [real_orth;lemma;orth;interp;freq;compos;sgjp;common;err] ->
+      StringMap.add_inc err_corr (real_orth^"\t"^lemma^"\t"^interp) (Single orth,parse_err err) (fun _ -> failwith "load_err_corr")
+  | [real_orth;lemma;interp;split_orth;split_lemma;split_interp;freq;compos;sgjp;common;err] ->
+      let l = combine_three (Xstring.split "|" split_orth,Xstring.split "|" split_lemma,Xstring.split "|" split_interp) in
+      let l = Xlist.map l (fun (orth,lemma,interp) ->
+        match Xstring.split ":" interp with
+          cat :: interp -> orth,lemma,cat,interp
+        | _ -> failwith "load_err_corr") in
+      StringMap.add_inc err_corr (real_orth^"\t"^lemma^"\t"^interp) (Split l,parse_err err) (fun _ -> failwith "load_err_corr")
+  | l -> print_endline ("load_err_corr: " ^ String.concat "\t" l); err_corr)
+    (* oooo	o	o	interj	1	NCOMPOS	NON-SGJP	CW	DERR
+    o	opylać	opyla	fin:sg:ter:imperf	1	NCOMPOS	NON-SGJP	CW	ERR *)
+    (* napewno  napewno qub     na|pewno        na|pewno        prep:acc|adv:pos        2       NCOMPOS NON-SGJP        CW      ERR *)
+
+let err_corr = load_err_corr "../resources/NKJP1M/NKJP1M-frequency-with-corrections.tab"
+
 let match_tokens name id_p s sentences =
   let p = Xunicode.utf8_chars_of_utf8_string s in
   let len = Xlist.size p in
@@ -340,8 +376,43 @@ let match_tokens name id_p s sentences =
       (* if no_spaces>0 && (nps || i=0) then Printf.printf "match_tokens spaces: %s %n i=%d beg=%d len=%d\n" name id_p i beg len; *)
       let i = i+no_spaces in
       let real_orth,p = split_front [] len p in
-      if beg = i then i+len, p, (beg,len,no_spaces,String.concat "" real_orth,orth,lemma,cat,interp) :: tokens else
-      failwith (Printf.sprintf "match_tokens 1: %s %n i=%d beg=%d len=%d" name id_p i beg len)) in
+      let split,err = try StringMap.find err_corr (orth ^ "\t" ^ lemma ^ "\t" ^ String.concat ":" (cat :: interp)) with Not_found -> Correct,Corr in
+      if beg <> i then failwith (Printf.sprintf "match_tokens 1: %s %n i=%d beg=%d len=%d" name id_p i beg len) else (
+      (* if err <> Corr then Printf.printf "match_tokens err: orth=%s lemma=%s cat=%s\n" orth lemma cat; *)
+      match split with
+        Correct ->
+          i+len, p, (beg,len,no_spaces,String.concat "" real_orth,orth,lemma,cat,interp) :: tokens
+      | Single new_orth ->
+          let new_orth = if err = TErr then orth else new_orth in
+          i+len, p, (beg,len,no_spaces,String.concat "" real_orth,new_orth,lemma,cat,interp) :: tokens
+      | Split["w",lemma1,cat1,interp1;"ogóle",lemma2,cat2,interp2] ->
+          if "wogole" = String.concat "" real_orth then
+            i+len, p, (beg+1,len-1,0,"ogole","ogóle",lemma2,cat2,interp2) :: (beg,1,no_spaces,"w","w",lemma1,cat1,interp1) :: tokens else
+          if "wogóle" <> String.concat "" real_orth then failwith (Printf.sprintf "match_tokens 3: wogole") else
+          i+len, p, (beg+1,len-1,0,"ogóle","ogóle",lemma2,cat2,interp2) :: (beg,1,no_spaces,"w","w",lemma1,cat1,interp1) :: tokens
+      | Split["z",lemma1,cat1,interp1;"pewnością",lemma2,cat2,interp2] ->
+          if "spewnością" <> String.concat "" real_orth then failwith (Printf.sprintf "match_tokens 3: spewnością") else
+          i+len, p, (beg+1,len-1,0,"pewnością","pewnością",lemma2,cat2,interp2) :: (beg,1,no_spaces,"s","z",lemma1,cat1,interp1) :: tokens
+      | Split["z",lemma1,cat1,interp1;"powrotem",lemma2,cat2,interp2] ->
+          if "spowrotem" <> String.concat "" real_orth then failwith (Printf.sprintf "match_tokens 3: spowrotem") else
+          i+len, p, (beg+1,len-1,0,"powrotem","powrotem",lemma2,cat2,interp2) :: (beg,1,no_spaces,"s","z",lemma1,cat1,interp1) :: tokens
+      | Split["Słyszała",lemma1,cat1,interp1;"m",lemma2,cat2,interp2] ->
+          if "Słyszalam" <> String.concat "" real_orth then failwith (Printf.sprintf "match_tokens 3: Słyszalam") else
+          i+len, p, (beg+len-1,1,0,"m","m",lemma2,cat2,interp2) :: (beg,len-1,no_spaces,"Słyszala","Słyszała",lemma1,cat1,interp1) :: tokens
+      | Split[orth1,lemma1,cat1,interp1;orth2,lemma2,cat2,interp2] ->
+          if orth1 ^ orth2 <> String.concat "" real_orth then failwith (Printf.sprintf "match_tokens 3: %s|%s <> %s" orth1 orth2 (String.concat "" real_orth)) else
+          let len1 = Xlist.size (Xunicode.utf8_chars_of_utf8_string orth1) in
+          let len2 = Xlist.size (Xunicode.utf8_chars_of_utf8_string orth2) in
+          if len1 + len2 <> len then failwith "match_tokens 4" else
+          i+len, p, (beg+len1,len2,0,orth2,orth2,lemma2,cat2,interp2) :: (beg,len1,no_spaces,orth1,orth1,lemma1,cat1,interp1) :: tokens
+      | Split[orth1,lemma1,cat1,interp1;orth2,lemma2,cat2,interp2;orth3,lemma3,cat3,interp3] ->
+          if orth1 ^ orth2 ^ orth3 <> String.concat "" real_orth then failwith (Printf.sprintf "match_tokens 5: %s|%s|%s <> %s" orth1 orth2 orth3 (String.concat "" real_orth)) else
+          let len1 = Xlist.size (Xunicode.utf8_chars_of_utf8_string orth1) in
+          let len2 = Xlist.size (Xunicode.utf8_chars_of_utf8_string orth2) in
+          let len3 = Xlist.size (Xunicode.utf8_chars_of_utf8_string orth3) in
+          if len1 + len2 + len3 <> len then failwith "match_tokens 6" else
+          i+len, p, (beg+len1+len2,len3,0,orth3,orth3,lemma3,cat3,interp3) :: (beg+len1,len2,0,orth2,orth2,lemma2,cat2,interp2) :: (beg,len1,no_spaces,orth1,orth1,lemma1,cat1,interp1) :: tokens
+      | Split _ -> failwith "match_tokens: ni")) in
     i,p,(id_s,List.rev tokens,named_tokens) :: sentences) in
   let no_spaces,p = get_spaces 0 p in
   if i+no_spaces <> len then failwith (Printf.sprintf "match_tokens 2: %s %n i=%d len=%d p='%s'" name id_p i len (String.concat "" p))
@@ -34,7 +34,7 @@ let generate_error_sentences sentences =
     let no_tokens = Xlist.size tokens in
     let tokens,prev_orth,prev_cat = Xlist.fold tokens ([],prev_orth,prev_cat) (fun (tokens,prev_orth,prev_cat) (_,_,no_spaces,real_orth,orth,_,cat,_) ->
       let tokens = Int.fold 1 no_spaces tokens (fun tokens _ -> xml_space :: tokens) in
-      let tokens = if no_spaces = 0 && ValidateTokenizer.is_space_required prev_orth prev_cat orth cat then xml_err_space:: tokens else tokens in
+      let tokens = if no_spaces = 0 && ValidateTokenizer.is_space_required prev_orth prev_cat orth cat then xml_err_space :: tokens else tokens in
       (make_xml_token real_orth orth) :: tokens, orth, cat) in
     Xml.Element("s",["id",id_s;"length",string_of_int no_tokens],merge_pcdata (List.rev tokens)) :: sentences,prev_orth,prev_cat) in
   Xml.Element("p",[],List.rev sentences)
@@ -81,9 +81,25 @@ let create_chart rules tokens lex_sems paths last =
       ENIAM_LCGchart.add_inc_list chart lnode rnode l 0) in
   chart
  
+let create_text_fragments tokens paths last =
+  let text_fragments = Array.make last IntMap.empty in
+  Xlist.iter paths (fun (id,lnode,rnode) ->
+    let t = ExtArray.get tokens id in
+    let orth = if t.ENIAMtokenizerTypes.beg + t.ENIAMtokenizerTypes.len = t.ENIAMtokenizerTypes.next
+      then t.ENIAMtokenizerTypes.orth else t.ENIAMtokenizerTypes.orth ^ " " in
+    text_fragments.(lnode) <- IntMap.add text_fragments.(lnode) rnode orth);
+  Int.iter_down 0 (last - 1) (fun i ->
+    let map = IntMap.fold text_fragments.(i) text_fragments.(i) (fun map j orth ->
+      if j = last then map else
+      IntMap.fold text_fragments.(j) map (fun map k orth2 ->
+        IntMap.add map k (orth ^ orth2))) in
+    text_fragments.(i) <- map);
+  text_fragments
+
 let eniam_parse_sentence timeout verbosity rules tokens lex_sems paths last =
   ENIAM_LCGreductions.reset_variant_label ();
   let result = {empty_eniam_parse_result with paths_size = Xlist.size paths} in
+  let result = if verbosity = 0 then result else {result with text_fragments=create_text_fragments tokens paths last} in
   let time1 = time_fun () in
   try
     let chart = create_chart rules tokens lex_sems paths last in
@@ -18,6 +18,7 @@
  *)
  
 open ENIAM_LCGtypes
+open Xstd
  
 type status = Idle | PreprocessingError | LexiconError | ParseError | ParseTimeout | Parsed | TooManyNodes | NotParsed | NotReduced | ReductionError | SemError | NotTranslated
  
@@ -41,6 +42,7 @@ type eniam_parse_result = {
   dependency_tree4: linear_term array;
   dependency_tree5: linear_term array;
   dependency_tree6: linear_term array;
+  text_fragments: string IntMap.t array;
   }
 (*
 type conll_parse_result = {
@@ -175,6 +177,7 @@ let empty_eniam_parse_result = {
   dependency_tree4=[| |];
   dependency_tree5=[| |];
   dependency_tree6=[| |];
+  text_fragments=[| |];
   }
  
 (*
+(*
+ *  ENIAM_LCGlexicon is a library that provides LCG lexicon form Polish
+ *  Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
+ *  Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences
+ *
+ *  This library is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU Lesser General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This library is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Lesser General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *)
+
+open Xstd
+open ENIAM_LCGtypes
+open Lexer
+open ENIAMwalTypes
+open ENIAMlexSemanticsTypes
+
+let remove_comments line =
+  try
+    let n = String.index line '#' in
+    String.sub line 0 n
+  with Not_found -> line
+
+let rec manage_tokens = function
+    [arg;[T role]] -> [arg,role]
+  | arg :: (T role :: arg2) :: tokens -> (arg,role) :: manage_tokens (arg2 :: tokens)
+  | _ -> failwith "manage_tokens"
+
+let parse_dir p = function
+    T "/" :: tokens -> tokens, {p with dir=Forward_}
+  | T "\\" :: tokens -> tokens, {p with dir=Backward_}
+  | T "|" :: tokens -> tokens, {p with dir=Both_}
+  | tokens -> failwith ("parse_dir: " ^ Lexer.string_of_token_list tokens)
+
+let parse_multi p = function
+    T "?" :: tokens -> tokens, {p with is_necessary=Multi}
+  | tokens -> tokens,p
+
+let parse_morf p = function
+    [T "1"] -> {p with is_necessary=Opt}
+  | tokens ->
+      let l = Xlist.map (Lexer.split_symbol (T "*") [] tokens) (function
+          [T s] -> Atom s
+        | tokens -> failwith ("parse_morf: " ^ Lexer.string_of_token_list tokens)) in
+      {p with morfs=LCG (Tensor l) :: p.morfs}
+
+let parse_arg tokens p =
+  (* Printf.printf "parse_arg: %s\n" (Lexer.string_of_token_list tokens); *)
+  let tokens,p = parse_dir p tokens in
+  let tokens,p = parse_multi p tokens in
+  match Lexer.find_brackets ["(",")"] [] tokens with
+    [B("(",")",tokens)] -> Xlist.fold (Lexer.split_symbol (T "+") [] tokens) p parse_morf
+  | tokens -> parse_morf p tokens
+
+
+let parse_role p = function
+    "adjunct" -> {p with gf=ADJUNCT}
+  | "unk" -> {p with role="unk"}
+  | "nosem" -> {p with gf=NOSEM}
+  | "Count" -> {p with role="Count"}
+  | "Measure" -> {p with role="Measure"}
+  | s -> failwith ("parse_role: " ^ s)
+
+let parse_entry = function
+    [T symbol; T ":"; T "null"] -> symbol,[]
+  | T symbol :: T ":" :: tokens ->
+      (* Printf.printf "parse_entry: %s\n" (Lexer.string_of_token_list tokens); *)
+      let tokens = Lexer.split_symbol (T ":") [] tokens in
+      let tokens = manage_tokens tokens in
+      let positions = Xlist.map tokens (fun (arg,role) ->
+        parse_arg arg (parse_role {empty_position with is_necessary=Req} role)) in
+      symbol,positions
+  | tokens -> failwith ("parse_entry: " ^ Lexer.string_of_token_list tokens)
+
+let load_lexicon filename =
+  let lines = File.load_lines filename in
+  let lines = List.rev (Xlist.rev_map lines remove_comments) in
+  let tokens = List.flatten (List.rev (Xlist.rev_map lines (Lexer.split "\\]\\| \\|\t\\|\r\\|\\?\\|:\\|;\\|&\\|!\\|=\\|}\\|{\\|,\\|\\*\\|/\\|\\+\\|)\\|(\\||\\|\\[\\|\\"))) in
+  let tokens = List.rev (Xlist.fold tokens [] (fun tokens -> function
+        T " " -> tokens
+      | T "\t" -> tokens
+      | T "\r" -> tokens
+      | t -> t :: tokens)) in
+  let entries = Lexer.split_symbol (T ";") [] tokens in
+  Xlist.fold entries StringMap.empty (fun map entry ->
+    let symbol,args = parse_entry entry in
+    StringMap.add_inc map symbol args (fun _ -> failwith ("load_lexicon: " ^ symbol)))
+
+let sem_lexicon = load_lexicon "resources/lexicon-pl.dic"
+
+let extend_frame symbol frame =
+  try
+    let positions = StringMap.find sem_lexicon symbol in
+    {frame with positions=positions @ frame.positions}
+  with Not_found -> failwith ("extend_frame: " ^ symbol)
+(*
+ *  ENIAMexec implements ENIAM processing stream
+ *  Copyright (C) 2016-2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
+ *  Copyright (C) 2016-2017 Institute of Computer Science Polish Academy of Sciences
+ *
+ *  This library is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU Lesser General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This library is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Lesser General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *)
+
+open ENIAMexecTypes
+open ENIAM_LCGtypes
+open ENIAM_LCGlexiconTypes
+open ENIAMlexSemanticsTypes
+open Xstd
+
+type pos = {role: linear_term; role_attr: linear_term; selprefs: linear_term; gf: ENIAMwalTypes.gf;
+  cr: string list; ce: string list;
+  is_necessary: bool; is_pro: bool; is_prong: bool; is_multi: bool; dir: string; morfs: StringSet.t}
+
+let match_value v2 = function
+    Val v -> if v = v2 then Val v else raise Not_found
+  | _ -> failwith "match_value"
+
+let match_neg_value vals = function
+    Val v -> if Xlist.mem vals v then raise Not_found else Val v
+  | _ -> failwith "match_neg_value"
+
+let rec apply_selector v2 = function
+    (sel,[]) -> failwith ("apply_selector: " ^ ENIAMcategoriesPL.string_of_selector sel)
+  | Negation,("NEGATION",v) :: l -> ("NEGATION",match_value v2 v) :: l
+  | Aspect,("ASPECT",v) :: l -> ("ASPECT",match_value v2 v) :: l
+  | Mood,("MOOD",v) :: l -> ("MOOD",match_value v2 v) :: l
+  | Nsyn,("NSYN",v) :: l -> ("NSYN",match_value v2 v) :: l
+  | Nsem,("NSEM",v) :: l -> ("NSEM",match_value v2 v) :: l
+  | Case,("CASE",v) :: l -> ("CASE",match_value v2 v) :: l
+  | Mode,("MODE",v) :: l -> ("MODE",match_value v2 v) :: l
+  | sel,(attr,v) :: l -> print_endline ("apply_selector: " ^ ENIAMcategoriesPL.string_of_selector sel ^ " " ^ attr); (attr,v) :: (apply_selector v2 (sel,l))
+
+let rec apply_neg_selector vals = function
+    (sel,[]) -> failwith ("apply_neg_selector: " ^ ENIAMcategoriesPL.string_of_selector sel)
+  | Nsem,("NSEM",v) :: l -> ("NSEM",match_neg_value vals v) :: l
+  | Case,("CASE",v) :: l -> ("CASE",match_neg_value vals v) :: l
+  | sel,(attr,v) :: l -> (*print_endline ("apply_neg_selector: " ^ ENIAMcategoriesPL.string_of_selector sel ^ " " ^ attr);*) (attr,v) :: (apply_neg_selector vals (sel,l))
+
+let rec apply_selectors attrs = function
+    [] -> attrs
+  | (sel,Eq,[v]) :: l -> apply_selectors (apply_selector v (sel,attrs)) l
+  | (sel,Neq,vals) :: l -> apply_selectors (apply_neg_selector vals (sel,attrs)) l
+  | _ -> failwith "apply_selectors"
+
+module OrderedStringDir =
+  struct
+    type t = string * string
+    let compare = compare
+  end
+
+module StringDirMap = Xmap.Make(OrderedStringDir)
+
+let rec get_arg_symbols_variant arg_symbols = function
+    Ref i ->
+      let l,dir = arg_symbols.(i) in
+      Xlist.map l (fun s -> (s,dir),Ref i)
+  | Variant(e,l) ->
+      let map = Xlist.fold l StringDirMap.empty (fun map (i,t) ->
+        Xlist.fold (get_arg_symbols_variant arg_symbols t) map (fun map (arg_symbol,t) ->
+          StringDirMap.add_inc map arg_symbol [i,t] (fun l -> (i,t) :: l))) in
+      StringDirMap.fold map [] (fun found arg_symbol l -> (arg_symbol,Variant(e,l)) :: found)
+  | t -> failwith ("get_arg_symbols_variant: " ^ ENIAM_LCGstringOf.linear_term 0 t)
+
+let rec get_arg_symbols_tuple arg_symbols rev = function
+    Dot -> rev
+  | Tuple l -> Xlist.fold l rev (get_arg_symbols_tuple arg_symbols)
+  | t -> (get_arg_symbols_variant arg_symbols t) :: rev
+
+let string_of_argdir = function
+    "forward" -> "/"
+  | "backward" -> "\\"
+  | "both" -> "|"
+  | _ -> failwith "string_of_argdir"
+
+let string_of_arg arg =
+  String.concat ", " (Xlist.map arg (fun ((arg_symbol,dir),t) -> (string_of_argdir dir) ^ arg_symbol ^ ":" ^ ENIAM_LCGstringOf.linear_term 0 t))
+
+let string_of_position p =
+  (string_of_argdir p.dir) ^ String.concat "+" (StringSet.to_list p.morfs)
+
+let rec match_arg_positions arg rev = function
+    p :: positions ->
+      Printf.printf "match_arg_positions: arg=%s rev=[%s] positions=%s :: [%s]\n%!" (string_of_arg arg) (String.concat "; " (Xlist.map rev string_of_position)) (string_of_position p) (String.concat "; " (Xlist.map positions string_of_position));
+      let l = Xlist.fold arg [] (fun l ((arg_symbol,dir),t) ->
+        if StringSet.mem p.morfs arg_symbol && p.dir = dir then t :: l else l) in
+      (match l with
+        [] -> print_endline "match_arg_positions: not matched"; match_arg_positions arg (p :: rev) positions
+      | [t] ->
+          let t = if p.gf = ENIAMwalTypes.SUBJ || p.gf = ENIAMwalTypes.OBJ || p.gf = ENIAMwalTypes.ARG then
+            SetAttr("role",p.role,SetAttr("role_attr",p.role_attr,SetAttr("selprefs",p.selprefs,t)))
+          else if p.gf = ENIAMwalTypes.ADJUNCT then t else failwith "match_arg_positions: ni 2" in
+          let t = SetAttr("gf",Val (ENIAMwalStringOf.gf p.gf),t) in
+          let t = Xlist.fold p.cr t (fun t cr -> SetAttr("controller",Val cr,t)) in
+          let t = Xlist.fold p.ce t (fun t ce -> SetAttr("controllee",Val ce,t)) in
+          if p.is_multi then (t, rev @ (p :: positions)) :: (match_arg_positions arg (p :: rev) positions)
+          else (t, rev @ positions) :: (match_arg_positions arg (p :: rev) positions)
+      | _ -> failwith "match_arg_positions: ni")
+  | [] -> Printf.printf "match_arg_positions: arg=%s rev=[%s] positions=[]\n%!" (string_of_arg arg) (String.concat "; " (Xlist.map rev string_of_position)); []
+
+(* Jeśli ta funkcja zwróci pustą listę, oznacza to, że argumentów nie dało się dopasować do pozycji *)
+let rec match_args_positions_rec positions = function
+    arg :: args ->
+      Printf.printf "match_args_positions_rec: args=%s :: [%s] positions=[%s]\n%!" (string_of_arg arg) (String.concat "; " (Xlist.map args string_of_arg)) (String.concat "; " (Xlist.map positions string_of_position));
+      Xlist.fold (match_arg_positions arg [] positions) [] (fun found (arg_pos,positions) ->
+        Xlist.fold (match_args_positions_rec positions args) found (fun found l -> (arg_pos :: l) :: found))
+  | [] ->
+      Printf.printf "match_args_positions_rec: args=[] positions=[%s]\n%!" (String.concat "; " (Xlist.map positions string_of_position));
+      let b = Xlist.fold positions false (fun b p -> p.is_necessary || b) in
+      if b then print_endline "match_args_positions: not matched";
+      if b then [] else
+        [Xlist.fold positions [] (fun found p ->
+          if not p.is_pro then found else
+          let attrs = ["role",p.role; "role_attr",p.role_attr; "selprefs",p.selprefs; "gf",Val (ENIAMwalStringOf.gf p.gf)] in
+          let attrs = if p.is_prong then attrs else attrs in (* FIXME: dodać number, gender *)
+          let attrs = Xlist.fold p.cr attrs (fun attrs cr -> ("controller",Val cr) :: attrs) in
+          let attrs = Xlist.fold p.ce attrs (fun attrs ce -> ("controllee",Val ce) :: attrs) in
+          Node{ENIAM_LCGrenderer.empty_node with lemma="pro"; pos="pro"; attrs=attrs} :: found)]
+
+(* FIXME: opcjonalność podrzędników argumentów zleksykalizowanych *)
+
+(* Jeśli ta funkcja zwróci pustą listę, oznacza to, że argumentów nie dało się dopasować do pozycji *)
+let match_args_positions args positions =
+  Printf.printf "match_args_positions: args=[%s] positions=[%s]\n%!" (String.concat "; " (Xlist.map args string_of_arg)) (String.concat "; " (Xlist.map positions string_of_position));
+  Xlist.rev_map (match_args_positions_rec positions args) (function
+      [] -> Dot
+    | [t] -> t
+    | l -> Tuple l)
+
+let translate_selprefs = function
+    ENIAMwalTypes.SynsetId _ -> failwith "translate_selprefs"
+  | ENIAMwalTypes.Predef _ -> failwith "translate_selprefs"
+  | ENIAMwalTypes.SynsetName s -> s
+  | ENIAMwalTypes.RelationRole _ -> "ALL"
+
+let string_of_internal_morf = function
+    Atom s -> s
+  | AVar s -> s
+  | Top -> "T"
+  | t -> failwith ("string_of_internal_morf: " ^ ENIAM_LCGstringOf.internal_grammar_symbol_prime t)
+
+
+let string_of_morf = function
+    ENIAMwalTypes.LCG Tensor l -> String.concat "*" (Xlist.map l string_of_internal_morf)
+  | ENIAMwalTypes.LCG t -> failwith ("string_of_morf: " ^ ENIAM_LCGstringOf.grammar_symbol_prime t)
+  | _ -> failwith "string_of_morf"
+
+let rec string_of_arg_symbol = function
+    Dot -> ""
+  | Val s -> s
+  | Tuple l -> String.concat "*" (Xlist.map l string_of_arg_symbol)
+  | t -> failwith ("string_of_arg_symbol: " ^ ENIAM_LCGstringOf.linear_term 0 t)
+
+let translate_dir = function
+    ENIAMwalTypes.Both_ -> "both"
+  | ENIAMwalTypes.Forward_ -> "forward"
+  | ENIAMwalTypes.Backward_ -> "backward"
+
+let translate_position id p =
+  {role = Val p.ENIAMwalTypes.role;
+   role_attr = Val p.ENIAMwalTypes.role_attr;
+   selprefs = (match Xlist.map p.ENIAMwalTypes.sel_prefs translate_selprefs with
+      [] -> Dot
+    | [s] -> Val s
+    | l -> Tuple(Xlist.rev_map l (fun s -> Val s)));
+   gf=p.ENIAMwalTypes.gf;
+   cr=Xlist.map p.ENIAMwalTypes.cr (fun cr -> id ^ "-" ^ cr);
+   ce=Xlist.map p.ENIAMwalTypes.ce (fun ce -> id ^ "-" ^ ce);
+   is_necessary = p.ENIAMwalTypes.is_necessary = ENIAMwalTypes.Req(*Xlist.fold p.ENIAMwalTypes.morfs true (fun b -> function ENIAMwalTypes.LCG One -> false | _ -> b)*);
+   is_pro = p.ENIAMwalTypes.is_necessary = ENIAMwalTypes.Pro || p.ENIAMwalTypes.is_necessary = ENIAMwalTypes.ProNG;
+   is_prong = p.ENIAMwalTypes.is_necessary = ENIAMwalTypes.ProNG;
+   is_multi = p.ENIAMwalTypes.is_necessary = ENIAMwalTypes.Multi;
+   dir= translate_dir p.ENIAMwalTypes.dir;
+   morfs = Xlist.fold p.ENIAMwalTypes.morfs StringSet.empty (fun morfs morf ->
+        if morf = ENIAMwalTypes.LCG One then (Printf.printf "translate_position: One%!"; morfs) else
+        StringSet.add morfs (string_of_morf morf))}
+
+let get_phrase_symbol = function
+    Tuple[Val "lex";Val "się";Val "qub"] -> "lex-się-qub"
+  | Tuple(Val s :: _) -> s
+  | Val s -> s
+  (* | Dot -> "dot" *)
+  | t -> failwith ("get_phrase_symbol: " ^ ENIAM_LCGstringOf.linear_term 0 t)
+
+(* let extend_frame symbol = function *)
+
+let rec assign_frames_rec tokens lex_sems tree arg_symbols visited = function
+    Ref i ->
+      if IntSet.mem visited i then Ref i,visited else
+      let t,visited = assign_frames_rec tokens lex_sems tree arg_symbols (IntSet.add visited i) tree.(i) in
+      tree.(i) <- t;
+      Ref i,visited
+  | Node t ->
+      let args,visited = assign_frames_rec tokens lex_sems tree arg_symbols visited t.args in
+      let t = {t with args=args} in
+      if t.symbol = Dot then Node t,visited else
+      let args = get_arg_symbols_tuple arg_symbols [] args in
+      let s = ExtArray.get lex_sems t.id in
+      let symbol = get_phrase_symbol t.symbol in
+      let frames = Xlist.fold s.ENIAMlexSemanticsTypes.frames [] (fun frames frame ->
+        print_endline ("selectors: " ^ ENIAMcategoriesPL.string_of_selectors frame.selectors);
+        try
+          let attrs = apply_selectors t.attrs frame.selectors in
+          let frame = ENIAMsemLexicon.extend_frame symbol frame in
+          print_endline "passed";
+          (attrs,frame,Xlist.rev_map frame.positions (translate_position (string_of_int t.id))) :: frames
+        with Not_found -> print_endline "rejected"; frames) in
+      if frames = [] then failwith "assign_frames_rec: no frame" else
+      let e = ENIAM_LCGreductions.get_variant_label () in
+      let l,_ = Xlist.fold frames ([],1) (fun (l,n) (attrs,frame,positions) ->
+        Printf.printf "assign_frames_rec: lemma=%s args=[%s] positions=[%s]\n%!" t.lemma (String.concat "; " (Xlist.map args string_of_arg)) (String.concat "; " (Xlist.map positions string_of_position));
+        if frame.meanings = [] then failwith "assign_frames_rec: no meanings" else
+        Xlist.fold (match_args_positions args positions) (l,n) (fun (l,n) args ->
+          Xlist.fold frame.meanings (l,n) (fun (l,n) (meaning,hipero,weight) ->
+            (string_of_int n, Node{t with attrs=
+              ("meaning",Val meaning) ::
+              ("hipero",ENIAM_LCGrules.make_variant (Xlist.map hipero (fun (h,n) -> Tuple[Val h;Val(string_of_int n)]))) ::
+              ("arole",Val frame.arole) ::
+              ("arole-attr",Val frame.arole_attr) ::
+              ("arev",Val (if frame.arev then "+" else "-")) ::
+              ("fopinion",Val (ENIAMwalStringOf.opinion frame.fopinion)) ::
+              ("sopinion",Val (ENIAMwalStringOf.opinion frame.sopinion)) :: t.attrs; args=args}) ::
+              l,n+1))) in
+      if l = [] then failwith ("assign_frames_rec: no frame assingment found for " ^ t.lemma ^ " " ^ ENIAM_LCGstringOf.linear_term 0 t.symbol) else
+      Variant(e,l),visited
+  | Variant(e,l) ->
+      let l,visited = Xlist.fold l ([],visited) (fun (l,visited) (i,t) ->
+        let t,visited = assign_frames_rec tokens lex_sems tree arg_symbols visited t in
+        (i,t) :: l, visited) in
+      Variant(e,List.rev l),visited
+  | Tuple l ->
+      let l,visited = Xlist.fold l ([],visited) (fun (l,visited) t ->
+        let t,visited = assign_frames_rec tokens lex_sems tree arg_symbols visited t in
+        t :: l, visited) in
+      Tuple(List.rev l),visited
+  | Dot -> Dot,visited
+  | t -> failwith ("assign_frames_rec: " ^ ENIAM_LCGstringOf.linear_term 0 t)
+
+let rec get_arg_symbols = function
+    Node{arg_symbol=Tuple([Val "cp"; Val "T"; Val "T"]);
+         symbol=Tuple([Val "cp"; ctype; comp]); arg_dir=dir} ->
+      [string_of_arg_symbol (Tuple([Val "cp"; Val "T"; Val "T"]));
+       string_of_arg_symbol (Tuple([Val "cp"; ctype; comp]))],dir
+  | Node{arg_symbol=Tuple([Val "ncp"; Val "T"; Val arg_case; Val "T"; Val "T"; Val "T"; Val "T"]);
+         symbol=Tuple([Val "ncp"; number; case; gender; person; ctype; comp]); arg_dir=dir} ->
+      [string_of_arg_symbol (Tuple([Val "ncp"; Val "T"; Val arg_case; Val "T"; Val "T"; Val "T"; Val "T"]));
+       string_of_arg_symbol (Tuple([Val "ncp"; Val "T"; Val arg_case; Val "T"; Val "T"; ctype; comp]))],dir
+  | Node{arg_symbol=Tuple([Val "prepncp"; Val arg_prep; Val arg_case; Val "T"; Val "T"]);
+         symbol=Tuple([Val "prepncp"; prep; case; ctype; comp]); arg_dir=dir} ->
+      [string_of_arg_symbol (Tuple([Val "prepncp"; Val arg_prep; Val arg_case; Val "T"; Val "T"]));
+       string_of_arg_symbol (Tuple([Val "prepncp"; prep; case; ctype; comp]))],dir
+  | Node t -> [string_of_arg_symbol t.arg_symbol], t.arg_dir
+  | t -> failwith ("get_arg_symbols: " ^ ENIAM_LCGstringOf.linear_term 0 t)
+
+let assign_frames tokens lex_sems tree =
+  print_endline "assign_frames";
+  let tree = Array.copy tree in
+  let arg_symbols = Array.make (Array.length tree) ([],"") in
+  Int.iter 0 (Array.length tree - 1) (fun i ->
+    arg_symbols.(i) <- get_arg_symbols tree.(i));
+  let _ = assign_frames_rec tokens lex_sems tree arg_symbols IntSet.empty (Ref 0) in
+  tree
+
+let assign tokens lex_sems text =
+  map_text Struct (fun mode -> function
+      ENIAMSentence result ->
+        if result.status <> Parsed then ENIAMSentence result else
+        ENIAMSentence {result with dependency_tree6=assign_frames tokens lex_sems result.dependency_tree6}
+    | t -> t) text
+
+let rec cut_nodes result_tree = function
+  | Node t ->
+      let i = ExtArray.add result_tree (Node t) in
+      Ref i
+  | Variant(e,l) ->
+      let l = Xlist.rev_map l (fun (i,t) -> i, cut_nodes result_tree t) in
+      Variant(e,List.rev l)
+  | Tuple l ->
+      let l = Xlist.rev_map l (cut_nodes result_tree) in
+      Tuple(List.rev l)
+  | Dot -> Dot
+  | t -> failwith ("cut_nodes: " ^ ENIAM_LCGstringOf.linear_term 0 t)
+
+let rec reduce_set_attr attr v = function
+    Node t -> Node{t with attrs=(attr,v) :: t.attrs}
+  | Variant(e,l) ->
+      Variant(e,List.rev (Xlist.rev_map l (fun (i,t) ->
+        i, reduce_set_attr attr v t)))
+  | t -> failwith ("reduce_set_attr: " ^ ENIAM_LCGstringOf.linear_term 0 t)
+
+let rec reduce_tree_rec tokens lex_sems result_tree mid_tree orig_tree = function
+    Ref i ->
+      if mid_tree.(i) <> Dot then mid_tree.(i) else
+      let t = reduce_tree_rec tokens lex_sems result_tree mid_tree orig_tree orig_tree.(i) in
+      mid_tree.(i) <- t;
+      t
+  | Node t ->
+      let args = reduce_tree_rec tokens lex_sems result_tree mid_tree orig_tree t.args in
+      print_endline ("reduce_tree_rec 1: " ^ ENIAM_LCGstringOf.linear_term 0 args);
+      let args = cut_nodes result_tree args in
+      print_endline ("reduce_tree_rec 2: " ^ ENIAM_LCGstringOf.linear_term 0 args);
+      let id =
+        if t.id = 0 then
+          let id = ExtArray.add tokens {ENIAMtokenizerTypes.empty_token_env with ENIAMtokenizerTypes.token=ENIAMtokenizerTypes.Lemma("pro","pro",[[]])} in
+          let _ = ExtArray.add lex_sems empty_lex_sem in
+          id
+        else t.id in
+      Node{t with args=args; id=id}
+  | Variant(e,l) ->
+      let l = Xlist.rev_map l (fun (i,t) -> i, reduce_tree_rec tokens lex_sems result_tree mid_tree orig_tree t) in
+      Variant(e,List.rev l)
+  | Tuple l ->
+      let l = Xlist.rev_map l (reduce_tree_rec tokens lex_sems result_tree mid_tree orig_tree) in
+      Tuple(List.rev l)
+  | Dot -> Dot
+  | SetAttr(attr,v,t) ->
+      let t = reduce_tree_rec tokens lex_sems result_tree mid_tree orig_tree t in
+      reduce_set_attr attr v t
+  | t -> failwith ("reduce_tree_rec: " ^ ENIAM_LCGstringOf.linear_term 0 t)
+
+let reduce_tree tokens lex_sems orig_tree =
+  print_endline "reduce_tree";
+  let mid_tree = Array.make (Array.length orig_tree) Dot in
+  let result_tree = ExtArray.make (Array.length orig_tree) Dot in
+  let _ = ExtArray.add result_tree Dot in
+  let t = reduce_tree_rec tokens lex_sems result_tree mid_tree orig_tree orig_tree.(0) in
+  ExtArray.set result_tree 0 t;
+  ExtArray.to_array result_tree
+
+let reduce tokens lex_sems text =
+  map_text Struct (fun mode -> function
+      ENIAMSentence result ->
+        if result.status <> Parsed then ENIAMSentence result else
+        ENIAMSentence {result with dependency_tree6=reduce_tree tokens lex_sems result.dependency_tree6}
+    | t -> t) text
@@ -774,8 +774,8 @@ let html_of_eniam_sentence path file_prefix img verbosity tokens (result : eniam
   | LexiconError -> sprintf "error_lex: %s paths_size=%d\n" result.msg result.paths_size
   | ParseError ->
       if verbosity = 0 then () else (
-        ENIAM_LCGlatexOf.print_chart path (file_prefix ^ "_1_chart") "a1" result.chart1;
-        ENIAM_LCGlatexOf.print_chart path (file_prefix ^ "_2_chart") "a4" result.chart2;
+        ENIAM_LCGlatexOf.print_chart path (file_prefix ^ "_1_chart") "a1" result.text_fragments result.chart1;
+        ENIAM_LCGlatexOf.print_chart path (file_prefix ^ "_2_chart") "a4" result.text_fragments result.chart2;
         ENIAM_LCGlatexOf.print_references path (file_prefix ^ "_2_references") "a0" result.references2);
       sprintf "error_parse: %s paths_size=%d\n" result.msg result.paths_size ^
       (if verbosity = 0 then "" else
@@ -785,10 +785,10 @@ let html_of_eniam_sentence path file_prefix img verbosity tokens (result : eniam
       ""
   | ParseTimeout ->
       if verbosity < 2 then () else (
-        ENIAM_LCGlatexOf.print_chart path (file_prefix ^ "_1_chart") "a1" result.chart1;
+        ENIAM_LCGlatexOf.print_chart path (file_prefix ^ "_1_chart") "a1" result.text_fragments result.chart1;
         ENIAM_LCGlatexOf.print_references path (file_prefix ^ "_2_references") "a0" result.references2);
       if verbosity = 0 then () else (
-        ENIAM_LCGlatexOf.print_chart path (file_prefix ^ "_2_chart") "a4" result.chart2);
+        ENIAM_LCGlatexOf.print_chart path (file_prefix ^ "_2_chart") "a4" result.text_fragments result.chart2);
       sprintf "timeout: %s paths_size=%d\n" result.msg result.paths_size ^
       (if verbosity < 2 then "" else
         sprintf "<BR><A HREF=\"%s_1_chart.pdf\">Chart 1</A>\n" file_prefix ^
@@ -798,13 +798,13 @@ let html_of_eniam_sentence path file_prefix img verbosity tokens (result : eniam
       ""
   | NotParsed ->
       if verbosity = 0 then () else (
-        ENIAM_LCGlatexOf.print_chart path (file_prefix ^ "_1_chart") "a1" result.chart1);
+        ENIAM_LCGlatexOf.print_chart path (file_prefix ^ "_1_chart") "a1" result.text_fragments result.chart1);
       if verbosity < 2 then () else (
-        ENIAM_LCGlatexOf.print_chart path (file_prefix ^ "_2_chart") "a4" result.chart2;
+        ENIAM_LCGlatexOf.print_chart path (file_prefix ^ "_2_chart") "a4" result.text_fragments result.chart2;
         ENIAM_LCGlatexOf.print_references path (file_prefix ^ "_2_references") "a0" result.references2;
         ENIAM_LCGlatexOf.print_references path (file_prefix ^ "_3_references") "a0" result.references3);
       if verbosity = 0 then () else (
-        ENIAM_LCGlatexOf.print_chart path (file_prefix ^ "_3_chart") "a4" result.chart3);
+        ENIAM_LCGlatexOf.print_chart path (file_prefix ^ "_3_chart") "a4" result.text_fragments result.chart3);
       sprintf "not_parsed: paths_size=%d chart_size=%d\n" result.paths_size result.chart_size ^
       (if verbosity = 0 then "" else
         sprintf "<BR><A HREF=\"%s_1_chart.pdf\">Chart 1</A>\n" file_prefix) ^
@@ -817,11 +817,11 @@ let html_of_eniam_sentence path file_prefix img verbosity tokens (result : eniam
       ""
   | ReductionError ->
       if verbosity < 2 then () else (
-        ENIAM_LCGlatexOf.print_chart path (file_prefix ^ "_2_chart") "a4" result.chart2;
+        ENIAM_LCGlatexOf.print_chart path (file_prefix ^ "_2_chart") "a4" result.text_fragments result.chart2;
         ENIAM_LCGlatexOf.print_references path (file_prefix ^ "_2_references") "a0" result.references2;
-        ENIAM_LCGlatexOf.print_chart path (file_prefix ^ "_3_chart") "a4" result.chart3);
+        ENIAM_LCGlatexOf.print_chart path (file_prefix ^ "_3_chart") "a4" result.text_fragments result.chart3);
       if verbosity = 0 then () else (
-        ENIAM_LCGlatexOf.print_chart path (file_prefix ^ "_1_chart") "a1" result.chart1;
+        ENIAM_LCGlatexOf.print_chart path (file_prefix ^ "_1_chart") "a1" result.text_fragments result.chart1;
         ENIAM_LCGlatexOf.print_references path (file_prefix ^ "_3_references") "a0" result.references3);
       (if verbosity < 2 then "" else
         sprintf "error_reduction: %s paths_size=%d chart_size=%d\n" result.msg result.paths_size result.chart_size ^
@@ -834,10 +834,10 @@ let html_of_eniam_sentence path file_prefix img verbosity tokens (result : eniam
       ""
   | TooManyNodes ->
       if verbosity < 2 then () else (
-        ENIAM_LCGlatexOf.print_chart path (file_prefix ^ "_1_chart") "a1" result.chart1;
-        ENIAM_LCGlatexOf.print_chart path (file_prefix ^ "_2_chart") "a4" result.chart2;
+        ENIAM_LCGlatexOf.print_chart path (file_prefix ^ "_1_chart") "a1" result.text_fragments result.chart1;
+        ENIAM_LCGlatexOf.print_chart path (file_prefix ^ "_2_chart") "a4" result.text_fragments result.chart2;
         ENIAM_LCGlatexOf.print_references path (file_prefix ^ "_2_references") "a0" result.references2;
-        ENIAM_LCGlatexOf.print_chart path (file_prefix ^ "_3_chart") "a4" result.chart3;
+        ENIAM_LCGlatexOf.print_chart path (file_prefix ^ "_3_chart") "a4" result.text_fragments result.chart3;
         ENIAM_LCGlatexOf.print_references path (file_prefix ^ "_3_references") "a0" result.references3);
       sprintf "to_many_nodes: paths_size=%d chart_size=%d\n" result.paths_size result.chart_size ^
       (if verbosity < 2 then "" else
@@ -849,10 +849,10 @@ let html_of_eniam_sentence path file_prefix img verbosity tokens (result : eniam
       ""
   | NotReduced ->
       if verbosity < 2 then () else (
-        ENIAM_LCGlatexOf.print_chart path (file_prefix ^ "_1_chart") "a1" result.chart1;
-        ENIAM_LCGlatexOf.print_chart path (file_prefix ^ "_2_chart") "a4" result.chart2;
+        ENIAM_LCGlatexOf.print_chart path (file_prefix ^ "_1_chart") "a1" result.text_fragments result.chart1;
+        ENIAM_LCGlatexOf.print_chart path (file_prefix ^ "_2_chart") "a4" result.text_fragments result.chart2;
         ENIAM_LCGlatexOf.print_references path (file_prefix ^ "_2_references") "a0" result.references2;
-        ENIAM_LCGlatexOf.print_chart path (file_prefix ^ "_3_chart") "a4" result.chart3);
+        ENIAM_LCGlatexOf.print_chart path (file_prefix ^ "_3_chart") "a4" result.text_fragments result.chart3);
       if verbosity = 0 then () else (
         ENIAM_LCGlatexOf.print_references path (file_prefix ^ "_3_references") "a0" result.references3;
         Xlatex.latex_file_out path (file_prefix ^ "_4_term") "a4" false (fun file ->
@@ -872,10 +872,10 @@ let html_of_eniam_sentence path file_prefix img verbosity tokens (result : eniam
       ""
   | SemError ->
       if verbosity < 2 then () else (
-        ENIAM_LCGlatexOf.print_chart path (file_prefix ^ "_1_chart") "a1" result.chart1;
-        ENIAM_LCGlatexOf.print_chart path (file_prefix ^ "_2_chart") "a4" result.chart2;
+        ENIAM_LCGlatexOf.print_chart path (file_prefix ^ "_1_chart") "a1" result.text_fragments result.chart1;
+        ENIAM_LCGlatexOf.print_chart path (file_prefix ^ "_2_chart") "a4" result.text_fragments result.chart2;
         ENIAM_LCGlatexOf.print_references path (file_prefix ^ "_2_references") "a0" result.references2;
-        ENIAM_LCGlatexOf.print_chart path (file_prefix ^ "_3_chart") "a4" result.chart3);
+        ENIAM_LCGlatexOf.print_chart path (file_prefix ^ "_3_chart") "a4" result.text_fragments result.chart3);
       if verbosity = 0 then () else (
         ENIAM_LCGlatexOf.print_references path (file_prefix ^ "_3_references") "a0" result.references3;
         Xlatex.latex_file_out path (file_prefix ^ "_4_term") "a4" false (fun file ->
@@ -895,10 +895,10 @@ let html_of_eniam_sentence path file_prefix img verbosity tokens (result : eniam
       ""
   | Parsed ->
       if verbosity < 2 then () else (
-        ENIAM_LCGlatexOf.print_chart path (file_prefix ^ "_1_chart") "a1" result.chart1;
-        ENIAM_LCGlatexOf.print_chart path (file_prefix ^ "_2_chart") "a4" result.chart2;
+        ENIAM_LCGlatexOf.print_chart path (file_prefix ^ "_1_chart") "a1" result.text_fragments result.chart1;
+        ENIAM_LCGlatexOf.print_chart path (file_prefix ^ "_2_chart") "a4" result.text_fragments result.chart2;
         ENIAM_LCGlatexOf.print_references path (file_prefix ^ "_2_references") "a0" result.references2;
-        ENIAM_LCGlatexOf.print_chart path (file_prefix ^ "_3_chart") "a4" result.chart3;
+        ENIAM_LCGlatexOf.print_chart path (file_prefix ^ "_3_chart") "a4" result.text_fragments result.chart3;
         ENIAM_LCGlatexOf.print_references path (file_prefix ^ "_3_references") "a0" result.references3;
         Xlatex.latex_file_out path (file_prefix ^ "_4_term") "a4" false (fun file ->
           Printf.fprintf file "\\[%s\\]\n" (ENIAM_LCGlatexOf.linear_term 0 result.term4));
@@ -4,3 +4,5 @@ przetwarzanie biogramu do końca
 przetwarzanie dialogów
 przechwytywanie błędów subsyntax itp w parserze i semparserze
 interfejs dla clarin
+
+przetwarzanie kontroli jako dodawanie pro/koreferencji, oraz uzgadnianie przypadków
@@ -9,15 +9,15 @@ OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa b
 	eniam-lexSemantics.cmxa #eniam-exec.cmxa
 INSTALLDIR=`ocamlc -where`/eniam
  
-SOURCES= ENIAMexecTypes.ml ENIAMexec.ml ENIAMselectSent.ml ENIAMvisualization.ml
+SOURCES= ENIAMexecTypes.ml ENIAMexec.ml ENIAMselectSent.ml ENIAMsemLexicon.ml ENIAMsemValence.ml ENIAMvisualization.ml
  
 all: eniam-exec.cma eniam-exec.cmxa
  
 install: all
 	mkdir -p $(INSTALLDIR)
 	cp eniam-exec.cmxa eniam-exec.a eniam-exec.cma $(INSTALLDIR)
-	cp ENIAMexecTypes.cmi ENIAMexec.cmi ENIAMselectSent.cmi ENIAMvisualization.cmi $(INSTALLDIR)
-	cp ENIAMexecTypes.cmx ENIAMexec.cmx ENIAMselectSent.cmx ENIAMvisualization.cmx $(INSTALLDIR)
+	cp ENIAMexecTypes.cmi ENIAMexec.cmi ENIAMselectSent.cmi ENIAMsemLexicon.cmi ENIAMsemValence.cmi ENIAMvisualization.cmi $(INSTALLDIR)
+	cp ENIAMexecTypes.cmx ENIAMexec.cmx ENIAMselectSent.cmx ENIAMsemLexicon.cmx ENIAMsemValence.cmx ENIAMvisualization.cmx $(INSTALLDIR)
  
  
 eniam-exec.cma: $(SOURCES)
@@ -30,6 +30,7 @@ let img = ref 1
 let timeout = ref 30.
 let select_sentence_modes_flag = ref false
 let select_sentences_flag = ref true
+let assign_semantic_valence_flag = ref true
 let output_dir = ref "results/"
 let spec_list = [
 (*  "-s", Arg.Unit (fun () -> sentence_split:=true), "Split input into sentences (default)";
@@ -56,6 +57,8 @@ let spec_list = [
   "--no_sel_modes", Arg.Unit (fun () -> select_sentence_modes_flag:=false), "Do not select sencence modes (default)";
   "--sel_sent", Arg.Unit (fun () -> select_sentences_flag:=true), "Select parsed sentences (default)";
   "--no_sel_sent", Arg.Unit (fun () -> select_sentences_flag:=false), "Do not select parsed sentences";
+  "--sem_valence", Arg.Unit (fun () -> assign_semantic_valence_flag:=true), "Assign semantic valence (default)";
+  "--no_sem_valence", Arg.Unit (fun () -> assign_semantic_valence_flag:=false), "Do not assign semantic valence";
   ]
  
 let usage_msg =
@@ -93,6 +96,8 @@ let rec main_loop sub_in sub_out =
     let text = ENIAMexec.parse !timeout !verbosity rules tokens lex_sems text in
     let text = if !select_sentence_modes_flag then ENIAMselectSent.select_sentence_modes_text text else text in
     let text = if !select_sentences_flag then ENIAMselectSent.select_sentences_text ENIAMexecTypes.Struct text else text in
+    let text = if !assign_semantic_valence_flag then ENIAMsemValence.assign tokens lex_sems text else text in
+    let text = if !assign_semantic_valence_flag then ENIAMsemValence.reduce tokens lex_sems text else text in
     ENIAMvisualization.print_html_text !output_dir "parsed_text" text !img !verbosity tokens);
     prerr_endline "Done!";
     main_loop sub_in sub_out)
@@ -106,10 +111,7 @@ let _ =
   prerr_endline message;
   ENIAMcategoriesPL.initialize ();
   Arg.parse spec_list anon_fun usage_msg;
-  if !lexSemantics_built_in then (
-    ENIAMsubsyntax.initialize ();
-    ENIAMwalParser.initialize ();
-    ENIAMwalReduce.initialize ());
+  if !lexSemantics_built_in then ENIAMlexSemantics.initialize ();
   Gc.compact ();
   let sub_in,sub_out =
     if !lexSemantics_built_in then stdin,stdout
+
+day-lex:      /(date+day+day-month):unk;
+date:         /(1+year-lex):unk;
+day:          /month-lex:unk;
+day-interval: /month-lex:unk;
+day-month:    /(1+year-lex):unk;
+year-lex:     |(1+adjp*number*case*gender):unk;
+month-lex:    /(1+year+np*T*gen*T*T):unk;
+
+date-interval:        null;
+day-month-interval:   null;
+month-interval:       null;
+year:                 null;
+year-interval:        null;
+roman:                null;
+roman-interval:       null;
+hour-minute:          null;
+hour:                 null;
+hour-minute-interval: null;
+hour-interval:        null;
+obj-id:               null;
+match-result:         null;
+url:                  null;
+email:                null;
+
+np:
+  \(1+num*number*case*gender*person*congr+num*number*case*gender*person*rec):Count \(1+qub):adjunct /(1+inclusion):adjunct
+  \(1+measure*unumber*ucase*ugender*uperson):Measure
+  /(1+date+day+day-month+date-interval+day-interval+day-month-interval+year+year-interval+hour+hour-minute+hour-interval+hour-minute-interval):unk
+  |(1+year):unk /(1+obj-id):unk;
+
+num: \(1+qub):adjunct /(1+inclusion):adjunct;
+
+measure:
+  \(1+num*number*case*gender*person*congr+num*number*case*gender*person*rec):Count \(1+qub):adjunct /(1+inclusion):adjunct;
+
+prepnp: \(1+advp*T):adjunct /(np*T*case*T*T+day-month+day+year+date+hour+hour-minute):unk \(1+qub):adjunct /(1+inclusion):adjunct;
+prepadjp: \(1+advp*T):adjunct /(adjp*T*case*T+adjp*sg*dat*m1+adjp*T*postp*T+adjp*sg*nom*f+advp*T):unk \(1+qub):adjunct /(1+inclusion):adjunct;
+compar: \(1+advp*T):adjunct /(np*T*case*T*T+prepnp*T*T+prepadjp*T*T):unk \(1+qub):adjunct /(1+inclusion):adjunct;
+
+adjp: \(1+qub):adjunct /(1+inclusion):adjunct \(1+adja):unk;
+
+adja: /hyphen:nosem;
+
+advp: \(1+qub):adjunct /(1+inclusion):adjunct \(1+adja):unk;
+
+#FIXME: sprawdzić czy 'or' czy 'or2'
+ip: /(1+int):unk \(1+qub):adjunct /(1+inclusion):adjunct \(1+nie):nosem |(1+aux-imp):nosem
+  |(1+aux-fut*number*gender*person+aux-past*number*gender*person):nosem |(1+aglt*number*person):nosem |(1+by):nosem /(1+or):adjunct;
+
+aux-fut: null;
+aux-past: null;
+aglt: null;
+
+infp: \(1+qub):adjunct /(1+inclusion):adjunct \(1+nie):nosem;
+padvp: \(1+qub):adjunct /(1+inclusion):adjunct \(1+nie):nosem;
+
+cp: /ip*T*T*T:unk;
+ncp: \(1+qub):adjunct /(1+inclusion):adjunct /cp*ctype*plemma:unk;
+
+#lemma=i|lub|czy|bądź,pos=conj:
+#  QUANT[number=all_numbers,gender=all_genders,person=all_persons]
+#  (ip*number*gender*person/ip*T*T*T)\ip*T*T*T;
+#lemma=,|i|lub|czy|bądź,pos=conj: (advp*mod/prepnp*T*T)\prepnp*T*T;
+#lemma=,|i|lub|czy|bądź,pos=conj: QUANT[mode=0] (advp*mode/advp*mode)\prepnp*T*T;
+#lemma=,|i|lub|czy|bądź,pos=conj: QUANT[mode=0] (advp*mode/prepnp*T*T)\advp*mode;
+#lemma=,|i|lub|czy|bądź,pos=conj: (advp*mod/advp*T)\advp*T; #FIXME: przydałaby się wersja zachowująca mode
+#lemma=,|i|lub|czy|bądź,pos=conj:
+#  QUANT[plemma=0,case=all_cases]
+#  (prepnp*plemma*case/prepnp*plemma*case)\prepnp*plemma*case;
+#lemma=,|i|lub|czy|bądź,pos=conj:
+#  QUANT[number=all_numbers,case=all_cases,gender=all_genders,person=all_persons]
+#  (np*number*case*gender*person/np*T*case*T*T)\np*T*case*T*T;
+#lemma=,|i|lub|czy|bądź,pos=conj:
+#  QUANT[number=all_numbers,case=all_cases,gender=all_genders]
+#  (adjp*number*case*gender/adjp*number*case*gender)\adjp*number*case*gender;
+
+lex-się-qub: null;
+nie: null;
+by: null;
+aux-imp: null;
+qub: null;
+interj: null;
+hyphen: null;
+int: null;
+#lemma=„,pos=interp:       QUANT[number=0,case=0,gender=0,person=0] (np*number*case*gender*person/rquot)/np*number*case*gender*person; #SetAttr("QUOT",Val "+",Var "x"
+#lemma=«,pos=interp:       QUANT[number=0,case=0,gender=0,person=0] (np*number*case*gender*person/rquot2)/np*number*case*gender*person; #SetAttr("QUOT",Val "+",Var "x"
+#lemma=»,pos=interp:       QUANT[number=0,case=0,gender=0,person=0] (np*number*case*gender*person/rquot3)/np*number*case*gender*person; #SetAttr("QUOT",Val "+",Var "x"
+rquot: null;
+rquot2: null;
+rquot3: null;
+#lemma=(,pos=interp:       (inclusion/rparen)/(np*T*T*T*T+ip*T*T*T+adjp*T*T*T+prepnp*T*T); #SetAttr("INCLUSION",Val "+",
+#lemma=[,pos=interp:       (inclusion/rparen2)/(np*T*T*T*T+ip*T*T*T+adjp*T*T*T+prepnp*T*T); #SetAttr("INCLUSION",Val "+",
+rparen: null;
+rparen2: null;
+
+<conll_root>: /(ip*T*T*T+cp*int*T+np*sg*voc*T*T+interj):unk;
+s: \?(ip*T*T*T+cp*int*T+np*sg*voc*T*T+interj):unk;
+<root>: /(1+s):unk /(1+<speaker-end>):unk /(1+or):unk /(1+np*T*nom*T*T):unk /(1+ip*T*T*T):unk;
+
+or: null;
+<colon>: \<speaker>:unk /(1+<squery>):unk;
+or2: \?(ip*T*T*T+cp*int*T+np*sg*voc*T*T+interj):unk;
+<speaker-end>: null
@@ -268,6 +268,27 @@ let add_adjuncts preps compreps compars pos2 (selectors,schema) =
   | "adv" -> [selectors,schema @ ENIAMwalRenderer.adv_adjuncts_simp @ compars]
   | _ -> []
  
+open ENIAMlexSemanticsTypes
+
+let add_subj_cr cr positions =
+  Xlist.map positions (fun p ->
+    if p.gf = SUBJ then {p with cr=cr :: p.cr} else p)
+
+let add_connected_adjuncts preps compreps compars pos2 frame =
+  let compreps = Xlist.rev_map compreps ENIAMwalRenderer.render_connected_comprep in
+  let prepnps = Xlist.rev_map preps (fun (prep,cases) -> ENIAMwalRenderer.render_connected_prepnp prep cases) in
+  let prepadjps = Xlist.rev_map preps (fun (prep,cases) -> ENIAMwalRenderer.render_connected_prepadjp prep cases) in
+  let compars = Xlist.rev_map compars ENIAMwalRenderer.render_connected_compar in
+  match pos2 with
+    "verb" -> [{frame with positions=(add_subj_cr "3" frame.positions) @ ENIAMwalRenderer.verb_connected_adjuncts_simp @ prepnps @ prepadjps @ compreps @ compars}]
+  | "noun" -> [
+      {frame with selectors=[Nsyn,Eq,["proper"]] @ frame.selectors; positions=ENIAMwalRenderer.proper_noun_connected_adjuncts_simp @ prepnps @ compreps @ compars};
+      {frame with selectors=[Nsyn,Eq,["common"];Nsem,Eq,["measure"]] @ frame.selectors; positions=ENIAMwalRenderer.measure_noun_connected_adjuncts_simp @ prepnps @ compreps @ compars};
+      {frame with selectors=[Nsyn,Eq,["common"];Nsem,Neq,["measure"]] @ frame.selectors; positions=frame.positions @ ENIAMwalRenderer.common_noun_connected_adjuncts_simp @ prepnps @ compreps @ compars}]
+  | "adj" -> [{frame with positions=frame.positions @ ENIAMwalRenderer.adj_connected_adjuncts_simp @ compars}]
+  | "adv" -> [{frame with positions=frame.positions @ ENIAMwalRenderer.adv_connected_adjuncts_simp @ compars}]
+  | _ -> []
+
 (* let _ =
   let schemata,entries = ENIAMvalence.prepare_all_valence ENIAMwalParser.phrases ENIAMwalParser.schemata ENIAMwalParser.entries in
   let _ = Entries.map2 schemata (fun pos lemma schemata -> simplify_schemata pos (ENIAMvalence.simplify_pos pos) lemma schemata) in
@@ -23,12 +23,61 @@ open ENIAMlexSemanticsTypes
 open ENIAMwalTypes
 open Xstd
  
-(*let find_senses t = (* FIXME: sensy zawierające 'się' *)
-  match t.token with
-    Lemma(lemma,pos,_) -> ENIAMplWordnet.find_senses lemma pos
-  | Proper(_,_,_,senses) -> ENIAMplWordnet.find_proper_senses senses
-  | _ -> []
-*)
+let find_meaning m =
+  try
+    ENIAMplWordnet.find_meaning m.plwnluid
+  with Not_found ->
+    m.name ^ "-" ^ m.variant, [], unknown_meaning_weight
+
+let find_prep_meaning lemma hipero =
+  let hipero = match hipero with
+      [Predef hipero] -> hipero
+    | _ -> failwith "find_prep_meaning" in
+  if hipero = "ALL" then lemma, [hipero,0], unknown_meaning_weight else
+  let syn_id = StringMap.find !ENIAMplWordnet.predef hipero in
+  let hipero = IntMap.fold (ENIAMplWordnet.get_hipero syn_id) [] (fun hipero syn_id cost -> (ENIAMplWordnet.synset_name syn_id, cost) :: hipero) in
+  lemma, hipero, unknown_meaning_weight
+
+let lex_sie = LCG (ENIAMwalRenderer.render_morf (SimpleLexArg("się",QUB)))
+
+(* FIXME: naiwnie wierzymy, że jeśli leksem jest opisany semantycznie w walentym to zawiera ramy dla wszystkich sensów *)
+let find_senses t s =
+  (*let set = Xlist.fold s.frames StringSet.empty (fun set frame ->
+    Xlist.fold frame.meanings set (fun set (name,hipero,weight) ->
+      StringSet.add set name)) in*)
+  let senses = match t.token with
+      Lemma(lemma,pos,_) -> ENIAMplWordnet.find_senses lemma pos
+    | Proper(_,_,_,senses) -> ENIAMplWordnet.find_proper_senses senses
+    | _ -> [] in
+  (* let senses =
+    Xlist.fold senses [] (fun senses (name,hipero,weight) ->
+      if StringSet.mem set name then senses else (name,hipero,weight) :: senses) in *)
+  let senses_sie = match t.token with
+      Lemma(lemma,pos,_) -> ENIAMplWordnet.find_senses (lemma ^ " się") pos
+    | Proper(_,_,_,senses) -> []
+    | _ -> [] in
+(*  let senses_sie = Xlist.fold senses_sie [] (fun senses_sie (name,hipero,weight) ->
+    if StringSet.mem set name then senses_sie else (name,hipero,weight) :: senses_sie) in
+  let frames = if senses = [] then s.frames else {empty_frame with meanings=senses} :: s.frames in
+  let frames = if senses_sie = [] then frames else {empty_frame with meanings=senses_sie;
+    positions=[{empty_position with role="Lemma"; mode=["lemma"]; morfs=[lex_sie]; is_necessary=Req}]} :: frames in*) (* FIXME: czy to nie usuwa elementów z ramy? *)
+  let frames = Xlist.fold s.frames [] (fun frames f ->
+    if f.meanings <> [] then f :: frames else
+    if senses = [] && senses_sie = [] then {f with meanings=[ENIAMtokens.get_lemma t.token, [], unknown_meaning_weight]} :: frames else
+    (if senses_sie = [] then [] else [{f with meanings=senses_sie; positions={empty_position with role="Lemma"; mode=["lemma"]; morfs=[lex_sie]; is_necessary=Req} :: f.positions}]) @
+    [{f with meanings=senses}] @ frames) in
+  {s with frames=frames}
+
+let find_selprefs schema = (* FIXME: RelationRole *)
+  Xlist.map schema (fun p ->
+      let l = Xlist.fold p.sel_prefs [] (fun l -> function
+          SynsetId id -> (try ENIAMplWordnet.synset_name id :: l with ENIAMplWordnet.SynsetNotFound -> l)
+        | Predef s -> s :: l
+        | SynsetName _ -> failwith "find_selprefs"
+        | RelationRole _ -> l) in
+      let l = if l = [] then ["ALL"] else l in
+      {p with sel_prefs=Xlist.map l (fun s -> SynsetName s)})
+
 let rec find a l i =
   if a.(i) = max_int then (
     a.(i) <- i;
@@ -102,6 +151,33 @@ let get_preps tokens group = (* FIXME: To nie zadziała przy kilku wystąpieniac
       | _ -> preps,compars) in
   StringMap.fold preps [] (fun l prep v -> (prep, StringSet.to_list v) :: l), StringSet.to_list compars
  
+let make_unique schemata =
+  let map = Xlist.fold schemata StringMap.empty (fun map (selectors,schema) ->
+    let s = "[" ^ ENIAMcategoriesPL.string_of_selectors selectors ^ "] {" ^ ENIAMwalStringOf.schema schema ^ "}" in
+    StringMap.add map s (selectors,schema)) in
+  StringMap.fold map [] (fun l _ (selectors,schema) -> (selectors,schema) :: l)
+
+let semantize lemma pos (selectors,schema) =
+  let schema = Xlist.rev_map schema (fun p ->
+    {p with role="Arg"; sel_prefs=[Predef "X"]}) in (* FIXME: zaślepka, żeby preferować znane argumenty *)
+  Xlist.rev_map (ENIAMvalence.get_aroles schema lemma pos) (fun (sel,arole,arole_attr,arev) ->
+    {empty_frame with selectors=sel @ selectors; positions=schema;
+     arole=arole; arole_attr=arole_attr; arev=arev})
+
+let assign_prep_semantics lemma =
+  let roles = try StringMap.find ENIAMlexSemanticsData.prep_roles lemma with Not_found -> [] in
+  Printf.printf "assign_prep_semantics: |roles|=%d\n%!" (Xlist.size roles);
+  Xlist.map roles (function (case,arole,arole_attr,hipero,sel_prefs) ->
+    Printf.printf "assign_prep_semantics: case=%s arole=%s arole_attr=%s\n%!" case arole arole_attr;
+    let meaning = find_prep_meaning lemma hipero in (* FIXME: zaślepka dla meaning i weight *)
+    print_endline "assign_prep_semantics 1";
+    let positions = [{empty_position with
+      sel_prefs=sel_prefs; dir=if lemma="temu" then Backward_ else Forward_;
+      morfs=ENIAMwalRenderer.assing_pref_morfs (lemma,case); is_necessary=Req}] in
+    print_endline "assign_prep_semantics 2";
+    {empty_frame with selectors=[ENIAM_LCGlexiconTypes.Case,ENIAM_LCGlexiconTypes.Eq,[case]]; meanings=[meaning]; positions=find_selprefs positions;
+     arole=arole; arole_attr=arole_attr; arev=false})
+
 let assign_valence tokens lex_sems group =
   let lexemes = Xlist.fold group StringSet.empty (fun lexemes id ->
       let lemma = ENIAMtokens.get_lemma (ExtArray.get tokens id).token in
@@ -118,10 +194,10 @@ let assign_valence tokens lex_sems group =
       (* Printf.printf "A %s %s %s |schemata|=%d\n" lemma pos pos2 (Xlist.size schemata); *)
       let entries = Entries.find entries pos lemma in
       let connected = Entries.find connected pos2 lemma in
-      let schemata = List.flatten (Xlist.map schemata (fun (opinion,neg,pred,aspect,schema) ->
-          ENIAMvalence.transform_entry pos lemma neg pred aspect schema)) in (* FIXME: gubię opinię *)
+      let schemata1 = List.flatten (Xlist.map schemata (fun (opinion,neg,pred,aspect,schema) ->
+          ENIAMvalence.transform_entry pos lemma neg pred aspect schema)) in (* gubię opinię *)
       (* Printf.printf "B %s |schemata|=%d\n" lemma (Xlist.size schemata); *)
-      let schemata = ENIAMadjuncts.simplify_schemata lexemes pos pos2 lemma schemata in
+      let schemata = ENIAMadjuncts.simplify_schemata lexemes pos pos2 lemma schemata1 in
       (* Printf.printf "C %s |schemata|=%d\n" lemma (Xlist.size schemata); *)
       let schemata = Xlist.rev_map schemata (fun (selectors,schema) ->
           selectors,ENIAMwalRenderer.render_simple_schema schema) in
@@ -130,13 +206,31 @@ let assign_valence tokens lex_sems group =
       let entries = List.flatten (Xlist.rev_map entries (ENIAMvalence.transform_lex_entry pos lemma)) in
       let entries = Xlist.map entries (fun (selectors,entry) ->
           selectors,ENIAMwalRenderer.render_lex_entry entry) in
-      let connected = List.flatten (Xlist.map connected (fun (sopinion,fopinion,meanings,neg,pred,aspect,schema) ->
-          Xlist.rev_map (ENIAMvalence.transform_entry pos lemma neg pred aspect schema) (fun (selectors,schema) ->
-              selectors,meanings,schema))) in (* FIXME: gubię opinię *)
-      let connected = Xlist.fold connected [] (fun connected (selectors,meanings,schema) ->
-          if ENIAMadjuncts.check_selector_lex_constraints lexemes pos selectors then (selectors,meanings,schema) :: connected else connected) in
-      let connected = Xlist.rev_map connected (fun (selectors,meanings,schema) ->
-          selectors,meanings,ENIAMwalRenderer.render_connected_schema schema) in
+      let connected = List.flatten (Xlist.map connected (fun (sopinion,fopinion,meanings,neg,pred,aspect,schema1) ->
+          List.flatten (Xlist.rev_map (ENIAMvalence.transform_entry pos lemma neg pred aspect schema1) (fun (selectors,schema) ->
+              Xlist.rev_map (ENIAMvalence.get_aroles schema1 lemma pos) (fun (sel,arole,arole_attr,arev) ->
+                  {selectors=sel @ selectors; meanings=Xlist.map meanings find_meaning; positions=schema;
+                   arole=arole; arole_attr=arole_attr; arev=arev; sopinion=sopinion; fopinion=fopinion}))))) in
+      (* Printf.printf "E %s |connected|=%d\n" lemma (Xlist.size connected); *)
+      let connected = if connected = [] then List.flatten (Xlist.rev_map (make_unique schemata1) (semantize lemma pos)) else connected in
+      (* Printf.printf "F %s |connected|=%d\n" lemma (Xlist.size connected); *)
+      let connected = Xlist.fold connected [] (fun connected frame ->
+          if ENIAMadjuncts.check_selector_lex_constraints lexemes pos frame.selectors then frame :: connected else connected) in
+      (* Printf.printf "G %s |connected|=%d\n" lemma (Xlist.size connected); *)
+      let connected = Xlist.rev_map connected (fun frame ->
+          {frame with
+            positions = find_selprefs (ENIAMwalRenderer.render_connected_schema (ENIAMwalReduce.set_necessary frame.positions))}) in
+      (* Printf.printf "H %s |connected|=%d\n" lemma (Xlist.size connected); *)
+      let connected = List.flatten (Xlist.rev_map connected (ENIAMadjuncts.add_connected_adjuncts preps compreps compars pos2)) in
+      (* Printf.printf "I %s |connected|=%d\n" lemma (Xlist.size connected); *)
+      let connected = if pos = "prep" then
+        if connected <> [] then failwith "assign_valence" else
+        assign_prep_semantics lemma else connected in
+      (* Printf.printf "J %s |connected|=%d\n" lemma (Xlist.size connected); *)
+      let connected = if connected = [] then
+        Xlist.rev_map (ENIAMvalence.get_aroles [] lemma pos) (fun (sel,arole,arole_attr,arev) ->
+          {empty_frame with selectors=sel; arole=arole; arole_attr=arole_attr; arev=arev}) else connected in
+      (* Printf.printf "K %s |connected|=%d\n" lemma (Xlist.size connected); *)
       ExtArray.set lex_sems id {(ExtArray.get lex_sems id) with
                                 schemata=schemata; lex_entries=entries; frames=connected})
  
@@ -177,60 +271,54 @@ let assign_valence tokens lex_sems group =
             (if pos = "subst" || pos = "depr" then "p" ^ pos else pos)) (fun frame -> 0,frame) with Not_found -> [](*failwith ("assign_valence: Proper(" ^ lemma ^ "," ^ pos ^ ")")*))};
         ExtArray.set tokens id {(ExtArray.get tokens id) with token=Lemma(lemma,pos,interp)}
     | _ -> ())
+*)
  
-let get_prefs_schema prefs schema =
-  Xlist.fold schema prefs (fun prefs t ->
-    Xlist.fold t.sel_prefs prefs StringSet.add)
-
-let map_prefs_schema senses schema =
-  Xlist.map schema (fun t ->
-    if Xlist.mem t.morfs (Phrase Pro) || Xlist.mem t.morfs (Phrase ProNG) then t else
-    {t with sel_prefs = Xlist.fold t.sel_prefs [] (fun l s ->
-      if StringSet.mem senses s then s :: l else l)})
  
 let disambiguate_senses lex_sems group =
   let prefs = Xlist.fold group (StringSet.singleton "ALL") (fun prefs id ->
-    Xlist.fold (ExtArray.get lex_sems id).valence prefs (fun prefs -> function
-      _,Frame(_,schema) -> get_prefs_schema prefs schema
-    | _,LexFrame(_,_,_,schema) -> get_prefs_schema prefs schema
-    | _,ComprepFrame(_,_,_,schema) -> get_prefs_schema prefs schema)) in
-  let hipero = Xlist.fold group (StringSet.singleton "ALL") (fun hipero id ->
+    Xlist.fold (ExtArray.get lex_sems id).frames prefs (fun prefs frame ->
+      Xlist.fold frame.positions prefs (fun prefs t ->
+        Xlist.fold t.sel_prefs prefs (fun prefs -> function
+          SynsetName s -> StringSet.add prefs s
+        | _ -> failwith "disambiguate_senses")))) in
+  (*let hipero = Xlist.fold group (StringSet.singleton "ALL") (fun hipero id ->
     Xlist.fold (ExtArray.get lex_sems id).senses hipero (fun hipero (_,l,_) ->
       Xlist.fold l hipero StringSet.add)) in
   let senses = StringSet.intersection prefs hipero in
   let is_zero = StringSet.mem hipero "0" in
-  let senses = if is_zero then StringSet.add senses "0" else senses in
+  let senses = if is_zero then StringSet.add senses "0" else senses in*)
   Xlist.iter group (fun id ->
     let t = ExtArray.get lex_sems id in
-    ExtArray.set lex_sems id {t with valence = if is_zero then t.valence else
-        Xlist.map t.valence (function
-          n,Frame(a,schema) -> n,Frame(a,map_prefs_schema senses schema)
-        | n,LexFrame(s,p,r,schema) -> n,LexFrame(s,p,r,map_prefs_schema senses schema)
-        | n,ComprepFrame(s,p,r,schema) -> n,ComprepFrame(s,p,r,map_prefs_schema senses schema));
-      senses = Xlist.map t.senses (fun (s,l,w) ->
-        s, List.rev (Xlist.fold l [] (fun l s -> if StringSet.mem senses s then s :: l else l)),w)})
-
-*)
+    ExtArray.set lex_sems id {t with frames=Xlist.map t.frames (fun frame ->
+      let meanings = Xlist.map frame.meanings (fun (name,hipero,weight) ->
+        let hipero = Xlist.fold hipero ["ALL",0] (fun hipero (name,cost) ->
+          if StringSet.mem prefs name then (name,cost) :: hipero else hipero) in
+        name,hipero,weight) in
+      {frame with meanings=meanings})})
  
+let remove_unused_tokens tokens groups =
+  let set = Xlist.fold groups IntSet.empty (fun set group ->
+    Xlist.fold group set IntSet.add) in
+  Int.iter 1 (ExtArray.size tokens - 1) (fun i ->
+    if IntSet.mem set i then () else
+    ExtArray.set tokens i ENIAMtokenizerTypes.empty_token_env)
  
 let assign tokens text =
   let lex_sems = ExtArray.make (ExtArray.size tokens) empty_lex_sem in
   let _ = ExtArray.add lex_sems empty_lex_sem in
   Int.iter 1 (ExtArray.size tokens - 1) (fun i ->
-    (* let token = ExtArray.get tokens i in
-    (* ExtArray.set tokens i token; *)
-    let senses = find_senses token in *)
-      let lex_sem = {empty_lex_sem with senses=[](*senses*)} in
-    let _ = ExtArray.add lex_sems lex_sem in
-    ());
+    ignore (ExtArray.add lex_sems empty_lex_sem));
   let groups = split_tokens_into_groups (ExtArray.size tokens) text in
   (* Xlist.iter groups (fun group -> print_endline (String.concat " " (Xlist.map group string_of_int))); *)
+  remove_unused_tokens tokens groups;
   Xlist.iter groups (fun group -> assign_valence tokens lex_sems group);
-  (* Xlist.iter groups (fun group -> assign_valence tokens lex_sems group);
+  Int.iter 1 (ExtArray.size tokens - 1) (fun i ->
+    let token = ExtArray.get tokens i in
+    let lex_sem = ExtArray.get lex_sems i in
+    let lex_sem = find_senses token lex_sem in
+    ExtArray.set lex_sems i lex_sem);
   Xlist.iter groups (fun group -> disambiguate_senses lex_sems group);
-  Xlist.iter groups (fun group -> assign_simplified_valence tokens lex_sems group);
-  Xlist.iter groups (fun group -> assign_very_simplified_valence tokens lex_sems group);
-  Xlist.iter groups (fun group -> ENIAMlexSemanticsData.assign_semantics tokens lex_sems group); *)
+  (*Xlist.iter groups (fun group -> ENIAMlexSemanticsData.assign_semantics tokens lex_sems group); *)
   lex_sems
  
 let catch_assign tokens text =
@@ -239,3 +327,11 @@ let catch_assign tokens text =
   with e ->
     ExtArray.make 0 empty_lex_sem,
     Printexc.to_string e
+
+let initialize () =
+  ENIAMsubsyntax.initialize ();
+  ENIAMwalParser.initialize ();
+  ENIAMwalReduce.initialize ();
+  ENIAMplWordnet.initialize ();
+  ENIAMcategoriesPL.initialize ();
+  ()
@@ -21,13 +21,13 @@ open ENIAMtokenizerTypes
 open ENIAMlexSemanticsTypes
 open Xstd
  
-let subst_inst_roles = Xlist.fold [
-  "wiosna",        "Time","";
-  "lato",          "Time","";
-  "jesień",        "Time","";
-  "zima",          "Time","";
-  "wieczór",       "Time","";
-  ] StringMap.empty (fun map (k,r,a) -> StringMap.add map k (r,a))
+let subst_inst_time = StringSet.of_list [
+  "wiosna";
+  "lato";
+  "jesień";
+  "zima";
+  "wieczór";
+  ]
  
 let adj_roles = Xlist.fold [
   "ten",               "Apoz","";
@@ -59,7 +59,7 @@ let adj_roles = Xlist.fold [
   "taki",              "Attribute","";
   "czyj",              "Possesive","";
   "który",             "Attribute","";
-  ] StringMap.empty (fun map (k,r,a) -> StringMap.add map k (r,a))
+  ] StringMap.empty (fun map (k,r,a) -> StringMap.add_inc map k [r,a] (fun l -> (r,a) :: l))
  
 let adv_roles = Xlist.fold [ (* FIXME: problem z podwójnymi przypisaniami *)
   (* operators:  nielokalnie zmieniaja formułe logiczna *)
@@ -80,7 +80,7 @@ let adv_roles = Xlist.fold [ (* FIXME: problem z podwójnymi przypisaniami *)
   "dlatego",        "Condition",""; (* odniesieniem argumentu jest sytuacji/kontekst *)
   "tak",            "Manner",""; (* odniesieniem argumentu jest sytuacji/kontekst, byc może deiktyczny *)
  
-  "skąd",		"Location","Source";
+(*  "skąd",		"Location","Source";
   "skądkolwiek",	"Location","Source";
   "skądś",		"Location","Source";
   "skądże",		"Location","Source";
@@ -209,8 +209,8 @@ let adv_roles = Xlist.fold [ (* FIXME: problem z podwójnymi przypisaniami *)
   "ongi",		"Time","";
   "ongiś",		"Time","";
   "wczas",		"Time","";
-  "wonczas",		"Time","";
-  ] StringMap.empty (fun map (k,r,a) -> StringMap.add map k (r,a))
+  "wonczas",		"Time","";*)
+  ] StringMap.empty (fun map (k,r,a) -> StringMap.add_inc map k [r,a] (fun l -> (r,a) :: l))
  
 let qub_roles = Xlist.fold [
   "tylko",          "Quantifier","";
@@ -236,10 +236,10 @@ let qub_roles = Xlist.fold [
   "ponad",          "Mod","";
   "prawie",         "Mod","";
   "przynajmniej",   "Mod","";
-  ] StringMap.empty (fun map (k,r,a) -> StringMap.add map k (r,a))
+  ] StringMap.empty (fun map (k,r,a) -> StringMap.add_inc map k [r,a] (fun l -> (r,a) :: l))
  
  
-let prep_roles = Xlist.fold [ (* lemma,case,role,role_attr,meaning/hipero,sel_prefs *)(* FIXME: problem z podwójnymi przypisaniami *)
+let prep_roles = Xlist.fold [ (* lemma,case,role,role_attr,meaning/hipero,sel_prefs *)
   "od","gen",		"Location","Source",["POŁOŻENIE"],["POŁOŻENIE"];
   "spod","gen",		"Location","Source",["POŁOŻENIE"],["POŁOŻENIE"];
   "spomiędzy","gen",	"Location","Source",["POŁOŻENIE"],["POŁOŻENIE"];
@@ -249,12 +249,14 @@ let prep_roles = Xlist.fold [ (* lemma,case,role,role_attr,meaning/hipero,sel_pr
   "spoza","gen",	"Location","Source",["POŁOŻENIE"],["POŁOŻENIE"];
   "sprzed","gen",	"Location","Source",["POŁOŻENIE"],["POŁOŻENIE"];
   "z","gen",		"Location","Source",["POŁOŻENIE"],["POŁOŻENIE"];
+  "z","postp",		"Location","Source",["POŁOŻENIE"],["POŁOŻENIE"];
   "znad","gen",		"Location","Source",["POŁOŻENIE"],["POŁOŻENIE"];
   "zza","gen",		"Location","Source",["POŁOŻENIE"],["POŁOŻENIE"];
   "do","gen",		"Location","Goal",["POŁOŻENIE"],["POŁOŻENIE"];
   "ku","dat",		"Location","Goal",["POŁOŻENIE"],["POŁOŻENIE"];
   "między","acc",	"Location","Goal",["POŁOŻENIE"],["POŁOŻENIE"];
   "na","acc",		"Location","Goal",["POŁOŻENIE"],["POŁOŻENIE"];
+  "na","postp",		"Location","Goal",["POŁOŻENIE"],["POŁOŻENIE"];
   "nad","acc",		"Location","Goal",["POŁOŻENIE"],["POŁOŻENIE"];
   "nieopodal","gen",	"Location","Goal",["POŁOŻENIE"],["POŁOŻENIE"];
   "opodal","gen",	"Location","Goal",["POŁOŻENIE"],["POŁOŻENIE"];
@@ -267,6 +269,7 @@ let prep_roles = Xlist.fold [ (* lemma,case,role,role_attr,meaning/hipero,sel_pr
   "za","acc",		"Location","Goal",["POŁOŻENIE"],["POŁOŻENIE"];
   "dzięki","dat",	"Condition","",["CZEMU"],[];
   "na","acc",		"Condition","",["CZEMU"],[];
+  "na","postp",		"Condition","",["CZEMU"],[];
   "od","gen",		"Condition","",["CZEMU"],[];
   "przez","acc",	"Condition","",["CZEMU"],[];
   "wskutek","gen",	"Condition","",["CZEMU"],[];
@@ -275,6 +278,7 @@ let prep_roles = Xlist.fold [ (* lemma,case,role,role_attr,meaning/hipero,sel_pr
   "do","gen",		"Purpose","",["CZEMU"],[];
   "ku","dat",		"Purpose","",["CZEMU"],[];
   "na","acc",		"Purpose","",["CZEMU"],[];
+  "na","postp",		"Purpose","",["CZEMU"],[];
   "po","acc",		"Purpose","",["CZEMU"],[];
   "do","gen",		"Duration","",["CZAS"],["CZAS"];
   "od","gen",		"Duration","",["CZAS"],["CZAS"];
@@ -285,6 +289,7 @@ let prep_roles = Xlist.fold [ (* lemma,case,role,role_attr,meaning/hipero,sel_pr
   "między","inst",	"Location","",["POŁOŻENIE"],["POŁOŻENIE"];
   "nad","inst",		"Location","",["POŁOŻENIE"],["POŁOŻENIE"];
   "na","loc",		"Location","",["POŁOŻENIE"],["POŁOŻENIE"];
+  "na","postp",		"Location","",["POŁOŻENIE"],["POŁOŻENIE"];
   "naokoło","gen",	"Location","",["POŁOŻENIE"],["POŁOŻENIE"];
   "naprzeciw","gen",	"Location","",["POŁOŻENIE"],["POŁOŻENIE"];
   "naprzeciwko","gen",	"Location","",["POŁOŻENIE"],["POŁOŻENIE"];
@@ -327,6 +332,7 @@ let prep_roles = Xlist.fold [ (* lemma,case,role,role_attr,meaning/hipero,sel_pr
   "jak","str",		"Manner","",[],[];*)
   "pod","acc",		"Manner","",["ALL"],[];
   "z","inst",		"Manner","",["ALL"],[];
+  "z","postp",		"Manner","",["ALL"],[];
   "dokoła","gen",	"Path","",["POŁOŻENIE"],["POŁOŻENIE"];
   "dookoła","gen",	"Path","",["POŁOŻENIE"],["POŁOŻENIE"];
   "koło","gen",		"Path","",["POŁOŻENIE"],["POŁOŻENIE"];
@@ -363,10 +369,10 @@ let prep_roles = Xlist.fold [ (* lemma,case,role,role_attr,meaning/hipero,sel_pr
   "temu","acc",		"Time","",["CZAS"],["CZAS"]; (* dodane *)
   "za","gen",		"Time","",["CZAS"],["CZAS"]; (* dodane *)
   ] StringMap.empty (fun map (lemma,case,role,role_attr,hipero,sel_prefs) ->
-      let hipero = Xlist.fold hipero StringSet.empty ENIAMplWordnet.get_hipero_rec in
-      let map2 = try StringMap.find map lemma with Not_found -> StringMap.empty in
-      let map2 = StringMap.add_inc map2 case [case,role,role_attr,hipero,sel_prefs] (fun l -> (case,role,role_attr,hipero,sel_prefs) :: l) in
-      StringMap.add map lemma map2)
+      let hipero = Xlist.map hipero (fun hipero -> ENIAMwalTypes.Predef hipero) in
+      let sel_prefs = Xlist.map sel_prefs (fun sel_prefs -> ENIAMwalTypes.Predef sel_prefs) in
+      StringMap.add_inc map lemma [case,role,role_attr,hipero,sel_prefs]
+        (fun l -> (case,role,role_attr,hipero,sel_prefs) :: l))
 (*  "przeciwko","dat","Dat";
   "przeciw","dat","Dat";
   "o","acc","Theme";
@@ -374,14 +380,7 @@ let prep_roles = Xlist.fold [ (* lemma,case,role,role_attr,meaning/hipero,sel_pr
   "według","gen","Manr";
   "wobec","gen","Dat";*)
  
-let assign_prep_semantics lemma cases t =
-  try
-    let map = StringMap.find prep_roles lemma in
-    let l = List.flatten (Xlist.map cases (fun case ->
-      try StringMap.find map case with Not_found -> [])) in
-    if l = [] then Normal else PrepSemantics l
-  with Not_found -> Normal
-
+(*
 let subst_special_lexemes = Xlist.fold [
   "jutro",        ["indexical"];(*"dzień"*)
   "pojutrze",     ["indexical"];(*"dzień"*)
@@ -553,3 +552,4 @@ let assign_semantics tokens lex_sems group =
           {t with semantics=assign_prep_semantics lemma (StringSet.to_list cases) t}
       | _ -> t in
     ExtArray.set lex_sems id t)
+*)
@@ -63,10 +63,10 @@ let html_of_lex_sems tokens lex_sems =
     let schemata = Xlist.map t.schemata (fun (selectors,l) ->
         "&emsp;&emsp;[" ^ ENIAMcategoriesPL.string_of_selectors selectors ^ "] {" ^ String.concat ", " (Xlist.map l (fun (d,s) ->
             ENIAM_LCGstringOf.direction d ^ ENIAM_LCGstringOf.grammar_symbol 0 s)) ^ "}") in
-    let frames = Xlist.map t.frames (fun (selectors,meanings,schema) ->
+    (* let frames = Xlist.map t.frames (fun (selectors,meanings,schema) -> FIXME
         "&emsp;&emsp;[" ^ ENIAMcategoriesPL.string_of_selectors selectors ^ "] {" ^ ENIAMwalStringOf.schema schema ^ "} " ^
-        String.concat ", " (Xlist.map meanings (fun m -> ENIAMwalStringOf.meaning m))) in
-    (String.concat "<br>\n    " ([core] @ schemata @ frames @ lex_entries)) :: l))) ^
+        String.concat ", " (Xlist.map meanings (fun m -> ENIAMwalStringOf.meaning m))) in *)
+    (String.concat "<br>\n    " ([core] @ schemata (*@ frames*) @ lex_entries)) :: l))) ^
   "</P>"
  
 (*  schemata: ((ENIAM_LCGlexiconTypes.selector * ENIAM_LCGlexiconTypes.selector_relation * string list) list *
@@ -27,6 +27,11 @@ let lex_sems t =
     let t2 = ExtArray.get t id in
     (Printf.sprintf "%3d %s" id (lex_sem t2)) :: l)))*)
  
+let arole f =
+  (if f.arole = "" then "" else "," ^ f.arole) ^
+  (if f.arole_attr = "" then "" else "," ^ f.arole_attr) ^
+  (if f.arev then ",rev"  else "")
+
 let string_of_lex_sems tokens lex_sems =
   String.concat "\n" (List.rev (Int.fold 0 (ExtArray.size lex_sems - 1) [] (fun l id ->
     let t = ExtArray.get lex_sems id in
@@ -35,18 +40,22 @@ let string_of_lex_sems tokens lex_sems =
     let lemma = ENIAMtokens.string_of_token t2.ENIAMtokenizerTypes.token in
     let core = Printf.sprintf "%3d %s %s" id orth lemma  in
     let lex_entries = Xlist.map t.lex_entries (fun (selectors,s) ->
-        "[" ^ ENIAMcategoriesPL.string_of_selectors selectors ^ "] " ^ ENIAM_LCGstringOf.grammar_symbol 0 s) in
+        "&[" ^ ENIAMcategoriesPL.string_of_selectors selectors ^ "] " ^ ENIAM_LCGstringOf.grammar_symbol 0 s) in
     let schemata = Xlist.map t.schemata (fun (selectors,l) ->
         "[" ^ ENIAMcategoriesPL.string_of_selectors selectors ^ "] {" ^ String.concat "," (Xlist.map l (fun (d,s) ->
             ENIAM_LCGstringOf.direction d ^ ENIAM_LCGstringOf.grammar_symbol 0 s)) ^ "}") in
-    let frames = Xlist.map t.frames (fun (selectors,meanings,schema) ->
-        "[" ^ ENIAMcategoriesPL.string_of_selectors selectors ^ "] {" ^ ENIAMwalStringOf.schema schema ^ "} " ^
+(*    let frames = Xlist.map t.frames (fun (selectors,meanings,schema) ->
+        "*[" ^ ENIAMcategoriesPL.string_of_selectors selectors ^ "] {" ^ ENIAMwalStringOf.schema schema ^ "} " ^
         String.concat "," (Xlist.map meanings (fun m -> ENIAMwalStringOf.meaning m))) in
-    (String.concat "\n    " ([core] @ schemata @ frames @ lex_entries)) :: l)))
+    let senses = Xlist.map t.senses (fun (sense,hipero,weight) ->
+        Printf.sprintf "%s[%s]%.2f" sense (String.concat "," (Xlist.map hipero (fun (s,n) -> s ^ " " ^ string_of_int n))) weight) in*)
+    let frames = Xlist.map t.frames (fun f ->
+        "*" ^ arole f ^ "[" ^ ENIAMcategoriesPL.string_of_selectors f.selectors ^ "] {" ^ ENIAMwalStringOf.schema f.positions ^ "} " ^
+        String.concat "," (Xlist.map f.meanings (fun (sense,hipero,weight) ->
+        Printf.sprintf "%s[%s]%.2f" sense (String.concat "," (Xlist.map hipero (fun (s,n) -> s ^ " " ^ string_of_int n))) weight))) in
+    (String.concat "\n    " ([core] @ (*senses @*) schemata @ frames @ lex_entries)) :: l)))
     (* let lroles = if snd t.lroles = "" then fst t.lroles else fst t.lroles ^ " " ^ snd t.lroles in
     let core = Printf.sprintf "%3d %s %s %s" id orth lemma lroles in
-    let senses = Xlist.map t.senses (fun (sense,hipero,weight) ->
-      Printf.sprintf "%s[%s]%.2f" sense (String.concat "," hipero) weight) in
     let valence = Xlist.map t.valence (ENIAMwalStringOf.fnum_frame "") in
     let simple_valence = Xlist.map t.simple_valence (ENIAMwalStringOf.fnum_frame "") in
     (* let semantics =  *)
@@ -36,20 +36,33 @@ type semantics =
   | SpecialMod of string * (type_arg list * type_term)*)
   | PrepSemantics of (string * string * string * StringSet.t * string list) list (* case,role,role_attr,hipero,sel_prefs *)
  
+type frame = {
+  selectors: (ENIAM_LCGlexiconTypes.selector * ENIAM_LCGlexiconTypes.selector_relation * string list) list;
+  meanings: ((*ENIAMwalTypes.meaning **) string * (string * int) list * float) list;
+  positions: ENIAMwalTypes.position list;
+  arole: string;
+  arole_attr: string;
+  arev: bool;
+  sopinion: ENIAMwalTypes.opinion;
+  fopinion: ENIAMwalTypes.opinion;
+  }
+
+let empty_frame = {selectors=[]; meanings=[]; positions=[]; arole=""; arole_attr=""; arev=false;
+  sopinion=ENIAMwalTypes.Nieokreslony; fopinion=ENIAMwalTypes.Nieokreslony}
+
 type lex_sem = {
   schemata: ((ENIAM_LCGlexiconTypes.selector * ENIAM_LCGlexiconTypes.selector_relation * string list) list *
              (ENIAM_LCGtypes.direction * ENIAM_LCGtypes.grammar_symbol) list) list;
   lex_entries: ((ENIAM_LCGlexiconTypes.selector * ENIAM_LCGlexiconTypes.selector_relation * string list) list *
                 ENIAM_LCGtypes.grammar_symbol) list;
-  frames: ((ENIAM_LCGlexiconTypes.selector * ENIAM_LCGlexiconTypes.selector_relation * string list) list *
-             ENIAMwalTypes.meaning list * ENIAMwalTypes.position list) list;
+  frames: frame list;
   cats: string list;
   (* e: labels; *)
   (* valence: (int * ENIAMwalTypes.frame) list;
   simple_valence: (int * ENIAMwalTypes.frame) list;
   very_simple_valence: ((ENIAM_LCGgrammarPLtypes.cat * ENIAM_LCGgrammarPLtypes.selector_relation * string list) list * ENIAM_LCGtypes.grammar_symbol) list; *)
-  senses: (string * string list * float) list;
-  lroles: string * string;
+  (* senses: (string * (string * int) list * float) list; *)
+  (* lroles: string * string; *)
   semantics: semantics;
   }
  
@@ -63,8 +76,8 @@ type lex_sem = {
  
 let empty_lex_sem = {
   schemata=[]; lex_entries=[]; frames=[]; cats=["X"];
-  (*e=empty_labels;*) (*valence=[]; simple_valence=[]; very_simple_valence=[];*) senses=[];
-  lroles="",""; semantics=Normal}
+  (*e=empty_labels;*) (*valence=[]; simple_valence=[]; very_simple_valence=[];*) (*senses=[];*)
+  (*lroles="","";*) semantics=Normal}
  
 (* FIXME: poprawić katalog *)
 (*let subst_uncountable_lexemes_filename = resource_path ^ "/lexSemantics/subst_uncountable.dat"
@@ -77,3 +90,13 @@ let subst_uncountable_lexemes_filename2 = resource_path ^ &quot;/Walenty/subst_uncoun
 let subst_container_lexemes_filename = resource_path ^ "/Walenty/subst_container.dat"
 let subst_numeral_lexemes_filename = resource_path ^ "/Walenty/subst_numeral.dat"
 let subst_time_lexemes_filename = resource_path ^ "/Walenty/subst_time.dat" *)
+
+let hipero_threshold = 3
+let unknown_meaning_weight = -1.
+
+let lu_filename = resource_path ^ "/plWordnet/lu.tab"
+let ex_hipo_filename = resource_path ^ "/plWordnet/ex_hipo.tab"
+let syn_filename = resource_path ^ "/plWordnet/syn.tab"
+
+let predef_filename = resource_path ^ "/lexSemantics/predef_prefs.tab"
+let proper_classes_filename = resource_path ^ "/lexSemantics/proper_classes.tab"
+(*
+ *  ENIAMlexSemantics is a library that assigns tokens with lexicosemantic information.
+ *  Copyright (C) 2016-2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
+ *  Copyright (C) 2016-2017 Institute of Computer Science Polish Academy of Sciences
+ *
+ *  This library is free software: you can redistribute it and/or modify
+ *  it under the terms of the GNU Lesser General Public License as published by
+ *  the Free Software Foundation, either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This library is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Lesser General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *)
+
+open Xstd
+open ENIAMlexSemanticsTypes
+
+let lu_names = ref IntMap.empty
+let lumap = ref StringMap.empty
+let synmap = ref IntMap.empty
+let ex_hipo = ref IntMap.empty
+let predef_names = ref IntMap.empty
+let proper_classes = ref StringMap.empty
+let predef = ref StringMap.empty
+
+let load_lu filename =
+  File.fold_tab filename (IntMap.empty,StringMap.empty) (fun (lu_names,lumap) -> function
+      [lu_id; lemma; variant; syn_id] ->
+        let v = variant,int_of_string syn_id in
+        IntMap.add lu_names (int_of_string lu_id) (lemma,variant,int_of_string syn_id),
+        StringMap.add_inc lumap lemma [v] (fun l -> v :: l)
+    | l -> failwith ("load_lu: " ^ String.concat "\t" l))
+
+let load_syn filename =
+  File.fold_tab filename IntMap.empty (fun synmap -> function
+      syn_id :: pos :: lu_ids ->
+        let lu_ids = Xlist.map lu_ids int_of_string in
+        IntMap.add synmap (int_of_string syn_id) (pos,lu_ids)
+    | l -> failwith ("load_syn: " ^ String.concat "\t" l))
+
+let load_ex_hipo filename =
+  File.fold_tab filename IntMap.empty (fun ex_hipo -> function
+      [parent; child; cost] ->
+         let parent = int_of_string parent in
+         let child = int_of_string child in
+         let cost = int_of_string cost in
+         let children = try IntMap.find ex_hipo parent with Not_found -> IntMap.empty in
+         let children = IntMap.add_inc children child cost (fun _ -> failwith "load_ex_hipo") in
+         IntMap.add ex_hipo parent children
+    | l -> failwith ("load_ex_hipo: " ^ String.concat "\t" l))
+
+let syn_id_of_sense sense =
+  let lemma,variant =
+    match List.rev (Xstring.split " " sense) with
+      variant :: l -> String.concat " " (List.rev l), variant
+    | _ -> failwith "syn_id_of_sense 1" in
+  let l = Xlist.fold (try StringMap.find !lumap lemma with Not_found -> failwith ("syn_id_of_sense: " ^ lemma)) [] (fun l (variant2,syn_id) ->
+    if variant = variant2 then syn_id :: l else l) in
+  match l with
+    [syn_id] -> syn_id
+  | _ -> failwith ("syn_id_of_sense 2: " ^ lemma)
+
+let load_predef ex_hipo filename =
+  let ex_hipo,predef_names,predef,_ =
+    File.fold_tab filename (ex_hipo,IntMap.empty,StringMap.empty,-1) (fun (ex_hipo,predef_names,predef,id) -> function
+      name :: senses ->
+        let ex_hipo = Xlist.fold senses ex_hipo (fun ex_hipo sense ->
+          let hipo_id = try StringMap.find predef sense with Not_found -> syn_id_of_sense sense in
+          let children = try IntMap.find ex_hipo hipo_id with Not_found -> IntMap.empty in
+          let children = IntMap.add_inc children id 0 (fun _ -> failwith "load_predef 1") in
+          IntMap.add ex_hipo hipo_id children) in
+        let predef_names = IntMap.add predef_names id name in
+        let predef = StringMap.add_inc predef name id (fun _ -> failwith "load_predef 2") in
+        ex_hipo, predef_names, predef, id-1
+    | l -> failwith ("load_predef: " ^ String.concat "\t" l)) in
+  ex_hipo,predef_names,predef
+
+let rec get_hipero_rec found ex_hipo id cost =
+  let cost2 = try IntMap.find found id with Not_found -> max_int in
+  if cost2 <= cost || cost > hipero_threshold then found else
+  let found = IntMap.add found id cost in
+  let map = try IntMap.find ex_hipo id with Not_found -> IntMap.empty in
+  IntMap.fold map found (fun found id2 cost2 ->
+    get_hipero_rec found ex_hipo id2 (cost + cost2))
+
+let get_hipero syn_id =
+  get_hipero_rec IntMap.empty !ex_hipo syn_id 0
+
+exception SynsetNotFound
+
+let synset_name syn_id =
+  if IntMap.mem !predef_names syn_id then IntMap.find !predef_names syn_id else
+  let lemma,variant,_ =
+    try IntMap.find !lu_names (List.hd (snd (IntMap.find !synmap syn_id)))
+    with Not_found -> raise SynsetNotFound (*failwith ("synset_name: " ^ string_of_int syn_id)*) in
+  lemma ^ "-" ^ variant
+
+let load_proper_classes filename =
+  File.fold_tab filename StringMap.empty (fun map -> function
+      id :: senses ->
+        let senses = Xlist.map senses (fun sense ->
+          match List.rev (Str.split (Str.regexp " ") sense) with
+            weight :: l -> String.concat " " (List.rev l), (try float_of_string weight with _ -> failwith "load_proper_classes 2")
+          | _ -> failwith "load_proper_classes 4") in
+        let senses = Xlist.map senses (fun (sense,weight) ->
+          (* let sense = if sense = "antroponim 1" then "nazwa własna 1" else sense in
+          let sense = if sense = "godzina 4" then "godzina 3" else sense in *)
+(*           print_endline sense; *)
+          let syn_id = syn_id_of_sense sense in
+          sense,IntMap.fold (get_hipero syn_id) [] (fun hipero syn_id cost -> (synset_name syn_id, cost) :: hipero),weight) in
+        StringMap.add_inc map id senses (fun _ -> failwith ("load_proper_classes 3: " ^ id))
+    | l -> failwith ("load_proper_classes: " ^ String.concat "\t" l))
+
+let simplify_pos = function
+    "subst" -> "noun"
+  | "depr" -> "noun"
+  | "adj" -> "adj"
+  | "adja" -> "adj"
+  | "adjc" -> "adj"
+  | "adjp" -> "adj"
+  | "ger" -> "verb"
+  | "pact" -> "verb"
+  | "ppas" -> "verb"
+  | "fin" -> "verb"
+  | "bedzie" -> "verb"
+  | "praet" -> "verb"
+  | "winien" -> "verb"
+  | "impt" -> "verb"
+  | "imps" -> "verb"
+  | "inf" -> "verb"
+  | "pcon" -> "verb"
+  | "pant" -> "verb"
+  | "pred" -> "verb"
+  | s -> s
+
+let find_senses lemma pos =
+(*if pos = "ppron12" || pos = "ppron3" || pos = "siebie" then {t with senses=[lemma,["0"],0.]} else*) (* FIXME: ustalić co z zaimkami *)
+  let l = try StringMap.find !lumap lemma with Not_found -> [] in
+  let pos = simplify_pos pos in
+  Xlist.fold l [] (fun l (variant,syn_id) ->
+    let pos2,_ = try IntMap.find !synmap syn_id with Not_found -> failwith "find_senses" in
+    if pos <> pos2 then l else
+    (lemma ^ "-" ^ variant,
+     IntMap.fold (get_hipero syn_id) [] (fun hipero syn_id cost -> (synset_name syn_id, cost) :: hipero),
+     log10 (1. /. (try float_of_string variant with _ -> 3.))) :: l)
+
+let find_proper_senses senses =
+  List.flatten (Xlist.rev_map senses (fun sense ->
+    try StringMap.find !proper_classes sense with Not_found -> failwith ("find_proper_senses: " ^ sense)))
+
+let find_meaning lu_id =
+  let lemma,variant,syn_id = IntMap.find !lu_names lu_id in
+  lemma ^ "-" ^ variant,
+  IntMap.fold (get_hipero syn_id) [] (fun hipero syn_id cost -> (synset_name syn_id, cost) :: hipero),
+  log10 (1. /. (try float_of_string variant with _ -> 3.))
+
+let initialize () =
+  let a,b = load_lu lu_filename in
+  lu_names := a;
+  lumap := b;
+  synmap := load_syn syn_filename;
+  ex_hipo := load_ex_hipo ex_hipo_filename;
+  let a,b,c = load_predef !ex_hipo predef_filename in
+  ex_hipo := a;
+  predef_names := b;
+  predef := c;
+  proper_classes := load_proper_classes proper_classes_filename;
+  ()
@@ -613,3 +613,34 @@ let get_default_valence = function
   | "adj" -> [Nieokreslony,NegationUndef,PredFalse,AspectUndef,[]]
   | "adv" -> [Nieokreslony,NegationUndef,PredFalse,AspectUndef,[]]
   | _ -> []
+
+open ENIAMcategoriesPL
+
+let get_aroles schema lemma = function
+    "pact" -> [Xlist.fold schema ([],"Arg","",true) (fun (sel,arole,arole_attr,arev) p ->
+    if p.gf = SUBJ then sel,p.role,p.role_attr,arev else sel,arole,arole_attr,arev)]
+  | "ppas" -> [Xlist.fold schema ([],"Arg","",true) (fun (sel,arole,arole_attr,arev) p ->
+    if p.gf = OBJ then sel,p.role,p.role_attr,arev else sel,arole,arole_attr,arev)]
+  | "subst" -> [
+       [Case,Eq,["dat"]],"Recipent","",false;
+       [Case,Eq,["inst"]],(if StringSet.mem ENIAMlexSemanticsData.subst_inst_time lemma then "Time" else "Instrument"),"",false;
+       [Case,Neq,["dat";"inst"]],"","",false]
+  | "adj" | "adjc" | "adjp" -> (* FIXME czy adjc i adjp mogą być adjunctami? *)
+      let l = try StringMap.find ENIAMlexSemanticsData.adj_roles lemma with Not_found -> ["Attribute",""] in
+      Xlist.map l (fun (role,role_attr) -> [],role,role_attr,false)
+  | "adv" ->
+      let modes = ENIAMcategoriesPL.adv_mode lemma in
+      let roles = try StringMap.find ENIAMlexSemanticsData.adv_roles lemma with Not_found -> ["Manner",""] in
+      Xlist.fold modes [] (fun l -> function
+        "mod" -> Xlist.fold roles l (fun l (role,role_attr) -> ([Mode,Eq,["mod"]],role,role_attr,false) :: l)
+      | "abl" -> ([Mode,Eq,["abl"]],"Location","Souce",false) :: l
+      | "adl" -> ([Mode,Eq,["adl"]],"Location","Goal",false) :: l
+      | "locat" -> ([Mode,Eq,["locat"]],"Location","",false) :: l
+      | "perl" -> ([Mode,Eq,["perl"]],"Path","",false) :: l
+      | "dur" -> ([Mode,Eq,["dur"]],"Duration","",false) :: l
+      | "temp" -> ([Mode,Eq,["temp"]],"Time","",false) :: l
+      | _ -> failwith "get_aroles")
+  | "qub" ->
+      let l = try StringMap.find ENIAMlexSemanticsData.qub_roles lemma with Not_found -> ["Arg",""] in
+      Xlist.map l (fun (role,role_attr) -> [],role,role_attr,false)
+  | _ -> [[],"","",false]
-(*
- *  ENIAMlexSemantics is a library that assigns tokens with lexicosemantic information.
- *  Copyright (C) 2016-2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
- *  Copyright (C) 2016-2017 Institute of Computer Science Polish Academy of Sciences
- *
- *  This library is free software: you can redistribute it and/or modify
- *  it under the terms of the GNU Lesser General Public License as published by
- *  the Free Software Foundation, either version 3 of the License, or
- *  (at your option) any later version.
- *
- *  This library is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU Lesser General Public License for more details.
- *
- *  You should have received a copy of the GNU Lesser General Public License
- *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
- *)
-
-open ENIAMwalTypes
-open Xstd
-
-let expands,compreps,comprep_reqs,subtypes,equivs = ENIAMwalParser.load_realizations ()
-(*let verb_frames = ENIAMwalParser.load_frames (Paths.walenty_path ^ Paths.verb_filename)
-let noun_frames = ENIAMwalParser.load_frames (Paths.walenty_path ^ Paths.noun_filename)
-let adj_frames = ENIAMwalParser.load_frames (Paths.walenty_path ^ Paths.adj_filename)
-let adv_frames = ENIAMwalParser.load_frames (Paths.walenty_path ^ Paths.adv_filename)    *)
-
-let walenty = (*StringMap.empty*)ENIAMwalTEI.load_walenty2 ()
-
-(*let _ = StringMap.iter walenty (fun pos map ->
-  StringMap.iter map (fun lexeme frames ->
-    Printf.printf "%s %s %d\n%!" pos lexeme (Xlist.size frames)))*)
-
-(*let all_frames =
-  ["subst",noun_frames;
-   "adj",adj_frames;
-   "adv",adv_frames;
-   "ger",verb_frames;
-   "pact",verb_frames;
-   "ppas",verb_frames;
-   "fin",verb_frames;
-   "praet",verb_frames;
-   "impt",verb_frames;
-   "imps",verb_frames;
-   "inf",verb_frames;
-   "pcon",verb_frames]*)
-
-let rec get_role_and_sense = function
-    Phrase(Lex "się") -> "Theme","", []
-  | PhraseAbbr(Xp "abl",_) -> "Location","Source", []
-  | PhraseAbbr(Xp "adl",_) -> "Location","Goal", []
-  | PhraseAbbr(Xp "caus",_) -> "Condition","", []
-  | PhraseAbbr(Xp "dest",_) -> "Purpose","", []
-  | PhraseAbbr(Xp "dur",_) -> "Duration","", []
-  | PhraseAbbr(Xp "instr",_) -> "Instrument","", []
-  | PhraseAbbr(Xp "locat",_) -> "Location","", []
-  | PhraseAbbr(Xp "mod",_) -> "Manner","", []
-  | PhraseAbbr(Xp "perl",_) -> "Path","", []
-  | PhraseAbbr(Xp "temp",_) -> "Time","", []
-  | PhraseAbbr(Advp "abl",_) -> "Location","Source", []
-  | PhraseAbbr(Advp "adl",_) -> "Location","Goal", []
-  | PhraseAbbr(Advp "dur",_) -> "Duration","", []
-  | PhraseAbbr(Advp "locat",_) -> "Location","", []
-  | PhraseAbbr(Advp "mod",_) -> "Manner","", []
-  | PhraseAbbr(Advp "perl",_) -> "Path","", []
-  | PhraseAbbr(Advp "temp",_) -> "Time","", []
-(*  | PhraseAbbr(Advp "pron",_) -> "Arg","", []
-  | PhraseAbbr(Advp "misc",_) -> "Arg","", []*)
-  | PhraseAbbr(Distrp,_) -> "Distributive","", [] (* FIXME: to jest kwantyfikator *)
-  | PhraseAbbr(Possp,_) -> "Possesive","", []
-  | LexPhraseMode("abl",_,_) -> "Location","Source", []
-  | LexPhraseMode("adl",_,_) -> "Location","Goal", []
-  | LexPhraseMode("caus",_,_) -> "Condition","", []
-  | LexPhraseMode("dest",_,_) -> "Purpose","", []
-  | LexPhraseMode("dur",_,_) -> "Duration","", []
-  | LexPhraseMode("instr",_,_) -> "Instrument","", []
-  | LexPhraseMode("locat",_,_) -> "Location","", []
-  | LexPhraseMode("mod",_,_) -> "Manner","", []
-  | LexPhraseMode("perl",_,_) -> "Path","", []
-  | LexPhraseMode("temp",_,_) -> "Time","", []
-  | _ -> "Arg","", []
-
-
-(*let rec get_gf_role = function
-    [],Phrase(NP case) -> "C", "", ["T"]
-  | [],Phrase(AdjP case) -> "R", "", ["T"]
-  | [],Phrase(NumP(case,_)) -> "C", "", ["T"]
-  | [],Phrase(PrepNP _) -> "C", "", ["T"]
-  | [],Phrase(PrepAdjP _) -> "C", "", ["T"]
-  | [],Phrase(PrepNumP _) -> "C", "", ["T"]
-  | [],Phrase(ComprepNP _) -> "C", "", ["T"]
-  | [],Phrase(ComparP _) -> "C", "", ["T"]
-  | [],Phrase(CP _) -> "C", "", ["T"]
-  | [],Phrase(NCP(case,_,_)) -> "C", "", ["T"]
-  | [],Phrase(PrepNCP _) -> "C", "", ["T"]
-  | [],Phrase(InfP _) -> "C", "", ["T"]
-  | [],Phrase(FixedP _) -> "C", "", ["T"]
-  | [],Phrase Or -> "C", "", ["T"] (* FIXME: zbadać w walentym faktyczne użycia or, bo to nie tylko zdania, ale też np(nom) w cudzysłowach *)
-  | [],Phrase(Lex "się") -> "C", "Ptnt", ["T"]
-  | [],PhraseAbbr(Xp mode,_) -> "C", mode, ["T"]
-  | [],PhraseAbbr(Advp "pron",_) -> "R", "", ["T"]
-  | [],PhraseAbbr(Advp "misc",_) -> "R", "", ["T"]
-  | [],PhraseAbbr(Advp mode,_) -> "C", mode, ["T"]
-  | [],PhraseAbbr(Nonch,_) -> "C", "", ["T"]
-  | [],PhraseAbbr(Distrp,_) -> "C", "Distr", ["T"]
-  | [],PhraseAbbr(Possp,_) -> "C", "Poss", ["T"]
-  | [],LexPhraseMode(mode,_,_) -> "C", mode, ["T"]
-  | [],LexPhrase((SUBST(_,case),_) :: _,_) -> "C", "", ["T"]
-  | [],LexPhrase((PREP _,_) :: _,_) -> "C", "", ["T"]
-  | [],LexPhrase((NUM(case,_,_),_) :: _,_) -> "C", "", ["T"]
-  | [],LexPhrase((ADJ(_,case,_,_),_) :: _,_) -> "C", "", ["T"]
-  | [],LexPhrase((ADV _,_) :: _,_) -> "C", "", ["T"]
-  | [],LexPhrase((GER(_,case,_,_,_,_),_) :: _,_) -> "C", "", ["T"]
-  | [],LexPhrase((PACT(_,case,_,_,_,_),_) :: _,_) -> "C", "", ["T"]
-  | [],LexPhrase((PPAS(_,case,_,_,_),_) :: _,_) -> "C", "", ["T"]
-  | [],LexPhrase((INF _,_) :: _,_) -> "C", "", ["T"]
-  | [],LexPhrase((QUB,_) :: _,_) -> "C", "", ["T"]
-  | [],LexPhrase((COMPAR,_) :: _,_) -> "C", "", ["T"]
-  | [],LexPhrase((COMP _,_) :: _,_) -> "C", "", ["T"]
-  | [],morf -> print_endline(*failwith*) ("get_gf: []," ^ ENIAMwalStringOf.morf morf);"","",[]
-  | _,Phrase(InfP _) -> "X", "", ["T"]
-  | _,Phrase(CP _) -> "X", "", ["T"]  (* zwykle możliwa koordynacja z infp *)
-  | _,Phrase _ -> "X", "", ["T"]
-  | _,PhraseAbbr _ -> "X", "", ["T"]
-  | _,LexPhraseMode _ -> "X", "", ["T"]
-  | _,LexPhrase((INF _,_) :: _,_) -> "X", "", ["T"]
-  | _,LexPhrase _ -> "X", "", ["T"]
-  | _,morf -> failwith ("get_gf: _," ^ ENIAMwalStringOf.morf morf)*)
-
-(*let gf_rank = Xlist.fold [
-  "",1;
-  ] StringMap.empty (fun gf_rank (gf,v) -> StringMap.add gf_rank gf v)*)
-
-(*let agregate_gfs s gfs_roles =
-(*  fst (Xlist.fold gfs ("",0) (fun (best_gf,best_rank) gf ->
-    let rank = try StringMap.find gf_rank gf with Not_found -> failwith ("agregate_gfs: " ^ gf) in
-    if rank > best_rank then gf, rank else best_gf, best_rank))*)
-(*  let gfs,roles = List.split gfs_roles in
-  let gfs = StringSet.to_list (Xlist.fold gfs StringSet.empty StringSet.add) in
-  if Xlist.size gfs > 1 then print_endline ("agregate_gfs: " ^ String.concat " " gfs);
-  if Xlist.size roles > 1 then print_endline ("agregate_gfs: " ^ String.concat " " roles);*)
-  let gf,role,prefs = List.hd gfs_roles in
-  {s with gf=gf; role=role; prefs=prefs}
-
-let rec make_gfs schema =
-  let schema = Xlist.map schema (function
-        {gf="subj"} as s -> {s with gf="SUBJ"; role="Agnt"; prefs=["T"]; morfs=make_gfs_morfs s.morfs}
-      | {gf="obj"} as s -> {s with gf="OBJ"; role="Ptnt"; prefs=["T"]; morfs=make_gfs_morfs s.morfs}
-      | {gf=""} as s -> agregate_gfs {s with morfs=make_gfs_morfs s.morfs} (Xlist.map s.morfs (fun morf -> get_gf_role (s.ce,morf)))
-      | {gf=t} -> failwith ("make_gfs: " ^ t)) in
-(*  let schema = List.rev (fst (Xlist.fold schema ([],StringMap.empty) (fun (schema,map) s ->
-    try
-      let n = StringMap.find map s.gf in
-      {s with gf=s.gf ^ string_of_int (n+1)} :: schema,
-      StringMap.add map s.gf (n+1)
-    with Not_found ->
-      s :: schema, StringMap.add map s.gf 1))) in*)
-  schema
-
-and make_gfs_morfs morfs =
-  List.flatten (Xlist.map morfs (function
-      Phrase _ as morf -> [morf]
-    | PhraseAbbr(Advp _,[]) -> [Phrase AdvP]
-    | PhraseAbbr(_,[]) -> failwith "make_gfs_morfs"
-    | PhraseAbbr(_,morfs) -> make_gfs_morfs morfs
-    | LexPhrase(pos_lex,(restr,schema)) -> [LexPhrase(pos_lex,(restr,make_gfs schema))]
-    | LexPhraseMode(_,pos_lex,(restr,schema)) -> [LexPhrase(pos_lex,(restr,make_gfs schema))]
-    | _ -> failwith "make_gfs_morfs"))*)
-
-let mark_nosem_morfs morfs =
-  Xlist.map morfs (function
-      Phrase(PrepNP(_,prep,c)) -> Phrase(PrepNP(NoSem,prep,c))
-    | Phrase(PrepAdjP(_,prep,c)) -> Phrase(PrepAdjP(NoSem,prep,c))
-    | Phrase(PrepNumP(_,prep,c)) -> Phrase(PrepNumP(NoSem,prep,c))
-(*     | Phrase(ComprepNP(_,prep)) -> Phrase(ComprepNP(NoSem,prep)) *) (* FIXME: na razie ComprepNP są zawsze semantyczne *)
-(*    | Phrase(ComparNP(_,prep,c)) -> Phrase(ComparNP(NoSem,prep,c)) (* FIXME: pomijam niesemantyczny compar *)
-    | Phrase(ComparPP(_,prep)) -> Phrase(ComparPP(NoSem,prep))*)
-    | Phrase(PrepNCP(_,prep,c,ct,co)) -> Phrase(PrepNCP(NoSem,prep,c,ct,co))
-    | t -> t)
-
-
-let agregate_role_and_sense s l =
-  let roles,senses = Xlist.fold l (StringSet.empty,StringSet.empty) (fun (roles,senses) (role,role_attr,sense) ->
-    StringSet.add roles (role ^ " " ^ role_attr),
-    Xlist.fold sense senses StringSet.add) in
-  let roles = if StringSet.size roles = 1 then roles else StringSet.remove roles "Arg " in
-  let role,role_attr =
-    match Str.split (Str.regexp " ") (StringSet.min_elt roles) with
-      [r;a] -> r,a
-    | [r] -> r,""
-    | _ -> failwith "agregate_role_and_sense" in
-  {s with role=role; role_attr=role_attr(*; sel_prefs=StringSet.to_list senses*)}
-
-let rec assign_role_and_sense schema =
-  Xlist.map schema (function
-        {gf=SUBJ} as s ->
-          if s.role = "" then {s with role="Initiator"; sel_prefs=["ALL"]; morfs=assign_role_and_sense_morfs s.morfs}
-          else {s with morfs=assign_role_and_sense_morfs (mark_nosem_morfs s.morfs)}
-      | {gf=OBJ} as s ->
-          if s.role = "" then {s with role="Theme"; sel_prefs=["ALL"]; morfs=assign_role_and_sense_morfs s.morfs}
-          else {s with morfs=assign_role_and_sense_morfs (mark_nosem_morfs s.morfs)}
-      | {gf=ARG} as s ->
-           if s.role = "" then agregate_role_and_sense {s with sel_prefs=["ALL"]; morfs=assign_role_and_sense_morfs s.morfs}
-             (Xlist.map s.morfs (fun morf -> get_role_and_sense morf))
-           else {s with morfs=assign_role_and_sense_morfs (mark_nosem_morfs s.morfs)}
-      | _ -> failwith "assign_role_and_sense")
-
-and assign_role_and_sense_morfs morfs =
-  List.flatten (Xlist.map morfs (function
-      Phrase _ as morf -> [morf]
-    | E _ as morf -> [morf]
-    | PhraseAbbr(Advp _,[]) -> [Phrase AdvP]
-    | PhraseAbbr(_,[]) -> failwith "assign_role_and_sense_morfs"
-    | PhraseAbbr(_,morfs) -> assign_role_and_sense_morfs morfs
-    | LexPhrase(pos_lex,(restr,schema)) -> [LexPhrase(pos_lex,(restr,assign_role_and_sense schema))]
-    | LexPhraseMode(_,pos_lex,(restr,schema)) -> [LexPhrase(pos_lex,(restr,assign_role_and_sense schema))]
-    | _ -> failwith "assign_role_and_sense_morfs"))
-
-(*let _ =
-  Xlist.iter walenty_filenames (fun filename ->
-    print_endline filename;
-    let frames = load_frames (walenty_path ^ filename) in
-    StringMap.iter frames (fun _ l ->
-      Xlist.iter l (fun (refl,opinion,negation,pred,aspect,schema) ->
-        ignore (process_opinion opinion);
-        ignore (process_negation [Text negation]);
-        ignore (process_pred [Text pred]);
-        ignore (process_aspect [Text aspect]);
-        ignore (assign_pro_args (make_gfs (process_schema expands subtypes equivs schema))))))*)
-
-let remove_trivial_args schema =
-  Xlist.fold schema [] (fun l (_,_,_,morfs) ->
-    let morfs = Xlist.fold morfs [] (fun morfs -> function
-        Phrase(AdjP _) -> morfs
-      | Phrase(NP(Case "gen")) -> morfs
-      | Phrase(NCP(Case "gen",_,_)) -> morfs
-      | Phrase(PrepNP _) -> morfs
-      | Phrase(FixedP _) -> morfs
-      | LexPhrase([ADJ _,_],_) -> morfs
-      | LexPhrase([PPAS _,_],_) -> morfs
-      | LexPhrase([PACT _,_],_) -> morfs
-      | LexPhrase([SUBST(_,Case "gen"),_],_) -> morfs
-      | LexPhrase([PREP _,_;_],_) -> morfs
-      | morf -> morf :: morfs) in
-    if morfs = [] then l else morfs :: l)
-
-(* leksykalizacje do zmiany struktury
-lex([PREP(gen),'z';SUBST(sg,gen),'nazwa'],atr1[OBL{lex([QUB,'tylko'],natr[])}])
-lex([PREP(loc),'na';SUBST(sg,loc),'papier'],atr1[OBL{lex([QUB,'tylko'],natr[])}])
-lex([PREP(acc),'w';SUBST(pl,acc),'oko'],atr1[OBL-MOD{lex([ADV(pos),'prosto'],natr[])}])
-lex([PREP(gen),'z';SUBST(sg,gen),'most'],ratr1[OBL-MOD{lex([ADV(pos),'prosto'],natr[])}])
-lex([PREP(acc),'w';SUBST(pl,acc),'oko'],atr1[OBL-MOD{lex([ADV(pos),'prosto'],natr[])}])
-lex([PREP(gen),'z';SUBST(sg,gen),'most'],ratr1[OBL-MOD{lex([ADV(pos),'prosto'],natr[])}])
-lex([PREP(acc),'w';SUBST(pl,acc),'oko'],atr1[OBL-MOD{lex([ADV(pos),'prosto'],natr[])}])
-lex([PREP(acc),'w';SUBST(pl,acc),'oko'],atr1[OBL-MOD{lex([ADV(pos),'prosto'],natr[])}])
-lex([PREP(acc),'w';SUBST(pl,acc),'oko'],atr1[OBL-MOD{lex([ADV(pos),'prosto'],natr[])}])
-lex([PREP(acc),'w';SUBST(pl,acc),'oko'],atr1[OBL-MOD{lex([ADV(pos),'prosto'],natr[])}])
-lex([PREP(acc),'w';SUBST(pl,acc),'oko'],atr1[OBL-MOD{lex([ADV(pos),'prosto'],natr[])}])
-*)
-
-let num_arg_schema_field morfs =
-  {gf=CORE; role="QUANT-ARG"; role_attr=""; sel_prefs=["ALL"]; cr=[]; ce=[]; dir=Forward; morfs=morfs}
-
-let std_arg_schema_field dir morfs =
-  {gf=ARG; role="Arg"; role_attr=""; sel_prefs=["ALL"]; cr=[]; ce=[]; dir=dir; morfs=morfs}
-
-let simple_arg_schema_field morfs =
-  {gf=ARG; role=""; role_attr=""; sel_prefs=["ALL"]; cr=[]; ce=[]; dir=Both; morfs=morfs}
-
-let nosem_refl_schema_field =
-  {gf=NOSEM; role=""; role_attr=""; sel_prefs=["ALL"]; cr=[]; ce=[]; dir=Both; morfs=[Phrase(Lex "się")]}
-
-
-let expand_lexicalizations = function
-    Frame(atrs,schema) -> Frame(atrs,expand_lexicalizations_schema schema)
-(*     ComprepFrame(s,morfs) -> ComprepFrame(atrs,expand_lexicalizations_morfs morfs) *)
-  | _ -> failwith "expand_lexicalizations"
-
-
-let prepare_schema_comprep expands subtypes equivs schema =
-  assign_pro_args (assign_role_and_sense (ENIAMwalParser.expand_equivs_schema equivs (ENIAMwalParser.expand_subtypes subtypes (ENIAMwalParser.expand_schema expands schema))))
-
-let prepare_schema expands subtypes equivs schema =
-  prepare_schema_comprep expands subtypes equivs (ENIAMwalParser.parse_schema schema)
-
-let prepare_schema_sem expands subtypes equivs schema =
-  prepare_schema_comprep expands subtypes equivs schema
-
-
-let convert_frame expands subtypes equivs lexemes valence lexeme pos (refl,opinion,negation,pred,aspect,schema) =
-(*   Printf.printf "convert_frame %s %s\n" lexeme pos; *)
-  try
-    if refl = "się" && not (StringMap.mem lexemes "się") then raise ImpossibleSchema else
-    let frame =
-      try StringMap.find default_frames refl (* w refl jest przekazywana informacja o typie domyślnej ramki *)
-      with Not_found ->
-        Frame(DefaultAtrs([],ENIAMwalParser.parse_refl [Text refl],
-          ENIAMwalParser.parse_opinion opinion,
-          ENIAMwalParser.parse_negation [Text negation],
-          ENIAMwalParser.parse_pred [Text pred],
-          ENIAMwalParser.parse_aspect [Text aspect]),
-          prepare_schema expands subtypes equivs schema) in
-    let frame = if StringMap.is_empty lexemes then frame else reduce_schema_frame lexemes frame in
-    let frame = expand_lexicalizations frame in
-    Xlist.fold (extract_lex_frames lexeme pos [] frame) valence (fun valence -> function
-        lexeme,pos,Frame(atrs,schema) ->
-           let schemas = simplify_lex (split_xor (split_or_coord schema)) in
-           Xlist.fold schemas valence (fun valence schema ->
-             let poss = try StringMap.find valence lexeme with Not_found -> StringMap.empty in
-             let poss = StringMap.add_inc poss pos [Frame(atrs,schema)] (fun l -> Frame(atrs,schema) :: l) in
-             StringMap.add valence lexeme poss)
-      | lexeme,pos,LexFrame(id,pos2,restr,schema) ->
-           let schemas = simplify_lex (split_xor (split_or_coord schema)) in
-           Xlist.fold schemas valence (fun valence schema ->
-             let poss = try StringMap.find valence lexeme with Not_found -> StringMap.empty in
-             let poss = StringMap.add_inc poss pos [LexFrame(id,pos2,restr,schema)] (fun l -> LexFrame(id,pos2,restr,schema) :: l) in
-             StringMap.add valence lexeme poss)
-      | _ -> failwith "convert_frame")
-  with ImpossibleSchema -> valence
-
-let convert_frame_sem expands subtypes equivs lexemes valence lexeme pos = function
-  Frame(DefaultAtrs(meanings,refl,opinion,negation,pred,aspect),positions) ->
-(*   Printf.printf "convert_frame_sem %s\n" (ENIAMwalStringOf.frame lexeme (Frame(DefaultAtrs(meanings,refl,opinion,negation,pred,aspect),positions))); *)
-  (try
-    if refl = ReflSie && not (StringMap.mem lexemes "się") then raise ImpossibleSchema else
-    let frame =
-        Frame(DefaultAtrs(meanings,refl,opinion,negation,pred,aspect),
-          prepare_schema_sem expands subtypes equivs positions) in
-    let frame = if StringMap.is_empty lexemes then frame else reduce_schema_frame lexemes frame in
-    let frame = expand_lexicalizations frame in
-    Xlist.fold (extract_lex_frames lexeme pos [] frame) valence (fun valence -> function
-        lexeme,pos,Frame(atrs,schema) ->
-           let schemas = simplify_lex (split_xor (split_or_coord schema)) in
-           Xlist.fold schemas valence (fun valence schema ->
-             let poss = try StringMap.find valence lexeme with Not_found -> StringMap.empty in
-             let poss = StringMap.add_inc poss pos [Frame(atrs,schema)] (fun l -> Frame(atrs,schema) :: l) in
-             StringMap.add valence lexeme poss)
-      | lexeme,pos,LexFrame(id,pos2,restr,schema) ->
-           let schemas = simplify_lex (split_xor (split_or_coord schema)) in
-           Xlist.fold schemas valence (fun valence schema ->
-             let poss = try StringMap.find valence lexeme with Not_found -> StringMap.empty in
-             let poss = StringMap.add_inc poss pos [LexFrame(id,pos2,restr,schema)] (fun l -> LexFrame(id,pos2,restr,schema) :: l) in
-             StringMap.add valence lexeme poss)
-      | _ -> failwith "convert_frame_sem")
-  with ImpossibleSchema -> valence)
-  | _ -> failwith "convert_frame_sem"
-
-let make_comprep_frames_of_schema s = function
-    [{cr=[];ce=[]; morfs=[LexPhrase([pos,Lexeme lex],(restr,schema))]}] ->
-      lex,
-      (match get_pos lex pos with [pos] -> pos | _ -> failwith "make_comprep_frame_of_schema 2"),
-      ComprepFrame(s,pos,restr,schema)
-  | schema -> failwith ("make_comprep_frame_of_schema: " ^ ENIAMwalStringOf.schema schema)
-
-let convert_comprep_frame expands subtypes equivs lexemes valence lexeme pos (s,morf) =
-  try
-    let schema = prepare_schema_comprep expands subtypes equivs [simple_arg_schema_field [morf]] in
-    let schema = if StringMap.is_empty lexemes then schema else reduce_schema lexemes schema in
-    let schema = expand_lexicalizations_schema schema in
-    let lexeme,pos,frame = make_comprep_frames_of_schema s schema in
-    Xlist.fold (extract_lex_frames lexeme pos [] frame) valence (fun valence -> function
-        lexeme,pos,ComprepFrame(s,pos2,restr,schema) ->
-           let schemas = simplify_lex (split_xor (split_or_coord schema)) in
-           Xlist.fold schemas valence (fun valence schema ->
-             let poss = try StringMap.find valence lexeme with Not_found -> StringMap.empty in
-             let poss = StringMap.add_inc poss pos [ComprepFrame(s,pos2,restr,schema)] (fun l -> ComprepFrame(s,pos2,restr,schema) :: l) in
-             StringMap.add valence lexeme poss)
-      | lexeme,pos,LexFrame(id,pos2,restr,schema) ->
-           let schemas = simplify_lex (split_xor (split_or_coord schema)) in
-           Xlist.fold schemas valence (fun valence schema ->
-             let poss = try StringMap.find valence lexeme with Not_found -> StringMap.empty in
-             let poss = StringMap.add_inc poss pos [LexFrame(id,pos2,restr,schema)] (fun l -> LexFrame(id,pos2,restr,schema) :: l) in
-             StringMap.add valence lexeme poss)
-      | _ -> failwith "convert_comprep_frame")
-  with ImpossibleSchema -> valence
-
-let remove_pro_args schema = (* FIXME: sprawdzić czy Pro i Null są zawsze na początku *)
-  List.rev (Xlist.fold schema [] (fun schema -> function
-      {morfs=[Phrase Pro]} -> schema
-    | {morfs=(Phrase Pro) :: morfs} as s -> {s with morfs=morfs} :: schema
-    | {morfs=[Phrase Null]} -> schema
-    | {morfs=(Phrase Null) :: morfs} as s -> {s with morfs=morfs} :: schema
-    | s -> s :: schema))
-
-
-
-(*let _ =
-  let valence = Xlist.fold all_frames StringMap.empty (fun valence (pos,frame_map) ->
-    print_endline pos;
-    StringMap.fold frame_map valence (fun valence lexeme frames ->
-      Xlist.fold frames valence (fun valence frame ->
-(*         print_endline (ENIAMwalStringOf.unparsed_frame lexeme frame); *)
-        convert_frame expands subtypes equivs StringMap.empty valence lexeme pos frame))) in
-  print_endline "comprepnp";
-  let valence = StringMap.fold compreps valence (fun valence lexeme frames ->
-    Xlist.fold frames valence (fun valence (pos,frame) ->
-      convert_comprep_frame expands subtypes equivs StringMap.empty valence lexeme pos frame)) in
-  print_endline "expand_restr";
-  let valence = StringMap.mapi valence (fun lexeme poss ->
-    StringMap.mapi poss (fun pos frames ->
-      List.flatten (Xlist.map frames (expand_restr valence lexeme pos)))) in
-  print_endline "transform_frame";
-  let _ = StringMap.mapi valence (fun lexeme poss ->
-    StringMap.mapi poss (fun pos frames ->
-(*       print_endline lexeme; *)
-      List.flatten (Xlist.map frames (transform_frame lexeme pos)))) in
-  print_endline "done";
-  ()*)
-(*  StringMap.iter valence (fun lexeme poss ->
-    StringMap.iter poss (fun pos frames ->
-      Xlist.iter frames (fun frame -> print_endline (ENIAMwalStringOf.frame lexeme frame))))*)
@@ -90,36 +90,21 @@ let select_comprep_adjuncts lexemes =
              not (StringSet.is_empty (StringSet.intersection reqs lexemes)) then s :: l else l)
       with Not_found -> l)
  
-
-
-(* let rec assign_pro_args schema =
-  Xlist.map schema (fun s ->
-      let morfs = match s.morfs with
-          (E p) :: l -> E Pro :: (E p) :: l
-        | [LexPhrase _] as morfs -> morfs
-        | [Phrase(FixedP _)] as morfs -> morfs
-        | [Phrase(Lex _)] as morfs -> morfs
-        (*    | [Phrase Refl] as morfs -> morfs
-              | [Phrase Recip] as morfs -> morfs*)
-        | Phrase Null :: _ as morfs -> morfs
-        | Phrase Pro :: _ as morfs -> morfs
-        | morfs -> if s.gf <> SUBJ && s.cr = [] && s.ce = [] then (Phrase Null) :: morfs else (Phrase Pro) :: morfs in (* FIXME: ustalić czy są inne przypadki uzgodnienia *)
-      (*     let morfs = assign_pro_args_lex morfs in *) (* bez pro wewnątrz leksykalizacji *)
-      {s with morfs=morfs}) *)
-
-(*let assign_pro_args_lex morfs =
-  Xlist.map morfs (function
-      Lex(morf,specs,lex,restr) -> LexN(morf,specs,lex,assign_pro_args_restr restr)
-    | LexNum(morf,lex1,lex2,restr) -> LexNum(morf,lex1,lex2,assign_pro_args_restr restr)
-    | LexCompar(morf,l) -> LexCompar(morf,make_gfs_lex l)
-    | morf -> morf)
-
-  and assign_pro_args_restr = function
-    Natr -> Natr
-  | Ratr1 schema -> Ratr1(assign_pro_args schema)
-  | Atr1 schema -> Atr1(assign_pro_args schema)
-  | Ratr schema -> Ratr(assign_pro_args schema)
-  | Atr schema -> Atr(assign_pro_args schema)*)
+let set_necessary schema =
+  Xlist.map schema (fun p ->
+    let nec =
+      if p.gf = ADJUNCT then Opt else
+      if Xlist.fold p.morfs false (fun b -> function
+          SimpleLexArg _ -> true
+        | LexArg _ -> true
+        | FixedP _ -> true
+        | _ -> b) then Req else
+      if p.gf <> SUBJ && p.cr = [] && p.ce = [] then Opt else
+      if Xlist.fold p.morfs false (fun b -> function
+          NP NomAgr -> true
+        | NCP(NomAgr,_,_) -> true
+        | _ -> b) then ProNG else Pro in
+    {p with is_necessary=nec})
  
 exception ImpossibleSchema
  
@@ -217,20 +217,39 @@ let render_lex_entry = function
         (* Printf.printf "%s %s %s\n" pos lemma (ENIAMwalStringOf.schema schema); *)
         selectors,render_lex_entry entry) *)
  
+let adjunct morfs = {empty_position with gf=ADJUNCT; is_necessary=Opt; morfs=Xlist.map morfs (fun morf -> LCG morf)}
+let adjunct_multi dir  morfs = {empty_position with gf=ADJUNCT; is_necessary=Multi; dir=dir; morfs=Xlist.map morfs (fun morf -> LCG morf)}
+let adjunct_dir dir morfs = {empty_position with gf=ADJUNCT; is_necessary=Opt; dir=dir; morfs=Xlist.map morfs (fun morf -> LCG morf)}
+let adjunct_ce ce morfs = {empty_position with gf=ADJUNCT; ce=[ce]; is_necessary=Opt; morfs=Xlist.map morfs (fun morf -> LCG morf)}
+
 let render_comprep prep = Both,Plus[One;Tensor[Atom "comprepnp"; Atom prep]]
  
+let render_connected_comprep prep = adjunct [Tensor[Atom "comprepnp"; Atom prep]]
+
 let render_prepnp prep cases =
   Both,Plus(One :: List.flatten (Xlist.map cases (fun case ->
       [Tensor[Atom "prepnp"; Atom prep; Atom case];
        Tensor[Atom "prepncp"; Atom prep; Atom case; Top; Top]])))
  
+let render_connected_prepnp prep cases =
+  adjunct (List.flatten (Xlist.map cases (fun case ->
+      [Tensor[Atom "prepnp"; Atom prep; Atom case];
+       Tensor[Atom "prepncp"; Atom prep; Atom case; Top; Top]])))
+
 let render_prepadjp prep cases =
   let postp = if prep = "z" || prep = "po" || prep = "na" then [Tensor[Atom "prepadjp"; Atom prep; Atom "postp"]] else [] in
   Both,Plus(One :: postp @ (Xlist.map cases (fun case ->
       Tensor[Atom "prepadjp"; Atom prep; Atom case])))
  
+let render_connected_prepadjp prep cases =
+  let postp = if prep = "z" || prep = "po" || prep = "na" then [Tensor[Atom "prepadjp"; Atom prep; Atom "postp"]] else [] in
+  adjunct (postp @ (Xlist.map cases (fun case ->
+      Tensor[Atom "prepadjp"; Atom prep; Atom case])))
+
 let render_compar prep = Both,Plus[One;Tensor[Atom "compar"; Atom prep; Top]]
  
+let render_connected_compar prep = adjunct [Tensor[Atom "compar"; Atom prep; Top]]
+
 let verb_adjuncts_simp = [
   Both, Plus[One;Tensor[Atom "advp"; Atom "pron"]];
   Both, Plus[One;Tensor[Atom "advp"; Atom "locat"]];
@@ -249,6 +268,24 @@ let verb_adjuncts_simp = [
   Both, Plus[One;Tensor[Atom "padvp"]];
 ]
  
+let verb_connected_adjuncts_simp = [
+  adjunct [Tensor[Atom "advp"; Atom "pron"]];
+  adjunct [Tensor[Atom "advp"; Atom "locat"]];
+  adjunct [Tensor[Atom "advp"; Atom "abl"]];
+  adjunct [Tensor[Atom "advp"; Atom "adl"]];
+  adjunct [Tensor[Atom "advp"; Atom "perl"]];
+  adjunct [Tensor[Atom "advp"; Atom "temp"]];
+  adjunct [Tensor[Atom "advp"; Atom "dur"]];
+  adjunct [Tensor[Atom "advp"; Atom "mod"]];
+  adjunct [Tensor[Atom "np";Top;Atom "dat"; Top; Top];Tensor[Atom "ncp"; Top; Atom "dat"; Top; Top; Top; Top]];
+  adjunct [Tensor[Atom "np";Top;Atom "inst"; Top; Top];Tensor[Atom "ncp"; Top; Atom "inst"; Top; Top; Top; Top]];
+  adjunct [Tensor[Atom "date"];Tensor[Atom "day-lex"];Tensor[Atom "day-month"];Tensor[Atom "day"]];
+  adjunct_dir Forward_ [Tensor[Atom "cp";Top; Top]];
+  adjunct [Tensor[Atom "or"]];
+  adjunct [Tensor[Atom "lex";Atom "się";Atom "qub"]];
+  adjunct_ce "3" [Tensor[Atom "padvp"]];
+]
+
 let proper_noun_adjuncts_simp = [
   Both, Plus[One;Tensor[Atom "np";Top;Atom "gen"; Top; Top];Tensor[Atom "ncp"; Top; Atom "gen"; Top; Top; Top; Top]];
   Forward, Plus[One;Tensor[Atom "np";Top;Atom "nom"; Top; Top];Tensor[Atom "np";Top;AVar "case"; Top; Top]];
@@ -256,6 +293,13 @@ let proper_noun_adjuncts_simp = [
   Forward, Plus[One;Tensor[Atom "adjp"; AVar "number"; AVar "case"; AVar "gender"]];
 ]
  
+let proper_noun_connected_adjuncts_simp = [
+  adjunct [Tensor[Atom "np";Top;Atom "gen"; Top; Top];Tensor[Atom "ncp"; Top; Atom "gen"; Top; Top; Top; Top]];
+  adjunct_dir Forward_ [Tensor[Atom "np";Top;Atom "nom"; Top; Top];Tensor[Atom "np";Top;AVar "case"; Top; Top]];
+  adjunct_multi Backward_ [Tensor[Atom "adjp"; AVar "number"; AVar "case"; AVar "gender"]];
+  adjunct_dir Forward_ [Tensor[Atom "adjp"; AVar "number"; AVar "case"; AVar "gender"]];
+]
+
 let common_noun_adjuncts_simp = [
   Both, Plus[One;Tensor[Atom "np";Top;Atom "gen"; Top; Top];Tensor[Atom "ncp"; Top; Atom "gen"; Top; Top; Top; Top]];
   Forward, Plus[One;Tensor[Atom "np";Top;Atom "nom"; Top; Top];Tensor[Atom "np";Top;AVar "case"; Top; Top]];
@@ -263,15 +307,45 @@ let common_noun_adjuncts_simp = [
   Forward, Plus[One;Tensor[Atom "adjp"; AVar "number"; AVar "case"; AVar "gender"]];
 ]
  
+let common_noun_connected_adjuncts_simp = [
+  adjunct [Tensor[Atom "np";Top;Atom "gen"; Top; Top];Tensor[Atom "ncp"; Top; Atom "gen"; Top; Top; Top; Top]];
+  adjunct_dir Forward_ [Tensor[Atom "np";Top;Atom "nom"; Top; Top];Tensor[Atom "np";Top;AVar "case"; Top; Top]];
+  adjunct_multi Backward_ [Tensor[Atom "adjp"; AVar "number"; AVar "case"; AVar "gender"]];
+  adjunct_dir Forward_ [Tensor[Atom "adjp"; AVar "number"; AVar "case"; AVar "gender"]];
+]
+
 let measure_noun_adjuncts_simp = [
   Backward, Maybe(Tensor[Atom "adjp"; AVar "number"; AVar "case"; AVar "gender"]);
   Forward, Plus[One;Tensor[Atom "adjp"; AVar "number"; AVar "case"; AVar "gender"]];
 ]
  
+let measure_noun_connected_adjuncts_simp = [
+  adjunct_multi Backward_ [Tensor[Atom "adjp"; AVar "number"; AVar "case"; AVar "gender"]];
+  adjunct_dir Forward_ [Tensor[Atom "adjp"; AVar "number"; AVar "case"; AVar "gender"]];
+]
+
 let adj_adjuncts_simp = [
   Both, Plus[One;Tensor[Atom "advp"; Top]];
 ]
  
+let adj_connected_adjuncts_simp = [
+  adjunct [Tensor[Atom "advp"; Top]];
+]
+
 let adv_adjuncts_simp = [
    Both, Plus[One;Tensor[Atom "advp"; Top]];
  ]
+
+let adv_connected_adjuncts_simp = [
+   adjunct [Tensor[Atom "advp"; Top]];
+ ]
+
+let assing_pref_morfs = function
+    "po","postp" -> [
+        LCG(Tensor[Atom "adjp"; Atom "sg"; Atom "dat"; Atom "m1"]);
+        LCG(Tensor[Atom "adjp"; Top; Atom "postp"; Top])]
+  | "z","postp" -> [LCG(Tensor[Atom "adjp"; Atom "sg"; Atom "nom"; Atom "f"])]
+  | "na","postp" -> [LCG(Tensor[Atom "advp"; Top])]
+  | _,case -> [
+        LCG(Tensor[Atom "np"; Top; Atom case; Top; Top]);
+        LCG(Tensor[Atom "adjp"; Top; Atom case; Top])]
@@ -101,6 +101,9 @@ let gf = function
     SUBJ -> "subj"
   | OBJ -> "obj"
   | ARG -> "arg"(*""*)
+  | ADJUNCT -> "adjunct"
+  | CORE -> "core"
+  | NOSEM -> "nosem"
  
 let pos = function
     SUBST(n,c) -> "SUBST(" ^ number n ^ "," ^ case c ^ ")"
@@ -173,13 +176,26 @@ let controllees l =
       "1" -> "controllee"
     | n -> "controllee" ^ n)
  
+let necessary = function
+    Opt -> "opt"
+  | Req -> "req"
+  | Pro -> "pro"
+  | ProNG -> "prong"
+  | Multi -> "multi"
+
+let dir = function
+    Both_ -> ""
+  | Forward_ -> "/"
+  | Backward_ -> "\\"
+
 let rec schema schema =
   String.concat "+" (Xlist.map schema (fun s ->
     String.concat "," (
       (if s.gf = ARG then [] else [gf s.gf])@s.mode@
+      (if s.is_necessary = Opt then [] else [necessary s.is_necessary])@
       (if s.role = "" then [] else [s.role])@
       (if s.role_attr = "" then [] else [s.role_attr])@
-      (*s.sel_prefs@*)(controllers s.cr)@(controllees s.ce)) ^ "{" ^  String.concat ";" (Xlist.map s.morfs phrase) ^ "}"))
+      (*s.sel_prefs@*)(controllers s.cr)@(controllees s.ce)) ^ (dir s.dir) ^ "{" ^  String.concat ";" (Xlist.map s.morfs phrase) ^ "}"))
 (*
 and morf = function
     Phrase p -> phrase p
@@ -222,7 +238,7 @@ let rec connected_schema schema =
 *)
  
 let meaning m =
-  m.name ^ "-" ^ m.variant 
+  m.name ^ "-" ^ m.variant
  
 let lex_entry = function
     SimpleLexEntry(le,p) ->
@@ -39,7 +39,7 @@ type aux = NoAux | PastAux | FutAux | ImpAux
  
   type nsem = Common of string | Time*)
  
-type gf = SUBJ | OBJ | ARG
+type gf = SUBJ | OBJ | ARG | ADJUNCT | CORE | NOSEM
  
 type pos =
     SUBST of number * case
@@ -99,13 +99,19 @@ type restr = Natr | Ratr | Ratrs | Ratr1 | Atr | Atr1 | NoRestr
 type sel_prefs =
     SynsetId of int
   | Predef of string
-  | RelationRole of string * string * string (* relacji * rola * atrybut roli *)
+  | SynsetName of string
+  | RelationRole of string * string * string (* relacja * rola * atrybut roli *)
+
+type necessary = Req | Opt | Pro | ProNG | Multi
+
+type direction = Both_ | Forward_ | Backward_
  
 type position = {psn_id: int; gf: gf; role: string; role_attr: string; sel_prefs: sel_prefs list;
-                 mode: string list; cr: string list; ce: string list; morfs: phrase list}
+                 mode: string list; cr: string list; ce: string list; morfs: phrase list;
+                 dir: direction; is_necessary: necessary}
  
 let empty_position =
-  {psn_id=(-1); gf=ARG; role=""; role_attr=""; mode=[]; sel_prefs=[]; cr=[]; ce=[]; morfs=[]}
+  {psn_id=(-1); gf=ARG; role=""; role_attr=""; mode=[]; sel_prefs=[]; cr=[]; ce=[]; dir=Both_; morfs=[]; is_necessary=Opt}
  
 type meaning = {mng_id: int;
                 name: string;
@@ -83,10 +83,7 @@ let rec main_loop in_chan out_chan =
 let _ =
   prerr_endline message;
   Arg.parse spec_list anon_fun usage_msg;
-  ENIAMsubsyntax.initialize ();
-  ENIAMcategoriesPL.initialize ();
-  ENIAMwalParser.initialize ();
-  ENIAMwalReduce.initialize ();
+  ENIAMlexSemantics.initialize ();
   Gc.compact ();
   prerr_endline "Ready!";
   if !comm_stdio then main_loop stdin stdout
@@ -3,34 +3,33 @@ OCAMLOPT=ocamlopt
 OCAMLDEP=ocamldep
 INCLUDES=-I +xml-light -I +xlib -I +zip -I +bz2 -I +eniam
 OCAMLFLAGS=$(INCLUDES) -g
-#OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa bz2.cmxa xlib.cmxa eniam-tokenizer.cmxa eniam-morphology.cmxa eniam-subsyntax.cmxa eniam-lcg-parser.cmxa eniam-lcg-lexicon.cmxa #eniam-plWordnet.cmxa #eniam-lexSemantics.cmxa
 OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa bz2.cmxa xlib.cmxa eniam-tokenizer.cmxa eniam-morphology.cmxa eniam-subsyntax.cmxa eniam-lcg-parser.cmxa eniam-lcg-lexicon.cmxa eniam-lexSemantics.cmxa
 INSTALLDIR=`ocamlc -where`/eniam
  
-SOURCES= entries.ml ENIAMwalTypes.ml ENIAMwalStringOf.ml ENIAMwalParser.ml ENIAMwalReduce.ml ENIAMvalence.ml ENIAMwalRenderer.ml ENIAMadjuncts.ml \
-    ENIAMlexSemanticsTypes.ml ENIAMlexSemanticsStringOf.ml ENIAMlexSemanticsHTMLof.ml ENIAMlexSemanticsXMLof.ml ENIAMlexSemantics.ml #ENIAMlexSemanticsData.ml
+SOURCES= entries.ml ENIAMwalTypes.ml ENIAMwalStringOf.ml ENIAMwalParser.ml ENIAMwalReduce.ml ENIAMlexSemanticsTypes.ml ENIAMlexSemanticsData.ml ENIAMvalence.ml ENIAMwalRenderer.ml ENIAMadjuncts.ml \
+    ENIAMlexSemanticsStringOf.ml ENIAMlexSemanticsHTMLof.ml ENIAMlexSemanticsXMLof.ml ENIAMplWordnet.ml ENIAMlexSemantics.ml
  
 all: eniam-lexSemantics.cma eniam-lexSemantics.cmxa
  
 install: all
 	mkdir -p $(INSTALLDIR)
 	cp eniam-lexSemantics.cmxa eniam-lexSemantics.a eniam-lexSemantics.cma $(INSTALLDIR)
-	cp entries.cmi ENIAMwalTypes.cmi ENIAMwalStringOf.cmi ENIAMwalParser.cmi ENIAMwalReduce.cmi ENIAMvalence.cmi ENIAMwalRenderer.cmi ENIAMadjuncts.cmi ENIAMlexSemanticsTypes.cmi ENIAMlexSemanticsStringOf.cmi ENIAMlexSemanticsHTMLof.cmi ENIAMlexSemanticsXMLof.cmi ENIAMlexSemantics.cmi $(INSTALLDIR)
-	cp entries.cmx ENIAMwalTypes.cmx ENIAMwalStringOf.cmx ENIAMwalParser.cmx ENIAMwalReduce.cmx ENIAMvalence.cmx ENIAMwalRenderer.cmx ENIAMadjuncts.cmx ENIAMlexSemanticsTypes.cmx ENIAMlexSemanticsStringOf.cmx ENIAMlexSemanticsHTMLof.cmx ENIAMlexSemanticsXMLof.cmx ENIAMlexSemantics.cmx $(INSTALLDIR)
+	cp entries.cmi ENIAMwalTypes.cmi ENIAMwalStringOf.cmi ENIAMwalParser.cmi ENIAMwalReduce.cmi ENIAMlexSemanticsData.cmi ENIAMvalence.cmi ENIAMwalRenderer.cmi ENIAMadjuncts.cmi ENIAMlexSemanticsTypes.cmi ENIAMlexSemanticsStringOf.cmi ENIAMlexSemanticsHTMLof.cmi ENIAMlexSemanticsXMLof.cmi ENIAMplWordnet.cmi ENIAMlexSemantics.cmi $(INSTALLDIR)
+	cp entries.cmx ENIAMwalTypes.cmx ENIAMwalStringOf.cmx ENIAMwalParser.cmx ENIAMwalReduce.cmx ENIAMlexSemanticsData.cmx ENIAMvalence.cmx ENIAMwalRenderer.cmx ENIAMadjuncts.cmx ENIAMlexSemanticsTypes.cmx ENIAMlexSemanticsStringOf.cmx ENIAMlexSemanticsHTMLof.cmx ENIAMlexSemanticsXMLof.cmx ENIAMplWordnet.cmx ENIAMlexSemantics.cmx $(INSTALLDIR)
 	mkdir -p /usr/share/eniam/lexSemantics
-#	cp resources/*  /usr/share/eniam/lexSemantics
-	ln -s /usr/share/eniam/lexSemantics/proper_names_20160104.tab /usr/share/eniam/lexSemantics/proper_names.tab
-	ln -s /usr/share/eniam/lexSemantics/proper_names_sgjp_polimorf_20151020.tab /usr/share/eniam/lexSemantics/proper_names_sgjp_polimorf.tab
+	cp resources/*  /usr/share/eniam/lexSemantics
+#	ln -s /usr/share/eniam/lexSemantics/proper_names_20160104.tab /usr/share/eniam/lexSemantics/proper_names.tab
+#	ln -s /usr/share/eniam/lexSemantics/proper_names_sgjp_polimorf_20151020.tab /usr/share/eniam/lexSemantics/proper_names_sgjp_polimorf.tab
  
 install-local: all
 	mkdir -p $(INSTALLDIR)
 	cp eniam-lexSemantics.cmxa eniam-lexSemantics.a eniam-lexSemantics.cma $(INSTALLDIR)
-	cp entries.cmi ENIAMwalTypes.cmi ENIAMwalStringOf.cmi ENIAMwalParser.cmi ENIAMwalReduce.cmi ENIAMvalence.cmi ENIAMwalRenderer.cmi ENIAMadjuncts.cmi ENIAMlexSemanticsTypes.cmi ENIAMlexSemantics.cmi $(INSTALLDIR)
-	cp entries.cmx ENIAMwalTypes.cmx ENIAMwalStringOf.cmx ENIAMwalParser.cmx ENIAMwalReduce.cmx ENIAMvalence.cmx ENIAMwalRenderer.cmx ENIAMadjuncts.cmx ENIAMlexSemanticsTypes.cmx ENIAMlexSemantics.cmx $(INSTALLDIR)
+	cp entries.cmi ENIAMwalTypes.cmi ENIAMwalStringOf.cmi ENIAMwalParser.cmi ENIAMwalReduce.cmi ENIAMlexSemanticsData.cmi ENIAMvalence.cmi ENIAMwalRenderer.cmi ENIAMadjuncts.cmi ENIAMlexSemanticsTypes.cmi ENIAMlexSemanticsStringOf.cmi ENIAMlexSemanticsHTMLof.cmi ENIAMlexSemanticsXMLof.cmi ENIAMplWordnet.cmi ENIAMlexSemantics.cmi $(INSTALLDIR)
+	cp entries.cmx ENIAMwalTypes.cmx ENIAMwalStringOf.cmx ENIAMwalParser.cmx ENIAMwalReduce.cmx ENIAMlexSemanticsData.cmx ENIAMvalence.cmx ENIAMwalRenderer.cmx ENIAMadjuncts.cmx ENIAMlexSemanticsTypes.cmx ENIAMlexSemanticsStringOf.cmx ENIAMlexSemanticsHTMLof.cmx ENIAMlexSemanticsXMLof.cmx ENIAMplWordnet.cmx ENIAMlexSemantics.cmx $(INSTALLDIR)
 	mkdir -p /usr/local/share/eniam/lexSemantics
-#	cp resources/*  /usr/local/share/eniam/lexSemantics
-	ln -s /usr/local/share/eniam/lexSemantics/proper_names_20160104.tab /usr/local/share/eniam/lexSemantics/proper_names.tab
-	ln -s /usr/local/share/eniam/lexSemantics/proper_names_sgjp_polimorf_20151020.tab /usr/local/share/eniam/lexSemantics/proper_names_sgjp_polimorf.tab
+	cp resources/*  /usr/local/share/eniam/lexSemantics
+#	ln -s /usr/local/share/eniam/lexSemantics/proper_names_20160104.tab /usr/local/share/eniam/lexSemantics/proper_names.tab
+#	ln -s /usr/local/share/eniam/lexSemantics/proper_names_sgjp_polimorf_20151020.tab /usr/local/share/eniam/lexSemantics/proper_names_sgjp_polimorf.tab
  
 eniam-lexSemantics.cma: $(SOURCES)
 	ocamlc -linkall -a -o eniam-lexSemantics.cma $(OCAMLFLAGS) $^
@@ -38,7 +37,7 @@ eniam-lexSemantics.cma: $(SOURCES)
 eniam-lexSemantics.cmxa: $(SOURCES)
 	ocamlopt -linkall -a -o eniam-lexSemantics.cmxa $(INCLUDES) $^
  
-test: $(SOURCES) test.ml
+test: test.ml
 	$(OCAMLOPT) -o test $(OCAMLOPTFLAGS) $^
  
 interface: interface.ml
-LUDZIE	osoba 1	grupa ludzi 1
+LUDZIE	osoba 1	ludzie 1
 ISTOTY	LUDZIE	osoba 1	istota żywa 1	grupa istot 1
 PODMIOTY	LUDZIE	podmiot 3
 JADŁO	pokarm 1	napój 1
@@ -17,4 +17,4 @@ SYTUACJA	CZYNNOŚĆ	czynność 1	zdarzenie 2	okoliczność 1	ciąg zdarzeń 1
 KIEDY	CZAS	SYTUACJA
 CZEMU	CECHA	SYTUACJA	LUDZIE
 ILOŚĆ	ilość 1	rozmiar 1	rozmiar 2	jednostka 4	wielkość 6
-ALL	PODMIOTY	ISTOTY	DOBRA	SYTUACJA
+#ALL	PODMIOTY	ISTOTY	DOBRA	SYTUACJA
@@ -23,6 +23,7 @@ let test_strings = [
   (* "Kot miauczy w październiku."; *)
   (* "Arabia Saudyjska biegnie."; *)
   "Chłopcy mają ulicę kwiatami.";
+  "Kot miałczy.";
   (* "Np. Ala.";
   "Kot np. miauczy.";
   "Szpak frunie. Kot miauczy.";
@@ -61,6 +62,7 @@ let test_strings = [
   ]
  
 let _ =
+  ENIAMlexSemantics.initialize ();
   print_endline "Testy wbudowane";
   Xlist.iter test_strings (fun s ->
     print_endline ("\nTEST: " ^ s);
+# Blame Szymon Rutkowski - szymon@szymonrutkowski.pl - Oct 2016.
+# This file is intended to check the NKJP1M frequency list against rules derived from SGJP.
+# If you want to use this, review the end of this file (filenames, column structure) and run with python3.
+
+import re
+
+def load_rules_file(fname):
+    rule_list = []
+    contents = ''
+
+    with open(fname) as inp:
+        contents = inp.read()
+
+    contents = contents.split('\n')
+
+    for line in contents:
+        data = line.split('\t')
+        if len(data) != 7:
+            print('Skipped line in rules: '+line)
+        rule_list.append(tuple(data))
+
+    return rule_list
+
+def make_rules_table(rule_list):
+    "Given rule_list as list of tuples (name, freq, classification, prefix, suffix, stem ending, \
+    tag), create a dictionary: ending -> list of applicable rules, also as tuples. Indices are \
+    prefixes followed by - (hyphen) and suffixes preced by -, up to three characters; longer \
+    affixes are included in the lists for their outermost three-character parts. If both empty \
+    affixes are empty, rule gets listed under '-'."
+
+    rtable = dict()
+
+    for rl in rule_list:
+        if len(rl) != 7:
+            print("Skipped invalid rule: "+str(rl))
+            continue
+
+        index = '-'
+
+        if rl[3] != '':
+            index = rl[3] + '-'
+        elif rl[4] != '':
+            index = '-' + rl[4]
+
+        if len(index) > 4:
+            if index[0] == '-': # suffix
+                index = '-' + index[-3:]
+            else: # prefix
+                index = index[:3] + '-'
+        
+        if index in rtable:
+            rtable[index].append(rl)
+        else:
+            rtable[index] = [ rl ]
+
+    return rtable
+
+# just ripped from compare_morphosyn.py, guess it'll be better to keep those scripts self-contained
+# note that liberal_tagcomp is mainly suitable for checking NKJP against SGJP, when checking
+# a resource obeying more SJGP'ish tagging convention the strict_tagcomp will be better
+def strict_tagcomp(tag1, tag2):
+    tag1_items = tag1.split(':')
+    tag2_items = tag2.split(':')
+
+    if (tag1_items[0] != tag2_items[0] # POS
+            or len(tag1_items) != len(tag2_items)):
+        return False
+
+    for (i, item) in enumerate(tag1_items):
+        if not item in tag2_items[i].split('.'):
+            return False
+
+    return True
+
+def liberal_tagcomp(tag1, tag2):
+    tag1_items = tag1.split(':')
+    tag2_items = tag2.split(':')
+
+    if (tag1_items[0] != tag2_items[0] # POS
+            or len(tag1_items) != len(tag2_items)):
+        return False
+
+    for (i, item) in enumerate(tag1_items):
+        # remove tags n1, f1...
+        item = re.sub(r'(n1|n2|n3)', 'n', item)
+        model = re.sub(r'(n1|n2|n3|p2|p3)', 'n', tag2_items[i]).split('.')
+        if not item in model and model[0] != '_': # underscore as a catchall
+            return False
+
+    return True
+
+def is_recognizable(entry, rules_table):
+    "Check whether entry, given as triple (word_form, lemma, tags) is recognizable using \
+    rules_table as obtained from make_rules_table() function. Return the rule's class \
+    (third column, usually empty string)."
+
+    for chunk_size in range(3, -1, -1):
+        if len(entry[0]) < chunk_size:
+            continue
+
+        rule_candidates = []
+
+        pref_ind = entry[0][:chunk_size]+'-'
+        suf_ind = '-'+entry[0][-chunk_size:]
+        if pref_ind in rules_table:
+            rule_candidates += rules_table[ pref_ind ]
+        if suf_ind in rules_table:
+            rule_candidates += rules_table[ suf_ind ]
+
+        if len(rule_candidates) == 0:
+            continue
+        for rl in rule_candidates:
+            # check first the prefix and suffix (the above code just finds rules that are
+            # potentially relevant), and tag; then proceed to reconstructing the lemma
+            if (entry[0][:len(rl[3])] == rl[3] and
+                    # check for empty suffix, since string[-0:] returns the string unchanged
+                    (len(rl[4]) == 0 or entry[0][-len(rl[4]):] == rl[4]) and
+                    liberal_tagcomp(entry[2], rl[6])):
+                # trim the prefix and suffix, and glue the ending suggested by the rule;
+                # compare with the original lemma
+                if (entry[0][len(rl[3]):-len(rl[4])]+rl[5] == entry[1]
+                        # another corner case, str[:-0] would be ''
+                        or (len(rl[4]) == 0 and entry[0][len(rl[3]):]+rl[5] == entry[1])):
+                    return rl[2]
+
+    return False
+
+rlist = load_rules_file('../resources/SGJP/freq_rules.tab')
+rtable = make_rules_table(rlist)
+
+def esccurl(string) :
+    "Escape the curly brackets in the string, for using it with the string formatter."
+    return string.replace('{', '{{').replace('}', '}}')
+
+with open('../resources/NKJP1M/NKJP1M-tagged-frequency.tab') as inp:
+    with open('freq_with_rules.tab', 'w+') as out:
+        for line in inp:
+            line = line.strip()
+            data = line.split('\t')
+            if len(data) != 8: # column count of TAGGED frequency list
+                print('Skipped line in the list: '+line)
+                continue
+
+            # The following was added to work on partially done tagged frequency, to get rid of the
+            # previous COMPOS classification. Otherwise we'd want to use something like this:
+            # fmt = esccurl(line) + '\t{0}' # simple format string, applicable to raw frequency list
+            # previous COMPOS column is in data[4], so we skip it below
+            fmt = esccurl('\t'.join(data[0:4])) + '\t{0}\t' + esccurl('\t'.join(data[5:]))
+
+            rl_class = is_recognizable((data[0], data[1], data[2]), rtable)
+            if rl_class == '':
+                print(fmt.format('COMPOS'), file=out)
+            elif rl_class != False:
+                print(fmt.format('COMPOS-'+rl_class), file=out)
+            else:
+                # Try again, with lowered lemma and word form.
+                rl_class_low = is_recognizable((data[0].lower(), data[1].lower(), data[2]),
+                        rtable)
+                if rl_class_low == '':
+                    print(fmt.format('COMPOS-LWR'), file=out)
+                elif rl_class_low != False:
+                    print(fmt.format('COMPOS-LWR-'+rl_class_low), file=out)
+                else:
+                    print(fmt.format('NCOMPOS'), file=out)
+# Blame Szymon Rutkowski - szymon@szymonrutkowski.pl - Oct 2016.
+#
+# Given a frequency list and groundtruth dictionary, tag the entries on the frequency list with some
+# automatic tags (can be seen at the end of this file).
+#
+# Run from Python3, with -i (inspect option), eg. `python3 -i compare_morphosyn.py`.
+# Then invoke something like (with # representing Python prompt):
+# # sgjp = load_sgjp('../../NLP resources/sgjp-20160724.tab')
+# # nkjp = load_nkjp('../resources/NKJP1M/NKJP1M-frequency.tab')
+# # notmatching(nkjp, sgjp, liberal_tagcomp, 'raw_tagged_frequency.tab') # (may take a while)
+# # ^D # Ctrl-D when done
+# The last argument points the result file, liberal_tagcomp is the most sane tag comparing function.
+
+import functools
+import re
+import unicodedata
+
+def load_sgjp(fname):
+    sgjp = dict()
+    with open(fname) as inp:
+        for line in inp:
+            data = line.strip().split('\t')
+
+            if len(data) < 3:
+                print('Skipped line: ' + line.strip())
+                continue
+
+            word_form = data[0]
+
+            lemma = ''
+            lemma_sub = ''
+            if data[1] == ':':
+                lemma = [':']
+            else:
+                lemma = data[1].split(":")[0] # lemma subidentifier
+                if len(data[1].split(":")) > 1:
+                    lemma_sub = data[1].split(":")[1]
+            if word_form.find('_') == -1:
+                lemma = lemma.replace('_', ' ')
+
+            tags = data[2]
+
+            notes = ''
+            if len(data) == 4:
+                notes = data[3]
+
+            if lemma in sgjp:
+                sgjp[lemma].append([word_form, tags, notes])
+            else:
+                sgjp[lemma] = [ [word_form, tags, notes, lemma_sub] ]
+    return sgjp
+
+
+def load_nkjp(fname):
+    nkjp = []
+    with open(fname) as inp:
+        nkjp = inp.read().split('\n')
+    for (n, line) in enumerate(nkjp):
+        nkjp[n] = nkjp[n].split('\t') # word_form, lemma, tags, freq
+        if len(nkjp[n]) != 5:
+            print('Skipped line: ' + str(n))
+            del nkjp[n]
+    return nkjp
+
+def naive_tagcomp(tag1, tag2):
+    return (tag1 == tag2)
+
+def strict_tagcomp(tag1, tag2):
+    tag1_items = tag1.split(':')
+    tag2_items = tag2.split(':')
+
+    if (tag1_items[0] != tag2_items[0] # POS
+            or len(tag1_items) != len(tag2_items)):
+        return False
+
+    for (i, item) in enumerate(tag1_items):
+        if not item in tag2_items[i].split('.'):
+            return False
+
+    return True
+
+def liberal_tagcomp(tag1, tag2):
+    tag1_items = tag1.split(':')
+    tag2_items = tag2.split(':')
+
+    if (tag1_items[0] != tag2_items[0] # POS
+            or len(tag1_items) != len(tag2_items)):
+        return False
+
+    for (i, item) in enumerate(tag1_items):
+        # remove tags n1, f1...
+        item = re.sub(r'(n1|n2|n3)', 'n', item)
+        model = re.sub(r'(n1|n2|n3|p2|p3)', 'n', tag2_items[i]).split('.')
+        if not item in model and model[0] != '_': # underscore as a catchall
+            return False
+
+    return True
+
+def compare_entries(nkjp_entry, sgjp_forms, tagcomp_func):
+    found = False
+    case1 = False
+    case2 = False
+    case3 = False
+    for (s, sgjp_form) in enumerate(sgjp_forms):
+        nkjp_word = nkjp_entry[0]
+        nkjp_tag = re.sub(r':$', '', nkjp_entry[2])
+        if nkjp_tag !=  nkjp_entry[2]:
+            print("Corrected tag %s for %s %s" % (nkjp_entry[2], nkjp_entry[0], nkjp_entry[1]))
+        sgjp_word = sgjp_form[0]
+        sgjp_tag = sgjp_form[1]
+
+        tag_match = tagcomp_func(nkjp_tag, sgjp_tag) # do it once
+
+        if sgjp_word == nkjp_word and tag_match: # word_nkjp_word & tag
+            found = True
+            break
+
+        elif tag_match: # tag okay, try with other letter cases
+            if len(nkjp_word) > 1 and nkjp_word.lower().capitalize() == nkjp_word: # Aaaa -> aaaa
+                if sgjp_word == nkjp_word.lower():
+                    case1 = True
+            if not case1 and nkjp_word.lower() != nkjp_word:
+                if sgjp_word == nkjp_word.capitalize(): # AAAA -> Aaaa
+                    case2 = True
+                elif sgjp_word == nkjp_word.lower(): # AAAA -> aaaa, A -> a
+                    case3 = True
+    return (found, case1, case2, case3)
+
+def tab_format(collection, label):
+    "Convert a collection used by notmatching() function to a string of tabbed entries."
+    fmt = ''
+    for etr in collection:
+        fmt = fmt + '\t'.join(etr)+ '\t' + label + '\n'
+    #print("formatted for "+label+", "+str(len(fmt)) + " bytes")
+    return fmt
+
+def nonalphab(string):
+    for char in string:
+        if unicodedata.category(char)[0] == 'L': # 'letter'
+            return False
+    return True
+
+def notmatching(nkjp, sgjp, tagcomp_func, result_file):
+    notmatching = []
+    matching = []
+    case1_notmatching = [] # Aaaa -> aaaa
+    case2_notmatching = [] # AAAA -> Aaaa
+    case3_notmatching = [] # AAAA -> aaaa, A -> a
+    lower_matching = [] # matching with form and lemma converted to lowercase
+    symbols = []
+    notmatching_numeric = []
+
+    for (n, nkjp_entry) in enumerate(nkjp):
+
+        lemma = nkjp_entry[1].strip()
+        form = nkjp_entry[0].strip()
+        # Warn about stripped whitespaces.
+        if lemma != nkjp_entry[1]:
+            print("Stripped whitespaces in lemma: %s" % nkjp_entry[1])
+        if form != nkjp_entry[0]:
+            print("Stripped whitespaces in form: %s" % nkjp_entry[0])
+
+        # Abbreviations are automatically classified as symbols.
+        if nkjp_entry[2][:4] == 'brev':
+            symbols.append(nkjp_entry)
+            continue
+
+        sgjp_forms = []
+        lowered_lemma = False # indicates if lemma was converted to lowercase
+        if lemma in sgjp: # lemma matching
+            sgjp_forms = sgjp[lemma]
+        else:
+            if lemma.lower() in sgjp:
+                lowered_lemma = True
+                sgjp_forms = sgjp[lemma.lower()]
+            else:
+                # Continue when we can't find even lowered lemma in SGJP. 
+                if nonalphab(form) and nonalphab(lemma):
+                    symbols.append(nkjp_entry)
+                elif re.match(r"^[123456789]", form, flags=re.L) != None:
+                    notmatching_numeric.append(nkjp_entry)
+                    continue
+                else:
+                    notmatching.append(nkjp_entry)
+                continue
+
+        # The following is executed only if the lemma (maybe in lowercase) was found in SGJP.
+
+        # Go through the entry if it wasn't found in SGJP
+        found, case1, case2, case3 = 0, 1, 2, 3 # indices in boolean tuple below
+        case = compare_entries(nkjp_entry, sgjp_forms, tagcomp_func)
+
+        # one more desperate attempt at lowering the lemma, if nothing was found
+        if (not lowered_lemma) and not True in case:
+            if lemma.lower() in sgjp:
+                sgjp_forms = sgjp[lemma.lower()]
+                case = compare_entries(nkjp_entry, sgjp_forms, tagcomp_func)
+                if True in case:
+                    lowered_lemma = True
+                else: # revert for consistency 
+                    sgjp_forms = sgjp[lemma]
+
+        if lowered_lemma and (case[found] or case[case1] or case[case2] or case[case3]):
+            lower_matching.append(nkjp_entry)
+            continue
+
+        if case[found]:
+            matching.append(nkjp_entry)
+            continue
+
+        if nonalphab(form) and nonalphab(lemma):
+            symbols.append(nkjp_entry)
+            continue
+        if re.match(r"^[123456789]", form, flags=re.L) != None:
+            notmatching_numeric.append(nkjp_entry)
+            continue
+
+        if case[case1]:
+            case1_notmatching.append(nkjp_entry)
+            continue
+        if case[case2]:
+            case2_notmatching.append(nkjp_entry)
+            continue
+        if case[case3]:
+            case3_notmatching.append(nkjp_entry)
+            continue
+
+        # when everything failed:
+        notmatching.append(nkjp_entry)
+
+    collections = [nkjp, matching, case1_notmatching, case2_notmatching, case3_notmatching,
+                lower_matching, symbols, notmatching_numeric, notmatching]
+    # sort the entries in collections by frequency
+    collections = list(map((lambda coll: sorted(coll, reverse=True, key=(lambda etr: int(etr[3])))),
+                        collections))
+    freqs = list(map(lambda coll: functools.reduce((lambda x, y: x+y),
+        [int(etr[3]) for etr in coll]), # sum of sets' frequencies
+                    collections))
+    descs = ["Total:",
+            "Found:",
+            "Found when Aaa -> aaa (lemma):",
+            "Found when AAA -> Aaa (lemma):",
+            "Found when AAA -> aaa (lemma):",
+            "Found when word form and lemma are converted to lowercase:",
+            "Symbols:",
+            "Not found, numeric:",
+            "Not found, other:"]
+
+    for (i, _) in enumerate(collections):
+        info = (len(collections[i]), 100.0*(len(collections[i])/len(collections[0])),
+                 freqs[i], 100.0*(freqs[i]/freqs[0]))
+        print((descs[i]+" %d entries (%.2f%%), %d occurences (%.2f%%)") % info)
+    
+    # below we skip nkjp, which contains everything
+    labels = ['SGJP-EXACT\tNCH\tCORR', 'SGJP-LMM-UNCAPITAL\tNCH\tCORR',
+            'SGJP-LMM-CAPITAL\tNCH\tCORR', 'SGJP-LMM-LOWER\tNCH\tCORR',
+            'SGJP-BTH-LOWER\tNCH\tCORR', 'NON-SGJP\tSYMB\tCORR',
+            'NON-SGJP\tLATEK\tCORR', 'NON-SGJP\tCW\tCORR']
+    with open(result_file, 'w+') as out:
+        for (c, coll) in enumerate(collections[1:]):
+            print(tab_format(coll, labels[c]), file=out)
+# Blame Szymon Rutkowski - szymon@szymonrutkowski.pl - Nov 2016.
+# This file is intended to check the (partially tagged) NKJP1M frequency list against list of exce-
+# ptions from morphological rules derived from SGJP.
+# If you want to use this, review the end of this file (filenames, column structure) and run with python3.
+
+import re
+
+# just ripped from compare_morphosyn.py, guess it'll be better to keep those scripts self-contained
+# note that liberal_tagcomp is mainly suitable for checking NKJP against SGJP, when checking
+# a resource obeying more SJGP'ish tagging convention the strict_tagcomp will be better
+def strict_tagcomp(tag1, tag2):
+    tag1_items = tag1.split(':')
+    tag2_items = tag2.split(':')
+
+    if (tag1_items[0] != tag2_items[0] # POS
+            or len(tag1_items) != len(tag2_items)):
+        return False
+
+    for (i, item) in enumerate(tag1_items):
+        if not item in tag2_items[i].split('.'):
+            return False
+
+    return True
+
+def liberal_tagcomp(tag1, tag2):
+    tag1_items = tag1.split(':')
+    tag2_items = tag2.split(':')
+
+    if (tag1_items[0] != tag2_items[0] # POS
+            or len(tag1_items) != len(tag2_items)):
+        return False
+
+    for (i, item) in enumerate(tag1_items):
+        # remove tags n1, f1...
+        item = re.sub(r'(n1|n2|n3)', 'n', item)
+        model = re.sub(r'(n1|n2|n3|p2|p3)', 'n', tag2_items[i]).split('.')
+        if not item in model and model[0] != '_': # underscore as a catchall
+            return False
+
+    return True
+
+# the bulk of the following ripped from check_rule_compos.py
+def esccurl(string) :
+    "Escape the curly brackets in the string, for using it with the string formatter."
+    return string.replace('{', '{{').replace('}', '}}')
+
+alt_idx = dict() # indexed by data[0] - word form
+
+with open('../resources/SGJP/alt.tab') as alt_src:
+    for line in alt_src:
+            line = line.strip()
+            data = line.split('\t')
+            if len(data) != 3:
+                print('Skipped line in the alt list: '+line)
+                continue
+            # handle lemmas with subclassification after colon
+            if data[1].find(':') != -1 and data[1] != ':':
+                data[1] = data[1][: data[1].find(':')]
+            # each entry consists of 0 - list of lemmas, 1 - list of tags
+            if not data[0] in alt_idx:
+                alt_idx[data[0]] = [[data[1]], [data[2]]]
+            else:
+                alt_idx[data[0]][0].append(data[1])
+                alt_idx[data[0]][1].append(data[2])
+
+with open('../resources/NKJP1M/NKJP1M-tagged-frequency.tab') as inp:
+    with open('freq_with_alt.tab', 'w+') as out:
+        for line in inp:
+            line = line.strip()
+            data = line.split('\t')
+            if len(data) != 8: # column count of TAGGED frequency list
+                print('Skipped line in the list: '+line)
+                continue
+
+            # The following was added to work on partially done tagged frequency, to get rid of the
+            # previous COMPOS classification. Otherwise we'd want to use something like this:
+            # fmt = esccurl(line) + '\t{0}' # simple format string, applicable to raw frequency list
+            # previous COMPOS column is in data[4], so we skip it below
+            fmt = esccurl('\t'.join(data[0:4])) + '\t{0}\t' + esccurl('\t'.join(data[5:]))
+
+            matched = False
+            if data[0] in alt_idx:
+                tagcomps = list(map(lambda x: liberal_tagcomp(data[2], x), alt_idx[data[0]][1]))
+                tagnum = True in tagcomps and tagcomps.index(True)
+                # (make sure that if lemma is matching, it belongs to the matching tag)
+                if tagnum != -1 and tagnum != False and alt_idx[data[0]][0][tagnum] == data[1]:
+                    print(fmt.format('COMPOS-ALT'), file=out)
+                    matched = True
+            # try again with lowering word form and lemma:
+            if not matched and data[0].lower() in alt_idx:
+                tagcomps = list(map(lambda x: liberal_tagcomp(data[2], x), # data[2] - tag stays the same
+                                    alt_idx[data[0].lower()][1]))
+                tagnum = True in tagcomps and tagcomps.index(True)
+                if tagnum != -1 and tagnum != False and alt_idx[data[0].lower()][0][tagnum] == data[1].lower():
+                    print(fmt.format('COMPOS-LWR-ALT'), file=out)
+                    matched = True
+            if not matched:
+                print(line, file=out)
@@ -1078,3 +1078,222 @@
 		zetrz	star
 		źr	ziar
  
+@obce_funkcjonalnie_twarde_a
+	ac	ak	ac
+	aq	ak	aq
+	ay	ay	ay
+	c	c	c
+	c	k	c
+	ch	ch	ch
+	cq	k	cq
+	dok	dk	dok
+	du	du	du
+	dź	dź	dź
+	ec	ek	ec
+	ey	ey	ey
+	gh	dz	gh
+	gh	gh	gh
+	gi	gi	gi
+	gn	gn	gn
+	gy	gy	gy
+	ic	ik	ic
+	kie	k	kie
+	lj	lj	lj
+	ly	ly	ly
+	m	m	m
+	nc	nk	nc
+	oc	ok	oc
+	oy	oy	oy
+	que	k	que
+	ri	ri	ri
+	shu	shu	shu
+	st	sti	st
+	szu	szu	szu
+	ti	ti	ti
+	tu	tu	tu
+	use	uz	use
+	v	v	v
+	x	ksi	x
+	x	x	x
+	z	z	z
+
+@obce_funkcjonalnie_twarde_e
+	ac	ak	ac
+	ac	aki	ac
+	aq	ak	aq
+	aq	aki	aq
+	ay	ay	ay
+	c	c	c
+	c	ce	c
+	co	ki	c
+	cq	k	cq
+	cq	ki	cq
+	cques	ki	cques
+	dok	dk	dok
+	dok	dki	dok
+	dź	dź	dź
+	ec	ek	ec
+	ec	eki	ec
+	ey	ey	ey
+	gh	dz	gh
+	gh	gh	gh
+	gh	ghi	gh
+	gi	gi	gi
+	gn	gn	gn
+	gue	gi	gue
+	gues	gi	gues
+	gy	gy	gy
+	ic	iki	ic
+	je	j	je
+	ke	ki	ke
+	kie	k	kie
+	kie	ki	kie
+	lj	lj	lj
+	ly	ly	ly
+	nc	nk	nc
+	nc	nki	nc
+	oc	ok	oc
+	oc	oki	oc
+	ov	ov	ov
+	oy	oy	oy
+	que	k	que
+	que	ki	que
+	ques	ki	ques
+	ri	ri	ri
+	st	sti	st
+	ti	ti	ti
+	use	uz	use
+	x	ksi	x
+*	ng	n	ng
+
+@obce_funkcjonalnie_twarde_ie
+	ais	aisi	ais
+	bes	bi	bes
+	ce	si	ce
+	ct	kci	ct
+	de	dzi	de
+	dh	dz	dh
+	dh	dzi	dh
+	fe	fi	fe
+	h	dz	h
+	m	mi	m
+	me	mi	me
+	ne	ni	ne
+	nes	ni	nes
+	nh	ni	nh
+	ph	fi	ph
+	phe	fi	phe
+	re	rz	re
+	res	rz	res
+	rh	rz	rh
+	rs	rz	rs
+	se	si	se
+	sne	śni	sne
+	ste	ści	ste
+	stes	ści	stes
+	te	ci	te
+	tes	ci	tes
+	th	ci	th
+	th	si	th
+	the	ci	the
+	thes	ci	thes
+	use	uzi	use
+	v	vi	v
+	ve	vi	ve
+	x	ksi	x
+*	ng	ni	ng
+
+@obce_funkcjonalnie_twarde_iy
+	ai	ai	ai
+	bee	bee	bee
+	co	ki	c
+	cques	ki	cques
+	dhi	dhi	dhi
+	die	die	die
+	dy	dy	dy
+	dí	dí	dí
+	eu	eu	eu
+	ghi	ghi	ghi
+	gie	gie	gie
+	gue	gi	gue
+	gues	gi	gues
+	ji	ji	ji
+	ke	ki	ke
+	kie	kie	kie
+	ky	c	ky
+	ky	ki	ky
+	ky	ky	ky
+	lye	lye	lye
+	nii	ni	nii
+	nii	nii	nii
+	nyi	nyi	nyi
+	pi	pi	pi
+	pie	pie	pie
+	que	ki	que
+	ques	ki	ques
+	re	ry	re
+	rii	ri	rii
+	rii	rii	rii
+	ssy	ssy	ssy
+	szky	scy	szky
+	szky	ski	szky
+	szky	szky	szky
+	thy	thy	thy
+	tie	tie	tie
+	zo	zi	zo
+	ři	ři	ři
+*	ng	ngy	ng
+
+@obce_funkcjonalnie_miekkie_ii_wyglos
+	ay	ai	ay
+	ci	cyj	ci
+	czi	czyj	czi
+	oy	oi	oy
+*	ay	ai	ay
+*	oy	oi	oy
+
+@obce_ais
+	ais	ais	ais
+
+@obce_apostrof
+	bes	bes	bes
+	by	by	by
+	ce	ce	ce
+	cy	cy	cy
+	de	de	de
+	dy	dy	dy
+	es	es	es
+	fe	fe	fe
+	ge	ge	ge
+	ges	ges	ges
+	gues	gues	gues
+	ke	ke	ke
+	kes	kes	kes
+	ky	ky	ky
+	le	le	le
+	les	les	les
+	ly	ly	ly
+	ly	ly	ly
+	me	me	me
+	my	my	my
+	ne	ne	ne
+	nes	nes	nes
+	pe	pe	pe
+	phe	phe	phe
+	phy	phy	phy
+	ques	ques	ques
+	re	re	re
+	res	res	res
+	ry	ry	ry
+	se	se	se
+	ses	ses	ses
+	sy	sy	sy
+	te	te	te
+	tes	tes	tes
+	the	the	the
+	thes	thes	thes
+	thy	thy	thy
+	uy	uy	uy
+	ve	ve	ve
+	ze	ze	ze
+*	es	e	es
+adj-sup	adj:sg:nom.voc:n1.n2:sup	Ca
+adj-sup	adj:sg:nom.voc:m1.m2.m3:sup	Cb
+adj-sup	adj:sg:nom.voc:f:sup	Cc
+adj-sup	adj:sg:loc:m1.m2.m3.n1.n2:sup	Cd
+adj-sup	adj:sg:loc:f:sup	Ce
+adj-sup	adj:sg:inst:m1.m2.m3.n1.n2:sup	Cf
+adj-sup	adj:sg:inst:f:sup	Cg
+adj-sup	adj:sg:gen:m1.m2.m3.n1.n2:sup	Ch
+adj-sup	adj:sg:gen:f:sup	Ci
+adj-sup	adj:sg:dat:m1.m2.m3.n1.n2:sup	Cj
+adj-sup	adj:sg:dat:f:sup	Ck
+adj-sup	adj:sg:acc:n1.n2:sup	Cl
+adj-sup	adj:sg:acc:m3:sup	Cm
+adj-sup	adj:sg:acc:m1.m2:sup	Cn
+adj-sup	adj:sg:acc:f:sup	Co
+adj-sup	adj:pl:nom.voc:m2.m3.f.n1.n2.p2.p3:sup	Cp
+adj-sup	adj:pl:nom.voc:m1.p1:sup	Cq
+adj-sup	adj:pl:loc:m1.m2.m3.f.n1.n2.p1.p2.p3:sup	Cr
+adj-sup	adj:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:sup	Cs
+adj-sup	adj:pl:gen:m1.m2.m3.f.n1.n2.p1.p2.p3:sup	Ct
+adj-sup	adj:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:sup	Cu
+adj-sup	adj:pl:acc:m2.m3.f.n1.n2.p2.p3:sup	Cv
+adj-sup	adj:pl:acc:m1.p1:sup	Cw
+verb-neg	ppas:sg:nom.voc:m1.m2.m3:perf:neg	Ua
+verb-neg	ppas:sg:nom.voc:m1.m2.m3:imperf:neg	Ua
+verb-neg	ppas:sg:nom.voc:m1.m2.m3:imperf.perf:neg	Ua
+verb-neg	ppas:sg:nom.voc:f:perf:neg	Ub
+verb-neg	ppas:sg:nom.voc:f:imperf:neg	Ub
+verb-neg	ppas:sg:nom.voc:f:imperf.perf:neg	Ub
+verb-neg	ppas:sg:nom.acc.voc:n1.n2:perf:neg	Uc
+verb-neg	ppas:sg:nom.acc.voc:n1.n2:imperf:neg	Uc
+verb-neg	ppas:sg:nom.acc.voc:n1.n2:imperf.perf:neg	Uc
+verb-neg	ppas:sg:inst.loc:m1.m2.m3.n1.n2:perf:neg	Ud
+verb-neg	ppas:sg:inst.loc:m1.m2.m3.n1.n2:imperf:neg	Ud
+verb-neg	ppas:sg:inst.loc:m1.m2.m3.n1.n2:imperf.perf:neg	Ud
+verb-neg	ppas:sg:gen:m1.m2.m3.n1.n2:perf:neg	Ue
+verb-neg	ppas:sg:gen:m1.m2.m3.n1.n2:imperf:neg	Ue
+verb-neg	ppas:sg:gen:m1.m2.m3.n1.n2:imperf.perf:neg	Ue
+verb-neg	ppas:sg:gen.dat.loc:f:perf:neg	Uf
+verb-neg	ppas:sg:gen.dat.loc:f:imperf:neg	Uf
+verb-neg	ppas:sg:gen.dat.loc:f:imperf.perf:neg	Uf
+verb-neg	ppas:sg:dat:m1.m2.m3.n1.n2:perf:neg	Ug
+verb-neg	ppas:sg:dat:m1.m2.m3.n1.n2:imperf:neg	Ug
+verb-neg	ppas:sg:dat:m1.m2.m3.n1.n2:imperf.perf:neg	Ug
+verb-neg	ppas:sg:acc:m3:perf:neg	Uh
+verb-neg	ppas:sg:acc:m3:imperf:neg	Uh
+verb-neg	ppas:sg:acc:m3:imperf.perf:neg	Uh
+verb-neg	ppas:sg:acc:m1.m2:perf:neg	Ui
+verb-neg	ppas:sg:acc:m1.m2:imperf:neg	Ui
+verb-neg	ppas:sg:acc:m1.m2:imperf.perf:neg	Ui
+verb-neg	ppas:sg:acc.inst:f:perf:neg	Uj
+verb-neg	ppas:sg:acc.inst:f:imperf:neg	Uj
+verb-neg	ppas:sg:acc.inst:f:imperf.perf:neg	Uj
+verb-neg	ppas:pl:nom.voc:m1.p1:perf:neg	Uk
+verb-neg	ppas:pl:nom.voc:m1.p1:imperf:neg	Uk
+verb-neg	ppas:pl:nom.voc:m1.p1:imperf.perf:neg	Uk
+verb-neg	ppas:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:perf:neg	Ul
+verb-neg	ppas:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:imperf:neg	Ul
+verb-neg	ppas:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:imperf.perf:neg	Ul
+verb-neg	ppas:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:perf:neg	Um
+verb-neg	ppas:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:neg	Um
+verb-neg	ppas:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:neg	Um
+verb-neg	ppas:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:perf:neg	Un
+verb-neg	ppas:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:neg	Un
+verb-neg	ppas:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:neg	Un
+verb-neg	ppas:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:perf:neg	Uo
+verb-neg	ppas:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:neg	Uo
+verb-neg	ppas:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:neg	Uo
+verb-neg	ppas:pl:acc:m1.p1:perf:neg	Up
+verb-neg	ppas:pl:acc:m1.p1:imperf:neg	Up
+verb-neg	ppas:pl:acc:m1.p1:imperf.perf:neg	Up
+verb-neg	pact:sg:nom.voc:m1.m2.m3:imperf:neg	Va
+verb-neg	pact:sg:nom.voc:m1.m2.m3:imperf.perf:neg	Va
+verb-neg	pact:sg:nom.voc:f:imperf:neg	Vb
+verb-neg	pact:sg:nom.voc:f:imperf.perf:neg	Vb
+verb-neg	pact:sg:nom.acc.voc:n1.n2:imperf:neg	Vc
+verb-neg	pact:sg:nom.acc.voc:n1.n2:imperf.perf:neg	Vc
+verb-neg	pact:sg:inst.loc:m1.m2.m3.n1.n2:imperf:neg	Vd
+verb-neg	pact:sg:inst.loc:m1.m2.m3.n1.n2:imperf.perf:neg	Vd
+verb-neg	pact:sg:gen:m1.m2.m3.n1.n2:imperf:neg	Ve
+verb-neg	pact:sg:gen:m1.m2.m3.n1.n2:imperf.perf:neg	Ve
+verb-neg	pact:sg:gen.dat.loc:f:imperf:neg	Vf
+verb-neg	pact:sg:gen.dat.loc:f:imperf.perf:neg	Vf
+verb-neg	pact:sg:dat:m1.m2.m3.n1.n2:imperf:neg	Vg
+verb-neg	pact:sg:dat:m1.m2.m3.n1.n2:imperf.perf:neg	Vg
+verb-neg	pact:sg:acc:m3:imperf:neg	Vh
+verb-neg	pact:sg:acc:m3:imperf.perf:neg	Vh
+verb-neg	pact:sg:acc:m1.m2:imperf:neg	Vi
+verb-neg	pact:sg:acc:m1.m2:imperf.perf:neg	Vi
+verb-neg	pact:sg:acc.inst:f:imperf:neg	Vj
+verb-neg	pact:sg:acc.inst:f:imperf.perf:neg	Vj
+verb-neg	pact:pl:nom.voc:m1.p1:imperf:neg	Vk
+verb-neg	pact:pl:nom.voc:m1.p1:imperf.perf:neg	Vk
+verb-neg	pact:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:imperf:neg	Vl
+verb-neg	pact:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:imperf.perf:neg	Vl
+verb-neg	pact:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:neg	Vm
+verb-neg	pact:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:neg	Vm
+verb-neg	pact:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:neg	Vn
+verb-neg	pact:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:neg	Vn
+verb-neg	pact:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:neg	Vo
+verb-neg	pact:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:neg	Vo
+verb-neg	pact:pl:acc:m1.p1:imperf:neg	Vp
+verb-neg	pact:pl:acc:m1.p1:imperf.perf:neg	Vp
+verb-neg	ger:sg:nom.acc:n2:perf:neg	Ta
+verb-neg	ger:sg:nom.acc:n2:imperf:neg	Ta
+verb-neg	ger:sg:nom.acc:n2:imperf.perf:neg	Ta
+verb-neg	ger:sg:inst:n2:perf:neg	Tb
+verb-neg	ger:sg:inst:n2:imperf:neg	Tb
+verb-neg	ger:sg:inst:n2:imperf.perf:neg	Tb
+verb-neg	ger:sg:gen:n2:perf:neg	Tc
+verb-neg	ger:sg:gen:n2:imperf:neg	Tc
+verb-neg	ger:sg:gen:n2:imperf.perf:neg	Tc
+verb-neg	ger:sg:dat.loc:n2:perf:neg	Td
+verb-neg	ger:sg:dat.loc:n2:imperf:neg	Td
+verb-neg	ger:sg:dat.loc:n2:imperf.perf:neg	Td
+verb-neg	ger:pl:nom.acc:n2:perf:neg	Te
+verb-neg	ger:pl:nom.acc:n2:imperf:neg	Te
+verb-neg	ger:pl:nom.acc:n2:imperf.perf:neg	Te
+verb-neg	ger:pl:loc:n2:perf:neg	Tf
+verb-neg	ger:pl:loc:n2:imperf:neg	Tf
+verb-neg	ger:pl:loc:n2:imperf.perf:neg	Tf
+verb-neg	ger:pl:inst:n2:perf:neg	Tg
+verb-neg	ger:pl:inst:n2:imperf:neg	Tg
+verb-neg	ger:pl:inst:n2:imperf.perf:neg	Tg
+verb-neg	ger:pl:gen:n2:perf:neg	Th
+verb-neg	ger:pl:gen:n2:imperf:neg	Th
+verb-neg	ger:pl:gen:n2:imperf.perf:neg	Th
+verb-neg	ger:pl:dat:n2:perf:neg	Ti
+verb-neg	ger:pl:dat:n2:imperf:neg	Ti
+verb-neg	ger:pl:dat:n2:imperf.perf:neg	Ti
+other	winien:sg:n1.n2:ter:imperf	W
+other	winien:sg:n1.n2:sec:imperf	W
+other	winien:sg:n1.n2:pri:imperf	W
+other	winien:sg:n1.n2:imperf	W
+other	winien:sg:m1.m2.m3:ter:imperf	W
+other	winien:sg:m1.m2.m3:sec:imperf	W
+other	winien:sg:m1.m2.m3:pri:imperf	W
+other	winien:sg:m1.m2.m3:imperf	W
+other	winien:sg:f:ter:imperf	W
+other	winien:sg:f:sec:imperf	W
+other	winien:sg:f:pri:imperf	W
+other	winien:sg:f:imperf	W
+other	winien:pl:m2.m3.f.n1.n2.p2.p3:ter:imperf	W
+other	winien:pl:m2.m3.f.n1.n2.p2.p3:sec:imperf	W
+other	winien:pl:m2.m3.f.n1.n2.p2.p3:imperf	W
+other	winien:pl:m1.p1:ter:imperf	W
+other	winien:pl:m1.p1:sec:imperf	W
+other	winien:pl:m1.p1:pri:imperf	W
+other	winien:pl:m1.p1:imperf	W
+noun	subst:sg:voc:n2	Gva
+noun	subst:sg:voc:n1	Gvb
+noun	subst:sg:voc:m3	Gvc
+noun	subst:sg:voc:m2	Gvd
+noun	subst:sg:voc:m1	Gve
+noun	subst:sg:voc:f	Gvf
+noun	subst:sg:nom:n2	Gna
+noun	subst:sg:nom:n1	Gnb
+noun	subst:sg:nom:m3	Gnc
+noun	subst:sg:nom:m2	Gnd
+noun	subst:sg:nom:m1	Gne
+noun	subst:sg:nom:f	Gnf
+noun	subst:sg:loc:n2	Gla
+noun	subst:sg:loc:n1	Glb
+noun	subst:sg:loc:m3	Glc
+noun	subst:sg:loc:m2	Gld
+noun	subst:sg:loc:m1	Gle
+noun	subst:sg:loc:f	Glf
+noun	subst:sg:inst:n2	Gia
+noun	subst:sg:inst:n1	Gib
+noun	subst:sg:inst:m3	Gic
+noun	subst:sg:inst:m2	Gid
+noun	subst:sg:inst:m1	Gie
+noun	subst:sg:inst:f	Gif
+noun	subst:sg:gen:n2	Gga
+noun	subst:sg:gen:n1	Ggb
+noun	subst:sg:gen:m3	Ggc
+noun	subst:sg:gen:m2	Ggd
+noun	subst:sg:gen:m1	Gge
+noun	subst:sg:gen:f	Ggf
+noun	subst:sg:dat:n2	Gda
+noun	subst:sg:dat:n1	Gdb
+noun	subst:sg:dat:m3	Gdc
+noun	subst:sg:dat:m2	Gdd
+noun	subst:sg:dat:m1	Gde
+noun	subst:sg:dat:f	Gdf
+noun	subst:sg:acc:n2	Gaa
+noun	subst:sg:acc:n1	Gab
+noun	subst:sg:acc:m3	Gac
+noun	subst:sg:acc:m2	Gad
+noun	subst:sg:acc:m1	Gae
+noun	subst:sg:acc:f	Gaf
+noun	subst:pl:voc:p3	Yvp
+noun	subst:pl:voc:p2	Yvq
+noun	subst:pl:voc:p1	Yvr
+noun	subst:pl:voc:n2	Yva
+noun	subst:pl:voc:n1	Yvb
+noun	subst:pl:voc:m3	Yvc
+noun	subst:pl:voc:m2	Yvd
+noun	subst:pl:voc:m1	Yve
+noun	subst:pl:voc:f	Yvf
+noun	subst:pl:nom:p3	Ynp
+noun	subst:pl:nom:p2	Ynq
+noun	subst:pl:nom:p1	Ynr
+noun	subst:pl:nom:n2	Yna
+noun	subst:pl:nom:n1	Ynb
+noun	subst:pl:nom:m3	Ync
+noun	subst:pl:nom:m2	Ynd
+noun	subst:pl:nom:m1	Yne
+noun	subst:pl:nom:f	Ynf
+noun	subst:pl:loc:p3	Ylp
+noun	subst:pl:loc:p2	Ylq
+noun	subst:pl:loc:p1	Ylr
+noun	subst:pl:loc:n2	Yla
+noun	subst:pl:loc:n1	Ylb
+noun	subst:pl:loc:m3	Ylc
+noun	subst:pl:loc:m2	Yld
+noun	subst:pl:loc:m1	Yle
+noun	subst:pl:loc:f	Ylf
+noun	subst:pl:inst:p3	Yip
+noun	subst:pl:inst:p2	Yiq
+noun	subst:pl:inst:p1	Yir
+noun	subst:pl:inst:n2	Yia
+noun	subst:pl:inst:n1	Yib
+noun	subst:pl:inst:m3	Yic
+noun	subst:pl:inst:m2	Yid
+noun	subst:pl:inst:m1	Yie
+noun	subst:pl:inst:f	Yif
+noun	subst:pl:gen:p3	Ygp
+noun	subst:pl:gen:p2	Ygq
+noun	subst:pl:gen:p1	Ygr
+noun	subst:pl:gen:n2	Yga
+noun	subst:pl:gen:n1	Ygb
+noun	subst:pl:gen:m3	Ygc
+noun	subst:pl:gen:m2	Ygd
+noun	subst:pl:gen:m1	Yge
+noun	subst:pl:gen:f	Ygf
+noun	subst:pl:dat:p3	Ydp
+noun	subst:pl:dat:p2	Ydq
+noun	subst:pl:dat:p1	Ydr
+noun	subst:pl:dat:n2	Yda
+noun	subst:pl:dat:n1	Ydb
+noun	subst:pl:dat:m3	Ydc
+noun	subst:pl:dat:m2	Ydd
+noun	subst:pl:dat:m1	Yde
+noun	subst:pl:dat:f	Ydf
+noun	subst:pl:acc:p3	Yap
+noun	subst:pl:acc:p2	Yaq
+noun	subst:pl:acc:p1	Yar
+noun	subst:pl:acc:n2	Yaa
+noun	subst:pl:acc:n1	Yab
+noun	subst:pl:acc:m3	Yac
+noun	subst:pl:acc:m2	Yad
+noun	subst:pl:acc:m1	Yae
+noun	subst:pl:acc:f	Yaf
+other	qub	W
+other	prep:nom	W
+other	prep:loc:wok	W
+other	prep:loc:nwok	W
+other	prep:loc	W
+other	prep:inst:wok	W
+other	prep:inst:nwok	W
+other	prep:inst	W
+other	prep:gen:wok	W
+other	prep:gen:nwok	W
+other	prep:gen	W
+other	prep:dat	W
+other	prep:acc:wok	W
+other	prep:acc:nwok	W
+other	prep:acc	W
+other	pred	W
+verb	praet:sg:n1.n2:ter:perf	Ja
+verb	praet:sg:n1.n2:ter:imperf.perf	Ja
+verb	praet:sg:n1.n2:ter:imperf	Ja
+verb	praet:sg:n1.n2:sec:perf	Jb
+verb	praet:sg:n1.n2:sec:imperf.perf	Jb
+verb	praet:sg:n1.n2:sec:imperf	Jb
+verb	praet:sg:n1.n2:pri:perf	Jc
+verb	praet:sg:n1.n2:pri:imperf.perf	Jc
+verb	praet:sg:n1.n2:pri:imperf	Jc
+verb	praet:sg:n1.n2:perf	Jd
+verb	praet:sg:n1.n2:imperf.perf	Jd
+verb	praet:sg:n1.n2:imperf	Jd
+verb	praet:sg:m1.m2.m3:ter:perf	Je
+verb	praet:sg:m1.m2.m3:ter:imperf.perf	Je
+verb	praet:sg:m1.m2.m3:ter:imperf	Je
+verb	praet:sg:m1.m2.m3:sec:perf	Jf
+verb	praet:sg:m1.m2.m3:sec:imperf.perf	Jf
+verb	praet:sg:m1.m2.m3:sec:imperf	Jf
+verb	praet:sg:m1.m2.m3:pri:perf	Jg
+verb	praet:sg:m1.m2.m3:pri:imperf.perf	Jg
+verb	praet:sg:m1.m2.m3:pri:imperf	Jg
+verb	praet:sg:m1.m2.m3:perf:nagl.agl	Jh
+verb	praet:sg:m1.m2.m3:imperf:nagl.agl	Jh
+verb	praet:sg:m1.m2.m3:imperf.perf	Jh
+verb	praet:sg:f:ter:perf	Ji
+verb	praet:sg:f:ter:imperf	Ji
+verb	praet:sg:f:sec:perf	Jj
+verb	praet:sg:f:sec:imperf	Jj
+verb	praet:sg:f:pri:perf	Jk
+verb	praet:sg:f:pri:imperf	Jk
+verb	praet:sg:f:perf	Jl
+verb	praet:sg:f:imperf	Jl
+verb	praet:pl:m2.m3.f.n1.n2.p2.p3:ter:perf	Jm
+verb	praet:pl:m2.m3.f.n1.n2.p2.p3:ter:imperf.perf	Jm
+verb	praet:pl:m2.m3.f.n1.n2.p2.p3:ter:imperf	Jm
+verb	praet:pl:m2.m3.f.n1.n2.p2.p3:sec:perf	Jn
+verb	praet:pl:m2.m3.f.n1.n2.p2.p3:sec:imperf.perf	Jn
+verb	praet:pl:m2.m3.f.n1.n2.p2.p3:sec:imperf	Jn
+verb	praet:pl:m2.m3.f.n1.n2.p2.p3:pri:perf	Jo
+verb	praet:pl:m2.m3.f.n1.n2.p2.p3:pri:imperf.perf	Jo
+verb	praet:pl:m2.m3.f.n1.n2.p2.p3:pri:imperf	Jo
+verb	praet:pl:m2.m3.f.n1.n2.p2.p3:perf	Jp
+verb	praet:pl:m2.m3.f.n1.n2.p2.p3:imperf.perf	Jp
+verb	praet:pl:m2.m3.f.n1.n2.p2.p3:imperf	Jp
+verb	praet:pl:m1.p1:ter:perf	Jq
+verb	praet:pl:m1.p1:ter:imperf	Jq
+verb	praet:pl:m1.p1:sec:perf	Jr
+verb	praet:pl:m1.p1:sec:imperf	Jr
+verb	praet:pl:m1.p1:pri:perf	Js
+verb	praet:pl:m1.p1:pri:imperf	Js
+verb	praet:pl:m1.p1:perf	Jt
+verb	praet:pl:m1.p1:imperf	Jt
+other	ppron3:sg:nom:n1.n2:ter:akc.nakc:praep.npraep	W
+other	ppron3:sg:nom:m1.m2.m3:ter:akc.nakc:praep.npraep	W
+other	ppron3:sg:nom:f:ter:akc.nakc:praep.npraep	W
+other	ppron3:sg:loc:n1.n2:ter:akc.nakc:praep.npraep	W
+other	ppron3:sg:loc:m1.m2.m3:ter:akc.nakc:praep.npraep	W
+other	ppron3:sg:loc:f:ter:akc.nakc:praep.npraep	W
+other	ppron3:sg:inst:n1.n2:ter:akc.nakc:praep.npraep	W
+other	ppron3:sg:inst:m1.m2.m3:ter:akc.nakc:praep.npraep	W
+other	ppron3:sg:inst:f:ter:akc.nakc:praep.npraep	W
+other	ppron3:sg:gen:n1.n2:ter:nakc:npraep	W
+other	ppron3:sg:gen:n1.n2:ter:akc:npraep	W
+other	ppron3:sg:gen:n1.n2:ter:akc.nakc:praep	W
+other	ppron3:sg:gen:m1.m2.m3:ter:nakc:praep	W
+other	ppron3:sg:gen:m1.m2.m3:ter:nakc:npraep	W
+other	ppron3:sg:gen:m1.m2.m3:ter:akc:praep	W
+other	ppron3:sg:gen:m1.m2.m3:ter:akc:npraep	W
+other	ppron3:sg:gen:f:ter:akc.nakc:praep	W
+other	ppron3:sg:gen:f:ter:akc.nakc:npraep	W
+other	ppron3:sg:dat:n1.n2:ter:nakc:npraep	W
+other	ppron3:sg:dat:n1.n2:ter:akc:npraep	W
+other	ppron3:sg:dat:n1.n2:ter:akc.nakc:praep	W
+other	ppron3:sg:dat:m1.m2.m3:ter:nakc:npraep	W
+other	ppron3:sg:dat:m1.m2.m3:ter:akc:npraep	W
+other	ppron3:sg:dat:m1.m2.m3:ter:akc.nakc:praep	W
+other	ppron3:sg:dat:f:ter:akc.nakc:praep	W
+other	ppron3:sg:dat:f:ter:akc.nakc:npraep	W
+other	ppron3:sg:acc:n1.n2:ter:akc.nakc:praep	W
+other	ppron3:sg:acc:n1.n2:ter:akc.nakc:npraep	W
+other	ppron3:sg:acc:m1.m2.m3:ter:nakc:praep	W
+other	ppron3:sg:acc:m1.m2.m3:ter:nakc:npraep	W
+other	ppron3:sg:acc:m1.m2.m3:ter:akc:praep	W
+other	ppron3:sg:acc:m1.m2.m3:ter:akc:npraep	W
+other	ppron3:sg:acc:f:ter:akc.nakc:praep	W
+other	ppron3:sg:acc:f:ter:akc.nakc:npraep	W
+other	ppron3:pl:nom:m2.m3.f.n1.n2.p2.p3:ter:akc.nakc:praep.npraep	W
+other	ppron3:pl:nom:m1.p1:ter:akc.nakc:praep.npraep	W
+other	ppron3:pl:loc:_:ter:akc.nakc:praep.npraep	W
+other	ppron3:pl:inst:_:ter:akc.nakc:praep.npraep	W
+other	ppron3:pl:gen:_:ter:akc.nakc:praep	W
+other	ppron3:pl:gen:_:ter:akc.nakc:npraep	W
+other	ppron3:pl:dat:_:ter:akc.nakc:praep	W
+other	ppron3:pl:dat:_:ter:akc.nakc:npraep	W
+other	ppron3:pl:acc:m2.m3.f.n1.n2.p2.p3:ter:akc.nakc:praep	W
+other	ppron3:pl:acc:m2.m3.f.n1.n2.p2.p3:ter:akc.nakc:npraep	W
+other	ppron3:pl:acc:m1.p1:ter:akc.nakc:praep	W
+other	ppron3:pl:acc:m1.p1:ter:akc.nakc:npraep	W
+other	ppron12:sg:voc:m1.m2.m3.f.n1.n2:sec	W
+other	ppron12:sg:voc:m1.m2.m3.f.n1.n2:pri	W
+other	ppron12:sg:nom:m1.m2.m3.f.n1.n2:sec	W
+other	ppron12:sg:nom:m1.m2.m3.f.n1.n2:pri	W
+other	ppron12:sg:loc:m1.m2.m3.f.n1.n2:sec	W
+other	ppron12:sg:loc:m1.m2.m3.f.n1.n2:pri	W
+other	ppron12:sg:inst:m1.m2.m3.f.n1.n2:sec	W
+other	ppron12:sg:inst:m1.m2.m3.f.n1.n2:pri	W
+other	ppron12:sg:gen:m1.m2.m3.f.n1.n2:sec:nakc	W
+other	ppron12:sg:gen:m1.m2.m3.f.n1.n2:sec:akc	W
+other	ppron12:sg:gen:m1.m2.m3.f.n1.n2:pri:nakc	W
+other	ppron12:sg:gen:m1.m2.m3.f.n1.n2:pri:akc	W
+other	ppron12:sg:dat:m1.m2.m3.f.n1.n2:sec:nakc	W
+other	ppron12:sg:dat:m1.m2.m3.f.n1.n2:sec:akc	W
+other	ppron12:sg:dat:m1.m2.m3.f.n1.n2:pri:nakc	W
+other	ppron12:sg:dat:m1.m2.m3.f.n1.n2:pri:akc	W
+other	ppron12:sg:acc:m1.m2.m3.f.n1.n2:sec:nakc	W
+other	ppron12:sg:acc:m1.m2.m3.f.n1.n2:sec:akc	W
+other	ppron12:sg:acc:m1.m2.m3.f.n1.n2:pri:nakc	W
+other	ppron12:sg:acc:m1.m2.m3.f.n1.n2:pri:akc	W
+other	ppron12:pl:voc:_:sec	W
+other	ppron12:pl:voc:_:pri	W
+other	ppron12:pl:nom:_:sec	W
+other	ppron12:pl:nom:_:pri	W
+other	ppron12:pl:loc:_:sec	W
+other	ppron12:pl:loc:_:pri	W
+other	ppron12:pl:inst:_:sec	W
+other	ppron12:pl:inst:_:pri	W
+other	ppron12:pl:gen:_:sec	W
+other	ppron12:pl:gen:_:pri	W
+other	ppron12:pl:dat:_:sec	W
+other	ppron12:pl:dat:_:pri	W
+other	ppron12:pl:acc:_:sec	W
+other	ppron12:pl:acc:_:pri	W
+verb	ppas:sg:nom.voc:m1.m2.m3:perf:aff	Ra
+verb	ppas:sg:nom.voc:m1.m2.m3:imperf:aff	Ra
+verb	ppas:sg:nom.voc:m1.m2.m3:imperf.perf:aff	Ra
+verb	ppas:sg:nom.voc:f:perf:aff	Rb
+verb	ppas:sg:nom.voc:f:imperf:aff	Rb
+verb	ppas:sg:nom.voc:f:imperf.perf:aff	Rb
+verb	ppas:sg:nom.acc.voc:n1.n2:perf:aff	Rc
+verb	ppas:sg:nom.acc.voc:n1.n2:imperf:aff	Rc
+verb	ppas:sg:nom.acc.voc:n1.n2:imperf.perf:aff	Rc
+verb	ppas:sg:inst.loc:m1.m2.m3.n1.n2:perf:aff	Rd
+verb	ppas:sg:inst.loc:m1.m2.m3.n1.n2:imperf:aff	Rd
+verb	ppas:sg:inst.loc:m1.m2.m3.n1.n2:imperf.perf:aff	Rd
+verb	ppas:sg:gen:m1.m2.m3.n1.n2:perf:aff	Re
+verb	ppas:sg:gen:m1.m2.m3.n1.n2:imperf:aff	Re
+verb	ppas:sg:gen:m1.m2.m3.n1.n2:imperf.perf:aff	Re
+verb	ppas:sg:gen.dat.loc:f:perf:aff	Rf
+verb	ppas:sg:gen.dat.loc:f:imperf:aff	Rf
+verb	ppas:sg:gen.dat.loc:f:imperf.perf:aff	Rf
+verb	ppas:sg:dat:m1.m2.m3.n1.n2:perf:aff	Rg
+verb	ppas:sg:dat:m1.m2.m3.n1.n2:imperf:aff	Rg
+verb	ppas:sg:dat:m1.m2.m3.n1.n2:imperf.perf:aff	Rg
+verb	ppas:sg:acc:m3:perf:aff	Rh
+verb	ppas:sg:acc:m3:imperf:aff	Rh
+verb	ppas:sg:acc:m3:imperf.perf:aff	Rh
+verb	ppas:sg:acc:m1.m2:perf:aff	Ri
+verb	ppas:sg:acc:m1.m2:imperf:aff	Ri
+verb	ppas:sg:acc:m1.m2:imperf.perf:aff	Ri
+verb	ppas:sg:acc.inst:f:perf:aff	Rj
+verb	ppas:sg:acc.inst:f:imperf:aff	Rj
+verb	ppas:sg:acc.inst:f:imperf.perf:aff	Rj
+verb	ppas:pl:nom.voc:m1.p1:perf:aff	Rk
+verb	ppas:pl:nom.voc:m1.p1:imperf:aff	Rk
+verb	ppas:pl:nom.voc:m1.p1:imperf.perf:aff	Rk
+verb	ppas:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:perf:aff	Rl
+verb	ppas:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:imperf:aff	Rl
+verb	ppas:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:imperf.perf:aff	Rl
+verb	ppas:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:perf:aff	Rm
+verb	ppas:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:aff	Rm
+verb	ppas:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:aff	Rm
+verb	ppas:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:perf:aff	Rn
+verb	ppas:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:aff	Rn
+verb	ppas:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:aff	Rn
+verb	ppas:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:perf:aff	Ro
+verb	ppas:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:aff	Ro
+verb	ppas:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:aff	Ro
+verb	ppas:pl:acc:m1.p1:perf:aff	Rp
+verb	ppas:pl:acc:m1.p1:imperf:aff	Rp
+verb	ppas:pl:acc:m1.p1:imperf.perf:aff	Rp
+verb	pcon:imperf	O
+verb	pant:perf	P
+verb	pact:sg:nom.voc:m1.m2.m3:imperf:aff	Qa
+verb	pact:sg:nom.voc:m1.m2.m3:imperf.perf:aff	Qa
+verb	pact:sg:nom.voc:f:imperf:aff	Qb
+verb	pact:sg:nom.voc:f:imperf.perf:aff	Qb
+verb	pact:sg:nom.acc.voc:n1.n2:imperf:aff	Qc
+verb	pact:sg:nom.acc.voc:n1.n2:imperf.perf:aff	Qc
+verb	pact:sg:inst.loc:m1.m2.m3.n1.n2:imperf:aff	Qd
+verb	pact:sg:inst.loc:m1.m2.m3.n1.n2:imperf.perf:aff	Qd
+verb	pact:sg:gen:m1.m2.m3.n1.n2:imperf:aff	Qe
+verb	pact:sg:gen:m1.m2.m3.n1.n2:imperf.perf:aff	Qe
+verb	pact:sg:gen.dat.loc:f:imperf:aff	Qf
+verb	pact:sg:gen.dat.loc:f:imperf.perf:aff	Qf
+verb	pact:sg:dat:m1.m2.m3.n1.n2:imperf:aff	Qg
+verb	pact:sg:dat:m1.m2.m3.n1.n2:imperf.perf:aff	Qg
+verb	pact:sg:acc:m3:imperf:aff	Qh
+verb	pact:sg:acc:m3:imperf.perf:aff	Qh
+verb	pact:sg:acc:m1.m2:imperf:aff	Qi
+verb	pact:sg:acc:m1.m2:imperf.perf:aff	Qi
+verb	pact:sg:acc.inst:f:imperf:aff	Qj
+verb	pact:sg:acc.inst:f:imperf.perf:aff	Qj
+verb	pact:pl:nom.voc:m1.p1:imperf:aff	Qk
+verb	pact:pl:nom.voc:m1.p1:imperf.perf:aff	Qk
+verb	pact:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:imperf:aff	Ql
+verb	pact:pl:nom.acc.voc:m2.m3.f.n1.n2.p2.p3:imperf.perf:aff	Ql
+verb	pact:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:aff	Qm
+verb	pact:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:aff	Qm
+verb	pact:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:aff	Qn
+verb	pact:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:aff	Qn
+verb	pact:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf:aff	Qo
+verb	pact:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:imperf.perf:aff	Qo
+verb	pact:pl:acc:m1.p1:imperf:aff	Qp
+verb	pact:pl:acc:m1.p1:imperf.perf:aff	Qp
+other	num:sg:nom.gen.dat.inst.acc.loc.voc:m1.m2.m3.n1.n2:rec	W
+other	num:sg:nom.gen.dat.inst.acc.loc.voc:m1.m2.m3.f.n1.n2:rec	W
+other	num:sg:nom.gen.dat.inst.acc.loc.voc:f:rec	W
+other	num:sg:nom.acc:m1.m2.m3.f.n1.n2:rec	W
+other	num:sg.pl:nom.acc:m1.m2.m3.f.n1.n2.p1.p2:rec	W
+other	num:pl:nom.voc:m1:rec	W
+other	num:pl:nom.voc:m1:congr	W
+other	num:pl:nom.gen.dat.inst.acc.loc.voc:m1.m2.m3.f.n1.n2.p1.p2:rec	W
+other	num:pl:nom.acc:m1.m2.m3.f.n1.n2.p1.p2:rec	W
+other	num:pl:nom.acc.voc:n1.p1.p2:rec	W
+other	num:pl:nom.acc.voc:m2.m3.n2:congr	W
+other	num:pl:nom.acc.voc:m2.m3.n2.f:congr	W
+other	num:pl:nom.acc.voc:m2.m3.f.n2:rec	W
+other	num:pl:nom.acc.voc:m2.m3.f.n1.n2.p1.p2:rec	W
+other	num:pl:nom.acc.voc:m1:rec	W
+other	num:pl:nom.acc.voc:f:congr	W
+other	num:pl:inst:n1.p1.p2:rec	W
+other	num:pl:inst:m1.m2.m3.n2:congr	W
+other	num:pl:inst:m1.m2.m3.n2.f:congr	W
+other	num:pl:inst:m1.m2.m3.f.n2:congr	W
+other	num:pl:inst:m1.m2.m3.f.n1.n2.p1.p2:congr	W
+other	num:pl:inst:f:congr	W
+other	num:pl:gen:n1.p1.p2:rec	W
+other	num:pl:gen.loc:m1.m2.m3.n2.f:congr	W
+other	num:pl:gen.loc:m1.m2.m3.f.n1.n2.p1.p2:congr	W
+other	num:pl:gen.dat.loc:m1.m2.m3.n2.f:congr	W
+other	num:pl:gen.dat.inst.loc:m1.m2.m3.f.n2:congr	W
+other	num:pl:gen.dat.inst.loc:m1.m2.m3.f.n1.n2.p1.p2:congr	W
+other	num:pl:dat:m1.m2.m3.n2.f:congr	W
+other	num:pl:dat.loc:n1.p1.p2:congr.rec	W
+other	num:pl:acc:m1:rec	W
+other	num:comp	W
+other	interj	W
+verb	inf:perf	I
+verb	inf:imperf.perf	I
+verb	inf:imperf	I
+verb	impt:sg:sec:perf	Ma
+verb	impt:sg:sec:imperf.perf	Ma
+verb	impt:sg:sec:imperf	Ma
+verb	impt:pl:sec:perf	Mb
+verb	impt:pl:sec:imperf.perf	Mb
+verb	impt:pl:sec:imperf	Mb
+verb	impt:pl:pri:perf	Mc
+verb	impt:pl:pri:imperf.perf	Mc
+verb	impt:pl:pri:imperf	Mc
+verb	imps:perf	N
+verb	imps:imperf.perf	N
+verb	imps:imperf	N
+verb	ger:sg:nom.acc:n2:perf:aff	Xa
+verb	ger:sg:nom.acc:n2:imperf:aff	Xa
+verb	ger:sg:nom.acc:n2:imperf.perf:aff	Xa
+verb	ger:sg:inst:n2:perf:aff	Xb
+verb	ger:sg:inst:n2:imperf:aff	Xb
+verb	ger:sg:inst:n2:imperf.perf:aff	Xb
+verb	ger:sg:gen:n2:perf:aff	Xc
+verb	ger:sg:gen:n2:imperf:aff	Xc
+verb	ger:sg:gen:n2:imperf.perf:aff	Xc
+verb	ger:sg:dat.loc:n2:perf:aff	Xd
+verb	ger:sg:dat.loc:n2:imperf:aff	Xd
+verb	ger:sg:dat.loc:n2:imperf.perf:aff	Xd
+verb	ger:pl:nom.acc:n2:perf:aff	Xe
+verb	ger:pl:nom.acc:n2:imperf:aff	Xe
+verb	ger:pl:nom.acc:n2:imperf.perf:aff	Xe
+verb	ger:pl:loc:n2:perf:aff	Xf
+verb	ger:pl:loc:n2:imperf:aff	Xf
+verb	ger:pl:loc:n2:imperf.perf:aff	Xf
+verb	ger:pl:inst:n2:perf:aff	Xg
+verb	ger:pl:inst:n2:imperf:aff	Xg
+verb	ger:pl:inst:n2:imperf.perf:aff	Xg
+verb	ger:pl:gen:n2:perf:aff	Xh
+verb	ger:pl:gen:n2:imperf:aff	Xh
+verb	ger:pl:gen:n2:imperf.perf:aff	Xh
+verb	ger:pl:dat:n2:perf:aff	Xi
+verb	ger:pl:dat:n2:imperf:aff	Xi
+verb	ger:pl:dat:n2:imperf.perf:aff	Xi
+verb	fin:sg:ter:perf	La
+verb	fin:sg:ter:imperf.perf	La
+verb	fin:sg:ter:imperf	La
+verb	fin:sg:sec:perf	Lb
+verb	fin:sg:sec:imperf.perf	Lb
+verb	fin:sg:sec:imperf	Lb
+verb	fin:sg:pri:perf	Lc
+verb	fin:sg:pri:imperf.perf	Lc
+verb	fin:sg:pri:imperf	Lc
+verb	fin:pl:ter:perf	Ld
+verb	fin:pl:ter:imperf.perf	Ld
+verb	fin:pl:ter:imperf	Ld
+verb	fin:pl:sec:perf	Le
+verb	fin:pl:sec:imperf.perf	Le
+verb	fin:pl:sec:imperf	Le
+verb	fin:pl:pri:perf	Lf
+verb	fin:pl:pri:imperf.perf	Lf
+verb	fin:pl:pri:imperf	Lf
+noun	depr:pl:voc:m2	Hv
+noun	depr:pl:nom:m2	Hn
+other	conj	W
+verb	cond:sg:n1.n2:ter:perf	Ka
+verb	cond:sg:n1.n2:ter:imperf.perf	Ka
+verb	cond:sg:n1.n2:ter:imperf	Ka
+verb	cond:sg:n1.n2:sec:perf	Kb
+verb	cond:sg:n1.n2:sec:imperf.perf	Kb
+verb	cond:sg:n1.n2:sec:imperf	Kb
+verb	cond:sg:n1.n2:pri:perf	Kc
+verb	cond:sg:n1.n2:pri:imperf.perf	Kc
+verb	cond:sg:n1.n2:pri:imperf	Kc
+verb	cond:sg:n1.n2:perf	Kd
+verb	cond:sg:n1.n2:imperf.perf	Kd
+verb	cond:sg:n1.n2:imperf	Kd
+verb	cond:sg:m1.m2.m3:ter:perf	Ke
+verb	cond:sg:m1.m2.m3:ter:imperf.perf	Ke
+verb	cond:sg:m1.m2.m3:ter:imperf	Ke
+verb	cond:sg:m1.m2.m3:sec:perf	Kf
+verb	cond:sg:m1.m2.m3:sec:imperf.perf	Kf
+verb	cond:sg:m1.m2.m3:sec:imperf	Kf
+verb	cond:sg:m1.m2.m3:pri:perf	Kg
+verb	cond:sg:m1.m2.m3:pri:imperf.perf	Kg
+verb	cond:sg:m1.m2.m3:pri:imperf	Kg
+verb	cond:sg:f:ter:perf	Kh
+verb	cond:sg:f:ter:imperf.perf	Kh
+verb	cond:sg:f:ter:imperf	Kh
+verb	cond:sg:f:sec:perf	Ki
+verb	cond:sg:f:sec:imperf.perf	Ki
+verb	cond:sg:f:sec:imperf	Ki
+verb	cond:sg:f:pri:perf	Kj
+verb	cond:sg:f:pri:imperf.perf	Kj
+verb	cond:sg:f:pri:imperf	Kj
+verb	cond:pl:m2.m3.f.n1.n2.p2.p3:ter:perf	Kk
+verb	cond:pl:m2.m3.f.n1.n2.p2.p3:ter:imperf.perf	Kk
+verb	cond:pl:m2.m3.f.n1.n2.p2.p3:ter:imperf	Kk
+verb	cond:pl:m2.m3.f.n1.n2.p2.p3:sec:perf	Kl
+verb	cond:pl:m2.m3.f.n1.n2.p2.p3:sec:imperf.perf	Kl
+verb	cond:pl:m2.m3.f.n1.n2.p2.p3:sec:imperf	Kl
+verb	cond:pl:m2.m3.f.n1.n2.p2.p3:pri:perf	Km
+verb	cond:pl:m2.m3.f.n1.n2.p2.p3:pri:imperf.perf	Km
+verb	cond:pl:m2.m3.f.n1.n2.p2.p3:pri:imperf	Km
+verb	cond:pl:m1.p1:ter:perf	Kn
+verb	cond:pl:m1.p1:ter:imperf.perf	Kn
+verb	cond:pl:m1.p1:ter:imperf	Kn
+verb	cond:pl:m1.p1:sec:perf	Ko
+verb	cond:pl:m1.p1:sec:imperf.perf	Ko
+verb	cond:pl:m1.p1:sec:imperf	Ko
+verb	cond:pl:m1.p1:pri:perf	Kp
+verb	cond:pl:m1.p1:pri:imperf.perf	Kp
+verb	cond:pl:m1.p1:pri:imperf	Kp
+other	comp	W
+other	burk	W
+other	brev:pun	W
+other	brev:npun	W
+other	bedzie:sg:ter:imperf	W
+other	bedzie:sg:sec:imperf	W
+other	bedzie:sg:pri:imperf	W
+other	bedzie:pl:ter:imperf	W
+other	bedzie:pl:sec:imperf	W
+other	bedzie:pl:pri:imperf	W
+other	aglt:sg:sec:imperf:wok	W
+other	aglt:sg:sec:imperf:nwok	W
+other	aglt:sg:pri:imperf:wok	W
+other	aglt:sg:pri:imperf:nwok	W
+other	aglt:pl:sec:imperf:wok	W
+other	aglt:pl:sec:imperf:nwok	W
+other	aglt:pl:pri:imperf:wok	W
+other	aglt:pl:pri:imperf:nwok	W
+adv-sup	adv:sup	W
+adv	adv:pos	W
+adv-com	adv:com	W
+adv	adv	W
+adj	adjp	F
+adj	adjc	E
+adj	adja	D
+adj	adj:sg:nom.voc:n1.n2:pos	Aa
+adj-com	adj:sg:nom.voc:n1.n2:com	Ba
+adj	adj:sg:nom.voc:m1.m2.m3:pos	Ab
+adj-com	adj:sg:nom.voc:m1.m2.m3:com	Bb
+adj	adj:sg:nom.voc:f:pos	Ac
+adj-com	adj:sg:nom.voc:f:com	Bc
+adj	adj:sg:loc:m1.m2.m3.n1.n2:pos	Ad
+adj-com	adj:sg:loc:m1.m2.m3.n1.n2:com	Bd
+adj	adj:sg:loc:f:pos	Ae
+adj-com	adj:sg:loc:f:com	Be
+adj	adj:sg:inst:m1.m2.m3.n1.n2:pos	Af
+adj-com	adj:sg:inst:m1.m2.m3.n1.n2:com	Bf
+adj	adj:sg:inst:f:pos	Ag
+adj-com	adj:sg:inst:f:com	Bg
+adj	adj:sg:gen:m1.m2.m3.n1.n2:pos	Ah
+adj-com	adj:sg:gen:m1.m2.m3.n1.n2:com	Bh
+adj	adj:sg:gen:f:pos	Ai
+adj-com	adj:sg:gen:f:com	Bi
+adj	adj:sg:dat:m1.m2.m3.n1.n2:pos	Aj
+adj-com	adj:sg:dat:m1.m2.m3.n1.n2:com	Bj
+adj	adj:sg:dat:f:pos	Ak
+adj-com	adj:sg:dat:f:com	Bk
+adj	adj:sg:acc:n1.n2:pos	Al
+adj-com	adj:sg:acc:n1.n2:com	Bl
+adj	adj:sg:acc:m3:pos	Am
+adj-com	adj:sg:acc:m3:com	Bm
+adj	adj:sg:acc:m1.m2:pos	An
+adj-com	adj:sg:acc:m1.m2:com	Bn
+adj	adj:sg:acc:f:pos	Ao
+adj-com	adj:sg:acc:f:com	Bo
+adj	adj:pl:nom.voc:m2.m3.f.n1.n2.p2.p3:pos	Ap
+adj-com	adj:pl:nom.voc:m2.m3.f.n1.n2.p2.p3:com	Bp
+adj	adj:pl:nom.voc:m1.p1:pos	Aq
+adj-com	adj:pl:nom.voc:m1.p1:com	Bq
+adj	adj:pl:loc:m1.m2.m3.f.n1.n2.p1.p2.p3:pos	Ar
+adj-com	adj:pl:loc:m1.m2.m3.f.n1.n2.p1.p2.p3:com	Br
+adj	adj:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:pos	As
+adj-com	adj:pl:inst:m1.m2.m3.f.n1.n2.p1.p2.p3:com	Bs
+adj	adj:pl:gen:m1.m2.m3.f.n1.n2.p1.p2.p3:pos	At
+adj-com	adj:pl:gen:m1.m2.m3.f.n1.n2.p1.p2.p3:com	Bt
+adj	adj:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:pos	Au
+adj-com	adj:pl:dat:m1.m2.m3.f.n1.n2.p1.p2.p3:com	Bu
+adj	adj:pl:acc:m2.m3.f.n1.n2.p2.p3:pos	Av
+adj-com	adj:pl:acc:m2.m3.f.n1.n2.p2.p3:com	Bv
+adj	adj:pl:acc:m1.p1:pos	Aw
+adj-com	adj:pl:acc:m1.p1:com	Bw
+verb	pacta	XYZ
@@ -32,6 +32,7 @@
 @NOUN-LEMMA-CAP
 	kapitaliki_wyglos		lemma=ε
 	kapitaliki_wyglos	A	lemma=A
+# lemma=końcówka_lematu 
  
 @VERB-LEMMA-PATAL
 	funkcjonalnie_miekkie_iy	ć	lemma=ć
@@ -57,3 +58,14 @@
 	dowolne	yć	lemma=yć
 	inf_e	eć	lemma=eć
  
+@NOUN-LEMMA-FOREIGN
+	obce_funkcjonalnie_twarde_a	a	lemma=a
+*	obce_funkcjonalnie_twarde_a	ah	lemma=ah
+	obce_funkcjonalnie_twarde_e		lemma=ε
+	obce_funkcjonalnie_twarde_ie		lemma=ε
+	obce_funkcjonalnie_twarde_ie	a	lemma=a
+	obce_funkcjonalnie_twarde_iy		lemma=y
+	obce_funkcjonalnie_miekkie_ii_wyglos	a	lemma=a
+	obce_funkcjonalnie_miekkie_ii_wyglos		lemma=ε
+	obce_ais		lemma=ais
+	obce_apostrof		lemma='
@@ -312,3 +312,67 @@
 	verb_j	e	group=e cat=verb
 	funkcjonalnie_twarde_i	e	group=ie cat=verb
  
+@NOUN-FLEX-FOREIGN
+	obce_funkcjonalnie_twarde_iy		flex=y2 con cat=noun
+	obce_funkcjonalnie_twarde_iy	ch	flex=ych con cat=noun
+	obce_funkcjonalnie_twarde_iy	m	flex=ym con cat=noun
+	obce_funkcjonalnie_twarde_iy	mi	flex=ymi con cat=noun
+*	obce_funkcjonalnie_twarde_iy	es	flex=s con cat=noun
+*	obce_funkcjonalnie_twarde_iy	s	flex=s con cat=noun
+	obce_funkcjonalnie_twarde_e	e	flex=e1 con cat=noun
+	obce_funkcjonalnie_twarde_ie	e	flex=ie con cat=noun
+	obce_funkcjonalnie_twarde_e	ego	flex=ego con cat=noun
+	obce_funkcjonalnie_twarde_e	ej	flex=ej con cat=noun
+	obce_funkcjonalnie_twarde_e	em	flex=em2 con cat=noun
+	obce_funkcjonalnie_twarde_e	emu	flex=emu con cat=noun
+	obce_funkcjonalnie_twarde_a	a	flex=a1 con cat=noun
+	obce_funkcjonalnie_twarde_a	ach	flex=ach con cat=noun
+	obce_funkcjonalnie_twarde_a	ami	flex=ami1 con cat=noun
+	obce_funkcjonalnie_twarde_a	ą	flex=ą con cat=noun
+	obce_funkcjonalnie_twarde_a	ę	flex=ę con cat=noun
+	obce_funkcjonalnie_twarde_a	o	flex=o2 con cat=noun
+	obce_funkcjonalnie_twarde_a	om	flex=om con cat=noun
+	obce_funkcjonalnie_twarde_a	ów	flex=ów con cat=noun
+	obce_funkcjonalnie_twarde_a	owie	flex=owie con cat=noun
+	obce_funkcjonalnie_twarde_a	u	flex=u1 con cat=noun
+	obce_funkcjonalnie_twarde_a	i	flex=i2 con cat=noun
+	obce_funkcjonalnie_twarde_a	y	flex=i2 con cat=noun
+	obce_funkcjonalnie_miekkie_ii_wyglos		flex=ε3 con cat=noun
+
+@NOUN-FLEX-APOSTROF
+	obce_ais	’go	flex=ego con cat=noun
+	obce_ais	’mu	flex=emu con cat=noun
+	obce_ais	’m	flex=em con cat=noun
+	obce_ais	'go	flex=ego con cat=noun
+	obce_ais	'mu	flex=emu con cat=noun
+	obce_ais	'm	flex=em con cat=noun
+	obce_ais		flex=ε con cat=noun
+	obce_apostrof	'a	flex='a con cat=noun
+	obce_apostrof	’u	flex='u con cat=noun
+	obce_apostrof	’owi	flex='owi con cat=noun
+	obce_apostrof	’em	flex='em con cat=noun
+	obce_apostrof	’ie	flex='ie con cat=noun
+	obce_apostrof	’u	flex='u con cat=noun
+	obce_apostrof	’y	flex='y con cat=noun
+	obce_apostrof	’owie	flex='owie con cat=noun
+	obce_apostrof	’e	flex='e con cat=noun
+	obce_apostrof	’ów	flex='ów con cat=noun
+	obce_apostrof	’i	flex='i con cat=noun
+	obce_apostrof	’om	flex='om con cat=noun
+	obce_apostrof	’ami	flex='ami con cat=noun
+	obce_apostrof	’ach	flex='ach con cat=noun
+	obce_apostrof	'a	flex='a con cat=noun
+	obce_apostrof	'u	flex='u con cat=noun
+	obce_apostrof	'owi	flex='owi con cat=noun
+	obce_apostrof	'em	flex='em con cat=noun
+	obce_apostrof	'ie	flex='ie con cat=noun
+	obce_apostrof	'u	flex='u con cat=noun
+	obce_apostrof	'y	flex='y con cat=noun
+	obce_apostrof	'owie	flex='owie con cat=noun
+	obce_apostrof	'e	flex='e con cat=noun
+	obce_apostrof	'ów	flex='ów con cat=noun
+	obce_apostrof	'i	flex='i con cat=noun
+	obce_apostrof	'om	flex='om con cat=noun
+	obce_apostrof	'ami	flex='ami con cat=noun
+	obce_apostrof	'ach	flex='ach con cat=noun
+	obce_apostrof		flex=ε con cat=noun
@@ -27,3 +27,5 @@ PREF-ε	FIN-FLEX-J	VERB-FLEX2-J	VERB-GROUP-J-NĄ	VERB-LEMMA-NĄ
 PREF-NIE	GER-FLEX	VERB-FLEX2-J	VERB-GROUP-J-NĄ	VERB-LEMMA-NĄ
 PREF-NIE	PACT-FLEX	VERB-FLEX2-J	VERB-GROUP-J-NĄ	VERB-LEMMA-NĄ
 PREF-NIE	PPAS-FLEX	VERB-FLEX2-J	VERB-GROUP-J-NĄ	VERB-LEMMA-NĄ
+NOUN-FLEX-FOREIGN	NOUN-LEMMA-FOREIGN
+NOUN-FLEX-APOSTROF	NOUN-LEMMA-FOREIGN
+Plik freqListInterps.ml wykorzystuje plik data/interps_general.tab, skąd usunięte zostały niektóre wpisy,
+głównie dotyczące adj (powodujące dwuznaczności przy scalaniu).
+
+Pozostawione intepretacje:
+-siebie, numcol (nieobecne w SGJP) pozostają jak są
+-ppron12, ppron3 pozostają jak są
+(tzn. nie ma znalezionych odpowiedników, są pomijane)
+
+W przypadku praet, imps, imp, fin, inf, ger, pact, ppas wybrana została interpretacja najbardziej podobna
+do oryginalnej licząc od końca, co rozwiązuje problemy perf.impef, neg.aff
+
+Przekształcenia:
+-qub:wok, qub:nwok -> qub
+
+Co do num spoza SGJP:
+-jeżeli forma składa się wyłącznie z cyfr arabskich i rzymskich, wybierana jest najdłuższa interpretacja
+Zapewne w rzeczywistości powinny wtedy obejmować wszystkie możliwe tagi.
+-w przeciwnym wypadku wybierana jest najkrótsza (najwęższa) interpretacja
@@ -6,89 +6,162 @@
 \usepackage[polish]{babel}
 % \usepackage{tikz}
 % \usetikzlibrary{conceptgraph}
+\usepackage{amsthm}
  
 \parindent 0pt
 \parskip 4pt
  
-% \newcommand{\tensor}{\otimes}
-% \newcommand{\forward}{\operatorname{/}}
-% \newcommand{\backward}{\operatorname{\backslash}}
-% \newcommand{\both}{\mid}
-% \newcommand{\plus}{\oplus}
-% \newcommand{\zero}{0}
-% \newcommand{\one}{1}
-% \newcommand{\letin}[2]{{\bf let}\;#1\;{\bf in}\;#2}
-% \newcommand{\caseof}[2]{{\bf case}\;#1\;{\bf of}\;#2}
-% \newcommand{\emp}{{\bf emp}}
-% \newcommand{\inl}{{\bf inl}}
-% \newcommand{\inr}{{\bf inr}}
-% \newcommand{\coord}[1]{{#1}^\star}
-% \newcommand{\map}[2]{{\bf map}\;#1\;#2}
-% \newcommand{\concat}[1]{{\bf concat}\;#1}
-% \newcommand{\makeset}[1]{{\bf makeset}\;#1}
-% \newcommand{\maketerm}[1]{{\bf maketerm}\;#1}
-% \newcommand{\addlist}[2]{{\bf add}\;#1\;#2}
-% \newcommand{\ana}[1]{{\bf ana}(#1)}
-% \newcommand{\One}{\bullet}
-
-
-\title{Model probabilistyczny guessera dla języka polskiego}
+\newcommand{\form}{{\it form}}
+\newcommand{\lemma}{{\it lemma}}
+\newcommand{\cat}{{\it cat}}
+\newcommand{\interp}{{\it interp}}
+\newcommand{\fsuf}{{\it fsuf}}
+\newcommand{\lsuf}{{\it lsuf}}
+
+\newtheorem{task}{Zadanie}
+\newtheorem{answer}{Odpowiedź}
+
+\title{Model probabilistyczny fleksji języka polskiego}
 \author{Wojciech Jaworski}
 %\date{}
  
 \begin{document}
 \maketitle
  
-Zakładamy, że język jest rozkładem probabilistycznym na czwórkach (form,lemma,cat,interp),
+Zakładamy, że język jest rozkładem probabilistycznym na czwórkach (\form,\lemma,\cat,\interp),
 czyli, że wystąpienia kolejnych słów w tekście są od siebie niezależne.
-Interpretacja interp jest zbiorem tagów zgodnym a tagsetem SGJP.
-Kategoria $cat \in \{ noun, adj, adv, verb, other \}$
+Interpretacja \interp{} jest zbiorem tagów zgodnym a tagsetem SGJP.
+Kategoria $\cat \in \{ {\rm noun}, {\rm adj}, {\rm adv}, {\rm verb}, {\rm other} \}$
 Zakładamy też, że język jest poprawny, tzn. nie ma literówek, ani błędów gramatycznych.
  
 Dysponujemy następującymi danymi: 
 \begin{itemize}
 \item słownikiem gramatycznym S, czyli zbiorem czwórek, o których wiemy, że należą do języka;
-\item zbiorem reguł, czyli zbiorem czwórek (fsuf,lsuf,cat,interp)
+\item zbiorem reguł, czyli zbiorem czwórek (\fsuf,\lsuf,\cat,\interp)
 \item zbiorem wyjątków, czyli zbiorem czwórek, o których wiemy, że należą do języka, które nie są opisywane przez reguły
 \item otagowaną listą frekwencyjną.
 \end{itemize}
-Reguła przyłożona do formy ucina fsuf i przykleja lsuf.
+Reguła przyłożona do formy ucina \fsuf{} i przykleja \lsuf.
+
+Lista frekwencyjna wytworzona jest na podstawie NKJP1M. Usunięte zostały z niej symbole 
+(formy do których odczytania nie wystarczy znajomość reguł wymowy takie, jak liczby zapisane cyframi, oznaczenia godzin i lat,
+znaki interpunkcyjne, skróty, emotikony). Usunięte zostały również formy odmienialne z użyciem myślnika i apostrofu 
+(np. odmienione akronimy i nazwiska obce, formy takie jak ,,12-latek``). 
+Interpretacje na liście frekwencyjnej zostały skonwertowane do postaci takiej jaka występuje w SGJP, 
+łączącej interpretacje form identycznych. Na przykład interpretacje adj:pl:nom:m1:pos, adj:pl:voc:m1:pos, adj:pl:nom:p1:pos i adj:pl:voc:p1:pos 
+zostały złączone w adj:pl:nom.voc:m1.p1:pos, a frekwencje form zsumowane.
+
+Celem jest aproksymacja wartości P(\lemma,\cat,\interp|\form).
+
+%Jakość aproksymacji mierzymy licząc jak często wśród $k$ najbardziej prawdopodobnych trójek $\lemma,\cat,\interp$ 
+%wskazanych przez model dla zadanej formy znajduje się trójka poprawna. Wyniki dla poszczególnych form agregujemy 
+%za pomocą średniej ważonej po ich częstościach.
+
+%Pytanie 0: Ile wynosi powyższa miara liczona z użyciem p-stw wziętych z listy frekwencyjnej? (To jest ograniczenie górne dla modelu)
+
+%Pytanie 0': Ile wynosi powyższa miara liczona z użyciem częstości wziętych ze zbioru reguł? (To jest ograniczenie dolne dla modelu)
+
+Pierwszym kryterium jest przynależność formy do słownika S. 
+Jeśli forma należy do S zakładamy, że jedno z haseł S zawierające tę formę
+poprawnie opisuje jej lemat, kategorię i interpretację.
+
+\begin{task}
+Jakie jest prawdopodobieństwo trafienia na formę, której lemat, kategoria i interpretacja należy do słownika, czyli
+\[P((\form,\lemma,\cat,\interp) \in S)\]
+Jakie jest prawdopodobieństwo trafienia na formę, która należy do słownika, ale jej lemat, kategoria lub interpretacja należy do słownika, czyli
+\[P((\form,\lemma,\cat,\interp) \not\in S \wedge \form \in S)\]
+\end{task}
+
+\begin{answer}
+Prawdopodobieństwo natrafienia na formę należącą do słownika wynosi 95,67\%, zaś natrafienia na formę należącą do SGJP bez odpowiedniej
+interpretacji -- 3,92\% (lista tych form znajduje się w pliku traps.txt).
+\end{answer}
+
+W przypadku form należących do słownika różnorodność interpretacji będzie niewielka, 
+natomiast istotne będzie prawdopodobieństwo wystąpienia danego lematu.
+Zaś w przypadku form nie należących do słownika prawdopodobieństwo wystąpienia lematu
+będzie zawsze małe.
  
-Celem jest aproksymacja wartości P(lemma,cat,interp|form).
+Dzielimy teraz listę frekwencyjną na część należącą do S i nie należącą do S. 
+Od tej pory budujemy model osobno dla każdej z części.
  
-Pytanie 1: $P((form,lemma,cat,interp) \in S)$
+W przypadku cześci należącej do S zauważamy, że \[P(\lemma,\cat,\interp|\form)=P(\form|\lemma,\cat,\interp)\frac{P(\lemma,\cat,\interp)}{P(\form)}\]
  
-Pytanie 2: $P((form,lemma,cat,interp) \not\in S \wedge form \in S)$
+Zakładamy, że \interp{} jest niezależne od \lemma, pod warunkiem określonego \cat
+\[P(\lemma,\cat,\interp)=P(\lemma,\cat)P(\interp|\lemma,\cat)=P(\lemma,\cat)P(\interp|\cat)\]
  
-Załóżmy, że reguły i wyjątki mają postać taką, że do danej formy można zaaplikować tylko jedną z nich 
-(dla żadnej reguły sufix nie jest podciągiem innego sufixu). Wtedy
-\[P(lemma,cat,interp|form)\approx P(rule|form)=P(rule|fsuf)\]
-(W powyższym drzewie sufixowym w każdym węźle mamy dowiązania do sufixów o jeden znak dłuższych oraz kategorię pozostałe traktową łącznie
+$P(\form)$, $P(\lemma,\cat)$ i $P(\interp|\cat)$ szacujemy na podstawie listy frekwencyjnej,
+w przypadku pierwszych dwu stosując wygładzanie. Wyliczenie $P(\form)$ zawiera uogólniona lista frekwencyjna
+(ścieżka {\tt resources/NKJP1M/NKJP1M-generalized-frequency.tab} w repozytorium ENIAM), $P(\lemma,\cat)$ -- plik
+ {\tt prob\_lemmacat.txt}, zaś $P(\interp|\cat)$ -- {\tt prob\_itp\_givencat.txt} (oba zawarte w katalogu {\tt morphology/doc}).
  
-Pytanie 3: Czy faktycznie zachodzi powyższa zależność? Jak zmierzyć podobieństwo?
+$P(\form|\lemma,\cat,\interp)$ wynosi 0, gdy w S nie ma krotki postaci (\form,\lemma,\cat,\interp);
+1, gdy jest dokładnie jedna krotka z (\lemma,\cat,\interp). Gdy jest ich więcej oznacza to, że
+lemat ma przynajmniej dwa warianty odmiany. Są to przypadki rzadkie. Przypisujemy każdej z możliwości
+prawdopodobieństwo 1.
  
-Problem tu jest taki, że lista frekwencyjna jest zbyt mała by precyzyjnie określić p-stwo ok. 40000 reguł
+\begin{task}
+Przejrzeć SGJP i znaleźć wszystkie przykłady, w których dla ustalonego lematu, kategorii i interpretacji
+jest więcej niż jedna forma. Znaleźć wystąpienia tych krotek na liście frekwencyjnej.
+\end{task}
  
-\[P(rule|fsuf)=P(lsuf,cat,interp|fsuf)=P(fsuf|lsuf,cat,interp)\frac{P(lsuf,cat,interp)}{P(fsuf)}\]
+\begin{answer}
+Lista takich form znajduje się w pliku multi\_forms.txt.
+\end{answer}
  
-$P(fsuf)$ jest prawdopodobieństwem tego, że do języka należy słowo o zadanym sufixie. 
+Teraz zanalizujemy drugą część listy frekwencyjnej. 
+Załóżmy, że reguły mają postać taką, że sufiks żadnej reguły nie jest podciągiem sufixu innej z nich.
+Sufiksy reguł tworzą drzewo, które w każdym węźle ma dowiązania do sufixów o jeden znak dłuższych oraz kategorię pozostałe traktową łącznie.
+Przyjmujemy następujące założenie modelowe:
+\[P(\lemma,\cat,\interp|\form)\approx P(rule|\form)=P(rule|\fsuf)\]
+Wynika ono z tego, że mając nieznaną formę musimy oprzeć się na ogólnych regułach 
+odmiany i nie możemy korzystać z tego że ma ona jakieś konkretne brzmienie.
+Korzystamy tutaj tylko z reguł oznaczonych jako produktywne.
+
+Problem tu jest taki, że lista frekwencyjna jest zbyt mała by precyzyjnie określić p-stwo ok. 40000 reguł.
+Dlatego znowu stosujemy zabieg z prawdopodobieństwem warunkowym.
+
+\[P(rule|\fsuf)=P(\lsuf,\cat,\interp|\fsuf)=P(\fsuf|\lsuf,\cat,\interp)\frac{P(\lsuf,\cat,\interp)}{P(\fsuf)}\]
+
+$P(\fsuf)$ jest prawdopodobieństwem tego, że do języka należy słowo o zadanym sufixie. 
 Można je oszacować za pomocą listy frekwencyjnej.
  
-Zakładamy, że interp jest niezależne od lsuf, pod warunkiem określonego cat
-$P(lsuf,cat,interp)=P(lsuf,cat)P(interp|lsuf,cat)=P(lsuf,cat)P(interp|cat)$ 
+Zakładamy, że \interp{} jest niezależne od \lsuf, pod warunkiem określonego \cat
+\[P(\lsuf,\cat,\interp)=P(\lsuf,\cat)P(\interp|\lsuf,\cat)=P(\lsuf,\cat)P(\interp|\cat)\]
  
-$P(lsuf,cat)$ i $P(interp|cat)$ można oszacować na podstawie listy frekwencyjnej.
+$P(\lsuf,\cat)$ i $P(\interp|\cat)$ można oszacować na podstawie listy frekwencyjnej.
  
-$P(fsuf|lsuf,cat,interp)$ wynosi 0, gdy nie ma reguły postaci (fsuf,lsuf,cat,interp);
-1, gdy jest dokładnie jedna reguła z (lsuf,cat,interp), a gdy jest ich więcej trzeba
-oszacować z listy frekwencyjnej. 
+\begin{task}
+Oszacować $P(\fsuf)$ i $P(\lsuf,\cat)$ na podstawie listy frekwencyjnej.
+Sprawdzić dla jakich sufiksów próbka jest mała albo nie ma jej wcale. 
+\end{task}
  
-Pytanie 4: Czy powyższe przybliżenie jest poprawne, jak często jest więcej niż jedna reguła i ile wynoszą wówczas p-stwa?
+% w razie gdyby był problem można próbować dzielić sufiksy na części i założyć niezależność tych części
+
+$P(\fsuf|\lsuf,\cat,\interp)$ wynosi 0, gdy nie ma reguły postaci (\fsuf,\lsuf,\cat,\interp);
+1, gdy jest dokładnie jedna reguła z (\fsuf,\lsuf,\cat,\interp). Ustawiamy produktywność reguł tak 
+by nie pojawiało się więcej pasujących reguł. 
+
+\begin{task}
+Określić produktywność reguł i sprawdzić, czy nie ma niejednoznacznych dopasowań.
+\end{task}
  
-Pytanie 5: Co zrobić z niejednoznacznymi interpretacjami?
+\begin{task}
+Określić jakość modelu.
+\end{task}
  
-Zadania poboczne: wytworzenie otagowanej listy frekwencyjnej, wytworzenie zbioru reguł, wskazanie, które reguły opisują sytuacje wyjątkowe.
+\begin{answer}
+Wyliczona jakość modelu (stopień pokrycia listy frekwencyjnej przez co najmniej 95\% najbardziej prawdopodobnych interpretacji wg modelu) wyniosła 79,90\%.
+\end{answer}
+
+%czasowniki produktywne to te z lematem ać ować ywać, ić, yć, (nąć)
+
+Pytanie 4: Czy powyższe przybliżenie jest poprawne, jak często jest więcej niż jedna reguła i ile wynoszą wówczas p-stwa?
+
+Zadania poboczne: wytworzenie otagowanej listy frekwencyjnej, wytworzenie (uzupełnienie) zbioru reguł na podstawie SGJP i listy frekwencyjnej, wskazanie, które reguły opisują sytuacje wyjątkowe.
  
 Zadanie na przyszłość: reguły słowotwórstwa i ich interpretacja semantyczna.
  
-\end{document}
 \ No newline at end of file
+Do powyższego modelu trzeba jeszcze dodać prefixy nie i naj.
+
+\end{document}