biblioteka eniam-lexSemantics-1.0

Wojciech Jaworski
1 parent accee0d9
Showing 15 changed files with 240 additions and 884 deletions
integration/eniam-integration-1.0.tar.bz2
integration/makefile
lexSemantics/ENIAMlexSemantics.ml
lexSemantics/ENIAMlexSemanticsData.ml
lexSemantics/ENIAMlexSemanticsTypes.ml
lexSemantics/eniam-lexSemantics-1.0.tar.bz2
lexSemantics/makefile
lexSemantics/test.ml
pre/makefile
pre/preProcessing.ml
pre/preTypes.ml
subsyntax/ENIAMsentences.ml
subsyntax/ENIAMsubsyntax.ml
subsyntax/TODO
walenty/TODO
@@ -15,8 +15,8 @@ install: all
 	cp eniam-integration.cmxa eniam-integration.a eniam-integration.cma $(INSTALLDIR)
 	cp ENIAM_CONLL.cmi ENIAMpreIntegration.cmi $(INSTALLDIR)
 	cp ENIAM_CONLL.cmx ENIAMpreIntegration.cmx $(INSTALLDIR)
-	mkdir -p /usr/share/eniam/integration
-	cp resources/*  /usr/share/eniam/integration
+	# mkdir -p /usr/share/eniam/integration
+	# cp resources/*  /usr/share/eniam/integration
  
 eniam-integration.cma: $(SOURCES)
 	ocamlc -linkall -a -o eniam-integration.cma $(OCAMLFLAGS) $^
@@ -18,7 +18,9 @@
  *)
  
 open ENIAMtokenizerTypes
+open ENIAMsubsyntaxTypes
 open ENIAMlexSemanticsTypes
+open ENIAMwalTypes
 open Xstd
  
 let string_of_lex_sems tokens lex_sems =
@@ -55,13 +57,12 @@ let find_proper_names tokens i t =
   match t.token with
     Lemma(lemma,pos,interp) ->
         if StringMap.mem proper_names lemma then
-          let t = {t with token=Proper(lemma,pos,interp,StringMap.find proper_names lemma);
-                  attrs=remove t.attrs "notvalidated proper"} in
-          ExtArray.set tokens i t else
+          {t with token=Proper(lemma,pos,interp,StringMap.find proper_names lemma);
+                  attrs=remove t.attrs "notvalidated proper"} else
         if Xlist.mem t.attrs "notvalidated proper" then
-          let t = {t with token=Proper(lemma,pos,interp,[])} in
-          ExtArray.set tokens i t
-  | _ -> ()
+          {t with token=Proper(lemma,pos,interp,[])}
+        else t
+  | _ -> t
  
 let find_senses t = (* FIXME: sensy zawierające 'się' *)
   match t.token with
@@ -69,36 +70,67 @@ let find_senses t = (* FIXME: sensy zawierające &#39;się&#39; *)
   | Proper(_,_,_,senses) -> ENIAMplWordnet.find_proper_senses senses
   | _ -> []
  
-
-let assign tokens text =
-  let lex_sems = ExtArray.make (ExtArray.size tokens) empty_lex_sem in
-  let _ = ExtArray.add lex_sems empty_lex_sem in
-  Int.iter 1 (ExtArray.size tokens - 1) (fun i ->
-    let token = ExtArray.get tokens i in
-    find_proper_names tokens i token;
-    let senses = find_senses token in
-    let lex_sem = {empty_lex_sem with senses=senses} in
-    let j = ExtArray.add lex_sems lex_sem in
-    if j <> i then failwith "assign_semantic_valence");
-  lex_sems
-
-(*
-(*   print_endline "a14"; *)
-  let paths = assign_valence paths in
-(*   print_endline "a15"; *)
-(*   print_endline "a16"; *)
-  let paths = disambiguate_senses paths in
-  let paths = assign_simplified_valence paths in
-  let paths = PreSemantics.assign_semantics paths in
-(*   print_endline "a16"; *)
-
-
-
-
-
-let assign_valence paths =
-  let lexemes = Xlist.fold paths StringMap.empty (fun lexemes t ->
-    match t.token with
+let rec find a l i =
+  if a.(i) = max_int then (
+    a.(i) <- i;
+    i) else
+  if a.(i) = i then (
+    Xlist.iter l (fun j -> a.(j) <- i);
+    i) else
+  find a (i :: l) a.(i)
+
+let union a i j =
+  if i = j then i else
+  let x = min i j in
+  let y = max i j in
+  a.(y) <- x;
+  x
+
+let rec split_tokens_into_groups_sentence a = function
+    RawSentence s -> ()
+  | StructSentence([],_) -> ()
+  | StructSentence((id,_,_) :: paths,_) ->
+      ignore (Xlist.fold paths (find a [] id) (fun m (id,_,_) ->
+        union a m (find a [] id)))
+  | DepSentence(paths) ->
+      if Array.length paths = 0 then () else
+      let id,_,_ = paths.(0) in
+      ignore (Int.fold 1 (Array.length paths - 1) (find a [] id) (fun m i ->
+        let id,_,_ = paths.(i) in
+        union a m (find a [] id)))
+  | QuotedSentences sentences ->
+      Xlist.iter sentences (fun p ->
+        split_tokens_into_groups_sentence a p.psentence)
+  | AltSentence l -> Xlist.iter l (fun (mode,sentence) ->
+        split_tokens_into_groups_sentence a sentence)
+
+let rec split_tokens_into_groups_paragraph a = function
+    RawParagraph s -> ()
+  | StructParagraph sentences ->
+      Xlist.iter sentences (fun p -> split_tokens_into_groups_sentence a p.psentence)
+  | AltParagraph l -> Xlist.iter l (fun (mode,paragraph) ->
+      split_tokens_into_groups_paragraph a paragraph)
+
+let rec split_tokens_into_groups_text a = function
+    RawText s -> ()
+  | StructText paragraphs ->
+      Xlist.iter paragraphs (split_tokens_into_groups_paragraph a)
+  | AltText l -> Xlist.iter l (fun (mode,text) ->
+      split_tokens_into_groups_text a text)
+
+let split_tokens_into_groups size text =
+  let a = Array.make size max_int in
+  split_tokens_into_groups_text a text;
+  Int.iter 1 (Array.length a - 1) (fun i ->
+    if a.(i) <> max_int then a.(i) <- a.(a.(i)));
+  let map = Int.fold 1 (Array.length a - 1) IntMap.empty (fun map i ->
+    if a.(i) = max_int then map else
+    IntMap.add_inc map a.(i) [i] (fun l -> i :: l)) in
+  IntMap.fold map [] (fun l _ v -> v :: l)
+
+let assign_valence tokens lex_sems group =
+  let lexemes = Xlist.fold group StringMap.empty (fun lexemes id ->
+    match (ExtArray.get tokens id).token with
       Lemma(lemma,pos,_) ->
         StringMap.add_inc lexemes lemma (StringSet.singleton pos) (fun set -> StringSet.add set pos)
     | Proper(lemma,pos,_,_) ->
@@ -108,104 +140,51 @@ let assign_valence paths =
         | _ -> pos (*failwith ("assign_valence: Proper " ^ pos ^ " " ^ lemma)*) in
         StringMap.add_inc lexemes lemma (StringSet.singleton pos) (fun set -> StringSet.add set pos) (* nazwy własne mają przypisywaną domyślną walencję rzeczowników *)
     | _ -> lexemes) in
-  let valence = WalFrames.find_frames lexemes in
-  List.rev (Xlist.rev_map paths (fun t ->
-    match t.token with
-      Lemma(lemma,pos,_) -> {t with valence=try Xlist.rev_map (StringMap.find (StringMap.find valence lemma) pos) (fun frame -> 0,frame) with Not_found -> []}
-    | Proper(lemma,pos,interp,_) -> {t with valence=(try Xlist.rev_map (StringMap.find (StringMap.find valence lemma)
-                                                         (if pos = "subst" || pos = "depr" then "p" ^ pos else pos)) (fun frame -> 0,frame) with Not_found -> [](*failwith ("assign_valence: Proper(" ^ lemma ^ "," ^ pos ^ ")")*));
-                                            token=Lemma(lemma,pos,interp)}
-    | _ -> t))
-
-(**********************************************************************************)
-
-(* let prepare_indexes (paths,_) =
-  let set = Xlist.fold paths IntSet.empty (fun set t ->
-    IntSet.add (IntSet.add set t.beg) t.next) in
-  let map,last = Xlist.fold (Xlist.sort (IntSet.to_list set) compare) (IntMap.empty,0) (fun (map,n) x ->
-    IntMap.add map x n, n+1) in
-  List.rev (Xlist.rev_map paths (fun t ->
-    {t with lnode=IntMap.find map t.beg; rnode=IntMap.find map t.next})), last - 1 *)
+  let valence = ENIAMwalenty.find_frames lexemes in
+  Xlist.iter group (fun id ->
+    match (ExtArray.get tokens id).token with
+      Lemma(lemma,pos,_) ->
+        ExtArray.set lex_sems id {(ExtArray.get lex_sems id) with
+          valence=try Xlist.rev_map (StringMap.find (StringMap.find valence lemma) pos) (fun frame -> 0,frame) with Not_found -> []}
+    | Proper(lemma,pos,interp,_) ->
+        ExtArray.set lex_sems id {(ExtArray.get lex_sems id) with
+          valence=(try Xlist.rev_map (StringMap.find (StringMap.find valence lemma)
+            (if pos = "subst" || pos = "depr" then "p" ^ pos else pos)) (fun frame -> 0,frame) with Not_found -> [](*failwith ("assign_valence: Proper(" ^ lemma ^ "," ^ pos ^ ")")*))};
+        ExtArray.set tokens id {(ExtArray.get tokens id) with token=Lemma(lemma,pos,interp)}
+    | _ -> ())
  
 let get_prefs_schema prefs schema =
   Xlist.fold schema prefs (fun prefs t ->
-    Xlist.fold t.WalTypes.sel_prefs prefs StringSet.add)
+    Xlist.fold t.sel_prefs prefs StringSet.add)
  
 let map_prefs_schema senses schema =
   Xlist.map schema (fun t ->
-    if Xlist.mem t.WalTypes.morfs (WalTypes.Phrase WalTypes.Pro) || Xlist.mem t.WalTypes.morfs (WalTypes.Phrase WalTypes.ProNG) then t else
-    {t with WalTypes.sel_prefs = Xlist.fold t.WalTypes.sel_prefs [] (fun l s ->
+    if Xlist.mem t.morfs (Phrase Pro) || Xlist.mem t.morfs (Phrase ProNG) then t else
+    {t with sel_prefs = Xlist.fold t.sel_prefs [] (fun l s ->
       if StringSet.mem senses s then s :: l else l)})
  
-let disambiguate_senses paths =
-  let prefs = Xlist.fold paths (StringSet.singleton "ALL") (fun prefs t ->
-    Xlist.fold t.valence prefs (fun prefs -> function
-      _,WalTypes.Frame(_,schema) -> get_prefs_schema prefs schema
-    | _,WalTypes.LexFrame(_,_,_,schema) -> get_prefs_schema prefs schema
-    | _,WalTypes.ComprepFrame(_,_,_,schema) -> get_prefs_schema prefs schema)) in
-  let hipero = Xlist.fold paths (StringSet.singleton "ALL") (fun hipero t ->
-    Xlist.fold t.senses hipero (fun hipero (_,l,_) ->
+let disambiguate_senses lex_sems group =
+  let prefs = Xlist.fold group (StringSet.singleton "ALL") (fun prefs id ->
+    Xlist.fold (ExtArray.get lex_sems id).valence prefs (fun prefs -> function
+      _,Frame(_,schema) -> get_prefs_schema prefs schema
+    | _,LexFrame(_,_,_,schema) -> get_prefs_schema prefs schema
+    | _,ComprepFrame(_,_,_,schema) -> get_prefs_schema prefs schema)) in
+  let hipero = Xlist.fold group (StringSet.singleton "ALL") (fun hipero id ->
+    Xlist.fold (ExtArray.get lex_sems id).senses hipero (fun hipero (_,l,_) ->
       Xlist.fold l hipero StringSet.add)) in
   let senses = StringSet.intersection prefs hipero in
   let is_zero = StringSet.mem hipero "0" in
   let senses = if is_zero then StringSet.add senses "0" else senses in
-  Xlist.map paths (fun t ->
-    {t with valence = if is_zero then t.valence else
+  Xlist.iter group (fun id ->
+    let t = ExtArray.get lex_sems id in
+    ExtArray.set lex_sems id {t with valence = if is_zero then t.valence else
         Xlist.map t.valence (function
-          n,WalTypes.Frame(a,schema) -> n,WalTypes.Frame(a,map_prefs_schema senses schema)
-        | n,WalTypes.LexFrame(s,p,r,schema) -> n,WalTypes.LexFrame(s,p,r,map_prefs_schema senses schema)
-        | n,WalTypes.ComprepFrame(s,p,r,schema) -> n,WalTypes.ComprepFrame(s,p,r,map_prefs_schema senses schema));
+          n,Frame(a,schema) -> n,Frame(a,map_prefs_schema senses schema)
+        | n,LexFrame(s,p,r,schema) -> n,LexFrame(s,p,r,map_prefs_schema senses schema)
+        | n,ComprepFrame(s,p,r,schema) -> n,ComprepFrame(s,p,r,map_prefs_schema senses schema));
       senses = Xlist.map t.senses (fun (s,l,w) ->
         s, List.rev (Xlist.fold l [] (fun l s -> if StringSet.mem senses s then s :: l else l)),w)})
  
-(*let single_sense (paths,last) =
-  List.rev (Xlist.rev_map paths (fun t ->
-    let sense =
-      if t.senses = [] then [] else
-      [Xlist.fold t.senses ("",[],-.max_float) (fun (max_meaning,max_hipero,max_weight) (meaning,hipero,weight) ->
-        if max_weight >= weight then max_meaning,max_hipero,max_weight else meaning,hipero,weight)] in
-    {t with senses=sense})), last*)
-
-open WalTypes
-
-(*let single_schema schemata =
-  let map = Xlist.fold schemata StringMap.empty (fun map schema ->
-    let t = WalStringOf.schema (List.sort compare (Xlist.fold schema [] (fun l s ->
-      if s.gf <> ARG && s.gf <> ADJUNCT then {s with role=""; role_attr=""; sel_prefs=[]} :: l else
-      if s.cr <> [] || s.ce <> [] then {s with role=""; role_attr=""; sel_prefs=[]} :: l else l))) in
-    StringMap.add_inc map t [schema] (fun l -> schema :: l)) in
-  StringMap.fold map [] (fun l _ schemata ->
-    let map = Xlist.fold schemata StringMap.empty (fun map schema ->
-      Xlist.fold schema map (fun map s ->
-        let t = WalStringOf.schema [{s with role=""; role_attr=""; sel_prefs=[]}] in
-        StringMap.add_inc map t [s] (fun l -> s :: l))) in
-    let schema = StringMap.fold map [] (fun schema _ l ->
-      let s = List.hd l in
-      {s with sel_prefs=Xlist.fold s.sel_prefs [] (fun l t -> if t = "0" || t = "T" then t :: l else l)} :: schema) in
-    schema :: l)*)
-
-let remove_meaning = function
-    DefaultAtrs(m,r,o,neg,p,a) -> DefaultAtrs([],r,o,neg,p,a)
-  | EmptyAtrs m -> EmptyAtrs []
-  | NounAtrs(m,nsyn,s(*,typ*)) -> NounAtrs([],nsyn,s(*,typ*))
-  | AdjAtrs(m,c,adjsyn(*,adjsem,typ*)) -> AdjAtrs([],c,adjsyn(*,adjsem,typ*))
-  | PersAtrs(m,le,neg,mo,t,au,a) -> PersAtrs([],le,neg,mo,t,au,a)
-  | GerAtrs(m,le,neg,a) -> GerAtrs([],le,neg,a)
-  | NonPersAtrs(m,le,role,role_attr,neg,a) -> NonPersAtrs([],le,role,role_attr,neg,a)
-  | _ -> failwith "remove_meaning"
-
-
-(*let single_frame (paths,last) =
-  List.rev (Xlist.rev_map paths (fun t ->
-    let lex_frames,frames = Xlist.fold t.valence ([],StringMap.empty) (fun (lex_frames,frames) -> function
-        Frame(attrs,schema) ->
-          let attrs = remove_meaning attrs in
-          lex_frames, StringMap.add_inc frames (WalStringOf.frame_atrs attrs) (attrs,[schema]) (fun (_,l) -> attrs, schema :: l)
-      | frame -> frame :: lex_frames, frames) in
-    let frames = StringMap.fold frames lex_frames (fun frames _ (attrs,schemata) ->
-      Xlist.fold (single_schema schemata) frames (fun frames frame -> Frame(attrs,frame) :: frames)) in
-    {t with valence=frames})), last    *)
-
 let simplify_position_verb l = function (* FIXME: dodać czyszczenie E Pro *)
     Phrase(NP(Case "dat")) -> l
   | Phrase(NP(Case "inst")) -> l
@@ -283,33 +262,34 @@ let simplify_schemata pos schemata =
       if s.gf <> ARG && s.gf <> ADJUNCT then s :: l else
 (*       if s.cr <> [] || s.ce <> [] then s :: l else  *)
       simplify_position pos l s)) in
-    StringMap.add_inc schemata (WalStringOf.schema schema) (schema,[frame]) (fun (_,frames) -> schema, frame :: frames)) in
+    StringMap.add_inc schemata (ENIAMwalStringOf.schema schema) (schema,[frame]) (fun (_,frames) -> schema, frame :: frames)) in
   StringMap.fold schemata [] (fun l _ s -> s :: l)
  
 (* FIXME: problem ComprepNP i PrepNCP *)
 (* FIXME: problem gdy ten sam token występuje w  kilku ścieżkach *)
 let generate_verb_prep_adjuncts preps =
-  Xlist.map preps (fun (lemma,case) -> WalFrames.verb_prep_adjunct_schema_field lemma case)
+  Xlist.map preps (fun (lemma,case) -> ENIAMwalFrames.verb_prep_adjunct_schema_field lemma case)
  
 let generate_verb_comprep_adjuncts compreps =
-  Xlist.map compreps (fun lemma -> WalFrames.verb_comprep_adjunct_schema_field lemma)
+  Xlist.map compreps (fun lemma -> ENIAMwalFrames.verb_comprep_adjunct_schema_field lemma)
  
 let generate_verb_compar_adjuncts compars =
-  Xlist.map compars (fun lemma -> WalFrames.verb_compar_adjunct_schema_field lemma)
+  Xlist.map compars (fun lemma -> ENIAMwalFrames.verb_compar_adjunct_schema_field lemma)
  
 let generate_noun_prep_adjuncts preps =
-  WalFrames.noun_prep_adjunct_schema_field preps
+  ENIAMwalFrames.noun_prep_adjunct_schema_field preps
  
 let generate_noun_compar_adjuncts compars =
-  WalFrames.noun_compar_adjunct_schema_field compars
+  ENIAMwalFrames.noun_compar_adjunct_schema_field compars
  
 let generate_adj_compar_adjuncts compars =
-  WalFrames.noun_compar_adjunct_schema_field compars
+  ENIAMwalFrames.noun_compar_adjunct_schema_field compars
  
 let compars = StringSet.of_list ["jak";"jako";"niż";"niczym";"niby";"co"]
  
-let generate_prep_adjunct_tokens paths =
-  let map = Xlist.fold paths StringMap.empty (fun map t ->
+let generate_prep_adjunct_tokens tokens group =
+  let map = Xlist.fold group StringMap.empty (fun map id ->
+    let t = ExtArray.get tokens id in
     match t.token with
       Lemma(lemma,"prep",interp) ->
         let map = if lemma = "po" then StringMap.add map "po:postp" ("po","postp") else map in
@@ -321,17 +301,19 @@ let generate_prep_adjunct_tokens paths =
     | _ -> map) in
   StringMap.fold map [] (fun l _ v -> v :: l)
  
-let generate_comprep_adjunct_tokens paths =
-  let lemmas = Xlist.fold paths StringSet.empty (fun lemmas t ->
+let generate_comprep_adjunct_tokens tokens group =
+  let lemmas = Xlist.fold group StringSet.empty (fun lemmas id ->
+    let t = ExtArray.get tokens id in
     match t.token with
       Lemma(lemma,_,_) -> StringSet.add lemmas lemma
     | _ -> lemmas) in
-  StringMap.fold WalFrames.comprep_reqs [] (fun compreps comprep reqs ->
+  StringMap.fold ENIAMwalFrames.comprep_reqs [] (fun compreps comprep reqs ->
     let b = Xlist.fold reqs true (fun b s -> b && StringSet.mem lemmas s) in
     if b then comprep :: compreps else compreps)
  
-let generate_compar_adjunct_tokens paths =
-  let set = Xlist.fold paths StringSet.empty (fun set t ->
+let generate_compar_adjunct_tokens tokens group =
+  let set = Xlist.fold group StringSet.empty (fun set id ->
+    let t = ExtArray.get tokens id in
     match t.token with
       Lemma(lemma,"prep",interp) ->
         if not (StringSet.mem compars lemma) then set else
@@ -343,29 +325,40 @@ let is_measure = function
     NounAtrs(_,_,Common "measure") -> true
   | _ -> false
  
-let assign_simplified_valence paths =
-  let preps = generate_prep_adjunct_tokens paths in
-  let compreps = generate_comprep_adjunct_tokens paths in
-  let compars = generate_compar_adjunct_tokens paths in
+let remove_meaning = function
+    DefaultAtrs(m,r,o,neg,p,a) -> DefaultAtrs([],r,o,neg,p,a)
+  | EmptyAtrs m -> EmptyAtrs []
+  | NounAtrs(m,nsyn,s(*,typ*)) -> NounAtrs([],nsyn,s(*,typ*))
+  | AdjAtrs(m,c,adjsyn(*,adjsem,typ*)) -> AdjAtrs([],c,adjsyn(*,adjsem,typ*))
+  | PersAtrs(m,le,neg,mo,t,au,a) -> PersAtrs([],le,neg,mo,t,au,a)
+  | GerAtrs(m,le,neg,a) -> GerAtrs([],le,neg,a)
+  | NonPersAtrs(m,le,role,role_attr,neg,a) -> NonPersAtrs([],le,role,role_attr,neg,a)
+  | _ -> failwith "remove_meaning"
+
+let assign_simplified_valence tokens lex_sems group =
+  let preps = generate_prep_adjunct_tokens tokens group in
+  let compreps = generate_comprep_adjunct_tokens tokens group in
+  let compars = generate_compar_adjunct_tokens tokens group in
   let verb_prep_adjuncts = generate_verb_prep_adjuncts preps in
   let verb_comprep_adjuncts = generate_verb_comprep_adjuncts compreps in
   let verb_compar_adjuncts = generate_verb_compar_adjuncts compars in
   let noun_prep_adjuncts = generate_noun_prep_adjuncts preps compreps in
   let noun_compar_adjuncts = generate_noun_compar_adjuncts compars in
   let adj_compar_adjuncts = generate_adj_compar_adjuncts compars in
-  let verb_adjuncts = WalFrames.verb_adjuncts_simp @ verb_prep_adjuncts @ verb_comprep_adjuncts @ verb_compar_adjuncts in
-  let noun_adjuncts = WalFrames.noun_adjuncts_simp @ [noun_prep_adjuncts] @ [noun_compar_adjuncts] in
-  let noun_measure_adjuncts = WalFrames.noun_measure_adjuncts_simp @ [noun_prep_adjuncts] @ [noun_compar_adjuncts] in
-  let adj_adjuncts = WalFrames.adj_adjuncts_simp @ [adj_compar_adjuncts] in
-  let adv_adjuncts = WalFrames.adv_adjuncts_simp @ [adj_compar_adjuncts] in
-  List.rev (Xlist.rev_map paths (fun t ->
-    let pos = match t.token with
-        Lemma(_,pos,_) -> WalFrames.simplify_pos pos
+  let verb_adjuncts = ENIAMwalFrames.verb_adjuncts_simp @ verb_prep_adjuncts @ verb_comprep_adjuncts @ verb_compar_adjuncts in
+  let noun_adjuncts = ENIAMwalFrames.noun_adjuncts_simp @ [noun_prep_adjuncts] @ [noun_compar_adjuncts] in
+  let noun_measure_adjuncts = ENIAMwalFrames.noun_measure_adjuncts_simp @ [noun_prep_adjuncts] @ [noun_compar_adjuncts] in
+  let adj_adjuncts = ENIAMwalFrames.adj_adjuncts_simp @ [adj_compar_adjuncts] in
+  let adv_adjuncts = ENIAMwalFrames.adv_adjuncts_simp @ [adj_compar_adjuncts] in
+  Xlist.iter group (fun id ->
+    let t = ExtArray.get lex_sems id in
+    let pos = match (ExtArray.get tokens id).token with
+        Lemma(_,pos,_) -> ENIAMwalFrames.simplify_pos pos
       | _ -> "" in
     let lex_frames,frames = Xlist.fold t.valence ([],StringMap.empty) (fun (lex_frames,frames) -> function
         _,(Frame(attrs,schema) as frame) ->
           let attrs = remove_meaning attrs in
-          lex_frames, StringMap.add_inc frames (WalStringOf.frame_atrs attrs) (attrs,[schema,frame]) (fun (_,l) -> attrs, (schema,frame) :: l)
+          lex_frames, StringMap.add_inc frames (ENIAMwalStringOf.frame_atrs attrs) (attrs,[schema,frame]) (fun (_,l) -> attrs, (schema,frame) :: l)
       | _,frame -> frame :: lex_frames, frames) in
     let simp_frames,full_frames,n = Xlist.fold lex_frames ([],[],1) (fun (simp_frames,full_frames,n) frame ->
       (n,frame) :: simp_frames, (n,frame) :: full_frames, n+1) in
@@ -380,93 +373,23 @@ let assign_simplified_valence paths =
         (n,Frame(attrs,schema)) :: simp_frames,
         Xlist.fold frames full_frames (fun full_frames frame -> (n,frame) :: full_frames),
         n+1)) in
-    {t with simple_valence=simp_frames; valence=full_frames}))
-
-(* FIXME: dodać do walencji preferencje selekcyjne nadrzędników symboli: dzień, godzina, rysunek itp. *)
-(* FIXME: sprawdzić czy walencja nazw własnych jest dobrze zrobiona. *)
-
-(* let first_id = 1 (* id=0 jest zarezerwowane dla pro; FIXME: czy to jest jeszcze aktualne? *)
-
-let add_ids (paths,last) next_id =
-  let paths,next_id = Xlist.fold ((*List.rev*) paths) ([],next_id) (fun (paths,id) t ->
-    {t with id=id} :: paths, id+1) in
-  (paths,last),next_id *)
-
-
-
-let parse query =
-(*   print_endline "a1"; *)
-  let l = Xunicode.classified_chars_of_utf8_string query in
-(*   print_endline "a2"; *)
-  let l = PreTokenizer.tokenize l in
-(*   print_endline "a3"; *)
-  let l = PrePatterns.normalize_tokens [] l in
-(*   print_endline "a4"; *)
-  let l = PrePatterns.find_replacement_patterns l in
-(*   print_endline "a5"; *)
-  let l = PrePatterns.remove_spaces [] l in
-  let l = PrePatterns.find_abr_patterns PreAcronyms.abr_patterns l in
-  let l = PrePatterns.normalize_tokens [] l in
-(*   print_endline "a6"; *)
-  let paths = PrePaths.translate_into_paths l in
-(*   print_endline "a7"; *)
-  let paths = PrePaths.lemmatize paths in
-(*   print_endline "a8"; *)
-  let paths,_ = PreMWE.process paths in
-(*   print_endline "a12"; *)
-  let paths = find_proper_names paths in
-(*   print_endline "a13"; *)
-  let paths = modify_weights paths in
-  let paths = translate_digs paths in
-  let paths = assign_senses paths in
-(*   print_endline "a14"; *)
-  let paths = assign_valence paths in
-(*   print_endline "a15"; *)
-  let paths = combine_interps paths in
-(*   print_endline "a16"; *)
-  let paths = disambiguate_senses paths in
-  let paths = assign_simplified_valence paths in
-  let paths = PreSemantics.assign_semantics paths in
-(*   print_endline "a16"; *)
-  let paths = select_tokens paths in
-(*   print_endline "a17"; *)
-(*  let paths = if !single_sense_flag then single_sense paths else paths in
-  let paths = if !single_frame_flag then single_frame paths else paths in*)
-  (*let paths, next_id = add_ids paths next_id in
-  let paths = prepare_indexes paths in*)
-(*   print_endline "a18"; *)
-  paths(*, next_id*)
-(*     print_endline (PrePaths.to_string paths);     *)
-(*   let paths =
-    if PrePaths.no_possible_path (PrePaths.map paths PreLemmatization.remove_postags) then
-      PrePaths.map paths process_ign
-    else paths in
-  let paths = PrePaths.map paths PreLemmatization.remove_postags in
-  let paths = PreCaseShift.manage_lower_upper_case paths in (* FIXME: niepotrzebnie powiększa pierwszy token (przymiotniki partykuły itp.) *)
-  let paths = PreLemmatization.combine_interps paths in
-(*     print_endline (PrePaths.to_string paths);     *)*)
-
-let parse_conll tokens dep_paths = (* FIXME: sprawdzić, czy zachowana jest kolejność elementów paths !!! *)
-  let paths = List.rev (Int.fold 1 (Array.length dep_paths - 1) [] (fun paths conll_id ->
-    let id,_,_ = dep_paths.(conll_id) in
-    ExtArray.get tokens id :: paths)) in
-  (* print_endline "a12"; *)
-  let paths = find_proper_names paths in
-  (*   print_endline "a13"; *)
-  let paths = modify_weights paths in
-  let paths = PreWordnet.assign_senses paths in
-  (*   print_endline "a14"; *)
-    (* let paths = combine_interps paths in (* FIXME: to powinno też działać dla Proper *) *)
-  (*   print_endline "a15"; *)
-  let paths = assign_valence paths in
-  (*   print_endline "a16"; *)
-  let paths = disambiguate_senses paths in
-  let paths = assign_simplified_valence paths in
-  let paths = PreSemantics.assign_semantics paths in
-  (*   print_endline "a16"; *)
-  let _ = Xlist.fold paths 1 (fun conll_id t ->
-    let id,_,_ = dep_paths.(conll_id) in
-    ExtArray.set tokens id t;
-    conll_id + 1) in
-  ()
-*)
+    ExtArray.set lex_sems id {t with simple_valence=simp_frames; valence=full_frames})
+
+let assign tokens text =
+  let lex_sems = ExtArray.make (ExtArray.size tokens) empty_lex_sem in
+  let _ = ExtArray.add lex_sems empty_lex_sem in
+  Int.iter 1 (ExtArray.size tokens - 1) (fun i ->
+    let token = ExtArray.get tokens i in
+    let token = find_proper_names tokens i token in
+    ExtArray.set tokens i token;
+    let senses = find_senses token in
+    let lex_sem = {empty_lex_sem with senses=senses} in
+    let _ = ExtArray.add lex_sems lex_sem in
+    ());
+  let groups = split_tokens_into_groups (ExtArray.size tokens) text in
+  (* Xlist.iter groups (fun group -> print_endline (String.concat " " (Xlist.map group string_of_int))); *)
+  Xlist.iter groups (fun group -> assign_valence tokens lex_sems group);
+  Xlist.iter groups (fun group -> disambiguate_senses lex_sems group);
+  Xlist.iter groups (fun group -> assign_simplified_valence tokens lex_sems group);
+  Xlist.iter groups (fun group -> ENIAMlexSemanticsData.assign_semantics tokens lex_sems group);
+  lex_sems
@@ -20,7 +20,7 @@
 open ENIAMtokenizerTypes
 open ENIAMlexSemanticsTypes
 open Xstd
-(*
+
 let subst_inst_roles = Xlist.fold [
   "wiosna",        "Time","";
   "lato",          "Time","";
@@ -365,7 +365,7 @@ let prep_roles = Xlist.fold [ (* lemma,case,role,role_attr,meaning/hipero,sel_pr
   ] StringMap.empty (fun map (lemma,case,role,role_attr,hipero,sel_prefs) ->
       let hipero = Xlist.fold hipero StringSet.empty ENIAMplWordnet.get_hipero_rec in
       let map2 = try StringMap.find map lemma with Not_found -> StringMap.empty in
-      let map2 = StringMap.add_inc map2 case [role,role_attr,hipero,sel_prefs] (fun l -> (role,role_attr,hipero,sel_prefs) :: l) in
+      let map2 = StringMap.add_inc map2 case [case,role,role_attr,hipero,sel_prefs] (fun l -> (case,role,role_attr,hipero,sel_prefs) :: l) in
       StringMap.add map lemma map2)
 (*  "przeciwko","dat","Dat";
   "przeciw","dat","Dat";
@@ -377,10 +377,10 @@ let prep_roles = Xlist.fold [ (* lemma,case,role,role_attr,meaning/hipero,sel_pr
 let assign_prep_semantics lemma cases t =
   try
     let map = StringMap.find prep_roles lemma in
-    Xlist.map cases (fun case ->
-      {t with semantics=(try PrepSemantics (StringMap.find map case) with Not_found -> Normal);
-              token=Lemma(lemma,"prep",[[[case]]])})
-  with Not_found -> [t]
+    let l = List.flatten (Xlist.map cases (fun case ->
+      try StringMap.find map case with Not_found -> [])) in
+    if l = [] then Normal else PrepSemantics l
+  with Not_found -> Normal
  
 let subst_special_lexemes = Xlist.fold [
   "jutro",        ["indexical"];(*"dzień"*)
@@ -532,21 +532,24 @@ let pron_lexemes = Xlist.fold [
   ] StringMap.empty (fun map (k,v,w) -> StringMap.add map k (SpecialMod(v,w)))*)
  
 (* UWAGA: przy przetwarzaniu danych zdezambiguowanych ta procedura nie zmienia liczby tokenów *)
-let assign_semantics paths =
-   List.rev (List.flatten (Xlist.rev_map paths (fun t ->
-     match t.token with
-       Lemma(lemma,"subst",_) -> [{t with lroles=(try StringMap.find subst_inst_roles lemma with Not_found -> "",""); semantics=try StringMap.find subst_special_lexemes lemma with Not_found -> Normal}]
-     | Lemma(lemma,"depr",_) -> [{t with semantics=try StringMap.find subst_special_lexemes lemma with Not_found -> Normal}]
-     | Lemma(lemma,"adj",_) -> [{t with lroles=(try StringMap.find adj_roles lemma with Not_found -> "",""); semantics=try StringMap.find adj_special_lexemes lemma with Not_found -> Normal}]
-     | Lemma(lemma,"adjc",_) -> [{t with lroles=(try StringMap.find adj_roles lemma with Not_found -> "",""); semantics=try StringMap.find adj_special_lexemes lemma with Not_found -> Normal}]
-     | Lemma(lemma,"adjp",_) -> [{t with lroles=(try StringMap.find adj_roles lemma with Not_found -> "",""); semantics=try StringMap.find adj_special_lexemes lemma with Not_found -> Normal}]
-     | Lemma(lemma,"adv",_) -> [{t with lroles=(try StringMap.find adv_roles lemma with Not_found -> "",""); semantics=try StringMap.find adv_special_lexemes lemma with Not_found -> Normal}]
-     | Lemma(lemma,"qub",_) -> [{t with lroles=(try StringMap.find qub_roles lemma with Not_found -> "",""); semantics=try StringMap.find qub_special_lexemes lemma with Not_found -> Normal}]
+let assign_semantics tokens lex_sems group =
+  Xlist.iter group (fun id ->
+    let token = (ExtArray.get tokens id).token in
+    let t = ExtArray.get lex_sems id in
+    let t = match token with
+        Lemma(lemma,"subst",_) -> {t with lroles=(try StringMap.find subst_inst_roles lemma with Not_found -> "",""); semantics=try StringMap.find subst_special_lexemes lemma with Not_found -> Normal}
+      | Lemma(lemma,"depr",_) -> {t with semantics=try StringMap.find subst_special_lexemes lemma with Not_found -> Normal}
+      | Lemma(lemma,"adj",_) -> {t with lroles=(try StringMap.find adj_roles lemma with Not_found -> "",""); semantics=try StringMap.find adj_special_lexemes lemma with Not_found -> Normal}
+      | Lemma(lemma,"adjc",_) -> {t with lroles=(try StringMap.find adj_roles lemma with Not_found -> "",""); semantics=try StringMap.find adj_special_lexemes lemma with Not_found -> Normal}
+      | Lemma(lemma,"adjp",_) -> {t with lroles=(try StringMap.find adj_roles lemma with Not_found -> "",""); semantics=try StringMap.find adj_special_lexemes lemma with Not_found -> Normal}
+      | Lemma(lemma,"adv",_) -> {t with lroles=(try StringMap.find adv_roles lemma with Not_found -> "",""); semantics=try StringMap.find adv_special_lexemes lemma with Not_found -> Normal}
+      | Lemma(lemma,"qub",_) -> {t with lroles=(try StringMap.find qub_roles lemma with Not_found -> "",""); semantics=try StringMap.find qub_special_lexemes lemma with Not_found -> Normal}
 (*      | Lemma(lemma,"num",_) -> [{t with semantics=try StringMap.find num_lexemes lemma with Not_found -> Normal}] *)
-     | Lemma(lemma,"ppron12",_) -> [{t with semantics=try StringMap.find pron_lexemes lemma with Not_found -> Normal}]
-     | Lemma(lemma,"ppron3",_) -> [{t with semantics=try StringMap.find pron_lexemes lemma with Not_found -> Normal}]
-     | Lemma(lemma,"siebie",_) -> [{t with semantics=try StringMap.find pron_lexemes lemma with Not_found -> Normal}]
-     | Lemma(lemma,"prep",l) -> Xlist.fold l [] (fun l -> function cases :: _ -> assign_prep_semantics lemma cases t @ l | [] -> l)
-     | _ -> [t]
-   )))
-*)
+      | Lemma(lemma,"ppron12",_) -> {t with semantics=try StringMap.find pron_lexemes lemma with Not_found -> Normal}
+      | Lemma(lemma,"ppron3",_) -> {t with semantics=try StringMap.find pron_lexemes lemma with Not_found -> Normal}
+      | Lemma(lemma,"siebie",_) -> {t with semantics=try StringMap.find pron_lexemes lemma with Not_found -> Normal}
+      | Lemma(lemma,"prep",l) ->
+          let cases = Xlist.fold l StringSet.empty (fun set -> function cases :: _ -> Xlist.fold cases set StringSet.add | _ -> set) in
+          {t with semantics=assign_prep_semantics lemma (StringSet.to_list cases) t}
+      | _ -> t in
+    ExtArray.set lex_sems id t)
@@ -33,7 +33,7 @@ type semantics =
   | Special of string list
 (*  | SpecialNoun of type_arg list * type_term
   | SpecialMod of string * (type_arg list * type_term)*)
-  | PrepSemantics of (string * string * StringSet.t * string list) list (* role,role_attr,hipero,sel_prefs *)
+  | PrepSemantics of (string * string * string * StringSet.t * string list) list (* case,role,role_attr,hipero,sel_prefs *)
  
 type lex_sem = {
   e: labels;
@@ -3,7 +3,7 @@ OCAMLOPT=ocamlopt
 OCAMLDEP=ocamldep
 INCLUDES=-I +xml-light -I +xlib -I +zip -I +bz2 -I +eniam
 OCAMLFLAGS=$(INCLUDES) -g
-OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa bz2.cmxa xlib.cmxa eniam-tokenizer.cmxa eniam-morphology.cmxa eniam-subsyntax.cmxa eniam-plWordnet.cmxa eniam-walenty.cmxa #eniam-lexSemantics.cmxa
+OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa bz2.cmxa xlib.cmxa eniam-tokenizer.cmxa eniam-morphology.cmxa eniam-subsyntax.cmxa eniam-plWordnet.cmxa eniam-walenty.cmxa eniam-lexSemantics.cmxa
 INSTALLDIR=`ocamlc -where`/eniam
  
 SOURCES= ENIAMlexSemanticsTypes.ml ENIAMlexSemanticsData.ml ENIAMlexSemantics.ml
@@ -27,7 +27,7 @@ eniam-lexSemantics.cmxa: $(SOURCES)
 	ocamlopt -linkall -a -o eniam-lexSemantics.cmxa $(INCLUDES) $^
  
 test: test.ml
-	$(OCAMLOPT) -o test $(OCAMLOPTFLAGS) $(SOURCES) test.ml
+	$(OCAMLOPT) -o test $(OCAMLOPTFLAGS) test.ml
  
 .SUFFIXES: .mll .mly .ml .mli .cmo .cmi .cmx
  
@@ -21,6 +21,11 @@
 let test_strings = [
   "Szpak frunie zimą.";
   "Kot miauczy w październiku.";
+  "Np. Ala.";
+  "Kot np. miauczy.";
+  "Szpak frunie. Kot miauczy.";
+  "Szpak powiedział: „Frunę. Kiszę.”";
+  "W XX w. Warszawa.";
 (*  "a gdybym miałem";
   "A Gdy Miałem";
   "GDY MIAŁEM";
 OCAMLC=ocamlc
 OCAMLOPT=ocamlopt
 OCAMLDEP=ocamldep
-INCLUDES=-I +xml-light -I +xlib -I +zip -I +bz2 -I +eniam -I ../morphology -I ../parser -I ../corpora
+INCLUDES=-I +xml-light -I +xlib -I +zip -I +bz2 -I +eniam
 OCAMLFLAGS=$(INCLUDES) -g
-OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa bz2.cmxa xlib.cmxa eniam-tokenizer.cmxa eniam-morphology.cmxa eniam-plWordnet.cmxa eniam-walenty.cmxa
+OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa bz2.cmxa xlib.cmxa eniam-tokenizer.cmxa eniam-morphology.cmxa eniam-subsyntax.cmxa eniam-plWordnet.cmxa eniam-walenty.cmxa eniam-integration.cmxa eniam-lexSemantics.cmxa
 INSTALLDIR=`ocamlc -where`
  
-WAL= paths.ml preTypes.ml
-PRE= preSemantics.ml ../corpora/CONLL.ml preProcessing.ml
+WAL= paths.ml
+PRE= preProcessing.ml
  
 all:
 	$(OCAMLOPT) -o pre $(OCAMLOPTFLAGS) $(WAL) $(PRE)
@@ -33,4 +33,4 @@ all:
 	$(OCAMLOPT) $(OCAMLOPTFLAGS) -c $<
  
 clean:
-	rm -f *~ *.cm[oix] *.o pre concraft_test
+	rm -f *~ *.cm[oix] *.o pre
@@ -17,355 +17,7 @@
  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
  *)
  
-open Xstd
-open PreTypes
-
-(* uwagi:
-jak przetwarzać num:comp
-czy rzeczownik niepoliczalny w liczbie mnogiej jest nadal niepoliczalny np. "Wody szumią."
-trzeba zrobić słownik mwe, i nazw własnych
-trzeba zweryfikować słownik niepoliczalnych
-przetwarzanie liczebników złożonych np dwadzieścia jeden, jedna druga
-*)
-
-
-
-(**********************************************************************************)
-
-let proper_names =
-  let l = Str.split_delim (Str.regexp "\n") (File.load_file Paths.proper_names_filename) in
-  let l2 = Str.split_delim (Str.regexp "\n") (File.load_file Paths.proper_names_filename2) in
-  Xlist.fold (l2 @ l) StringMap.empty (fun proper line ->
-    if String.length line = 0 then proper else
-    if String.get line 0 = '#' then proper else
-    match Str.split_delim (Str.regexp "\t") line with
-      [lemma; types] ->
-        let types = Str.split (Str.regexp "|") types in
-        StringMap.add_inc proper lemma types (fun types2 -> types @ types2)
-    | _ -> failwith ("proper_names: " ^ line))
-
-let remove l s =
-  Xlist.fold l [] (fun l t ->
-    if s = t then l else t :: l)
-
-let find_proper_names paths =
-  List.rev (Xlist.rev_map paths (fun t ->
-    match t.token with
-      Lemma(lemma,pos,interp) ->
-        if StringMap.mem proper_names lemma then
-          {t with token=Proper(lemma,pos,interp,StringMap.find proper_names lemma);
-                  attrs=remove t.attrs "notvalidated proper"}
-        else
-          if Xlist.mem t.attrs "notvalidated proper" then
-            {t with token=Proper(lemma,pos,interp,[])}
-          else t
-    | _ -> t))
-
-
-let assign_valence paths =
-  let lexemes = Xlist.fold paths StringMap.empty (fun lexemes t ->
-    match t.token with
-      Lemma(lemma,pos,_) ->
-        StringMap.add_inc lexemes lemma (StringSet.singleton pos) (fun set -> StringSet.add set pos)
-    | Proper(lemma,pos,_,_) ->
-        let pos = match pos with
-          "subst" -> "psubst"
-        | "depr" -> "pdepr"
-        | _ -> pos (*failwith ("assign_valence: Proper " ^ pos ^ " " ^ lemma)*) in
-        StringMap.add_inc lexemes lemma (StringSet.singleton pos) (fun set -> StringSet.add set pos) (* nazwy własne mają przypisywaną domyślną walencję rzeczowników *)
-    | _ -> lexemes) in
-  let valence = WalFrames.find_frames lexemes in
-  List.rev (Xlist.rev_map paths (fun t ->
-    match t.token with
-      Lemma(lemma,pos,_) -> {t with valence=try Xlist.rev_map (StringMap.find (StringMap.find valence lemma) pos) (fun frame -> 0,frame) with Not_found -> []}
-    | Proper(lemma,pos,interp,_) -> {t with valence=(try Xlist.rev_map (StringMap.find (StringMap.find valence lemma)
-                                                         (if pos = "subst" || pos = "depr" then "p" ^ pos else pos)) (fun frame -> 0,frame) with Not_found -> [](*failwith ("assign_valence: Proper(" ^ lemma ^ "," ^ pos ^ ")")*));
-                                            token=Lemma(lemma,pos,interp)}
-    | _ -> t))
-
-(**********************************************************************************)
-
-(* let prepare_indexes (paths,_) =
-  let set = Xlist.fold paths IntSet.empty (fun set t ->
-    IntSet.add (IntSet.add set t.beg) t.next) in
-  let map,last = Xlist.fold (Xlist.sort (IntSet.to_list set) compare) (IntMap.empty,0) (fun (map,n) x ->
-    IntMap.add map x n, n+1) in
-  List.rev (Xlist.rev_map paths (fun t ->
-    {t with lnode=IntMap.find map t.beg; rnode=IntMap.find map t.next})), last - 1 *)
-
-let get_prefs_schema prefs schema =
-  Xlist.fold schema prefs (fun prefs t ->
-    Xlist.fold t.WalTypes.sel_prefs prefs StringSet.add)
-
-let map_prefs_schema senses schema =
-  Xlist.map schema (fun t ->
-    if Xlist.mem t.WalTypes.morfs (WalTypes.Phrase WalTypes.Pro) || Xlist.mem t.WalTypes.morfs (WalTypes.Phrase WalTypes.ProNG) then t else
-    {t with WalTypes.sel_prefs = Xlist.fold t.WalTypes.sel_prefs [] (fun l s ->
-      if StringSet.mem senses s then s :: l else l)})
-
-let disambiguate_senses paths =
-  let prefs = Xlist.fold paths (StringSet.singleton "ALL") (fun prefs t ->
-    Xlist.fold t.valence prefs (fun prefs -> function
-      _,WalTypes.Frame(_,schema) -> get_prefs_schema prefs schema
-    | _,WalTypes.LexFrame(_,_,_,schema) -> get_prefs_schema prefs schema
-    | _,WalTypes.ComprepFrame(_,_,_,schema) -> get_prefs_schema prefs schema)) in
-  let hipero = Xlist.fold paths (StringSet.singleton "ALL") (fun hipero t ->
-    Xlist.fold t.senses hipero (fun hipero (_,l,_) ->
-      Xlist.fold l hipero StringSet.add)) in
-  let senses = StringSet.intersection prefs hipero in
-  let is_zero = StringSet.mem hipero "0" in
-  let senses = if is_zero then StringSet.add senses "0" else senses in
-  Xlist.map paths (fun t ->
-    {t with valence = if is_zero then t.valence else
-        Xlist.map t.valence (function
-          n,WalTypes.Frame(a,schema) -> n,WalTypes.Frame(a,map_prefs_schema senses schema)
-        | n,WalTypes.LexFrame(s,p,r,schema) -> n,WalTypes.LexFrame(s,p,r,map_prefs_schema senses schema)
-        | n,WalTypes.ComprepFrame(s,p,r,schema) -> n,WalTypes.ComprepFrame(s,p,r,map_prefs_schema senses schema));
-      senses = Xlist.map t.senses (fun (s,l,w) ->
-        s, List.rev (Xlist.fold l [] (fun l s -> if StringSet.mem senses s then s :: l else l)),w)})
-
-(*let single_sense (paths,last) =
-  List.rev (Xlist.rev_map paths (fun t ->
-    let sense =
-      if t.senses = [] then [] else
-      [Xlist.fold t.senses ("",[],-.max_float) (fun (max_meaning,max_hipero,max_weight) (meaning,hipero,weight) ->
-        if max_weight >= weight then max_meaning,max_hipero,max_weight else meaning,hipero,weight)] in
-    {t with senses=sense})), last*)
-
-open WalTypes
-
-(*let single_schema schemata =
-  let map = Xlist.fold schemata StringMap.empty (fun map schema ->
-    let t = WalStringOf.schema (List.sort compare (Xlist.fold schema [] (fun l s ->
-      if s.gf <> ARG && s.gf <> ADJUNCT then {s with role=""; role_attr=""; sel_prefs=[]} :: l else
-      if s.cr <> [] || s.ce <> [] then {s with role=""; role_attr=""; sel_prefs=[]} :: l else l))) in
-    StringMap.add_inc map t [schema] (fun l -> schema :: l)) in
-  StringMap.fold map [] (fun l _ schemata ->
-    let map = Xlist.fold schemata StringMap.empty (fun map schema ->
-      Xlist.fold schema map (fun map s ->
-        let t = WalStringOf.schema [{s with role=""; role_attr=""; sel_prefs=[]}] in
-        StringMap.add_inc map t [s] (fun l -> s :: l))) in
-    let schema = StringMap.fold map [] (fun schema _ l ->
-      let s = List.hd l in
-      {s with sel_prefs=Xlist.fold s.sel_prefs [] (fun l t -> if t = "0" || t = "T" then t :: l else l)} :: schema) in
-    schema :: l)*)
-
-let remove_meaning = function
-    DefaultAtrs(m,r,o,neg,p,a) -> DefaultAtrs([],r,o,neg,p,a)
-  | EmptyAtrs m -> EmptyAtrs []
-  | NounAtrs(m,nsyn,s(*,typ*)) -> NounAtrs([],nsyn,s(*,typ*))
-  | AdjAtrs(m,c,adjsyn(*,adjsem,typ*)) -> AdjAtrs([],c,adjsyn(*,adjsem,typ*))
-  | PersAtrs(m,le,neg,mo,t,au,a) -> PersAtrs([],le,neg,mo,t,au,a)
-  | GerAtrs(m,le,neg,a) -> GerAtrs([],le,neg,a)
-  | NonPersAtrs(m,le,role,role_attr,neg,a) -> NonPersAtrs([],le,role,role_attr,neg,a)
-  | _ -> failwith "remove_meaning"
-
-
-(*let single_frame (paths,last) =
-  List.rev (Xlist.rev_map paths (fun t ->
-    let lex_frames,frames = Xlist.fold t.valence ([],StringMap.empty) (fun (lex_frames,frames) -> function
-        Frame(attrs,schema) ->
-          let attrs = remove_meaning attrs in
-          lex_frames, StringMap.add_inc frames (WalStringOf.frame_atrs attrs) (attrs,[schema]) (fun (_,l) -> attrs, schema :: l)
-      | frame -> frame :: lex_frames, frames) in
-    let frames = StringMap.fold frames lex_frames (fun frames _ (attrs,schemata) ->
-      Xlist.fold (single_schema schemata) frames (fun frames frame -> Frame(attrs,frame) :: frames)) in
-    {t with valence=frames})), last    *)
-
-let simplify_position_verb l = function (* FIXME: dodać czyszczenie E Pro *)
-    Phrase(NP(Case "dat")) -> l
-  | Phrase(NP(Case "inst")) -> l
-  | Phrase(PrepNP _) -> l
-  | Phrase(PrepAdjP _) -> l
-  | Phrase(NumP (Case "dat")) -> l
-  | Phrase(NumP (Case "inst")) -> l
-  | Phrase(PrepNumP _) -> l
-  | Phrase(ComprepNP _) -> l
-  | Phrase(ComparNP _) -> l
-  | Phrase(ComparPP _) -> l
-  | Phrase(IP) -> l
-  | Phrase(CP _) -> l
-  | Phrase(NCP(Case "dat",_,_)) -> l
-  | Phrase(NCP(Case "inst",_,_)) -> l
-  | Phrase(PrepNCP _) -> l
-(*   | Phrase(PadvP) -> l *)
-  | Phrase(AdvP) -> l
-  | Phrase(PrepP) -> l
-  | Phrase(Or) -> l
-  | Phrase(Qub) -> l
-  | Phrase(Adja) -> l
-  | Phrase(Inclusion) -> l
-  | Phrase Pro -> Phrase Null :: l
-  | t -> t :: l
-
-let simplify_position_noun l = function
-    Phrase(NP(Case "gen")) -> l
-  | Phrase(NP(Case "nom")) -> l
-  | Phrase(NP(CaseAgr)) -> l
-  | Phrase(PrepNP _) -> l
-  | Phrase(AdjP AllAgr) -> l
-  | Phrase(NumP (Case "gen")) -> l
-  | Phrase(NumP (Case "nom")) -> l
-  | Phrase(NumP (CaseAgr)) -> l
-  | Phrase(PrepNumP _) -> l
-  | Phrase(ComprepNP _) -> l
-  | Phrase(ComparNP _) -> l
-  | Phrase(ComparPP _) -> l
-  | Phrase(IP) -> l
-  | Phrase(NCP(Case "gen",_,_)) -> l
-  | Phrase(PrepNCP _) -> l
-  | Phrase(PrepP) -> l
-  | Phrase(Qub) -> l
-  | Phrase(Adja) -> l
-  | Phrase(Inclusion) -> l
-  | Phrase Pro -> Phrase Null :: l
-  | t -> t :: l
-
-let simplify_position_adj l = function
-    Phrase(AdvP) -> l
-  | t -> t :: l
-
-let simplify_position_adv l = function
-    Phrase(AdvP) -> l
-  | t -> t :: l
-
-
-let simplify_position pos l s =
-  let morfs = match pos with
-      "verb" -> List.rev (Xlist.fold s.morfs [] simplify_position_verb)
-    | "noun" -> List.rev (Xlist.fold s.morfs [] simplify_position_noun)
-    | "adj" -> List.rev (Xlist.fold s.morfs [] simplify_position_adj)
-    | "adv" -> List.rev (Xlist.fold s.morfs [] simplify_position_adv)
-    | _ -> s.morfs in
-  match morfs with
-    [] -> l
-  | [Phrase Null] -> l
-  | _ -> {s with morfs=morfs} :: l
-
-let simplify_schemata pos schemata =
-  let schemata = Xlist.fold schemata StringMap.empty (fun schemata (schema,frame) ->
-    let schema = List.sort compare (Xlist.fold schema [] (fun l s ->
-      let s = {s with role=""; role_attr=""; sel_prefs=[]; cr=[]; ce=[]; morfs=List.sort compare s.morfs} in
-      if s.gf <> ARG && s.gf <> ADJUNCT then s :: l else
-(*       if s.cr <> [] || s.ce <> [] then s :: l else  *)
-      simplify_position pos l s)) in
-    StringMap.add_inc schemata (WalStringOf.schema schema) (schema,[frame]) (fun (_,frames) -> schema, frame :: frames)) in
-  StringMap.fold schemata [] (fun l _ s -> s :: l)
-
-(* FIXME: problem ComprepNP i PrepNCP *)
-(* FIXME: problem gdy ten sam token występuje w  kilku ścieżkach *)
-let generate_verb_prep_adjuncts preps =
-  Xlist.map preps (fun (lemma,case) -> WalFrames.verb_prep_adjunct_schema_field lemma case)
-
-let generate_verb_comprep_adjuncts compreps =
-  Xlist.map compreps (fun lemma -> WalFrames.verb_comprep_adjunct_schema_field lemma)
-
-let generate_verb_compar_adjuncts compars =
-  Xlist.map compars (fun lemma -> WalFrames.verb_compar_adjunct_schema_field lemma)
-
-let generate_noun_prep_adjuncts preps =
-  WalFrames.noun_prep_adjunct_schema_field preps
-
-let generate_noun_compar_adjuncts compars =
-  WalFrames.noun_compar_adjunct_schema_field compars
-
-let generate_adj_compar_adjuncts compars =
-  WalFrames.noun_compar_adjunct_schema_field compars
-
-let compars = StringSet.of_list ["jak";"jako";"niż";"niczym";"niby";"co"]
-
-let generate_prep_adjunct_tokens paths =
-  let map = Xlist.fold paths StringMap.empty (fun map t ->
-    match t.token with
-      Lemma(lemma,"prep",interp) ->
-        let map = if lemma = "po" then StringMap.add map "po:postp" ("po","postp") else map in
-        if StringSet.mem compars lemma then map else
-        Xlist.fold interp map (fun map -> function
-          [cases] -> Xlist.fold cases map (fun map case -> StringMap.add map (lemma ^ ":" ^ case) (lemma,case))
-        | [cases;_] -> Xlist.fold cases map (fun map case -> StringMap.add map (lemma ^ ":" ^ case) (lemma,case))
-        | _ -> map)
-    | _ -> map) in
-  StringMap.fold map [] (fun l _ v -> v :: l)
-
-let generate_comprep_adjunct_tokens paths =
-  let lemmas = Xlist.fold paths StringSet.empty (fun lemmas t ->
-    match t.token with
-      Lemma(lemma,_,_) -> StringSet.add lemmas lemma
-    | _ -> lemmas) in
-  StringMap.fold WalFrames.comprep_reqs [] (fun compreps comprep reqs ->
-    let b = Xlist.fold reqs true (fun b s -> b && StringSet.mem lemmas s) in
-    if b then comprep :: compreps else compreps)
-
-let generate_compar_adjunct_tokens paths =
-  let set = Xlist.fold paths StringSet.empty (fun set t ->
-    match t.token with
-      Lemma(lemma,"prep",interp) ->
-        if not (StringSet.mem compars lemma) then set else
-        StringSet.add set lemma
-    | _ -> set) in
-  StringSet.to_list set
-
-let is_measure = function
-    NounAtrs(_,_,Common "measure") -> true
-  | _ -> false
-
-let assign_simplified_valence paths =
-  let preps = generate_prep_adjunct_tokens paths in
-  let compreps = generate_comprep_adjunct_tokens paths in
-  let compars = generate_compar_adjunct_tokens paths in
-  let verb_prep_adjuncts = generate_verb_prep_adjuncts preps in
-  let verb_comprep_adjuncts = generate_verb_comprep_adjuncts compreps in
-  let verb_compar_adjuncts = generate_verb_compar_adjuncts compars in
-  let noun_prep_adjuncts = generate_noun_prep_adjuncts preps compreps in
-  let noun_compar_adjuncts = generate_noun_compar_adjuncts compars in
-  let adj_compar_adjuncts = generate_adj_compar_adjuncts compars in
-  let verb_adjuncts = WalFrames.verb_adjuncts_simp @ verb_prep_adjuncts @ verb_comprep_adjuncts @ verb_compar_adjuncts in
-  let noun_adjuncts = WalFrames.noun_adjuncts_simp @ [noun_prep_adjuncts] @ [noun_compar_adjuncts] in
-  let noun_measure_adjuncts = WalFrames.noun_measure_adjuncts_simp @ [noun_prep_adjuncts] @ [noun_compar_adjuncts] in
-  let adj_adjuncts = WalFrames.adj_adjuncts_simp @ [adj_compar_adjuncts] in
-  let adv_adjuncts = WalFrames.adv_adjuncts_simp @ [adj_compar_adjuncts] in
-  List.rev (Xlist.rev_map paths (fun t ->
-    let pos = match t.token with
-        Lemma(_,pos,_) -> WalFrames.simplify_pos pos
-      | _ -> "" in
-    let lex_frames,frames = Xlist.fold t.valence ([],StringMap.empty) (fun (lex_frames,frames) -> function
-        _,(Frame(attrs,schema) as frame) ->
-          let attrs = remove_meaning attrs in
-          lex_frames, StringMap.add_inc frames (WalStringOf.frame_atrs attrs) (attrs,[schema,frame]) (fun (_,l) -> attrs, (schema,frame) :: l)
-      | _,frame -> frame :: lex_frames, frames) in
-    let simp_frames,full_frames,n = Xlist.fold lex_frames ([],[],1) (fun (simp_frames,full_frames,n) frame ->
-      (n,frame) :: simp_frames, (n,frame) :: full_frames, n+1) in
-    let simp_frames,full_frames,_ = StringMap.fold frames (simp_frames,full_frames,n) (fun (simp_frames,full_frames,n) _ (attrs,schemata) ->
-      Xlist.fold (simplify_schemata pos schemata) (simp_frames,full_frames,n) (fun (simp_frames,full_frames,n) (schema,frames) ->
-        let schema = match pos with
-            "verb" -> schema @ verb_adjuncts
-          | "noun" -> schema @ (if is_measure attrs then noun_measure_adjuncts else noun_adjuncts)
-          | "adj" -> schema @ adj_adjuncts
-          | "adv" -> schema @ adv_adjuncts
-          | _ -> schema in
-        (n,Frame(attrs,schema)) :: simp_frames,
-        Xlist.fold frames full_frames (fun full_frames frame -> (n,frame) :: full_frames),
-        n+1)) in
-    {t with simple_valence=simp_frames; valence=full_frames}))
-
-(* FIXME: dodać do walencji preferencje selekcyjne nadrzędników symboli: dzień, godzina, rysunek itp. *)
-(* FIXME: sprawdzić czy walencja nazw własnych jest dobrze zrobiona. *)
-
-(* let first_id = 1 (* id=0 jest zarezerwowane dla pro; FIXME: czy to jest jeszcze aktualne? *)
-
-let add_ids (paths,last) next_id =
-  let paths,next_id = Xlist.fold ((*List.rev*) paths) ([],next_id) (fun (paths,id) t ->
-    {t with id=id} :: paths, id+1) in
-  (paths,last),next_id *)
-
-let assign_senses paths = (* FIXME: sensy zawierające 'się' *)
-  List.rev (Xlist.rev_map paths (fun t ->
-    match t.token with
-      Lemma(lemma,pos,_) -> {t with senses=ENIAMplWordnet.find_senses lemma pos}
-    | Proper(_,_,_,senses) -> {t with senses=ENIAMplWordnet.find_proper_senses senses}
-    | _ -> t))
-
-
+(*
 let parse query =
 (*   print_endline "a1"; *)
   let l = Xunicode.classified_chars_of_utf8_string query in
@@ -442,107 +94,12 @@ let parse_conll tokens dep_paths = (* FIXME: sprawdzić, czy zachowana jest kole
     conll_id + 1) in
   ()
  
-(*
-UWAGA: Aby korzytać z concrafta trzeba najpierw postawić serwer wpisując z linii poleceń:
-concraft-pl server --inmodel ../concraft/nkjp-model-0.2.gz
-*)
-
-let read_whole_channel c =
-  let r = ref [] in
-  try
-    while true do
-      r := (input_line c) :: !r
-    done;
-    !r
-  with End_of_file -> List.rev (!r)
-
-let rec process_concraft_result orth lemma interp others rev = function
-    [] -> List.rev ((orth,(lemma,interp) :: others) :: rev)
-  | "" :: l -> process_concraft_result orth lemma interp others rev l
-  | line :: l ->
-      (match Xstring.split_delim "\t" line with
-        [orth2;s] when s = "none" || s = "space" ->
-           if orth = "" then process_concraft_result orth2 lemma interp others rev l
-           else process_concraft_result orth2 "" "" [] ((orth,(lemma,interp) :: others) :: rev) l
-      | ["";lemma2;interp2] -> process_concraft_result orth lemma interp ((lemma2,interp2) :: others) rev l
-      | ["";lemma;interp;"disamb"] -> process_concraft_result orth lemma interp others rev l
-      | _ -> failwith ("process_concraft_result: " ^ line))
-
-let concraft_parse s =
-  let concraft_in, concraft_out, concraft_err =
-    Unix.open_process_full ("echo \"" ^ s ^ "\" | concraft-pl client")
-      [|"PATH=" ^ Sys.getenv "PATH"; "LANG=en_GB.UTF-8"|] in
-  let err_msg = String.concat "\n" (read_whole_channel concraft_err) in
-  let result = read_whole_channel concraft_in in
-  if err_msg <> "" then failwith err_msg else
-  process_concraft_result "" "" "" [] [] result
-
-(*let rec load_concraft_sentence white orth rev ic =
-  (* print_endline "load_concraft_sentence 1"; *)
-  (* print_endline ("concraft error message: " ^ input_line concraft_err); *)
-  let s = input_line ic in
-  (* print_endline ("load_concraft_sentence: " ^ s); *)
-  if s = "" then List.rev rev else
-  match Xstring.split_delim "\t" s with
-    [""; lemma; interp; "disamb"] -> load_concraft_sentence "" "" ((white,orth,lemma,interp) :: rev) ic
-  | [""; lemma; interp] -> load_concraft_sentence white orth rev ic
-  | [orth; white] -> load_concraft_sentence white orth rev ic
-  | _ -> failwith ("load_concraft_sentence: " ^ s)*)
-
-let make_token (orth,l) =
-  if l = [] then failwith "make_token 1" else
-  let lemma,interp = List.hd l in
-  let cat,interp = match Xstring.split ":" interp with
-      cat :: l -> cat, [Xlist.map l (fun tag -> [tag])]
-    | _ -> failwith ("make_token 2: " ^ orth ^ " " ^ lemma ^ " " ^ interp) in
-  {empty_token with orth = orth; token = Lemma(lemma,cat,interp)}
-
-let parse_mate tokens pbeg s =
-  (* print_endline ("parse_mate: " ^ s); *)
-  (* Printf.fprintf concraft_out "%s\n\n%!" s;
-  let l = load_concraft_sentence "" "" [] concraft_in in *)
-  let l = concraft_parse s in
-  let l = Xlist.map l make_token in
-  let l = {empty_token with token = Interp "<conll_root>"} :: l in
-  let l = Xlist.map l (fun t -> ExtArray.add tokens t,-1,"") in
-  let _ = CONLL.establish_for_token pbeg s tokens (List.tl l) in
-  let dep_paths = Array.of_list l in
-  parse_conll tokens dep_paths;
-  dep_paths
-
-let rec parse_mate_sentence tokens mode pbeg = function
-    RawSentence s -> if mode <> Mate || not Paths.config.Paths.concraft_enabled then RawSentence s else DepSentence (parse_mate tokens pbeg s)
-  | StructSentence(paths,last) -> StructSentence(paths,last)
-  | DepSentence(paths) -> DepSentence(paths)
-  | QuotedSentences sentences ->
-      QuotedSentences(Xlist.map sentences (fun p ->
-        {pid=p.PreTypes.pid; pbeg=p.PreTypes.pbeg; plen=p.PreTypes.plen; pnext=p.PreTypes.pnext; pfile_prefix=p.PreTypes.pfile_prefix;
-         psentence=parse_mate_sentence tokens mode pbeg p.PreTypes.psentence}))
-  | AltSentence l -> AltSentence(Xlist.map l (fun (mode,sentence) ->
-      mode, parse_mate_sentence tokens mode pbeg sentence))
-
-let parse_mate_sentences tokens sentences =
-  Xlist.map sentences (fun p ->
-    {p with psentence=parse_mate_sentence tokens Struct p.pbeg p.psentence})
-
 let parse_text = function
     RawText query ->
-      (* print_endline ("parse_text: " ^ query); *)
-      let tokens = ExtArray.make 100 empty_token in
-      let _ = ExtArray.add tokens empty_token in (* id=0 jest zarezerwowane dla pro; FIXME: czy to jest jeszcze aktualne? *)
-      let paragraphs = Xstring.split "\n\\|\r" query in
-      let paragraphs = List.rev (Xlist.fold paragraphs [] (fun l -> function "" -> l | s -> s :: l)) in
-      let n = if Xlist.size paragraphs = 1 then 0 else 1 in
-      let paragraphs,_ = Xlist.fold paragraphs ([],n) (fun (paragraphs,n) paragraph ->
-        let paths = parse paragraph in
-        (* print_endline "parse_text 1"; *)
-        let pid = if n = 0 then "" else string_of_int n ^ "_" in
-        let sentences = PreSentences.split_into_sentences pid paragraph tokens paths in
-        (* print_endline "parse_text 2"; *)
-        let sentences = parse_mate_sentences tokens sentences in
-        (* print_endline "parse_text 3"; *)
-        (AltParagraph[Raw,RawParagraph paragraph; Struct,StructParagraph sentences]) :: paragraphs, n+1) in
-      AltText[Raw,RawText query; Struct,StructText(List.rev paragraphs, tokens)]
+      let text,tokens = ENIAMsubsyntax.parse_text query in
+      let text = ENIAMpreIntegration.parse_text ENIAMsubsyntaxTypes.Struct tokens text in
+      let lex_sems = ENIAMlexSemantics.assign tokens text in
+      text,tokens,lex_sems
   | AltText[Raw,RawText query;CONLL,StructText([
             StructParagraph[{psentence = AltSentence[Raw, RawSentence text; CONLL, DepSentence dep_paths]} as p]],tokens)] ->
         parse_conll tokens dep_paths;
@@ -553,28 +110,54 @@ let parse_text = function
           @ if Paths.config.Paths.mate_parser_enabled then [Mate, DepSentence m_dep_paths] else [])}] in
         AltText[Raw,RawText query; Struct, StructText([
           AltParagraph[Raw,RawParagraph query; ENIAM, StructParagraph sentences; CONLL, conll]],tokens)]
+  | _ -> failwith "parse_text: not implemented"*)
+
+open ENIAMsubsyntaxTypes
+
+let parse_text = function
+    RawText query,_ ->
+      let text,tokens = ENIAMsubsyntax.parse_text query in
+      let text = ENIAMpreIntegration.parse_text ENIAMsubsyntaxTypes.Struct tokens text in
+      let lex_sems = ENIAMlexSemantics.assign tokens text in
+      text,tokens,lex_sems
+  | AltText[Raw,RawText query;CONLL,StructText[
+            StructParagraph[{psentence = AltSentence[Raw, RawSentence text; CONLL, DepSentence dep_paths]} as p]]],tokens ->
+      let m_dep_paths = Array.map (fun (id,_,_) -> id,-1,"") dep_paths in
+      let conll = StructParagraph[{p with psentence = AltSentence([Raw, RawSentence text; CONLL, DepSentence dep_paths]
+        @ if Paths.config.Paths.mate_parser_enabled then [Mate, DepSentence m_dep_paths] else [])}] in
+      let paths = ENIAMsubsyntax.parse query in
+      let sentences = ENIAMsentences.split_into_sentences "" query tokens paths in
+      let text = AltText[Raw,RawText query; Struct, StructText([
+        AltParagraph[Raw,RawParagraph query; ENIAM, StructParagraph sentences; CONLL, conll]])] in
+      let lex_sems = ENIAMlexSemantics.assign tokens text in
+      text,tokens,lex_sems
   | _ -> failwith "parse_text: not implemented"
  
 let rec main_loop in_chan out_chan =
   (* print_endline "main_loop 1"; *)
-  let query = (Marshal.from_channel in_chan : text) in
+  let query = (Marshal.from_channel in_chan : text * ENIAMtokenizerTypes.token_record ExtArray.t) in
   (* print_endline "main_loop 2"; *)
-  if query = RawText "" then () else (
+  if fst query = RawText "" then () else (
   (try
 (*     let time0 = Sys.time () in *)
     let utime0 = Unix.gettimeofday () in
    (* print_endline "main_loop 3a"; *)
-    let text = parse_text query in
+    let text,tokens,lex_sems = parse_text query in
    (* print_endline "main_loop 4a"; *)
 (*     let time2 = Sys.time () in *)
     let utime2 = Unix.gettimeofday () in
 (*     Printf.printf "time=%f utime=%f\n%!" (time2 -. time0) (utime2 -. utime0); *)
-    Marshal.to_channel out_chan (text(*paths,last,next_id*),"",utime2 -. utime0) [];
+    Marshal.to_channel out_chan (text,tokens,lex_sems,"",utime2 -. utime0) [];
   (* print_endline "main_loop 5"; *)
     ()
   with e -> (
     (* print_endline "main_loop 7"; *)
-    Marshal.to_channel out_chan (RawText ""(*[],0*),Printexc.to_string e,0.) []));
+    Marshal.to_channel out_chan (
+      RawText "",
+      ExtArray.make 1 ENIAMtokenizerTypes.empty_token,
+      ExtArray.make 1 ENIAMlexSemanticsTypes.empty_lex_sem,
+      Printexc.to_string e,
+      0.) []));
       (* print_endline "main_loop 6"; *)
   flush out_chan;
   main_loop in_chan out_chan)
-(*
- *  ENIAM: Categorial Syntactic-Semantic Parser for Polish
- *  Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
- *  Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences
- *
- *  This program is free software: you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation, either version 3 of the License, or
- *  (at your option) any later version.
- *
- *  This program is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
- *)
-
-open Xstd
-
-(*let single_sense_flag = ref false(*true*)
-let single_frame_flag = ref false(*true*)*)
-
-(*type pos_record = {interp: string list list list; attrs: string list; proper: string list; senses: string list}
-
-type dict = {lemmas: pos_record StringMap.t StringMap.t; dbeg: int; dlen: int}*)
-
-(* type selector = Orth of string | Pos of string (*| All    *) *)
-
-(* Długość pojedynczego znaku w tekście *)
-let factor = 100
-
-type labels = {
-  number: string;
-  case: string;
-  gender: string;
-  person: string;
-  aspect: string;
-  }
-
-
-(*type type_arg =
-    TArg of string
-  | TWith of type_arg list
-
-and type_term =
-    TConst of string * type_arg list
-  | TMod of type_term * type_term
-  | TName of string
-  | TVariant of type_term * type_term*)
-
-
-type semantics =
-    Normal
-  | Special of string list
-(*  | SpecialNoun of type_arg list * type_term
-  | SpecialMod of string * (type_arg list * type_term)*)
-  | PrepSemantics of (string * string * StringSet.t * string list) list (* role,role_attr,hipero,sel_prefs *)
-
-type token =
-    SmallLetter of string 		(* orth *)
-  | CapLetter of string * string	(* orth * lowercase *)
-  | AllSmall of string			(* orth *)
-  | AllCap of string * string * string	(* orth * lowercase * all lowercase *)
-  | FirstCap of string * string * string * string	(* orth * all lowercase  * first letter uppercase  * first letter lowercase *)
-  | SomeCap of string			(* orth *)
-  | RomanDig of string * string		(* value * cat *)
-  | Interp of string			(* orth *)
-  | Symbol of string			(* orth *)
-  | Dig of string * string		(* value * cat *)
-  | Other2 of string			(* orth *)
-  | Lemma of string * string * string list list list	(* lemma * cat * interp *)
-  | Proper of string * string * string list list list * string list	(* lemma * cat * interp * senses *)
-(*   | Sense of string * string * string list list list * (string * string * string list) list	(* lemma * cat * interp * senses *) *)
-  | Compound of string * token list	(* sense * components *)
-  | Tokens of string * int list (*cat * token id list *)
-
-(* Tekst reprezentuję jako zbiór obiektów typu token_record zawierających
-   informacje o poszczególnych tokenach *)
-and token_record = {
-  orth: string;		(* sekwencja znaków pierwotnego tekstu składająca się na token *)
-  corr_orth: string; (* sekwencja znaków pierwotnego tekstu składająca się na token z poprawionymi błędami *)
-  beg: int; 		(* pozycja początkowa tokenu względem początku akapitu *)
-  len: int; 		(* długość tokenu *)
-  next: int; 		(* pozycja początkowa następnego tokenu względem początku akapitu *)
-  token: token; 	(* treść tokenu *)
-  attrs: string list;	(* dodatkowe atrybuty *)
-  weight: float;
-  e: labels;
-  valence: (int * ENIAMwalTypes.frame) list;
-  simple_valence: (int * ENIAMwalTypes.frame) list;
-  senses: (string * string list * float) list;
-  lroles: string * string;
-  semantics: semantics;
-  }
-
-(* Tokeny umieszczone są w strukturze danych umożliwiającej efektywne wyszukiwanie ich sekwencji,
-   struktura danych sama z siebie nie wnosi informacji *)
-type tokens =
-  | Token of token_record
-  | Variant of tokens list
-  | Seq of tokens list
-
-type pat = L | CL | D of string | C of string | S of string | RD of string | O of string
-
-let empty_labels = {
-  number="";
-  case="";
-  gender="";
-  person="";
-  aspect="";
-  }
-
-let empty_token = {
-  orth="";corr_orth="";beg=0;len=0;next=0; token=Symbol ""; weight=0.; e=empty_labels;
-  attrs=[]; valence=[]; simple_valence=[]; senses=[];
-  lroles="",""; semantics=Normal}
-
-type mode =
-    Raw | Struct | CONLL | ENIAM | Mate | Swigra | POLFIE
-
-(* warstwy nkjp1m do analizy:
-header
-text
-ann_segmentation
-ann_morphosyntax
-ann_named
-*)
-
-(* zdania wydobyte na zewnątrz *)
-(* struktura ponadzdaniowa przetwarzana przed strukturą zależnościową *)
-(* istnieje ryzyko eksplozji interpretacji *)
-type sentence =
-    RawSentence of string
-  (* | CONLL of conll list *)
-  | StructSentence of (int * int * int) list * int (* (id * lnode * rnode) list * last *)
-  | DepSentence of (int * int * string) array (* (id * super * label) conll_id *)
-  | QuotedSentences of paragraph_record list
-  (* | NKJP1M of nkjp1m list *)
-  (* | Skladnica of skladnica_tree *)
-  | AltSentence of (mode * sentence) list  (* string = etykieta np raw, nkjp, krzaki *)
-
-and paragraph_record = {pid: string; pbeg: int; plen: int; pnext: int; psentence: sentence; pfile_prefix: string} (* beg i len liczone po znakach unicode ( * 100 ???) *)
-
-and paragraph =
-    RawParagraph of string
-  | StructParagraph of paragraph_record list (* zdania *)
-  | AltParagraph of (mode * paragraph) list
-
-type text =
-    RawText of string
-  | StructText of paragraph list * token_record ExtArray.t (* akapity * tokeny *)
-  | AltText of (mode * text) list
@@ -181,10 +181,7 @@ let rec extract_sentences_rec tokens id =
     Tokens("sentence",ids) ->
       let paths,last = make_paths tokens ids in
       [{pid=string_of_int id; pbeg=t.beg; plen=t.len; pnext=t.next; pfile_prefix="";
-        psentence=AltSentence([Raw,RawSentence t.orth; ENIAM,StructSentence(paths,last)] (*@
-                              (if Paths.config.Paths.mate_parser_enabled then [Mate,RawSentence t.orth] else []) @
-                              (if Paths.config.Paths.swigra_enabled then [Swigra,RawSentence t.orth] else [])*)(* @
-                              (if Paths.config.Paths.polfie_enabled then [POLFIE,RawSentence t.orth] else []) *))}]
+        psentence=AltSentence([Raw,RawSentence t.orth; ENIAM,StructSentence(paths,last)])}]
   | Tokens("quoted_sentences",ids) ->
       [{pid=string_of_int id; pbeg=t.beg; plen=t.len; pnext=t.next; pfile_prefix="";
         psentence=AltSentence[Raw,RawSentence t.orth;
@@ -234,8 +234,5 @@ let parse_text query =
     (* print_endline "parse_text 1"; *)
     let pid = if n = 0 then "" else string_of_int n ^ "_" in
     let sentences = ENIAMsentences.split_into_sentences pid paragraph tokens paths in
-    (* print_endline "parse_text 2"; *)
-    (* let sentences = parse_mate_sentences tokens sentences in *)
-    (* print_endline "parse_text 3"; *)
     (AltParagraph[Raw,RawParagraph paragraph; Struct,StructParagraph sentences]) :: paragraphs, n+1) in
   AltText[Raw,RawText query; Struct,StructText(List.rev paragraphs)], tokens
@@ -2,5 +2,5 @@
 - dodać zasoby MWE
 - rozpoznawanie MWE ze Słowosieci
  
-- przenieść odwołanie do Świgry i Mate z ENIAMsentences
-- przenieść odwołanie do parse_mate_sentences z ENIAMsubsyntax
+- jak przetwarzać num:comp
+- przetwarzanie liczebników złożonych np dwadzieścia jeden, jedna druga
@@ -4,3 +4,5 @@ a jedynie instalator, który dostaje Walentego i go przetwarza.
 - dowiązanie symboliczne do Walentego - udokumentować.
 - uporządkować położenia słowników pojemników itp.
 - uporządkować położenie fixed.tab
+- czy rzeczownik niepoliczalny w liczbie mnogiej jest nadal niepoliczalny np. "Wody szumią."
+- trzeba zweryfikować słownik niepoliczalnych