Commit b8d9e115cb4ee2506c5457b6497a0b4ebb50baf3

Authored by Wojciech Jaworski
1 parent 27138426

upraszczanie schematów walencyjnych

lexSemantics/ENIAMadjuncts.ml 0 → 100644
  1 +(*
  2 + * ENIAMlexSemantics is a library that assigns tokens with lexicosemantic information.
  3 + * Copyright (C) 2016-2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
  4 + * Copyright (C) 2016-2017 Institute of Computer Science Polish Academy of Sciences
  5 + *
  6 + * This library is free software: you can redistribute it and/or modify
  7 + * it under the terms of the GNU Lesser General Public License as published by
  8 + * the Free Software Foundation, either version 3 of the License, or
  9 + * (at your option) any later version.
  10 + *
  11 + * This library is distributed in the hope that it will be useful,
  12 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14 + * GNU Lesser General Public License for more details.
  15 + *
  16 + * You should have received a copy of the GNU Lesser General Public License
  17 + * along with this program. If not, see <http://www.gnu.org/licenses/>.
  18 + *)
  19 +
  20 +open ENIAMwalTypes
  21 +open Xstd
  22 +
  23 +let simplify_position_verb mode l = function (* FIXME: dodać czyszczenie E Pro *)
  24 + NP(Case "dat") -> l
  25 + | NP(Case "inst") -> l
  26 + | NCP(Case "dat",_,_) -> l
  27 + | NCP(Case "inst",_,_) -> l
  28 + | CP _ -> l
  29 + | PrepNP _ -> l
  30 + | PrepAdjP _ -> l
  31 + | ComprepNP _ -> l
  32 + | ComparP _ -> l
  33 + | PrepNCP _ -> l
  34 + | AdvP _ -> l
  35 + | Or -> l
  36 + | SimpleLexArg("się",QUB) -> l
  37 + | E Or -> l
  38 + | E (CP(CompTypeUndef,CompUndef)) -> l
  39 + | E (PrepNP(prep,Case case)) -> l
  40 + | E (PrepNCP(prep,Case case,CompTypeUndef,CompUndef)) -> l
  41 + | NP(Case "gen") as t -> if mode = "temp" then l else t :: l
  42 + | NP(Case "acc") as t -> if mode = "dur" then l else t :: l
  43 + | t -> t :: l
  44 +
  45 +let simplify_position_noun mode l = function
  46 + NP(Case "gen") -> l
  47 + | NP(Case "nom") -> l
  48 + | NCP(Case "gen",_,_) -> l
  49 + | NP(CaseAgr) -> l
  50 + | AdjP AllAgr -> l
  51 + | PrepNP _ -> l
  52 + | ComprepNP _ -> l
  53 + | ComparP _ -> l
  54 + | PrepNCP _ -> l
  55 + | t -> t :: l
  56 +
  57 +let simplify_position_adj mode l = function
  58 + AdvP _ -> l
  59 + | ComparP _ -> l
  60 + | t -> t :: l
  61 +
  62 +let simplify_position_adv mode l = function
  63 + AdvP _ -> l (* FIXME: czy na pewno zostawić swobodę modyfikowania przysłówka? *)
  64 + | t -> t :: l
  65 +
  66 +(*
  67 +let simplify_position pos l s =
  68 + let morfs = match pos with
  69 + "verb" -> List.rev (Xlist.fold s.morfs [] simplify_position_verb)
  70 + | "noun" -> List.rev (Xlist.fold s.morfs [] simplify_position_noun)
  71 + | "adj" -> List.rev (Xlist.fold s.morfs [] simplify_position_adj)
  72 + | "adv" -> List.rev (Xlist.fold s.morfs [] simplify_position_adv)
  73 + | _ -> s.morfs in
  74 + match morfs with
  75 + [] -> l
  76 + | [Phrase Null] -> l
  77 + | _ -> {s with morfs=morfs} :: l
  78 +
  79 +let simplify_schemata pos schemata =
  80 + let schemata = Xlist.fold schemata StringMap.empty (fun schemata (schema,frame) ->
  81 + let schema = List.sort compare (Xlist.fold schema [] (fun l s ->
  82 + let s = {s with role=""; role_attr=""; sel_prefs=[]; cr=[]; ce=[]; morfs=List.sort compare s.morfs} in
  83 + if s.gf <> ARG && s.gf <> ADJUNCT then s :: l else
  84 + (* if s.cr <> [] || s.ce <> [] then s :: l else *)
  85 + simplify_position pos l s)) in
  86 + StringMap.add_inc schemata (ENIAMwalStringOf.schema schema) (schema,[frame]) (fun (_,frames) -> schema, frame :: frames)) in
  87 + StringMap.fold schemata [] (fun l _ s -> s :: l)
  88 +
  89 +let simplify_schemata2 pos schemata =
  90 + let simplify_position_fun = match pos with
  91 + "verb" -> simplify_position_verb2
  92 + | "noun" -> simplify_position_noun
  93 + | "adj" -> simplify_position_adj
  94 + | "adv" -> simplify_position_adv
  95 + | _ -> (fun l x -> x :: l) in
  96 + let morfs = Xlist.fold schemata [] (fun morfs schema ->
  97 + Xlist.fold schema morfs (fun morfs s ->
  98 + Xlist.fold s.morfs morfs simplify_position_fun)) in
  99 + let morfs = Xlist.fold morfs StringMap.empty (fun map s ->
  100 + StringMap.add map (ENIAMwalStringOf.morf s) s) in
  101 + let schema = StringMap.fold morfs [] (fun schema _ morf ->
  102 + {gf=ARG; role=""; role_attr=""; sel_prefs=[]; cr=[]; ce=[];
  103 + dir=Both; morfs=[Phrase Null;morf]} :: schema) in
  104 + schema*)
  105 +
  106 +(*let rec classify_phrase = function
  107 + NP _ as phrase -> ENIAMwalStringOf.phrase phrase
  108 + | PrepNP _ as phrase -> ENIAMwalStringOf.phrase phrase
  109 + | AdjP _ as phrase -> ENIAMwalStringOf.phrase phrase
  110 + | PrepAdjP _ as phrase -> ENIAMwalStringOf.phrase phrase
  111 + | ComprepNP _ as phrase -> ENIAMwalStringOf.phrase phrase
  112 + | ComparP _ as phrase -> ENIAMwalStringOf.phrase phrase
  113 + | CP _ -> "cp"
  114 + | NCP(case,_,_) -> ENIAMwalStringOf.phrase (NP case)
  115 + | PrepNCP(prep,case,_,_) -> ENIAMwalStringOf.phrase (PrepNP(prep,case))
  116 + | InfP _ -> "infp"
  117 + | AdvP _ -> "advp"
  118 + | FixedP _ as phrase -> ENIAMwalStringOf.phrase phrase
  119 + | Or -> "or"
  120 + | E Or -> "or"
  121 + | E phrase -> classify_phrase phrase
  122 + | SimpleLexArg("się",QUB) -> "się"
  123 + (* | SimpleLexArg _ -> "lex" *)
  124 +(* | LexArg _ -> "lex" *)
  125 + | SimpleLexArg _ as phrase -> ENIAMwalStringOf.phrase phrase
  126 + | LexArg _ as phrase -> ENIAMwalStringOf.phrase phrase
  127 + | phrase -> print_endline ("classify_phrase: " ^ ENIAMwalStringOf.phrase phrase); "other"
  128 +
  129 +let classify_position pos p =
  130 + let l = (*StringSet.to_list*) (Xlist.fold p.morfs StringSet.empty (fun set morf ->
  131 + StringSet.add set ((*classify_phrase*)ENIAMwalStringOf.phrase morf))) in
  132 + (* match l with
  133 + [] -> "empty"
  134 + | [c] -> c
  135 + (* | ["np(gen)"; "np(acc)"] -> "np(str)"
  136 + | ["np(gen)"; "infp"] -> "np(gen)-infp"
  137 + | ["np(acc)"; "infp"] -> "np(acc)-infp" *)
  138 + | _ -> let c = String.concat " " l in if pos="adv" then print_endline c; c *)
  139 + l*)
  140 +
  141 +module OrderedPhrase = struct
  142 + type t = phrase
  143 + let compare = compare
  144 +end
  145 +
  146 +module PhraseSet = Xset.Make(OrderedPhrase)
  147 +
  148 +
  149 +let remove_adjuncts_schema pos lemma schema =
  150 + let simplify_position_fun = match pos with
  151 + "verb" -> simplify_position_verb
  152 + | "noun" -> simplify_position_noun
  153 + | "adj" -> simplify_position_adj
  154 + | "adv" -> simplify_position_adv
  155 + | _ -> (fun _ l x -> x :: l) in
  156 + List.flatten (Xlist.map schema (fun p ->
  157 + let morfs = Xlist.fold p.morfs [] (simplify_position_fun (String.concat " " p.mode)) in
  158 + if morfs = [] then [] else [PhraseSet.of_list morfs]))
  159 + (* let schema2 = List.flatten (Xlist.map schema1 (fun p ->
  160 + let p = {p with morfs = Xlist.fold p.morfs [] (simplify_position_fun (String.concat " " p.mode))} in
  161 + let c = classify_position pos p in
  162 + if StringSet.is_empty c (*"empty"*) then [] else [c,[p]])) in
  163 + (* let sum = Xlist.fold schema2 StringSet.empty (fun set (c,p) -> StringSet.union set c) in
  164 + let n = Xlist.fold schema2 0 (fun n (c,p) -> n + StringSet.size c) in
  165 + if StringSet.size sum <> n (*&& pos = "noun"*) then (*Printf.printf "%s %s %s\n" pos lemma (ENIAMwalStringOf.schema schema1);*)
  166 + Printf.printf "%s %s %s\n" pos lemma (String.concat "+" (Xlist.map schema2 (fun (c,p) -> String.concat "," p.mode ^ "{" ^ String.concat ";" (StringSet.to_list c) ^ "}"))); *)
  167 +(* let set = Xlist.fold schema2 StringSet.empty (fun set (c,_) ->
  168 + StringSet.add set c) in
  169 + (* if StringSet.mem set "np(acc)" && StringSet.mem set "infp" then
  170 + Printf.printf "%s %s %s\n" pos lemma (ENIAMwalStringOf.schema schema1); *)
  171 + let schema2 = if StringSet.mem set "np(gen)" && StringSet.mem set "np(acc)" then schema2 else
  172 + Xlist.map schema2 (function
  173 + "np(gen)",p -> "np(str)",p
  174 + | "np(acc)",p -> "np(str)",p
  175 + | c,p -> c,p) in*)
  176 + schema2 *)
  177 +
  178 +
  179 +let is_disjunctive schema =
  180 + let sum = Xlist.fold schema PhraseSet.empty (fun set morfs -> PhraseSet.union set morfs) in
  181 + let n = Xlist.fold schema 0 (fun n morfs -> n + PhraseSet.size morfs) in
  182 + PhraseSet.size sum = n
  183 +
  184 +let rec find_overlapping morfs rev = function
  185 + morfs2 :: schema ->
  186 + if PhraseSet.is_empty (PhraseSet.intersection morfs morfs2) then find_overlapping morfs (morfs2 :: rev) schema
  187 + else morfs2, List.rev rev @ schema
  188 + | [] -> raise Not_found
  189 +
  190 +let rec merge_schemata_rec cont = function
  191 + [] -> cont
  192 + | [schema] -> schema :: cont
  193 + | schema1 :: schema2 :: schemata ->
  194 + let sum_schema,diff_schema = Xlist.fold schema1 ([],schema2) (fun (sum_schema,diff_schema) morfs ->
  195 + try
  196 + let morfs2,diff_schema = find_overlapping morfs [] diff_schema in
  197 + (PhraseSet.union morfs morfs2) :: sum_schema,diff_schema
  198 + with Not_found -> morfs :: sum_schema,diff_schema) in
  199 + let schema = sum_schema @ diff_schema in
  200 + if is_disjunctive schema then ((*print_endline "A";*) merge_schemata_rec cont (schema :: schemata))
  201 + else ((*print_endline "B";*) merge_schemata_rec (schema :: cont) schemata)
  202 +
  203 +let rec merge_schemata schemata =
  204 + let cont,schemata = Xlist.fold schemata ([],[]) (fun (cont,schemata) schema ->
  205 + if is_disjunctive schema then cont, schema :: schemata else ((*print_endline "C";*) schema :: cont, schemata)) in
  206 + merge_schemata_rec cont schemata
  207 +
  208 +
  209 +
  210 +module OrderedSelector = struct
  211 + type t = (ENIAM_LCGlexiconTypes.selector *
  212 + ENIAM_LCGlexiconTypes.selector_relation * string list)
  213 + list
  214 + let compare = compare
  215 +end
  216 +
  217 +module SelectorMap = Xmap.Make(OrderedSelector)
  218 +
  219 +let latex_of_selectors selectors =
  220 + String.concat ", " (Xlist.map selectors (fun (cat,rel,l) ->
  221 + let rel = if rel = ENIAM_LCGlexiconTypes.Eq then "=" else "!=" in
  222 + ENIAMcategoriesPL.string_of_selector cat ^ rel ^ (String.concat "|" l)))
  223 +
  224 +let simplify_valence pos pos2 lemma schemata =
  225 + (* Xlist.iter schemata (fun (selectors,schema) -> if pos2="verb" then Printf.printf "A %s %s [%s] %s\n" pos lemma (latex_of_selectors selectors) (ENIAMwalStringOf.schema schema)); *)
  226 + let map = Xlist.fold schemata SelectorMap.empty (fun map (selectors,schema) ->
  227 + SelectorMap.add_inc map selectors [schema] (fun l -> schema :: l)) in
  228 + let schemata = SelectorMap.fold map [] (fun new_schemata selectors schemata ->
  229 + (selectors,Xlist.map schemata (remove_adjuncts_schema pos2 lemma)) :: new_schemata) in
  230 + (* Xlist.iter schemata (fun (_,schemata) ->
  231 + Xlist.iter schemata (fun schema -> if pos2="verb" then Printf.printf "%s %s %s\n" pos lemma (ENIAMwalStringOf.schema schema))); *)
  232 + (* Xlist.iter schemata (fun (selectors,schemata) -> Xlist.iter schemata (fun schema ->
  233 + if pos2="verb" then Printf.printf "B %s %s [%s] %s\n" pos lemma (latex_of_selectors selectors) (String.concat "+" (Xlist.map schema (fun morfs ->
  234 + "{" ^ String.concat ";" (PhraseSet.fold morfs [] (fun l m -> ENIAMwalStringOf.phrase m :: l)) ^ "}"))))); *)
  235 + let schemata = List.flatten (Xlist.map schemata (fun (selectors,schemata) ->
  236 + Xlist.map (merge_schemata schemata) (fun schema -> selectors,Xlist.map schema PhraseSet.to_list))) in
  237 + (* Xlist.iter schemata (fun (selectors,schema) ->
  238 + if pos2="verb" then Printf.printf "C %s %s [%s] %s\n" pos lemma (latex_of_selectors selectors) (String.concat "+" (Xlist.map schema (fun morfs ->
  239 + "{" ^ String.concat ";" (PhraseSet.fold morfs [] (fun l m -> ENIAMwalStringOf.phrase m :: l)) ^ "}")))); *)
  240 + schemata
  241 +
  242 +let _ =
  243 + let schemata,entries = ENIAMvalence.prepare_all_valence ENIAMwalParser.phrases ENIAMwalParser.schemata ENIAMwalParser.entries in
  244 + let _ = Entries.map2 schemata (fun pos lemma schemata -> simplify_valence pos (ENIAMvalence.simplify_pos pos) lemma schemata) in
  245 + ()
  246 +
  247 +
  248 +(*
  249 +let default_frames = Xlist.fold [ (* FIXME: poprawić domyślne ramki po ustaleniu adjunctów *)
  250 + "verb",(ReflEmpty,Domyslny,NegationUndef,PredNA,AspectUndef,"subj{np(str)}+obj{np(str)}"); (* FIXME: dodać ramkę z refl *)
  251 + "noun",(ReflEmpty,Domyslny,NegationNA,PredNA,AspectNA,"{possp}+{adjp(agr)}");
  252 + "adj",(ReflEmpty,Domyslny,NegationNA,PredNA,AspectNA,"");
  253 + "adv",(ReflEmpty,Domyslny,NegationNA,PredNA,AspectNA,"");
  254 + "empty",(ReflEmpty,Domyslny,NegationNA,PredNA,AspectNA,"");
  255 + "date",(ReflEmpty,Domyslny,NegationNA,PredNA,AspectNA,"{null;lex(np(gen),sg,'rok',natr)}");
  256 + "date2",(ReflEmpty,Domyslny,NegationNA,PredNA,AspectNA,"{null;lex(np(gen),sg,'rok',atr1({adjp(agr)}))}"); (* FIXME: wskazać możliwe podrzędniki *)
  257 + "day",(ReflEmpty,Domyslny,NegationNA,PredNA,AspectNA,""
  258 + (*"{lex(np(gen),sg,XOR('styczeń','luty','marzec','kwiecień','maj','czerwiec','lipiec','sierpień','wrzesień','październik','litopad','grudzień'),atr1({np(gen)}))}"*)); (* FIXME: wskazać możliwe podrzędniki *)
  259 + "hour",(ReflEmpty,Domyslny,NegationNA,PredNA,AspectNA,"{null;lex(advp(temp),pos,'rano',natr)}");
  260 + ] StringMap.empty (fun map (k,(refl,opinion,negation,pred,aspect,schema)) ->
  261 + StringMap.add map k (Frame(DefaultAtrs([],refl,opinion,negation,pred,aspect),prepare_schema expands subtypes equivs schema)))
  262 +
  263 +let adjunct_schema_field role dir morfs =
  264 + {gf=ADJUNCT; role=role; role_attr=""; sel_prefs=["ALL"]; cr=[]; ce=[]; dir=dir; morfs=morfs}
  265 +
  266 +let verb_prep_adjunct_schema_field lemma case =
  267 + {gf=ADJUNCT; role="Manner"; role_attr=""; sel_prefs=["ALL"]; cr=[]; ce=[]; dir=Both; morfs=[
  268 + Phrase Null;
  269 + Phrase(PrepNP(Sem,lemma,Case case));
  270 + Phrase(PrepAdjP(Sem,lemma,Case case));
  271 + Phrase(PrepNumP(Sem,lemma,Case case))]}
  272 +
  273 +let verb_comprep_adjunct_schema_field lemma =
  274 + {gf=ADJUNCT; role="Manner"; role_attr=""; sel_prefs=["ALL"]; cr=[]; ce=[]; dir=Both; morfs=[
  275 + Phrase Null;
  276 + Phrase(ComprepNP(Sem,lemma))]}
  277 +
  278 +let verb_compar_adjunct_schema_field lemma =
  279 + {gf=ADJUNCT; role="Manner"; role_attr=""; sel_prefs=["ALL"]; cr=[]; ce=[]; dir=Both; morfs=[
  280 + Phrase Null;
  281 + Phrase(ComparPP(Sem,lemma))] @
  282 + Xlist.map ["nom";"gen";"dat";"acc";"inst"] (fun case -> Phrase(ComparNP(Sem,lemma,Case case)))}
  283 +
  284 +let noun_prep_adjunct_schema_field preps compreps =
  285 + {gf=ADJUNCT; role="Attribute"; role_attr=""; sel_prefs=["ALL"]; cr=[]; ce=[]; dir=Both; morfs=
  286 + let l = Xlist.fold preps [Phrase Null] (fun l (lemma,case) ->
  287 + [Phrase(PrepNP(Sem,lemma,Case case));
  288 + Phrase(PrepAdjP(Sem,lemma,Case case));
  289 + Phrase(PrepNumP(Sem,lemma,Case case))] @ l) in
  290 + Xlist.fold compreps l (fun l lemma ->
  291 + Phrase(ComprepNP(Sem,lemma)) :: l)}
  292 +
  293 +let noun_compar_adjunct_schema_field compars =
  294 + {gf=ADJUNCT; role="Attribute"; role_attr=""; sel_prefs=["ALL"]; cr=[]; ce=[]; dir=Both; morfs=
  295 + Xlist.fold compars [Phrase Null] (fun l lemma ->
  296 + [Phrase(ComparPP(Sem,lemma))] @ Xlist.map ["nom";"gen";"dat";"acc";"inst"] (fun case -> Phrase(ComparNP(Sem,lemma,Case case))) @ l)}
  297 +
  298 +let adj_compar_adjunct_schema_field compars =
  299 + {gf=ADJUNCT; role="Manner"; role_attr=""; sel_prefs=["ALL"]; cr=[]; ce=[]; dir=Both; morfs=
  300 + Xlist.fold compars [Phrase Null] (fun l lemma ->
  301 + [Phrase(ComparPP(Sem,lemma))] @ Xlist.map ["nom"] (fun case -> Phrase(ComparNP(Sem,lemma,Case case))) @ l)}
  302 +
  303 +(*let nogf_schema_field dir morfs =
  304 + {gf=NOGF; role=""; role_attr=""; sel_prefs=[]; cr=[]; ce=[]; dir=dir; morfs=morfs} *)
  305 +
  306 +let schema_field gf role dir morfs =
  307 + {gf=gf; role=role; role_attr=""; sel_prefs=["ALL"]; cr=[]; ce=[]; dir=dir; morfs=morfs}
  308 +
  309 +(*let verb_adjuncts = [
  310 + adjunct_schema_field "R" "" Both [Phrase AdvP];
  311 + adjunct_schema_field "R" "" Both [Phrase PrepP]; (* FIXME: Trzeba będzie uzgodnić PrepNP, PrepAdjP, PrepNumP z PrepP i XP *)
  312 + ]
  313 +
  314 + let noun_adjuncts = [
  315 + adjunct_schema_field "C" "poss" Both [Phrase(NP(Case "gen"))];
  316 + adjunct_schema_field "C" "=" Both [Phrase(NP(Case "nom"))];
  317 + adjunct_schema_field "C" "=" Both [Phrase(NP(CaseAgr))];
  318 + adjunct_schema_field "R" "" Backward [Multi[AdjP AllAgr]];
  319 + adjunct_schema_field "R" "" Forward [Multi[AdjP AllAgr]];
  320 + adjunct_schema_field "R" "" Both [Phrase PrepP];
  321 + ]
  322 +
  323 + let adj_adjuncts = [
  324 + adjunct_schema_field "R" "" Both [Phrase PrepP];
  325 + ]*)
  326 +
  327 +let verb_adjuncts = [
  328 + (* adjunct_schema_field "" Both [Phrase Null;Phrase AdvP];
  329 + adjunct_schema_field "" Both [Phrase Null;Phrase PrepP]; (* FIXME: Trzeba będzie uzgodnić PrepNP, PrepAdjP, PrepNumP z PrepP i XP *)
  330 + adjunct_schema_field "Topic" Forward [Phrase Null;Phrase (CP(CompTypeUndef,CompUndef))]; (* poprawić semantykę *) (* FIXME: to powinno być jako ostatnia lista argumentów *)*)
  331 +]
  332 +
  333 +(* FIXME: pozycje dublują się z domyślną ramką "noun" *)
  334 +let noun_adjuncts = [ (* FIXME: usuniecie noun_adjuncts pozostawia poss dla 'Witoldzie' *)
  335 + (* adjunct_schema_field "poss" Both [Phrase Null;Phrase(NP(Case "gen"))];
  336 + adjunct_schema_field "=" Both [Phrase Null;Phrase(NP(Case "nom"))];
  337 + adjunct_schema_field "=" Both [Phrase Null;Phrase(NP(CaseAgr))];
  338 + adjunct_schema_field "" Backward [(*Phrase Null;Phrase(AdjP AllAgr)*)Multi[AdjP AllAgr]]; (* FIXME: za pomocą Multi można zrobić konkatenowane leksykalizacje *)
  339 + adjunct_schema_field "" Forward [Phrase Null;Phrase(AdjP AllAgr)];
  340 + adjunct_schema_field "" Both [Phrase Null;Phrase PrepP];*)
  341 +]
  342 +
  343 +let adj_adjuncts = [
  344 + (* adjunct_schema_field "" Both [Phrase Null;Phrase AdvP]; *)
  345 +]
  346 +
  347 +
  348 +let verb_adjuncts_simp = [
  349 + adjunct_schema_field "Manner" Both [Phrase Null;Phrase AdvP];
  350 + adjunct_schema_field "Recipent" Both [Phrase Null;Phrase (NP(Case "dat"));Phrase (NumP(Case "dat"));Phrase (NCP(Case "dat",CompTypeUndef,CompUndef))];
  351 + adjunct_schema_field "Instrument" Both [Phrase Null;Phrase (NP(Case "inst"));Phrase (NumP(Case "inst"));Phrase (NCP(Case "inst",CompTypeUndef,CompUndef))];
  352 + adjunct_schema_field "Time" Both [Phrase Null;Phrase (Lex "date");Phrase (Lex "day-lex");Phrase (Lex "day-month");Phrase (Lex "day")];
  353 + (* adjunct_schema_field "" Both [Phrase Null;Phrase PrepP]; (* FIXME: Trzeba będzie uzgodnić PrepNP, PrepAdjP, PrepNumP z PrepP i XP *) *)
  354 + adjunct_schema_field "Condition" Forward [Phrase Null;Phrase (CP(CompTypeUndef,CompUndef))]; (* poprawić semantykę *) (* FIXME: to powinno być jako ostatnia lista argumentów *)
  355 + adjunct_schema_field "Theme" Both [Phrase Null;Phrase Or];
  356 +]
  357 +
  358 +let verb_adjuncts_simp2 = [
  359 + adjunct_schema_field "Manner" Both [Phrase Null;Phrase AdvP];
  360 + adjunct_schema_field "Recipent" Both [Phrase Null;Phrase (NP(Case "dat"));Phrase (NumP(Case "dat"));Phrase (NCP(Case "dat",CompTypeUndef,CompUndef))];
  361 + adjunct_schema_field "Instrument" Both [Phrase Null;Phrase (NP(Case "inst"));Phrase (NumP(Case "inst"));Phrase (NCP(Case "inst",CompTypeUndef,CompUndef))];
  362 + adjunct_schema_field "Time" Both [Phrase Null;Phrase (Lex "date");Phrase (Lex "day-lex");Phrase (Lex "day-month");Phrase (Lex "day")];
  363 + (* adjunct_schema_field "" Both [Phrase Null;Phrase PrepP]; (* FIXME: Trzeba będzie uzgodnić PrepNP, PrepAdjP, PrepNumP z PrepP i XP *) *)
  364 + adjunct_schema_field "Condition" Forward [Phrase Null;Phrase (CP(CompTypeUndef,CompUndef))]; (* poprawić semantykę *) (* FIXME: to powinno być jako ostatnia lista argumentów *)
  365 + adjunct_schema_field "Theme" Both [Phrase Null;Phrase Or];
  366 + adjunct_schema_field "Theme" Both [Phrase Null;Phrase(Lex "się")];
  367 +]
  368 +
  369 +let noun_adjuncts_simp = [ (* FIXME: usuniecie noun_adjuncts pozostawia poss dla 'Witoldzie' *)
  370 + adjunct_schema_field "Possesive" Both [Phrase Null;Phrase(NP(Case "gen"));Phrase(NumP(Case "gen"))];
  371 + adjunct_schema_field "Aposition" Forward [Phrase Null;Phrase(NP(Case "nom"));Phrase(NumP(Case "nom"));Phrase Null;Phrase(NP(CaseAgr));Phrase(NumP(CaseAgr))];
  372 + adjunct_schema_field "Attribute" Backward [(*Phrase Null;Phrase(AdjP AllAgr)*)Multi[AdjP AllAgr]]; (* FIXME: za pomocą Multi można zrobić konkatenowane leksykalizacje *)
  373 + adjunct_schema_field "Base" Forward [Phrase Null;Phrase(AdjP AllAgr)];
  374 + (* adjunct_schema_field "" Both [Phrase Null;Phrase PrepP]; *)
  375 +]
  376 +
  377 +let noun_measure_adjuncts_simp = [ (* FIXME: usuniecie noun_adjuncts pozostawia poss dla 'Witoldzie' *)
  378 + adjunct_schema_field "Attribute" Backward [(*Phrase Null;Phrase(AdjP AllAgr)*)Multi[AdjP AllAgr]]; (* FIXME: za pomocą Multi można zrobić konkatenowane leksykalizacje *)
  379 + adjunct_schema_field "Base" Forward [Phrase Null;Phrase(AdjP AllAgr)];
  380 + (* adjunct_schema_field "" Both [Phrase Null;Phrase PrepP]; *)
  381 +]
  382 +*)
  383 +(* let noun_adjuncts = [NP(Case "gen");NP(Case "nom");Multi[AdjP AllAgr]
  384 +
  385 +]
  386 +
  387 +let verb_adjuncts = [
  388 +
  389 +]
  390 +
  391 +let adj_adjuncts = [AdvP "misc"]
  392 +
  393 +let adv_adjuncts = [AdvP "misc"] *)
... ...
lexSemantics/ENIAMlexSemantics.ml
... ... @@ -22,7 +22,7 @@ open ENIAMsubsyntaxTypes
22 22 open ENIAMlexSemanticsTypes
23 23 open ENIAMwalTypes
24 24 open Xstd
25   -
  25 +(*
26 26 let string_of_lex_sems tokens lex_sems =
27 27 String.concat "\n" (List.rev (Int.fold 0 (ExtArray.size lex_sems - 1) [] (fun l id ->
28 28 let t = ExtArray.get lex_sems id in
... ... @@ -43,7 +43,7 @@ let find_senses t = (* FIXME: sensy zawierające &#39;się&#39; *)
43 43 Lemma(lemma,pos,_) -> ENIAMplWordnet.find_senses lemma pos
44 44 | Proper(_,_,_,senses) -> ENIAMplWordnet.find_proper_senses senses
45 45 | _ -> []
46   -
  46 +*)
47 47 let rec find a l i =
48 48 if a.(i) = max_int then (
49 49 a.(i) <- i;
... ... @@ -74,14 +74,14 @@ let rec split_tokens_into_groups_sentence a = function
74 74 union a m (find a [] id)))
75 75 | QuotedSentences sentences ->
76 76 Xlist.iter sentences (fun p ->
77   - split_tokens_into_groups_sentence a p.psentence)
  77 + split_tokens_into_groups_sentence a p.sentence)
78 78 | AltSentence l -> Xlist.iter l (fun (mode,sentence) ->
79 79 split_tokens_into_groups_sentence a sentence)
80 80  
81 81 let rec split_tokens_into_groups_paragraph a = function
82 82 RawParagraph s -> ()
83 83 | StructParagraph sentences ->
84   - Xlist.iter sentences (fun p -> split_tokens_into_groups_sentence a p.psentence)
  84 + Xlist.iter sentences (fun p -> split_tokens_into_groups_sentence a p.sentence)
85 85 | AltParagraph l -> Xlist.iter l (fun (mode,paragraph) ->
86 86 split_tokens_into_groups_paragraph a paragraph)
87 87  
... ... @@ -103,6 +103,59 @@ let split_tokens_into_groups size text =
103 103 IntMap.fold map [] (fun l _ v -> v :: l)
104 104  
105 105 let assign_valence tokens lex_sems group =
  106 + let lexemes = Xlist.fold group StringSet.empty (fun lexemes id ->
  107 + let lemma = ENIAMtokens.get_lemma (ExtArray.get tokens id).token in
  108 + StringSet.add lexemes lemma) in
  109 + let entries,schemata,connected = ENIAMwalReduce.select_entries lexemes in
  110 + Xlist.iter group (fun id ->
  111 + let lemma = ENIAMtokens.get_lemma (ExtArray.get tokens id).token in
  112 + let pos = ENIAMtokens.get_pos (ExtArray.get tokens id).token in
  113 + let pos2 = ENIAMvalence.simplify_pos pos in
  114 + let schemata = try Entries.find schemata pos2 lemma with Not_found -> ENIAMvalence.get_default_valence pos2 in
  115 + let entries = try Entries.find entries pos lemma with Not_found -> [] in
  116 + let schemata = List.flatten (Xlist.map schemata (fun (opinion,neg,pred,aspect,schema) ->
  117 + ENIAMvalence.transform_entry pos lemma neg pred aspect schema)) in (* FIXME: gubię opinię *)
  118 + let schemata = ENIAMadjuncts.simplify_valence pos pos2 lemma schemata in
  119 + let schemata = Xlist.map schemata (fun (selectors,schema) ->
  120 + selectors,ENIAMvalence2.render_simple_schema schema) in
  121 + let entries = List.flatten (Xlist.map entries (ENIAMvalence.transform_lex_entry pos lemma)) in
  122 + let entries = Xlist.map entries (fun (selectors,entry) ->
  123 + selectors,ENIAMvalence2.render_lex_entry entry) in
  124 + ExtArray.set lex_sems id {(ExtArray.get lex_sems id) with
  125 + schemata = schemata; lex_entries=entries})
  126 +
  127 +(* TODO:
  128 + slashe
  129 + test
  130 + zgranie z LCGlexicon
  131 +*)
  132 +
  133 + (* let lexemes = Xlist.fold group Entries.empty (fun lexemes id ->
  134 + let lemma = ENIAMtokens.get_lemma (ExtArray.get tokens id).token in
  135 +
  136 + match (ExtArray.get tokens id).token with
  137 + Lemma(lemma,pos,_) ->
  138 + Entries.add_inc lexemes (ENIAMvalence.simplify_pos pos) lemma pos (*(StringSet.singleton pos) (fun set -> StringSet.add set pos)*)
  139 + | Proper(lemma,pos,_,_) ->
  140 + (* let pos = match pos with
  141 + "subst" -> "psubst"
  142 + | "depr" -> "pdepr"
  143 + | _ -> pos (*failwith ("assign_valence: Proper " ^ pos ^ " " ^ lemma)*) in *)
  144 + Entries.add_inc lexemes (ENIAMvalence.simplify_pos pos) lemma pos (*StringSet.singleton pos) (fun set -> StringSet.add set pos*)
  145 + (* StringMap.add_inc lexemes lemma (StringSet.singleton pos) (fun set -> StringSet.add set pos) (* nazwy własne mają przypisywaną domyślną walencję rzeczowników *) *)
  146 + | _ -> lexemes) in
  147 + let lexemes = Entries.map2 (fun l -> StringSet.to_list (StringSet.of_list l)) in
  148 + let lexemes_set = Entries.fold lexemes StringSet.empty (fun lexemes_set _ lemma _ -> StringSet.add lexemes_set lemma) in
  149 + let schemata,entries = ENIAMvalence.prepare_selected_valence schemata entries lexemes in
  150 + let schemata = ENIAMadjuncts.simplify_valence schemata in
  151 + let schemata = Entries.map schemata (fun pos lemma (selectors,schema) ->
  152 + selectors,ENIAMvalence2.render_schema schema) in
  153 + let entries = Entries.map entries (fun pos lemma (selectors,entry) ->
  154 + selectors,ENIAMvalence2.render_lex_entry entry) in *)
  155 +
  156 +
  157 +(*
  158 +let assign_valence tokens lex_sems group =
106 159 let lexemes = Xlist.fold group StringMap.empty (fun lexemes id ->
107 160 match (ExtArray.get tokens id).token with
108 161 Lemma(lemma,pos,_) ->
... ... @@ -159,129 +212,7 @@ let disambiguate_senses lex_sems group =
159 212 senses = Xlist.map t.senses (fun (s,l,w) ->
160 213 s, List.rev (Xlist.fold l [] (fun l s -> if StringSet.mem senses s then s :: l else l)),w)})
161 214  
162   -let simplify_position_verb l = function (* FIXME: dodać czyszczenie E Pro *)
163   - Phrase(NP(Case "dat")) -> l
164   - | Phrase(NP(Case "inst")) -> l
165   - | Phrase(PrepNP _) -> l
166   - | Phrase(PrepAdjP _) -> l
167   - | Phrase(NumP (Case "dat")) -> l
168   - | Phrase(NumP (Case "inst")) -> l
169   - | Phrase(PrepNumP _) -> l
170   - | Phrase(ComprepNP _) -> l
171   - | Phrase(ComparNP _) -> l
172   - | Phrase(ComparPP _) -> l
173   - | Phrase(IP) -> l
174   - | Phrase(CP _) -> l
175   - | Phrase(NCP(Case "dat",_,_)) -> l
176   - | Phrase(NCP(Case "inst",_,_)) -> l
177   - | Phrase(PrepNCP _) -> l
178   -(* | Phrase(PadvP) -> l *)
179   - | Phrase(AdvP) -> l
180   - | Phrase(PrepP) -> l
181   - | Phrase(Or) -> l
182   - | Phrase(Qub) -> l
183   - | Phrase(Adja) -> l
184   - | Phrase(Inclusion) -> l
185   - | Phrase Pro -> Phrase Null :: l
186   - | t -> t :: l
187   -
188   -let simplify_position_verb2 l = function (* FIXME: dodać czyszczenie E Pro *)
189   - Phrase(NP(Case "dat")) -> l
190   - | Phrase(NP(Case "inst")) -> l
191   - | Phrase(PrepNP _) -> l
192   - | Phrase(PrepAdjP _) -> l
193   - | Phrase(NumP (Case "dat")) -> l
194   - | Phrase(NumP (Case "inst")) -> l
195   - | Phrase(PrepNumP _) -> l
196   - | Phrase(ComprepNP _) -> l
197   - | Phrase(ComparNP _) -> l
198   - | Phrase(ComparPP _) -> l
199   - | Phrase(IP) -> l
200   - | Phrase(CP _) -> l
201   - | Phrase(NCP(Case "dat",_,_)) -> l
202   - | Phrase(NCP(Case "inst",_,_)) -> l
203   - | Phrase(PrepNCP _) -> l
204   - (* | Phrase(PadvP) -> l *)
205   - | Phrase(AdvP) -> l
206   - | Phrase(PrepP) -> l
207   - | Phrase(Or) -> l
208   - | Phrase(Qub) -> l
209   - | Phrase(Adja) -> l
210   - | Phrase(Inclusion) -> l
211   - | Phrase(Lex "się") -> l
212   - | Phrase Pro -> Phrase Null :: l
213   - | t -> t :: l
214   -
215   -let simplify_position_noun l = function
216   - Phrase(NP(Case "gen")) -> l
217   - | Phrase(NP(Case "nom")) -> l
218   - | Phrase(NP(CaseAgr)) -> l
219   - | Phrase(PrepNP _) -> l
220   - | Phrase(AdjP AllAgr) -> l
221   - | Phrase(NumP (Case "gen")) -> l
222   - | Phrase(NumP (Case "nom")) -> l
223   - | Phrase(NumP (CaseAgr)) -> l
224   - | Phrase(PrepNumP _) -> l
225   - | Phrase(ComprepNP _) -> l
226   - | Phrase(ComparNP _) -> l
227   - | Phrase(ComparPP _) -> l
228   - | Phrase(IP) -> l
229   - | Phrase(NCP(Case "gen",_,_)) -> l
230   - | Phrase(PrepNCP _) -> l
231   - | Phrase(PrepP) -> l
232   - | Phrase(Qub) -> l
233   - | Phrase(Adja) -> l
234   - | Phrase(Inclusion) -> l
235   - | Phrase Pro -> Phrase Null :: l
236   - | t -> t :: l
237   -
238   -let simplify_position_adj l = function
239   - Phrase(AdvP) -> l
240   - | t -> t :: l
241   -
242   -let simplify_position_adv l = function
243   - Phrase(AdvP) -> l
244   - | t -> t :: l
245   -
246   -
247   -let simplify_position pos l s =
248   - let morfs = match pos with
249   - "verb" -> List.rev (Xlist.fold s.morfs [] simplify_position_verb)
250   - | "noun" -> List.rev (Xlist.fold s.morfs [] simplify_position_noun)
251   - | "adj" -> List.rev (Xlist.fold s.morfs [] simplify_position_adj)
252   - | "adv" -> List.rev (Xlist.fold s.morfs [] simplify_position_adv)
253   - | _ -> s.morfs in
254   - match morfs with
255   - [] -> l
256   - | [Phrase Null] -> l
257   - | _ -> {s with morfs=morfs} :: l
258   -
259   -let simplify_schemata pos schemata =
260   - let schemata = Xlist.fold schemata StringMap.empty (fun schemata (schema,frame) ->
261   - let schema = List.sort compare (Xlist.fold schema [] (fun l s ->
262   - let s = {s with role=""; role_attr=""; sel_prefs=[]; cr=[]; ce=[]; morfs=List.sort compare s.morfs} in
263   - if s.gf <> ARG && s.gf <> ADJUNCT then s :: l else
264   - (* if s.cr <> [] || s.ce <> [] then s :: l else *)
265   - simplify_position pos l s)) in
266   - StringMap.add_inc schemata (ENIAMwalStringOf.schema schema) (schema,[frame]) (fun (_,frames) -> schema, frame :: frames)) in
267   - StringMap.fold schemata [] (fun l _ s -> s :: l)
268   -
269   -let simplify_schemata2 pos schemata =
270   - let simplify_position_fun = match pos with
271   - "verb" -> simplify_position_verb2
272   - | "noun" -> simplify_position_noun
273   - | "adj" -> simplify_position_adj
274   - | "adv" -> simplify_position_adv
275   - | _ -> (fun l x -> x :: l) in
276   - let morfs = Xlist.fold schemata [] (fun morfs schema ->
277   - Xlist.fold schema morfs (fun morfs s ->
278   - Xlist.fold s.morfs morfs simplify_position_fun)) in
279   - let morfs = Xlist.fold morfs StringMap.empty (fun map s ->
280   - StringMap.add map (ENIAMwalStringOf.morf s) s) in
281   - let schema = StringMap.fold morfs [] (fun schema _ morf ->
282   - {gf=ARG; role=""; role_attr=""; sel_prefs=[]; cr=[]; ce=[];
283   - dir=Both; morfs=[Phrase Null;morf]} :: schema) in
284   - schema
  215 +
285 216  
286 217 (* FIXME: problem ComprepNP i PrepNCP *)
287 218 (* FIXME: problem gdy ten sam token występuje w kilku ścieżkach *)
... ... @@ -471,24 +402,24 @@ let assign_very_simplified_valence tokens lex_sems group =
471 402 (selectors,schema) :: frames) in
472 403 (* Printf.printf "|frames|=%d\n" (Xlist.size frames); *)
473 404 ExtArray.set lex_sems id {t with very_simple_valence=frames})
474   -
  405 + *)
475 406  
476 407 let assign tokens text =
477 408 let lex_sems = ExtArray.make (ExtArray.size tokens) empty_lex_sem in
478 409 let _ = ExtArray.add lex_sems empty_lex_sem in
479 410 Int.iter 1 (ExtArray.size tokens - 1) (fun i ->
480   - let token = ExtArray.get tokens i in
481   - ExtArray.set tokens i token;
482   - let senses = find_senses token in
483   - let cats = ENIAMcategories.assign token in
484   - let lex_sem = {empty_lex_sem with senses=senses; cats=cats} in
  411 + (* let token = ExtArray.get tokens i in
  412 + (* ExtArray.set tokens i token; *)
  413 + let senses = find_senses token in *)
  414 + let lex_sem = {empty_lex_sem with senses=[](*senses*)} in
485 415 let _ = ExtArray.add lex_sems lex_sem in
486 416 ());
487 417 let groups = split_tokens_into_groups (ExtArray.size tokens) text in
488 418 (* Xlist.iter groups (fun group -> print_endline (String.concat " " (Xlist.map group string_of_int))); *)
489 419 Xlist.iter groups (fun group -> assign_valence tokens lex_sems group);
  420 + (* Xlist.iter groups (fun group -> assign_valence tokens lex_sems group);
490 421 Xlist.iter groups (fun group -> disambiguate_senses lex_sems group);
491 422 Xlist.iter groups (fun group -> assign_simplified_valence tokens lex_sems group);
492 423 Xlist.iter groups (fun group -> assign_very_simplified_valence tokens lex_sems group);
493   - Xlist.iter groups (fun group -> ENIAMlexSemanticsData.assign_semantics tokens lex_sems group);
  424 + Xlist.iter groups (fun group -> ENIAMlexSemanticsData.assign_semantics tokens lex_sems group); *)
494 425 lex_sems
... ...
lexSemantics/ENIAMlexSemanticsTypes.ml
... ... @@ -36,24 +36,18 @@ type semantics =
36 36 | SpecialMod of string * (type_arg list * type_term)*)
37 37 | PrepSemantics of (string * string * string * StringSet.t * string list) list (* case,role,role_attr,hipero,sel_prefs *)
38 38  
39   -(* FIXME: usunąć *)
40   -type categories = {lemma: string; pos: string; pos2: string;
41   - numbers: string list; cases: string list; genders: string list; persons: string list;
42   - grads: string list; praeps: string list; acms: string list;
43   - aspects: string list; negations: string list; moods: string list; tenses: string list;
44   - nsyn: string list; nsem: string list;
45   - }
46   -
47   -
48 39 type lex_sem = {
  40 + schemata: ((ENIAM_LCGlexiconTypes.selector * ENIAM_LCGlexiconTypes.selector_relation * string list) list *
  41 + (ENIAM_LCGtypes.direction * ENIAM_LCGtypes.grammar_symbol) list) list;
  42 + lex_entries: ((ENIAM_LCGlexiconTypes.selector * ENIAM_LCGlexiconTypes.selector_relation * string list) list *
  43 + ENIAM_LCGtypes.grammar_symbol list) list;
49 44 e: labels;
50   - valence: (int * ENIAMwalTypes.frame) list;
  45 + (* valence: (int * ENIAMwalTypes.frame) list;
51 46 simple_valence: (int * ENIAMwalTypes.frame) list;
52   - very_simple_valence: ((ENIAM_LCGgrammarPLtypes.cat * ENIAM_LCGgrammarPLtypes.selector_relation * string list) list * ENIAM_LCGtypes.grammar_symbol) list;
  47 + very_simple_valence: ((ENIAM_LCGgrammarPLtypes.cat * ENIAM_LCGgrammarPLtypes.selector_relation * string list) list * ENIAM_LCGtypes.grammar_symbol) list; *)
53 48 senses: (string * string list * float) list;
54 49 lroles: string * string;
55 50 semantics: semantics;
56   - cats: categories list;
57 51 }
58 52  
59 53 let empty_labels = {
... ... @@ -65,8 +59,9 @@ let empty_labels = {
65 59 }
66 60  
67 61 let empty_lex_sem = {
68   - e=empty_labels; valence=[]; simple_valence=[]; very_simple_valence=[]; senses=[];
69   - lroles="",""; semantics=Normal; cats=[];}
  62 + schemata=[]; lex_entries=[];
  63 + e=empty_labels; (*valence=[]; simple_valence=[]; very_simple_valence=[];*) senses=[];
  64 + lroles="",""; semantics=Normal}
70 65  
71 66 (* FIXME: poprawić katalog *)
72 67 (*let subst_uncountable_lexemes_filename = resource_path ^ "/lexSemantics/subst_uncountable.dat"
... ... @@ -74,8 +69,8 @@ let subst_uncountable_lexemes_filename2 = resource_path ^ &quot;/lexSemantics/subst_u
74 69 let subst_container_lexemes_filename = resource_path ^ "/lexSemantics/subst_container.dat"
75 70 let subst_numeral_lexemes_filename = resource_path ^ "/lexSemantics/subst_numeral.dat"
76 71 let subst_time_lexemes_filename = resource_path ^ "/lexSemantics/subst_time.dat"*)
77   -let subst_uncountable_lexemes_filename = resource_path ^ "/Walenty/subst_uncountable.dat"
  72 +(* let subst_uncountable_lexemes_filename = resource_path ^ "/Walenty/subst_uncountable.dat"
78 73 let subst_uncountable_lexemes_filename2 = resource_path ^ "/Walenty/subst_uncountable_stare.dat"
79 74 let subst_container_lexemes_filename = resource_path ^ "/Walenty/subst_container.dat"
80 75 let subst_numeral_lexemes_filename = resource_path ^ "/Walenty/subst_numeral.dat"
81   -let subst_time_lexemes_filename = resource_path ^ "/Walenty/subst_time.dat"
  76 +let subst_time_lexemes_filename = resource_path ^ "/Walenty/subst_time.dat" *)
... ...
lexSemantics/ENIAMvalence.ml
... ... @@ -23,8 +23,8 @@ open Xstd
23 23 let simplify_pos = function
24 24 "subst" -> "noun"
25 25 | "depr" -> "noun"
26   - | "psubst" -> "noun"
27   - | "pdepr" -> "noun"
  26 + (* | "psubst" -> "noun"
  27 + | "pdepr" -> "noun" *)
28 28 | "adj" -> "adj"
29 29 | "adjc" -> "adj"
30 30 | "adjp" -> "adj"
... ... @@ -629,3 +629,17 @@ let prepare_all_valence phrases schemata entries =
629 629 (Entries.flatten_map entries transform_lex_entry)
630 630  
631 631 (* let _ = prepare_all_valence ENIAMwalParser.phrases ENIAMwalParser.schemata ENIAMwalParser.entries *)
  632 +
  633 +(* let prepare_selected_valence schemata entries lexemes =
  634 + Entries.fold schemata Entries.empty (fun schemata pos2 lemma (opinion,neg,pred,aspect,schema) ->
  635 + Xlist.fold (Entries.find pos2 lemma) schemata (function pos ->
  636 + Entries.add_inc_list schemata pos lemma (transform_entry pos lemma neg pred aspect schema))),
  637 + Entries.flatten_map entries transform_lex_entry *)
  638 +
  639 +let get_default_valence = function
  640 + "verb" -> [Nieokreslony,NegationUndef,PredFalse,AspectUndef,[{empty_position with gf=SUBJ; morfs=[NP(Str);NCP(Str,CompTypeUndef,CompUndef)]};
  641 + {empty_position with gf=OBJ; morfs=[NP(Str);NCP(Str,CompTypeUndef,CompUndef)]}]]
  642 + | "noun" -> [Nieokreslony,NegationUndef,PredFalse,AspectUndef,[]]
  643 + | "adj" -> [Nieokreslony,NegationUndef,PredFalse,AspectUndef,[]]
  644 + | "adv" -> [Nieokreslony,NegationUndef,PredFalse,AspectUndef,[]]
  645 + | _ -> []
... ...
lexSemantics/ENIAMvalence2.ml
... ... @@ -142,7 +142,8 @@ let render_phrase = function
142 142 | InfP(Aspect aspect) -> Tensor[Atom "infp"; Atom aspect]
143 143 | InfP AspectUndef -> Tensor[Atom "infp"; Top]
144 144 (* | PadvP -> Tensor[Atom "padvp"] *)
145   - | AdvP -> Tensor[Atom "advp"]
  145 + | AdvP "misc" -> Tensor[Atom "advp"; Top] (* FIXME: a może Atom "mod" zamiast Top *)
  146 + | AdvP mode -> Tensor[Atom "advp"; Atom mode]
146 147 | FixedP lex -> Tensor[Atom "fixed"; Atom lex]
147 148 (* | PrepP -> Tensor[Atom "prepp";Top]
148 149 | Prep("",CaseAgr) -> Tensor[Atom "prep"; Top; AVar "case"]
... ... @@ -181,6 +182,10 @@ let render_schema schema =
181 182 Xlist.map schema (fun p ->
182 183 Both,Plus(Xlist.map p.morfs render_morf))
183 184  
  185 +let render_simple_schema schema =
  186 + Xlist.map schema (fun morfs ->
  187 + Both,Plus(Xlist.map morfs render_morf))
  188 +
184 189 (* FIXME: tu trzeba by dodać zwykłe reguły dla czasowników dotyczące ich negacji, aglutynatu itp. *)
185 190 let render_lex_entry = function
186 191 SimpleLexEntry(lemma,pos) -> [Tensor([Atom "lex";Atom lemma] @ render_pos_entry pos)]
... ... @@ -195,12 +200,12 @@ let render_lex_entry = function
195 200 [ImpSet(Tensor([Atom "lex";Atom (string_of_int id);Atom lemma] @ render_pos_entry pos),[Both,Tensor[AVar "schema"]])]
196 201 | entry -> print_endline ("render_entry:" ^ ENIAMwalStringOf.lex_entry entry); [(*[],entry*)]
197 202  
198   -let schemata,entries = ENIAMvalence.prepare_all_valence ENIAMwalParser.phrases ENIAMwalParser.schemata ENIAMwalParser.entries
  203 +(* let schemata,entries = ENIAMvalence.prepare_all_valence ENIAMwalParser.phrases ENIAMwalParser.schemata ENIAMwalParser.entries *)
199 204  
200   -let _ =
  205 +(* let _ =
201 206 (* Entries.map schemata (fun pos lemma (selectors,schema) ->
202 207 (* Printf.printf "%s %s %s\n" pos lemma (ENIAMwalStringOf.schema schema); *)
203 208 render_schema schema) *)
204 209 Entries.map entries (fun pos lemma (selectors,entry) ->
205 210 (* Printf.printf "%s %s %s\n" pos lemma (ENIAMwalStringOf.schema schema); *)
206   - selectors,render_lex_entry entry)
  211 + selectors,render_lex_entry entry) *)
... ...
lexSemantics/ENIAMwalFrames.ml
... ... @@ -287,147 +287,6 @@ let prepare_schema expands subtypes equivs schema =
287 287 let prepare_schema_sem expands subtypes equivs schema =
288 288 prepare_schema_comprep expands subtypes equivs schema
289 289  
290   -let default_frames = Xlist.fold [ (* FIXME: poprawić domyślne ramki po ustaleniu adjunctów *)
291   - "verb",(ReflEmpty,Domyslny,NegationUndef,PredNA,AspectUndef,"subj{np(str)}+obj{np(str)}"); (* FIXME: dodać ramkę z refl *)
292   - "noun",(ReflEmpty,Domyslny,NegationNA,PredNA,AspectNA,"{possp}+{adjp(agr)}");
293   - "adj",(ReflEmpty,Domyslny,NegationNA,PredNA,AspectNA,"");
294   - "adv",(ReflEmpty,Domyslny,NegationNA,PredNA,AspectNA,"");
295   - "empty",(ReflEmpty,Domyslny,NegationNA,PredNA,AspectNA,"");
296   - "date",(ReflEmpty,Domyslny,NegationNA,PredNA,AspectNA,"{null;lex(np(gen),sg,'rok',natr)}");
297   - "date2",(ReflEmpty,Domyslny,NegationNA,PredNA,AspectNA,"{null;lex(np(gen),sg,'rok',atr1({adjp(agr)}))}"); (* FIXME: wskazać możliwe podrzędniki *)
298   - "day",(ReflEmpty,Domyslny,NegationNA,PredNA,AspectNA,""
299   - (*"{lex(np(gen),sg,XOR('styczeń','luty','marzec','kwiecień','maj','czerwiec','lipiec','sierpień','wrzesień','październik','litopad','grudzień'),atr1({np(gen)}))}"*)); (* FIXME: wskazać możliwe podrzędniki *)
300   - "hour",(ReflEmpty,Domyslny,NegationNA,PredNA,AspectNA,"{null;lex(advp(temp),pos,'rano',natr)}");
301   - ] StringMap.empty (fun map (k,(refl,opinion,negation,pred,aspect,schema)) ->
302   - StringMap.add map k (Frame(DefaultAtrs([],refl,opinion,negation,pred,aspect),prepare_schema expands subtypes equivs schema)))
303   -
304   -let adjunct_schema_field role dir morfs =
305   - {gf=ADJUNCT; role=role; role_attr=""; sel_prefs=["ALL"]; cr=[]; ce=[]; dir=dir; morfs=morfs}
306   -
307   -let verb_prep_adjunct_schema_field lemma case =
308   - {gf=ADJUNCT; role="Manner"; role_attr=""; sel_prefs=["ALL"]; cr=[]; ce=[]; dir=Both; morfs=[
309   - Phrase Null;
310   - Phrase(PrepNP(Sem,lemma,Case case));
311   - Phrase(PrepAdjP(Sem,lemma,Case case));
312   - Phrase(PrepNumP(Sem,lemma,Case case))]}
313   -
314   -let verb_comprep_adjunct_schema_field lemma =
315   - {gf=ADJUNCT; role="Manner"; role_attr=""; sel_prefs=["ALL"]; cr=[]; ce=[]; dir=Both; morfs=[
316   - Phrase Null;
317   - Phrase(ComprepNP(Sem,lemma))]}
318   -
319   -let verb_compar_adjunct_schema_field lemma =
320   - {gf=ADJUNCT; role="Manner"; role_attr=""; sel_prefs=["ALL"]; cr=[]; ce=[]; dir=Both; morfs=[
321   - Phrase Null;
322   - Phrase(ComparPP(Sem,lemma))] @
323   - Xlist.map ["nom";"gen";"dat";"acc";"inst"] (fun case -> Phrase(ComparNP(Sem,lemma,Case case)))}
324   -
325   -let noun_prep_adjunct_schema_field preps compreps =
326   - {gf=ADJUNCT; role="Attribute"; role_attr=""; sel_prefs=["ALL"]; cr=[]; ce=[]; dir=Both; morfs=
327   - let l = Xlist.fold preps [Phrase Null] (fun l (lemma,case) ->
328   - [Phrase(PrepNP(Sem,lemma,Case case));
329   - Phrase(PrepAdjP(Sem,lemma,Case case));
330   - Phrase(PrepNumP(Sem,lemma,Case case))] @ l) in
331   - Xlist.fold compreps l (fun l lemma ->
332   - Phrase(ComprepNP(Sem,lemma)) :: l)}
333   -
334   -let noun_compar_adjunct_schema_field compars =
335   - {gf=ADJUNCT; role="Attribute"; role_attr=""; sel_prefs=["ALL"]; cr=[]; ce=[]; dir=Both; morfs=
336   - Xlist.fold compars [Phrase Null] (fun l lemma ->
337   - [Phrase(ComparPP(Sem,lemma))] @ Xlist.map ["nom";"gen";"dat";"acc";"inst"] (fun case -> Phrase(ComparNP(Sem,lemma,Case case))) @ l)}
338   -
339   -let adj_compar_adjunct_schema_field compars =
340   - {gf=ADJUNCT; role="Manner"; role_attr=""; sel_prefs=["ALL"]; cr=[]; ce=[]; dir=Both; morfs=
341   - Xlist.fold compars [Phrase Null] (fun l lemma ->
342   - [Phrase(ComparPP(Sem,lemma))] @ Xlist.map ["nom"] (fun case -> Phrase(ComparNP(Sem,lemma,Case case))) @ l)}
343   -
344   -(*let nogf_schema_field dir morfs =
345   - {gf=NOGF; role=""; role_attr=""; sel_prefs=[]; cr=[]; ce=[]; dir=dir; morfs=morfs} *)
346   -
347   -let schema_field gf role dir morfs =
348   - {gf=gf; role=role; role_attr=""; sel_prefs=["ALL"]; cr=[]; ce=[]; dir=dir; morfs=morfs}
349   -
350   -(*let verb_adjuncts = [
351   - adjunct_schema_field "R" "" Both [Phrase AdvP];
352   - adjunct_schema_field "R" "" Both [Phrase PrepP]; (* FIXME: Trzeba będzie uzgodnić PrepNP, PrepAdjP, PrepNumP z PrepP i XP *)
353   - ]
354   -
355   -let noun_adjuncts = [
356   - adjunct_schema_field "C" "poss" Both [Phrase(NP(Case "gen"))];
357   - adjunct_schema_field "C" "=" Both [Phrase(NP(Case "nom"))];
358   - adjunct_schema_field "C" "=" Both [Phrase(NP(CaseAgr))];
359   - adjunct_schema_field "R" "" Backward [Multi[AdjP AllAgr]];
360   - adjunct_schema_field "R" "" Forward [Multi[AdjP AllAgr]];
361   - adjunct_schema_field "R" "" Both [Phrase PrepP];
362   - ]
363   -
364   -let adj_adjuncts = [
365   - adjunct_schema_field "R" "" Both [Phrase PrepP];
366   - ]*)
367   -
368   -let verb_adjuncts = [
369   -(* adjunct_schema_field "" Both [Phrase Null;Phrase AdvP];
370   - adjunct_schema_field "" Both [Phrase Null;Phrase PrepP]; (* FIXME: Trzeba będzie uzgodnić PrepNP, PrepAdjP, PrepNumP z PrepP i XP *)
371   - adjunct_schema_field "Topic" Forward [Phrase Null;Phrase (CP(CompTypeUndef,CompUndef))]; (* poprawić semantykę *) (* FIXME: to powinno być jako ostatnia lista argumentów *)*)
372   - ]
373   -
374   -(* FIXME: pozycje dublują się z domyślną ramką "noun" *)
375   -let noun_adjuncts = [ (* FIXME: usuniecie noun_adjuncts pozostawia poss dla 'Witoldzie' *)
376   -(* adjunct_schema_field "poss" Both [Phrase Null;Phrase(NP(Case "gen"))];
377   - adjunct_schema_field "=" Both [Phrase Null;Phrase(NP(Case "nom"))];
378   - adjunct_schema_field "=" Both [Phrase Null;Phrase(NP(CaseAgr))];
379   - adjunct_schema_field "" Backward [(*Phrase Null;Phrase(AdjP AllAgr)*)Multi[AdjP AllAgr]]; (* FIXME: za pomocą Multi można zrobić konkatenowane leksykalizacje *)
380   - adjunct_schema_field "" Forward [Phrase Null;Phrase(AdjP AllAgr)];
381   - adjunct_schema_field "" Both [Phrase Null;Phrase PrepP];*)
382   - ]
383   -
384   -let adj_adjuncts = [
385   -(* adjunct_schema_field "" Both [Phrase Null;Phrase AdvP]; *)
386   - ]
387   -
388   -
389   -let verb_adjuncts_simp = [
390   - adjunct_schema_field "Manner" Both [Phrase Null;Phrase AdvP];
391   - adjunct_schema_field "Recipent" Both [Phrase Null;Phrase (NP(Case "dat"));Phrase (NumP(Case "dat"));Phrase (NCP(Case "dat",CompTypeUndef,CompUndef))];
392   - adjunct_schema_field "Instrument" Both [Phrase Null;Phrase (NP(Case "inst"));Phrase (NumP(Case "inst"));Phrase (NCP(Case "inst",CompTypeUndef,CompUndef))];
393   - adjunct_schema_field "Time" Both [Phrase Null;Phrase (Lex "date");Phrase (Lex "day-lex");Phrase (Lex "day-month");Phrase (Lex "day")];
394   -(* adjunct_schema_field "" Both [Phrase Null;Phrase PrepP]; (* FIXME: Trzeba będzie uzgodnić PrepNP, PrepAdjP, PrepNumP z PrepP i XP *) *)
395   - adjunct_schema_field "Condition" Forward [Phrase Null;Phrase (CP(CompTypeUndef,CompUndef))]; (* poprawić semantykę *) (* FIXME: to powinno być jako ostatnia lista argumentów *)
396   - adjunct_schema_field "Theme" Both [Phrase Null;Phrase Or];
397   - ]
398   -
399   -let verb_adjuncts_simp2 = [
400   - adjunct_schema_field "Manner" Both [Phrase Null;Phrase AdvP];
401   - adjunct_schema_field "Recipent" Both [Phrase Null;Phrase (NP(Case "dat"));Phrase (NumP(Case "dat"));Phrase (NCP(Case "dat",CompTypeUndef,CompUndef))];
402   - adjunct_schema_field "Instrument" Both [Phrase Null;Phrase (NP(Case "inst"));Phrase (NumP(Case "inst"));Phrase (NCP(Case "inst",CompTypeUndef,CompUndef))];
403   - adjunct_schema_field "Time" Both [Phrase Null;Phrase (Lex "date");Phrase (Lex "day-lex");Phrase (Lex "day-month");Phrase (Lex "day")];
404   - (* adjunct_schema_field "" Both [Phrase Null;Phrase PrepP]; (* FIXME: Trzeba będzie uzgodnić PrepNP, PrepAdjP, PrepNumP z PrepP i XP *) *)
405   - adjunct_schema_field "Condition" Forward [Phrase Null;Phrase (CP(CompTypeUndef,CompUndef))]; (* poprawić semantykę *) (* FIXME: to powinno być jako ostatnia lista argumentów *)
406   - adjunct_schema_field "Theme" Both [Phrase Null;Phrase Or];
407   - adjunct_schema_field "Theme" Both [Phrase Null;Phrase(Lex "się")];
408   -]
409   -
410   -let noun_adjuncts_simp = [ (* FIXME: usuniecie noun_adjuncts pozostawia poss dla 'Witoldzie' *)
411   - adjunct_schema_field "Possesive" Both [Phrase Null;Phrase(NP(Case "gen"));Phrase(NumP(Case "gen"))];
412   - adjunct_schema_field "Aposition" Forward [Phrase Null;Phrase(NP(Case "nom"));Phrase(NumP(Case "nom"));Phrase Null;Phrase(NP(CaseAgr));Phrase(NumP(CaseAgr))];
413   - adjunct_schema_field "Attribute" Backward [(*Phrase Null;Phrase(AdjP AllAgr)*)Multi[AdjP AllAgr]]; (* FIXME: za pomocą Multi można zrobić konkatenowane leksykalizacje *)
414   - adjunct_schema_field "Base" Forward [Phrase Null;Phrase(AdjP AllAgr)];
415   -(* adjunct_schema_field "" Both [Phrase Null;Phrase PrepP]; *)
416   - ]
417   -
418   -let noun_measure_adjuncts_simp = [ (* FIXME: usuniecie noun_adjuncts pozostawia poss dla 'Witoldzie' *)
419   - adjunct_schema_field "Attribute" Backward [(*Phrase Null;Phrase(AdjP AllAgr)*)Multi[AdjP AllAgr]]; (* FIXME: za pomocą Multi można zrobić konkatenowane leksykalizacje *)
420   - adjunct_schema_field "Base" Forward [Phrase Null;Phrase(AdjP AllAgr)];
421   -(* adjunct_schema_field "" Both [Phrase Null;Phrase PrepP]; *)
422   - ]
423   -
424   -let adj_adjuncts_simp = [
425   - adjunct_schema_field "Manner" Both [Phrase Null;Phrase AdvP];
426   - ]
427   -
428   -let adv_adjuncts_simp = [
429   - adjunct_schema_field "Manner" Both [Phrase Null;Phrase AdvP];
430   - ]
431 290  
432 291 let convert_frame expands subtypes equivs lexemes valence lexeme pos (refl,opinion,negation,pred,aspect,schema) =
433 292 (* Printf.printf "convert_frame %s %s\n" lexeme pos; *)
... ...
lexSemantics/ENIAMwalReduce.ml
... ... @@ -172,7 +172,7 @@ let create_tests comprep_reqs comprep_reqs2 lexarg_reqs lexemes =
172 172 StringSet.mem lexemes
173 173  
174 174  
175   -let select_entries phrases entries schemata connected meanings comprep_reqs comprep_reqs2 lexarg_reqs lexemes =
  175 +let select_entries_full phrases entries schemata connected meanings comprep_reqs comprep_reqs2 lexarg_reqs lexemes =
176 176 let tests = create_tests comprep_reqs comprep_reqs2 lexarg_reqs lexemes in
177 177 let entries = reduce_entries lexemes entries in
178 178 let schemata = reduce_entries lexemes schemata in
... ... @@ -194,16 +194,18 @@ let select_all_entries phrases entries schemata connected meanings =
194 194 let connected = merge_entries_conn phrases meanings connected in
195 195 entries,schemata,connected
196 196  
197   -let entries,schemata,connected =
  197 +let select_entries lexemes =
  198 + select_entries_full ENIAMwalParser.phrases ENIAMwalParser.entries ENIAMwalParser.schemata
  199 + ENIAMwalParser.connected ENIAMwalParser.meanings comprep_reqs comprep_reqs2 lexarg_reqs lexemes
  200 +
  201 +(* let entries,schemata,connected =
198 202 (* let lexemes = StringSet.of_list ["Ala"; "ma"; "kot"] in *)
199 203 let lexemes = StringSet.of_list ["dorastać"; "dorobić"; "po"; "bok"; "na"] in
200 204 select_entries ENIAMwalParser.phrases ENIAMwalParser.entries ENIAMwalParser.schemata
201   - ENIAMwalParser.connected ENIAMwalParser.meanings comprep_reqs comprep_reqs2 lexarg_reqs lexemes
  205 + ENIAMwalParser.connected ENIAMwalParser.meanings comprep_reqs comprep_reqs2 lexarg_reqs lexemes *)
202 206  
203 207 (* TODO
204   - - usunięcie adjunctów
205   - - uwzględnienie cech morfoskładniowych
206   - - scalenie schematów
  208 + - uwzględnienie cech morfoskładniowych - np usunięcie schematów wymagających negacji, gdy nie ma "nie"
207 209 - dodanie adjunctów - pamiętać o padvp
208 210 *)
209 211 (* TODO
... ...
lexSemantics/makefile
... ... @@ -3,10 +3,11 @@ OCAMLOPT=ocamlopt
3 3 OCAMLDEP=ocamldep
4 4 INCLUDES=-I +xml-light -I +xlib -I +zip -I +bz2 -I +eniam
5 5 OCAMLFLAGS=$(INCLUDES) -g
6   -OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa bz2.cmxa xlib.cmxa eniam-lcg-parser.cmxa eniam-lcg-lexicon.cmxa #eniam-tokenizer.cmxa eniam-morphology.cmxa eniam-subsyntax.cmxa eniam-plWordnet.cmxa #eniam-lexSemantics.cmxa
  6 +OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa bz2.cmxa xlib.cmxa eniam-tokenizer.cmxa eniam-morphology.cmxa eniam-subsyntax.cmxa eniam-lcg-parser.cmxa eniam-lcg-lexicon.cmxa #eniam-plWordnet.cmxa #eniam-lexSemantics.cmxa
7 7 INSTALLDIR=`ocamlc -where`/eniam
8 8  
9   -SOURCES= ENIAMlexSemanticsTypes.ml ENIAMcategories.ml ENIAMlexSemanticsData.ml ENIAMlexSemantics.ml
  9 +SOURCES= entries.ml ENIAMwalTypes.ml ENIAMwalStringOf.ml ENIAMwalParser.ml ENIAMwalReduce.ml ENIAMvalence.ml ENIAMadjuncts.ml ENIAMvalence2.ml \
  10 + ENIAMlexSemanticsTypes.ml ENIAMlexSemantics.ml #ENIAMlexSemanticsData.ml
10 11  
11 12 all: eniam-lexSemantics.cma eniam-lexSemantics.cmxa
12 13  
... ... @@ -28,8 +29,8 @@ eniam-lexSemantics.cmxa: $(SOURCES)
28 29  
29 30 # test: test.ml
30 31 # $(OCAMLOPT) -o test $(OCAMLOPTFLAGS) test.ml
31   -test: entries.ml ENIAMwalTypes.ml ENIAMwalStringOf.ml ENIAMwalParser.ml ENIAMwalReduce.ml ENIAMvalence.ml ENIAMvalence2.ml test.ml
32   - $(OCAMLOPT) -o test $(OCAMLOPTFLAGS) entries.ml ENIAMwalTypes.ml ENIAMwalStringOf.ml ENIAMwalParser.ml ENIAMwalReduce.ml ENIAMvalence.ml ENIAMvalence2.ml test.ml
  32 +test: $(SOURCES) test.ml
  33 + $(OCAMLOPT) -o test $(OCAMLOPTFLAGS) $^
33 34  
34 35  
35 36 .SUFFIXES: .mll .mly .ml .mli .cmo .cmi .cmx
... ...
subsyntax/interface.ml
... ... @@ -43,6 +43,10 @@ let spec_list = [
43 43 let usage_msg =
44 44 "Usage: subsyntax <options>\nInput is a sequence of lines. Empty line ends the sequence and invoke parsing. Double empty line shutdown parser.\nOptions are:"
45 45  
  46 +let message = "ENIAMsubsyntax: MWE, abbreviation and sentence detecion for Polish\n\
  47 +Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>\n\
  48 +Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences"
  49 +
46 50 let anon_fun s = raise (Arg.Bad ("invalid argument: " ^ s))
47 51  
48 52 let input_text channel =
... ... @@ -80,6 +84,7 @@ let rec main_loop in_chan out_chan =
80 84 main_loop in_chan out_chan)
81 85  
82 86 let _ =
  87 + print_endline message;
83 88 Arg.parse spec_list anon_fun usage_msg;
84 89 Gc.compact ();
85 90 print_endline "Ready!";
... ...