diff --git a/lexSemantics/ENIAMadjuncts.ml b/lexSemantics/ENIAMadjuncts.ml new file mode 100644 index 0000000..c698b85 --- /dev/null +++ b/lexSemantics/ENIAMadjuncts.ml @@ -0,0 +1,393 @@ +(* + * ENIAMlexSemantics is a library that assigns tokens with lexicosemantic information. + * Copyright (C) 2016-2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> + * Copyright (C) 2016-2017 Institute of Computer Science Polish Academy of Sciences + * + * This library is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + *) + +open ENIAMwalTypes +open Xstd + +let simplify_position_verb mode l = function (* FIXME: dodać czyszczenie E Pro *) + NP(Case "dat") -> l + | NP(Case "inst") -> l + | NCP(Case "dat",_,_) -> l + | NCP(Case "inst",_,_) -> l + | CP _ -> l + | PrepNP _ -> l + | PrepAdjP _ -> l + | ComprepNP _ -> l + | ComparP _ -> l + | PrepNCP _ -> l + | AdvP _ -> l + | Or -> l + | SimpleLexArg("się",QUB) -> l + | E Or -> l + | E (CP(CompTypeUndef,CompUndef)) -> l + | E (PrepNP(prep,Case case)) -> l + | E (PrepNCP(prep,Case case,CompTypeUndef,CompUndef)) -> l + | NP(Case "gen") as t -> if mode = "temp" then l else t :: l + | NP(Case "acc") as t -> if mode = "dur" then l else t :: l + | t -> t :: l + +let simplify_position_noun mode l = function + NP(Case "gen") -> l + | NP(Case "nom") -> l + | NCP(Case "gen",_,_) -> l + | NP(CaseAgr) -> l + | AdjP AllAgr -> l + | PrepNP _ -> l + | ComprepNP _ -> l + | ComparP _ -> l + | PrepNCP _ -> l + | t -> t :: l + +let simplify_position_adj mode l = function + AdvP _ -> l + | ComparP _ -> l + | t -> t :: l + +let simplify_position_adv mode l = function + AdvP _ -> l (* FIXME: czy na pewno zostawić swobodę modyfikowania przysłówka? *) + | t -> t :: l + +(* +let simplify_position pos l s = + let morfs = match pos with + "verb" -> List.rev (Xlist.fold s.morfs [] simplify_position_verb) + | "noun" -> List.rev (Xlist.fold s.morfs [] simplify_position_noun) + | "adj" -> List.rev (Xlist.fold s.morfs [] simplify_position_adj) + | "adv" -> List.rev (Xlist.fold s.morfs [] simplify_position_adv) + | _ -> s.morfs in + match morfs with + [] -> l + | [Phrase Null] -> l + | _ -> {s with morfs=morfs} :: l + +let simplify_schemata pos schemata = + let schemata = Xlist.fold schemata StringMap.empty (fun schemata (schema,frame) -> + let schema = List.sort compare (Xlist.fold schema [] (fun l s -> + let s = {s with role=""; role_attr=""; sel_prefs=[]; cr=[]; ce=[]; morfs=List.sort compare s.morfs} in + if s.gf <> ARG && s.gf <> ADJUNCT then s :: l else + (* if s.cr <> [] || s.ce <> [] then s :: l else *) + simplify_position pos l s)) in + StringMap.add_inc schemata (ENIAMwalStringOf.schema schema) (schema,[frame]) (fun (_,frames) -> schema, frame :: frames)) in + StringMap.fold schemata [] (fun l _ s -> s :: l) + +let simplify_schemata2 pos schemata = + let simplify_position_fun = match pos with + "verb" -> simplify_position_verb2 + | "noun" -> simplify_position_noun + | "adj" -> simplify_position_adj + | "adv" -> simplify_position_adv + | _ -> (fun l x -> x :: l) in + let morfs = Xlist.fold schemata [] (fun morfs schema -> + Xlist.fold schema morfs (fun morfs s -> + Xlist.fold s.morfs morfs simplify_position_fun)) in + let morfs = Xlist.fold morfs StringMap.empty (fun map s -> + StringMap.add map (ENIAMwalStringOf.morf s) s) in + let schema = StringMap.fold morfs [] (fun schema _ morf -> + {gf=ARG; role=""; role_attr=""; sel_prefs=[]; cr=[]; ce=[]; + dir=Both; morfs=[Phrase Null;morf]} :: schema) in + schema*) + +(*let rec classify_phrase = function + NP _ as phrase -> ENIAMwalStringOf.phrase phrase + | PrepNP _ as phrase -> ENIAMwalStringOf.phrase phrase + | AdjP _ as phrase -> ENIAMwalStringOf.phrase phrase + | PrepAdjP _ as phrase -> ENIAMwalStringOf.phrase phrase + | ComprepNP _ as phrase -> ENIAMwalStringOf.phrase phrase + | ComparP _ as phrase -> ENIAMwalStringOf.phrase phrase + | CP _ -> "cp" + | NCP(case,_,_) -> ENIAMwalStringOf.phrase (NP case) + | PrepNCP(prep,case,_,_) -> ENIAMwalStringOf.phrase (PrepNP(prep,case)) + | InfP _ -> "infp" + | AdvP _ -> "advp" + | FixedP _ as phrase -> ENIAMwalStringOf.phrase phrase + | Or -> "or" + | E Or -> "or" + | E phrase -> classify_phrase phrase + | SimpleLexArg("się",QUB) -> "się" + (* | SimpleLexArg _ -> "lex" *) +(* | LexArg _ -> "lex" *) + | SimpleLexArg _ as phrase -> ENIAMwalStringOf.phrase phrase + | LexArg _ as phrase -> ENIAMwalStringOf.phrase phrase + | phrase -> print_endline ("classify_phrase: " ^ ENIAMwalStringOf.phrase phrase); "other" + +let classify_position pos p = + let l = (*StringSet.to_list*) (Xlist.fold p.morfs StringSet.empty (fun set morf -> + StringSet.add set ((*classify_phrase*)ENIAMwalStringOf.phrase morf))) in + (* match l with + [] -> "empty" + | [c] -> c + (* | ["np(gen)"; "np(acc)"] -> "np(str)" + | ["np(gen)"; "infp"] -> "np(gen)-infp" + | ["np(acc)"; "infp"] -> "np(acc)-infp" *) + | _ -> let c = String.concat " " l in if pos="adv" then print_endline c; c *) + l*) + +module OrderedPhrase = struct + type t = phrase + let compare = compare +end + +module PhraseSet = Xset.Make(OrderedPhrase) + + +let remove_adjuncts_schema pos lemma schema = + let simplify_position_fun = match pos with + "verb" -> simplify_position_verb + | "noun" -> simplify_position_noun + | "adj" -> simplify_position_adj + | "adv" -> simplify_position_adv + | _ -> (fun _ l x -> x :: l) in + List.flatten (Xlist.map schema (fun p -> + let morfs = Xlist.fold p.morfs [] (simplify_position_fun (String.concat " " p.mode)) in + if morfs = [] then [] else [PhraseSet.of_list morfs])) + (* let schema2 = List.flatten (Xlist.map schema1 (fun p -> + let p = {p with morfs = Xlist.fold p.morfs [] (simplify_position_fun (String.concat " " p.mode))} in + let c = classify_position pos p in + if StringSet.is_empty c (*"empty"*) then [] else [c,[p]])) in + (* let sum = Xlist.fold schema2 StringSet.empty (fun set (c,p) -> StringSet.union set c) in + let n = Xlist.fold schema2 0 (fun n (c,p) -> n + StringSet.size c) in + if StringSet.size sum <> n (*&& pos = "noun"*) then (*Printf.printf "%s %s %s\n" pos lemma (ENIAMwalStringOf.schema schema1);*) + Printf.printf "%s %s %s\n" pos lemma (String.concat "+" (Xlist.map schema2 (fun (c,p) -> String.concat "," p.mode ^ "{" ^ String.concat ";" (StringSet.to_list c) ^ "}"))); *) +(* let set = Xlist.fold schema2 StringSet.empty (fun set (c,_) -> + StringSet.add set c) in + (* if StringSet.mem set "np(acc)" && StringSet.mem set "infp" then + Printf.printf "%s %s %s\n" pos lemma (ENIAMwalStringOf.schema schema1); *) + let schema2 = if StringSet.mem set "np(gen)" && StringSet.mem set "np(acc)" then schema2 else + Xlist.map schema2 (function + "np(gen)",p -> "np(str)",p + | "np(acc)",p -> "np(str)",p + | c,p -> c,p) in*) + schema2 *) + + +let is_disjunctive schema = + let sum = Xlist.fold schema PhraseSet.empty (fun set morfs -> PhraseSet.union set morfs) in + let n = Xlist.fold schema 0 (fun n morfs -> n + PhraseSet.size morfs) in + PhraseSet.size sum = n + +let rec find_overlapping morfs rev = function + morfs2 :: schema -> + if PhraseSet.is_empty (PhraseSet.intersection morfs morfs2) then find_overlapping morfs (morfs2 :: rev) schema + else morfs2, List.rev rev @ schema + | [] -> raise Not_found + +let rec merge_schemata_rec cont = function + [] -> cont + | [schema] -> schema :: cont + | schema1 :: schema2 :: schemata -> + let sum_schema,diff_schema = Xlist.fold schema1 ([],schema2) (fun (sum_schema,diff_schema) morfs -> + try + let morfs2,diff_schema = find_overlapping morfs [] diff_schema in + (PhraseSet.union morfs morfs2) :: sum_schema,diff_schema + with Not_found -> morfs :: sum_schema,diff_schema) in + let schema = sum_schema @ diff_schema in + if is_disjunctive schema then ((*print_endline "A";*) merge_schemata_rec cont (schema :: schemata)) + else ((*print_endline "B";*) merge_schemata_rec (schema :: cont) schemata) + +let rec merge_schemata schemata = + let cont,schemata = Xlist.fold schemata ([],[]) (fun (cont,schemata) schema -> + if is_disjunctive schema then cont, schema :: schemata else ((*print_endline "C";*) schema :: cont, schemata)) in + merge_schemata_rec cont schemata + + + +module OrderedSelector = struct + type t = (ENIAM_LCGlexiconTypes.selector * + ENIAM_LCGlexiconTypes.selector_relation * string list) + list + let compare = compare +end + +module SelectorMap = Xmap.Make(OrderedSelector) + +let latex_of_selectors selectors = + String.concat ", " (Xlist.map selectors (fun (cat,rel,l) -> + let rel = if rel = ENIAM_LCGlexiconTypes.Eq then "=" else "!=" in + ENIAMcategoriesPL.string_of_selector cat ^ rel ^ (String.concat "|" l))) + +let simplify_valence pos pos2 lemma schemata = + (* Xlist.iter schemata (fun (selectors,schema) -> if pos2="verb" then Printf.printf "A %s %s [%s] %s\n" pos lemma (latex_of_selectors selectors) (ENIAMwalStringOf.schema schema)); *) + let map = Xlist.fold schemata SelectorMap.empty (fun map (selectors,schema) -> + SelectorMap.add_inc map selectors [schema] (fun l -> schema :: l)) in + let schemata = SelectorMap.fold map [] (fun new_schemata selectors schemata -> + (selectors,Xlist.map schemata (remove_adjuncts_schema pos2 lemma)) :: new_schemata) in + (* Xlist.iter schemata (fun (_,schemata) -> + Xlist.iter schemata (fun schema -> if pos2="verb" then Printf.printf "%s %s %s\n" pos lemma (ENIAMwalStringOf.schema schema))); *) + (* Xlist.iter schemata (fun (selectors,schemata) -> Xlist.iter schemata (fun schema -> + if pos2="verb" then Printf.printf "B %s %s [%s] %s\n" pos lemma (latex_of_selectors selectors) (String.concat "+" (Xlist.map schema (fun morfs -> + "{" ^ String.concat ";" (PhraseSet.fold morfs [] (fun l m -> ENIAMwalStringOf.phrase m :: l)) ^ "}"))))); *) + let schemata = List.flatten (Xlist.map schemata (fun (selectors,schemata) -> + Xlist.map (merge_schemata schemata) (fun schema -> selectors,Xlist.map schema PhraseSet.to_list))) in + (* Xlist.iter schemata (fun (selectors,schema) -> + if pos2="verb" then Printf.printf "C %s %s [%s] %s\n" pos lemma (latex_of_selectors selectors) (String.concat "+" (Xlist.map schema (fun morfs -> + "{" ^ String.concat ";" (PhraseSet.fold morfs [] (fun l m -> ENIAMwalStringOf.phrase m :: l)) ^ "}")))); *) + schemata + +let _ = + let schemata,entries = ENIAMvalence.prepare_all_valence ENIAMwalParser.phrases ENIAMwalParser.schemata ENIAMwalParser.entries in + let _ = Entries.map2 schemata (fun pos lemma schemata -> simplify_valence pos (ENIAMvalence.simplify_pos pos) lemma schemata) in + () + + +(* +let default_frames = Xlist.fold [ (* FIXME: poprawić domyślne ramki po ustaleniu adjunctów *) + "verb",(ReflEmpty,Domyslny,NegationUndef,PredNA,AspectUndef,"subj{np(str)}+obj{np(str)}"); (* FIXME: dodać ramkę z refl *) + "noun",(ReflEmpty,Domyslny,NegationNA,PredNA,AspectNA,"{possp}+{adjp(agr)}"); + "adj",(ReflEmpty,Domyslny,NegationNA,PredNA,AspectNA,""); + "adv",(ReflEmpty,Domyslny,NegationNA,PredNA,AspectNA,""); + "empty",(ReflEmpty,Domyslny,NegationNA,PredNA,AspectNA,""); + "date",(ReflEmpty,Domyslny,NegationNA,PredNA,AspectNA,"{null;lex(np(gen),sg,'rok',natr)}"); + "date2",(ReflEmpty,Domyslny,NegationNA,PredNA,AspectNA,"{null;lex(np(gen),sg,'rok',atr1({adjp(agr)}))}"); (* FIXME: wskazać możliwe podrzędniki *) + "day",(ReflEmpty,Domyslny,NegationNA,PredNA,AspectNA,"" + (*"{lex(np(gen),sg,XOR('styczeń','luty','marzec','kwiecień','maj','czerwiec','lipiec','sierpień','wrzesień','październik','litopad','grudzień'),atr1({np(gen)}))}"*)); (* FIXME: wskazać możliwe podrzędniki *) + "hour",(ReflEmpty,Domyslny,NegationNA,PredNA,AspectNA,"{null;lex(advp(temp),pos,'rano',natr)}"); + ] StringMap.empty (fun map (k,(refl,opinion,negation,pred,aspect,schema)) -> + StringMap.add map k (Frame(DefaultAtrs([],refl,opinion,negation,pred,aspect),prepare_schema expands subtypes equivs schema))) + +let adjunct_schema_field role dir morfs = + {gf=ADJUNCT; role=role; role_attr=""; sel_prefs=["ALL"]; cr=[]; ce=[]; dir=dir; morfs=morfs} + +let verb_prep_adjunct_schema_field lemma case = + {gf=ADJUNCT; role="Manner"; role_attr=""; sel_prefs=["ALL"]; cr=[]; ce=[]; dir=Both; morfs=[ + Phrase Null; + Phrase(PrepNP(Sem,lemma,Case case)); + Phrase(PrepAdjP(Sem,lemma,Case case)); + Phrase(PrepNumP(Sem,lemma,Case case))]} + +let verb_comprep_adjunct_schema_field lemma = + {gf=ADJUNCT; role="Manner"; role_attr=""; sel_prefs=["ALL"]; cr=[]; ce=[]; dir=Both; morfs=[ + Phrase Null; + Phrase(ComprepNP(Sem,lemma))]} + +let verb_compar_adjunct_schema_field lemma = + {gf=ADJUNCT; role="Manner"; role_attr=""; sel_prefs=["ALL"]; cr=[]; ce=[]; dir=Both; morfs=[ + Phrase Null; + Phrase(ComparPP(Sem,lemma))] @ + Xlist.map ["nom";"gen";"dat";"acc";"inst"] (fun case -> Phrase(ComparNP(Sem,lemma,Case case)))} + +let noun_prep_adjunct_schema_field preps compreps = + {gf=ADJUNCT; role="Attribute"; role_attr=""; sel_prefs=["ALL"]; cr=[]; ce=[]; dir=Both; morfs= + let l = Xlist.fold preps [Phrase Null] (fun l (lemma,case) -> + [Phrase(PrepNP(Sem,lemma,Case case)); + Phrase(PrepAdjP(Sem,lemma,Case case)); + Phrase(PrepNumP(Sem,lemma,Case case))] @ l) in + Xlist.fold compreps l (fun l lemma -> + Phrase(ComprepNP(Sem,lemma)) :: l)} + +let noun_compar_adjunct_schema_field compars = + {gf=ADJUNCT; role="Attribute"; role_attr=""; sel_prefs=["ALL"]; cr=[]; ce=[]; dir=Both; morfs= + Xlist.fold compars [Phrase Null] (fun l lemma -> + [Phrase(ComparPP(Sem,lemma))] @ Xlist.map ["nom";"gen";"dat";"acc";"inst"] (fun case -> Phrase(ComparNP(Sem,lemma,Case case))) @ l)} + +let adj_compar_adjunct_schema_field compars = + {gf=ADJUNCT; role="Manner"; role_attr=""; sel_prefs=["ALL"]; cr=[]; ce=[]; dir=Both; morfs= + Xlist.fold compars [Phrase Null] (fun l lemma -> + [Phrase(ComparPP(Sem,lemma))] @ Xlist.map ["nom"] (fun case -> Phrase(ComparNP(Sem,lemma,Case case))) @ l)} + +(*let nogf_schema_field dir morfs = + {gf=NOGF; role=""; role_attr=""; sel_prefs=[]; cr=[]; ce=[]; dir=dir; morfs=morfs} *) + +let schema_field gf role dir morfs = + {gf=gf; role=role; role_attr=""; sel_prefs=["ALL"]; cr=[]; ce=[]; dir=dir; morfs=morfs} + +(*let verb_adjuncts = [ + adjunct_schema_field "R" "" Both [Phrase AdvP]; + adjunct_schema_field "R" "" Both [Phrase PrepP]; (* FIXME: Trzeba będzie uzgodnić PrepNP, PrepAdjP, PrepNumP z PrepP i XP *) + ] + + let noun_adjuncts = [ + adjunct_schema_field "C" "poss" Both [Phrase(NP(Case "gen"))]; + adjunct_schema_field "C" "=" Both [Phrase(NP(Case "nom"))]; + adjunct_schema_field "C" "=" Both [Phrase(NP(CaseAgr))]; + adjunct_schema_field "R" "" Backward [Multi[AdjP AllAgr]]; + adjunct_schema_field "R" "" Forward [Multi[AdjP AllAgr]]; + adjunct_schema_field "R" "" Both [Phrase PrepP]; + ] + + let adj_adjuncts = [ + adjunct_schema_field "R" "" Both [Phrase PrepP]; + ]*) + +let verb_adjuncts = [ + (* adjunct_schema_field "" Both [Phrase Null;Phrase AdvP]; + adjunct_schema_field "" Both [Phrase Null;Phrase PrepP]; (* FIXME: Trzeba będzie uzgodnić PrepNP, PrepAdjP, PrepNumP z PrepP i XP *) + adjunct_schema_field "Topic" Forward [Phrase Null;Phrase (CP(CompTypeUndef,CompUndef))]; (* poprawić semantykę *) (* FIXME: to powinno być jako ostatnia lista argumentów *)*) +] + +(* FIXME: pozycje dublują się z domyślną ramką "noun" *) +let noun_adjuncts = [ (* FIXME: usuniecie noun_adjuncts pozostawia poss dla 'Witoldzie' *) + (* adjunct_schema_field "poss" Both [Phrase Null;Phrase(NP(Case "gen"))]; + adjunct_schema_field "=" Both [Phrase Null;Phrase(NP(Case "nom"))]; + adjunct_schema_field "=" Both [Phrase Null;Phrase(NP(CaseAgr))]; + adjunct_schema_field "" Backward [(*Phrase Null;Phrase(AdjP AllAgr)*)Multi[AdjP AllAgr]]; (* FIXME: za pomocą Multi można zrobić konkatenowane leksykalizacje *) + adjunct_schema_field "" Forward [Phrase Null;Phrase(AdjP AllAgr)]; + adjunct_schema_field "" Both [Phrase Null;Phrase PrepP];*) +] + +let adj_adjuncts = [ + (* adjunct_schema_field "" Both [Phrase Null;Phrase AdvP]; *) +] + + +let verb_adjuncts_simp = [ + adjunct_schema_field "Manner" Both [Phrase Null;Phrase AdvP]; + adjunct_schema_field "Recipent" Both [Phrase Null;Phrase (NP(Case "dat"));Phrase (NumP(Case "dat"));Phrase (NCP(Case "dat",CompTypeUndef,CompUndef))]; + adjunct_schema_field "Instrument" Both [Phrase Null;Phrase (NP(Case "inst"));Phrase (NumP(Case "inst"));Phrase (NCP(Case "inst",CompTypeUndef,CompUndef))]; + adjunct_schema_field "Time" Both [Phrase Null;Phrase (Lex "date");Phrase (Lex "day-lex");Phrase (Lex "day-month");Phrase (Lex "day")]; + (* adjunct_schema_field "" Both [Phrase Null;Phrase PrepP]; (* FIXME: Trzeba będzie uzgodnić PrepNP, PrepAdjP, PrepNumP z PrepP i XP *) *) + adjunct_schema_field "Condition" Forward [Phrase Null;Phrase (CP(CompTypeUndef,CompUndef))]; (* poprawić semantykę *) (* FIXME: to powinno być jako ostatnia lista argumentów *) + adjunct_schema_field "Theme" Both [Phrase Null;Phrase Or]; +] + +let verb_adjuncts_simp2 = [ + adjunct_schema_field "Manner" Both [Phrase Null;Phrase AdvP]; + adjunct_schema_field "Recipent" Both [Phrase Null;Phrase (NP(Case "dat"));Phrase (NumP(Case "dat"));Phrase (NCP(Case "dat",CompTypeUndef,CompUndef))]; + adjunct_schema_field "Instrument" Both [Phrase Null;Phrase (NP(Case "inst"));Phrase (NumP(Case "inst"));Phrase (NCP(Case "inst",CompTypeUndef,CompUndef))]; + adjunct_schema_field "Time" Both [Phrase Null;Phrase (Lex "date");Phrase (Lex "day-lex");Phrase (Lex "day-month");Phrase (Lex "day")]; + (* adjunct_schema_field "" Both [Phrase Null;Phrase PrepP]; (* FIXME: Trzeba będzie uzgodnić PrepNP, PrepAdjP, PrepNumP z PrepP i XP *) *) + adjunct_schema_field "Condition" Forward [Phrase Null;Phrase (CP(CompTypeUndef,CompUndef))]; (* poprawić semantykę *) (* FIXME: to powinno być jako ostatnia lista argumentów *) + adjunct_schema_field "Theme" Both [Phrase Null;Phrase Or]; + adjunct_schema_field "Theme" Both [Phrase Null;Phrase(Lex "się")]; +] + +let noun_adjuncts_simp = [ (* FIXME: usuniecie noun_adjuncts pozostawia poss dla 'Witoldzie' *) + adjunct_schema_field "Possesive" Both [Phrase Null;Phrase(NP(Case "gen"));Phrase(NumP(Case "gen"))]; + adjunct_schema_field "Aposition" Forward [Phrase Null;Phrase(NP(Case "nom"));Phrase(NumP(Case "nom"));Phrase Null;Phrase(NP(CaseAgr));Phrase(NumP(CaseAgr))]; + adjunct_schema_field "Attribute" Backward [(*Phrase Null;Phrase(AdjP AllAgr)*)Multi[AdjP AllAgr]]; (* FIXME: za pomocą Multi można zrobić konkatenowane leksykalizacje *) + adjunct_schema_field "Base" Forward [Phrase Null;Phrase(AdjP AllAgr)]; + (* adjunct_schema_field "" Both [Phrase Null;Phrase PrepP]; *) +] + +let noun_measure_adjuncts_simp = [ (* FIXME: usuniecie noun_adjuncts pozostawia poss dla 'Witoldzie' *) + adjunct_schema_field "Attribute" Backward [(*Phrase Null;Phrase(AdjP AllAgr)*)Multi[AdjP AllAgr]]; (* FIXME: za pomocą Multi można zrobić konkatenowane leksykalizacje *) + adjunct_schema_field "Base" Forward [Phrase Null;Phrase(AdjP AllAgr)]; + (* adjunct_schema_field "" Both [Phrase Null;Phrase PrepP]; *) +] +*) +(* let noun_adjuncts = [NP(Case "gen");NP(Case "nom");Multi[AdjP AllAgr] + +] + +let verb_adjuncts = [ + +] + +let adj_adjuncts = [AdvP "misc"] + +let adv_adjuncts = [AdvP "misc"] *) diff --git a/lexSemantics/ENIAMlexSemantics.ml b/lexSemantics/ENIAMlexSemantics.ml index dfe38f3..1190de1 100644 --- a/lexSemantics/ENIAMlexSemantics.ml +++ b/lexSemantics/ENIAMlexSemantics.ml @@ -22,7 +22,7 @@ open ENIAMsubsyntaxTypes open ENIAMlexSemanticsTypes open ENIAMwalTypes open Xstd - +(* let string_of_lex_sems tokens lex_sems = String.concat "\n" (List.rev (Int.fold 0 (ExtArray.size lex_sems - 1) [] (fun l id -> let t = ExtArray.get lex_sems id in @@ -43,7 +43,7 @@ let find_senses t = (* FIXME: sensy zawierające 'się' *) Lemma(lemma,pos,_) -> ENIAMplWordnet.find_senses lemma pos | Proper(_,_,_,senses) -> ENIAMplWordnet.find_proper_senses senses | _ -> [] - +*) let rec find a l i = if a.(i) = max_int then ( a.(i) <- i; @@ -74,14 +74,14 @@ let rec split_tokens_into_groups_sentence a = function union a m (find a [] id))) | QuotedSentences sentences -> Xlist.iter sentences (fun p -> - split_tokens_into_groups_sentence a p.psentence) + split_tokens_into_groups_sentence a p.sentence) | AltSentence l -> Xlist.iter l (fun (mode,sentence) -> split_tokens_into_groups_sentence a sentence) let rec split_tokens_into_groups_paragraph a = function RawParagraph s -> () | StructParagraph sentences -> - Xlist.iter sentences (fun p -> split_tokens_into_groups_sentence a p.psentence) + Xlist.iter sentences (fun p -> split_tokens_into_groups_sentence a p.sentence) | AltParagraph l -> Xlist.iter l (fun (mode,paragraph) -> split_tokens_into_groups_paragraph a paragraph) @@ -103,6 +103,59 @@ let split_tokens_into_groups size text = IntMap.fold map [] (fun l _ v -> v :: l) let assign_valence tokens lex_sems group = + let lexemes = Xlist.fold group StringSet.empty (fun lexemes id -> + let lemma = ENIAMtokens.get_lemma (ExtArray.get tokens id).token in + StringSet.add lexemes lemma) in + let entries,schemata,connected = ENIAMwalReduce.select_entries lexemes in + Xlist.iter group (fun id -> + let lemma = ENIAMtokens.get_lemma (ExtArray.get tokens id).token in + let pos = ENIAMtokens.get_pos (ExtArray.get tokens id).token in + let pos2 = ENIAMvalence.simplify_pos pos in + let schemata = try Entries.find schemata pos2 lemma with Not_found -> ENIAMvalence.get_default_valence pos2 in + let entries = try Entries.find entries pos lemma with Not_found -> [] in + let schemata = List.flatten (Xlist.map schemata (fun (opinion,neg,pred,aspect,schema) -> + ENIAMvalence.transform_entry pos lemma neg pred aspect schema)) in (* FIXME: gubię opinię *) + let schemata = ENIAMadjuncts.simplify_valence pos pos2 lemma schemata in + let schemata = Xlist.map schemata (fun (selectors,schema) -> + selectors,ENIAMvalence2.render_simple_schema schema) in + let entries = List.flatten (Xlist.map entries (ENIAMvalence.transform_lex_entry pos lemma)) in + let entries = Xlist.map entries (fun (selectors,entry) -> + selectors,ENIAMvalence2.render_lex_entry entry) in + ExtArray.set lex_sems id {(ExtArray.get lex_sems id) with + schemata = schemata; lex_entries=entries}) + +(* TODO: + slashe + test + zgranie z LCGlexicon +*) + + (* let lexemes = Xlist.fold group Entries.empty (fun lexemes id -> + let lemma = ENIAMtokens.get_lemma (ExtArray.get tokens id).token in + + match (ExtArray.get tokens id).token with + Lemma(lemma,pos,_) -> + Entries.add_inc lexemes (ENIAMvalence.simplify_pos pos) lemma pos (*(StringSet.singleton pos) (fun set -> StringSet.add set pos)*) + | Proper(lemma,pos,_,_) -> + (* let pos = match pos with + "subst" -> "psubst" + | "depr" -> "pdepr" + | _ -> pos (*failwith ("assign_valence: Proper " ^ pos ^ " " ^ lemma)*) in *) + Entries.add_inc lexemes (ENIAMvalence.simplify_pos pos) lemma pos (*StringSet.singleton pos) (fun set -> StringSet.add set pos*) + (* StringMap.add_inc lexemes lemma (StringSet.singleton pos) (fun set -> StringSet.add set pos) (* nazwy własne mają przypisywaną domyślną walencję rzeczowników *) *) + | _ -> lexemes) in + let lexemes = Entries.map2 (fun l -> StringSet.to_list (StringSet.of_list l)) in + let lexemes_set = Entries.fold lexemes StringSet.empty (fun lexemes_set _ lemma _ -> StringSet.add lexemes_set lemma) in + let schemata,entries = ENIAMvalence.prepare_selected_valence schemata entries lexemes in + let schemata = ENIAMadjuncts.simplify_valence schemata in + let schemata = Entries.map schemata (fun pos lemma (selectors,schema) -> + selectors,ENIAMvalence2.render_schema schema) in + let entries = Entries.map entries (fun pos lemma (selectors,entry) -> + selectors,ENIAMvalence2.render_lex_entry entry) in *) + + +(* +let assign_valence tokens lex_sems group = let lexemes = Xlist.fold group StringMap.empty (fun lexemes id -> match (ExtArray.get tokens id).token with Lemma(lemma,pos,_) -> @@ -159,129 +212,7 @@ let disambiguate_senses lex_sems group = senses = Xlist.map t.senses (fun (s,l,w) -> s, List.rev (Xlist.fold l [] (fun l s -> if StringSet.mem senses s then s :: l else l)),w)}) -let simplify_position_verb l = function (* FIXME: dodać czyszczenie E Pro *) - Phrase(NP(Case "dat")) -> l - | Phrase(NP(Case "inst")) -> l - | Phrase(PrepNP _) -> l - | Phrase(PrepAdjP _) -> l - | Phrase(NumP (Case "dat")) -> l - | Phrase(NumP (Case "inst")) -> l - | Phrase(PrepNumP _) -> l - | Phrase(ComprepNP _) -> l - | Phrase(ComparNP _) -> l - | Phrase(ComparPP _) -> l - | Phrase(IP) -> l - | Phrase(CP _) -> l - | Phrase(NCP(Case "dat",_,_)) -> l - | Phrase(NCP(Case "inst",_,_)) -> l - | Phrase(PrepNCP _) -> l -(* | Phrase(PadvP) -> l *) - | Phrase(AdvP) -> l - | Phrase(PrepP) -> l - | Phrase(Or) -> l - | Phrase(Qub) -> l - | Phrase(Adja) -> l - | Phrase(Inclusion) -> l - | Phrase Pro -> Phrase Null :: l - | t -> t :: l - -let simplify_position_verb2 l = function (* FIXME: dodać czyszczenie E Pro *) - Phrase(NP(Case "dat")) -> l - | Phrase(NP(Case "inst")) -> l - | Phrase(PrepNP _) -> l - | Phrase(PrepAdjP _) -> l - | Phrase(NumP (Case "dat")) -> l - | Phrase(NumP (Case "inst")) -> l - | Phrase(PrepNumP _) -> l - | Phrase(ComprepNP _) -> l - | Phrase(ComparNP _) -> l - | Phrase(ComparPP _) -> l - | Phrase(IP) -> l - | Phrase(CP _) -> l - | Phrase(NCP(Case "dat",_,_)) -> l - | Phrase(NCP(Case "inst",_,_)) -> l - | Phrase(PrepNCP _) -> l - (* | Phrase(PadvP) -> l *) - | Phrase(AdvP) -> l - | Phrase(PrepP) -> l - | Phrase(Or) -> l - | Phrase(Qub) -> l - | Phrase(Adja) -> l - | Phrase(Inclusion) -> l - | Phrase(Lex "się") -> l - | Phrase Pro -> Phrase Null :: l - | t -> t :: l - -let simplify_position_noun l = function - Phrase(NP(Case "gen")) -> l - | Phrase(NP(Case "nom")) -> l - | Phrase(NP(CaseAgr)) -> l - | Phrase(PrepNP _) -> l - | Phrase(AdjP AllAgr) -> l - | Phrase(NumP (Case "gen")) -> l - | Phrase(NumP (Case "nom")) -> l - | Phrase(NumP (CaseAgr)) -> l - | Phrase(PrepNumP _) -> l - | Phrase(ComprepNP _) -> l - | Phrase(ComparNP _) -> l - | Phrase(ComparPP _) -> l - | Phrase(IP) -> l - | Phrase(NCP(Case "gen",_,_)) -> l - | Phrase(PrepNCP _) -> l - | Phrase(PrepP) -> l - | Phrase(Qub) -> l - | Phrase(Adja) -> l - | Phrase(Inclusion) -> l - | Phrase Pro -> Phrase Null :: l - | t -> t :: l - -let simplify_position_adj l = function - Phrase(AdvP) -> l - | t -> t :: l - -let simplify_position_adv l = function - Phrase(AdvP) -> l - | t -> t :: l - - -let simplify_position pos l s = - let morfs = match pos with - "verb" -> List.rev (Xlist.fold s.morfs [] simplify_position_verb) - | "noun" -> List.rev (Xlist.fold s.morfs [] simplify_position_noun) - | "adj" -> List.rev (Xlist.fold s.morfs [] simplify_position_adj) - | "adv" -> List.rev (Xlist.fold s.morfs [] simplify_position_adv) - | _ -> s.morfs in - match morfs with - [] -> l - | [Phrase Null] -> l - | _ -> {s with morfs=morfs} :: l - -let simplify_schemata pos schemata = - let schemata = Xlist.fold schemata StringMap.empty (fun schemata (schema,frame) -> - let schema = List.sort compare (Xlist.fold schema [] (fun l s -> - let s = {s with role=""; role_attr=""; sel_prefs=[]; cr=[]; ce=[]; morfs=List.sort compare s.morfs} in - if s.gf <> ARG && s.gf <> ADJUNCT then s :: l else - (* if s.cr <> [] || s.ce <> [] then s :: l else *) - simplify_position pos l s)) in - StringMap.add_inc schemata (ENIAMwalStringOf.schema schema) (schema,[frame]) (fun (_,frames) -> schema, frame :: frames)) in - StringMap.fold schemata [] (fun l _ s -> s :: l) - -let simplify_schemata2 pos schemata = - let simplify_position_fun = match pos with - "verb" -> simplify_position_verb2 - | "noun" -> simplify_position_noun - | "adj" -> simplify_position_adj - | "adv" -> simplify_position_adv - | _ -> (fun l x -> x :: l) in - let morfs = Xlist.fold schemata [] (fun morfs schema -> - Xlist.fold schema morfs (fun morfs s -> - Xlist.fold s.morfs morfs simplify_position_fun)) in - let morfs = Xlist.fold morfs StringMap.empty (fun map s -> - StringMap.add map (ENIAMwalStringOf.morf s) s) in - let schema = StringMap.fold morfs [] (fun schema _ morf -> - {gf=ARG; role=""; role_attr=""; sel_prefs=[]; cr=[]; ce=[]; - dir=Both; morfs=[Phrase Null;morf]} :: schema) in - schema + (* FIXME: problem ComprepNP i PrepNCP *) (* FIXME: problem gdy ten sam token występuje w kilku ścieżkach *) @@ -471,24 +402,24 @@ let assign_very_simplified_valence tokens lex_sems group = (selectors,schema) :: frames) in (* Printf.printf "|frames|=%d\n" (Xlist.size frames); *) ExtArray.set lex_sems id {t with very_simple_valence=frames}) - + *) let assign tokens text = let lex_sems = ExtArray.make (ExtArray.size tokens) empty_lex_sem in let _ = ExtArray.add lex_sems empty_lex_sem in Int.iter 1 (ExtArray.size tokens - 1) (fun i -> - let token = ExtArray.get tokens i in - ExtArray.set tokens i token; - let senses = find_senses token in - let cats = ENIAMcategories.assign token in - let lex_sem = {empty_lex_sem with senses=senses; cats=cats} in + (* let token = ExtArray.get tokens i in + (* ExtArray.set tokens i token; *) + let senses = find_senses token in *) + let lex_sem = {empty_lex_sem with senses=[](*senses*)} in let _ = ExtArray.add lex_sems lex_sem in ()); let groups = split_tokens_into_groups (ExtArray.size tokens) text in (* Xlist.iter groups (fun group -> print_endline (String.concat " " (Xlist.map group string_of_int))); *) Xlist.iter groups (fun group -> assign_valence tokens lex_sems group); + (* Xlist.iter groups (fun group -> assign_valence tokens lex_sems group); Xlist.iter groups (fun group -> disambiguate_senses lex_sems group); Xlist.iter groups (fun group -> assign_simplified_valence tokens lex_sems group); Xlist.iter groups (fun group -> assign_very_simplified_valence tokens lex_sems group); - Xlist.iter groups (fun group -> ENIAMlexSemanticsData.assign_semantics tokens lex_sems group); + Xlist.iter groups (fun group -> ENIAMlexSemanticsData.assign_semantics tokens lex_sems group); *) lex_sems diff --git a/lexSemantics/ENIAMlexSemanticsTypes.ml b/lexSemantics/ENIAMlexSemanticsTypes.ml index 4f93e4e..6892e9f 100644 --- a/lexSemantics/ENIAMlexSemanticsTypes.ml +++ b/lexSemantics/ENIAMlexSemanticsTypes.ml @@ -36,24 +36,18 @@ type semantics = | SpecialMod of string * (type_arg list * type_term)*) | PrepSemantics of (string * string * string * StringSet.t * string list) list (* case,role,role_attr,hipero,sel_prefs *) -(* FIXME: usunąć *) -type categories = {lemma: string; pos: string; pos2: string; - numbers: string list; cases: string list; genders: string list; persons: string list; - grads: string list; praeps: string list; acms: string list; - aspects: string list; negations: string list; moods: string list; tenses: string list; - nsyn: string list; nsem: string list; - } - - type lex_sem = { + schemata: ((ENIAM_LCGlexiconTypes.selector * ENIAM_LCGlexiconTypes.selector_relation * string list) list * + (ENIAM_LCGtypes.direction * ENIAM_LCGtypes.grammar_symbol) list) list; + lex_entries: ((ENIAM_LCGlexiconTypes.selector * ENIAM_LCGlexiconTypes.selector_relation * string list) list * + ENIAM_LCGtypes.grammar_symbol list) list; e: labels; - valence: (int * ENIAMwalTypes.frame) list; + (* valence: (int * ENIAMwalTypes.frame) list; simple_valence: (int * ENIAMwalTypes.frame) list; - very_simple_valence: ((ENIAM_LCGgrammarPLtypes.cat * ENIAM_LCGgrammarPLtypes.selector_relation * string list) list * ENIAM_LCGtypes.grammar_symbol) list; + very_simple_valence: ((ENIAM_LCGgrammarPLtypes.cat * ENIAM_LCGgrammarPLtypes.selector_relation * string list) list * ENIAM_LCGtypes.grammar_symbol) list; *) senses: (string * string list * float) list; lroles: string * string; semantics: semantics; - cats: categories list; } let empty_labels = { @@ -65,8 +59,9 @@ let empty_labels = { } let empty_lex_sem = { - e=empty_labels; valence=[]; simple_valence=[]; very_simple_valence=[]; senses=[]; - lroles="",""; semantics=Normal; cats=[];} + schemata=[]; lex_entries=[]; + e=empty_labels; (*valence=[]; simple_valence=[]; very_simple_valence=[];*) senses=[]; + lroles="",""; semantics=Normal} (* FIXME: poprawić katalog *) (*let subst_uncountable_lexemes_filename = resource_path ^ "/lexSemantics/subst_uncountable.dat" @@ -74,8 +69,8 @@ let subst_uncountable_lexemes_filename2 = resource_path ^ "/lexSemantics/subst_u let subst_container_lexemes_filename = resource_path ^ "/lexSemantics/subst_container.dat" let subst_numeral_lexemes_filename = resource_path ^ "/lexSemantics/subst_numeral.dat" let subst_time_lexemes_filename = resource_path ^ "/lexSemantics/subst_time.dat"*) -let subst_uncountable_lexemes_filename = resource_path ^ "/Walenty/subst_uncountable.dat" +(* let subst_uncountable_lexemes_filename = resource_path ^ "/Walenty/subst_uncountable.dat" let subst_uncountable_lexemes_filename2 = resource_path ^ "/Walenty/subst_uncountable_stare.dat" let subst_container_lexemes_filename = resource_path ^ "/Walenty/subst_container.dat" let subst_numeral_lexemes_filename = resource_path ^ "/Walenty/subst_numeral.dat" -let subst_time_lexemes_filename = resource_path ^ "/Walenty/subst_time.dat" +let subst_time_lexemes_filename = resource_path ^ "/Walenty/subst_time.dat" *) diff --git a/lexSemantics/ENIAMvalence.ml b/lexSemantics/ENIAMvalence.ml index 4c7b598..00ca135 100644 --- a/lexSemantics/ENIAMvalence.ml +++ b/lexSemantics/ENIAMvalence.ml @@ -23,8 +23,8 @@ open Xstd let simplify_pos = function "subst" -> "noun" | "depr" -> "noun" - | "psubst" -> "noun" - | "pdepr" -> "noun" + (* | "psubst" -> "noun" + | "pdepr" -> "noun" *) | "adj" -> "adj" | "adjc" -> "adj" | "adjp" -> "adj" @@ -629,3 +629,17 @@ let prepare_all_valence phrases schemata entries = (Entries.flatten_map entries transform_lex_entry) (* let _ = prepare_all_valence ENIAMwalParser.phrases ENIAMwalParser.schemata ENIAMwalParser.entries *) + +(* let prepare_selected_valence schemata entries lexemes = + Entries.fold schemata Entries.empty (fun schemata pos2 lemma (opinion,neg,pred,aspect,schema) -> + Xlist.fold (Entries.find pos2 lemma) schemata (function pos -> + Entries.add_inc_list schemata pos lemma (transform_entry pos lemma neg pred aspect schema))), + Entries.flatten_map entries transform_lex_entry *) + +let get_default_valence = function + "verb" -> [Nieokreslony,NegationUndef,PredFalse,AspectUndef,[{empty_position with gf=SUBJ; morfs=[NP(Str);NCP(Str,CompTypeUndef,CompUndef)]}; + {empty_position with gf=OBJ; morfs=[NP(Str);NCP(Str,CompTypeUndef,CompUndef)]}]] + | "noun" -> [Nieokreslony,NegationUndef,PredFalse,AspectUndef,[]] + | "adj" -> [Nieokreslony,NegationUndef,PredFalse,AspectUndef,[]] + | "adv" -> [Nieokreslony,NegationUndef,PredFalse,AspectUndef,[]] + | _ -> [] diff --git a/lexSemantics/ENIAMvalence2.ml b/lexSemantics/ENIAMvalence2.ml index eb3d731..f2e1829 100644 --- a/lexSemantics/ENIAMvalence2.ml +++ b/lexSemantics/ENIAMvalence2.ml @@ -142,7 +142,8 @@ let render_phrase = function | InfP(Aspect aspect) -> Tensor[Atom "infp"; Atom aspect] | InfP AspectUndef -> Tensor[Atom "infp"; Top] (* | PadvP -> Tensor[Atom "padvp"] *) - | AdvP -> Tensor[Atom "advp"] + | AdvP "misc" -> Tensor[Atom "advp"; Top] (* FIXME: a może Atom "mod" zamiast Top *) + | AdvP mode -> Tensor[Atom "advp"; Atom mode] | FixedP lex -> Tensor[Atom "fixed"; Atom lex] (* | PrepP -> Tensor[Atom "prepp";Top] | Prep("",CaseAgr) -> Tensor[Atom "prep"; Top; AVar "case"] @@ -181,6 +182,10 @@ let render_schema schema = Xlist.map schema (fun p -> Both,Plus(Xlist.map p.morfs render_morf)) +let render_simple_schema schema = + Xlist.map schema (fun morfs -> + Both,Plus(Xlist.map morfs render_morf)) + (* FIXME: tu trzeba by dodać zwykłe reguły dla czasowników dotyczące ich negacji, aglutynatu itp. *) let render_lex_entry = function SimpleLexEntry(lemma,pos) -> [Tensor([Atom "lex";Atom lemma] @ render_pos_entry pos)] @@ -195,12 +200,12 @@ let render_lex_entry = function [ImpSet(Tensor([Atom "lex";Atom (string_of_int id);Atom lemma] @ render_pos_entry pos),[Both,Tensor[AVar "schema"]])] | entry -> print_endline ("render_entry:" ^ ENIAMwalStringOf.lex_entry entry); [(*[],entry*)] -let schemata,entries = ENIAMvalence.prepare_all_valence ENIAMwalParser.phrases ENIAMwalParser.schemata ENIAMwalParser.entries +(* let schemata,entries = ENIAMvalence.prepare_all_valence ENIAMwalParser.phrases ENIAMwalParser.schemata ENIAMwalParser.entries *) -let _ = +(* let _ = (* Entries.map schemata (fun pos lemma (selectors,schema) -> (* Printf.printf "%s %s %s\n" pos lemma (ENIAMwalStringOf.schema schema); *) render_schema schema) *) Entries.map entries (fun pos lemma (selectors,entry) -> (* Printf.printf "%s %s %s\n" pos lemma (ENIAMwalStringOf.schema schema); *) - selectors,render_lex_entry entry) + selectors,render_lex_entry entry) *) diff --git a/lexSemantics/ENIAMwalFrames.ml b/lexSemantics/ENIAMwalFrames.ml index c6da5fa..b548c1d 100644 --- a/lexSemantics/ENIAMwalFrames.ml +++ b/lexSemantics/ENIAMwalFrames.ml @@ -287,147 +287,6 @@ let prepare_schema expands subtypes equivs schema = let prepare_schema_sem expands subtypes equivs schema = prepare_schema_comprep expands subtypes equivs schema -let default_frames = Xlist.fold [ (* FIXME: poprawić domyślne ramki po ustaleniu adjunctów *) - "verb",(ReflEmpty,Domyslny,NegationUndef,PredNA,AspectUndef,"subj{np(str)}+obj{np(str)}"); (* FIXME: dodać ramkę z refl *) - "noun",(ReflEmpty,Domyslny,NegationNA,PredNA,AspectNA,"{possp}+{adjp(agr)}"); - "adj",(ReflEmpty,Domyslny,NegationNA,PredNA,AspectNA,""); - "adv",(ReflEmpty,Domyslny,NegationNA,PredNA,AspectNA,""); - "empty",(ReflEmpty,Domyslny,NegationNA,PredNA,AspectNA,""); - "date",(ReflEmpty,Domyslny,NegationNA,PredNA,AspectNA,"{null;lex(np(gen),sg,'rok',natr)}"); - "date2",(ReflEmpty,Domyslny,NegationNA,PredNA,AspectNA,"{null;lex(np(gen),sg,'rok',atr1({adjp(agr)}))}"); (* FIXME: wskazać możliwe podrzędniki *) - "day",(ReflEmpty,Domyslny,NegationNA,PredNA,AspectNA,"" - (*"{lex(np(gen),sg,XOR('styczeń','luty','marzec','kwiecień','maj','czerwiec','lipiec','sierpień','wrzesień','październik','litopad','grudzień'),atr1({np(gen)}))}"*)); (* FIXME: wskazać możliwe podrzędniki *) - "hour",(ReflEmpty,Domyslny,NegationNA,PredNA,AspectNA,"{null;lex(advp(temp),pos,'rano',natr)}"); - ] StringMap.empty (fun map (k,(refl,opinion,negation,pred,aspect,schema)) -> - StringMap.add map k (Frame(DefaultAtrs([],refl,opinion,negation,pred,aspect),prepare_schema expands subtypes equivs schema))) - -let adjunct_schema_field role dir morfs = - {gf=ADJUNCT; role=role; role_attr=""; sel_prefs=["ALL"]; cr=[]; ce=[]; dir=dir; morfs=morfs} - -let verb_prep_adjunct_schema_field lemma case = - {gf=ADJUNCT; role="Manner"; role_attr=""; sel_prefs=["ALL"]; cr=[]; ce=[]; dir=Both; morfs=[ - Phrase Null; - Phrase(PrepNP(Sem,lemma,Case case)); - Phrase(PrepAdjP(Sem,lemma,Case case)); - Phrase(PrepNumP(Sem,lemma,Case case))]} - -let verb_comprep_adjunct_schema_field lemma = - {gf=ADJUNCT; role="Manner"; role_attr=""; sel_prefs=["ALL"]; cr=[]; ce=[]; dir=Both; morfs=[ - Phrase Null; - Phrase(ComprepNP(Sem,lemma))]} - -let verb_compar_adjunct_schema_field lemma = - {gf=ADJUNCT; role="Manner"; role_attr=""; sel_prefs=["ALL"]; cr=[]; ce=[]; dir=Both; morfs=[ - Phrase Null; - Phrase(ComparPP(Sem,lemma))] @ - Xlist.map ["nom";"gen";"dat";"acc";"inst"] (fun case -> Phrase(ComparNP(Sem,lemma,Case case)))} - -let noun_prep_adjunct_schema_field preps compreps = - {gf=ADJUNCT; role="Attribute"; role_attr=""; sel_prefs=["ALL"]; cr=[]; ce=[]; dir=Both; morfs= - let l = Xlist.fold preps [Phrase Null] (fun l (lemma,case) -> - [Phrase(PrepNP(Sem,lemma,Case case)); - Phrase(PrepAdjP(Sem,lemma,Case case)); - Phrase(PrepNumP(Sem,lemma,Case case))] @ l) in - Xlist.fold compreps l (fun l lemma -> - Phrase(ComprepNP(Sem,lemma)) :: l)} - -let noun_compar_adjunct_schema_field compars = - {gf=ADJUNCT; role="Attribute"; role_attr=""; sel_prefs=["ALL"]; cr=[]; ce=[]; dir=Both; morfs= - Xlist.fold compars [Phrase Null] (fun l lemma -> - [Phrase(ComparPP(Sem,lemma))] @ Xlist.map ["nom";"gen";"dat";"acc";"inst"] (fun case -> Phrase(ComparNP(Sem,lemma,Case case))) @ l)} - -let adj_compar_adjunct_schema_field compars = - {gf=ADJUNCT; role="Manner"; role_attr=""; sel_prefs=["ALL"]; cr=[]; ce=[]; dir=Both; morfs= - Xlist.fold compars [Phrase Null] (fun l lemma -> - [Phrase(ComparPP(Sem,lemma))] @ Xlist.map ["nom"] (fun case -> Phrase(ComparNP(Sem,lemma,Case case))) @ l)} - -(*let nogf_schema_field dir morfs = - {gf=NOGF; role=""; role_attr=""; sel_prefs=[]; cr=[]; ce=[]; dir=dir; morfs=morfs} *) - -let schema_field gf role dir morfs = - {gf=gf; role=role; role_attr=""; sel_prefs=["ALL"]; cr=[]; ce=[]; dir=dir; morfs=morfs} - -(*let verb_adjuncts = [ - adjunct_schema_field "R" "" Both [Phrase AdvP]; - adjunct_schema_field "R" "" Both [Phrase PrepP]; (* FIXME: Trzeba będzie uzgodnić PrepNP, PrepAdjP, PrepNumP z PrepP i XP *) - ] - -let noun_adjuncts = [ - adjunct_schema_field "C" "poss" Both [Phrase(NP(Case "gen"))]; - adjunct_schema_field "C" "=" Both [Phrase(NP(Case "nom"))]; - adjunct_schema_field "C" "=" Both [Phrase(NP(CaseAgr))]; - adjunct_schema_field "R" "" Backward [Multi[AdjP AllAgr]]; - adjunct_schema_field "R" "" Forward [Multi[AdjP AllAgr]]; - adjunct_schema_field "R" "" Both [Phrase PrepP]; - ] - -let adj_adjuncts = [ - adjunct_schema_field "R" "" Both [Phrase PrepP]; - ]*) - -let verb_adjuncts = [ -(* adjunct_schema_field "" Both [Phrase Null;Phrase AdvP]; - adjunct_schema_field "" Both [Phrase Null;Phrase PrepP]; (* FIXME: Trzeba będzie uzgodnić PrepNP, PrepAdjP, PrepNumP z PrepP i XP *) - adjunct_schema_field "Topic" Forward [Phrase Null;Phrase (CP(CompTypeUndef,CompUndef))]; (* poprawić semantykę *) (* FIXME: to powinno być jako ostatnia lista argumentów *)*) - ] - -(* FIXME: pozycje dublują się z domyślną ramką "noun" *) -let noun_adjuncts = [ (* FIXME: usuniecie noun_adjuncts pozostawia poss dla 'Witoldzie' *) -(* adjunct_schema_field "poss" Both [Phrase Null;Phrase(NP(Case "gen"))]; - adjunct_schema_field "=" Both [Phrase Null;Phrase(NP(Case "nom"))]; - adjunct_schema_field "=" Both [Phrase Null;Phrase(NP(CaseAgr))]; - adjunct_schema_field "" Backward [(*Phrase Null;Phrase(AdjP AllAgr)*)Multi[AdjP AllAgr]]; (* FIXME: za pomocą Multi można zrobić konkatenowane leksykalizacje *) - adjunct_schema_field "" Forward [Phrase Null;Phrase(AdjP AllAgr)]; - adjunct_schema_field "" Both [Phrase Null;Phrase PrepP];*) - ] - -let adj_adjuncts = [ -(* adjunct_schema_field "" Both [Phrase Null;Phrase AdvP]; *) - ] - - -let verb_adjuncts_simp = [ - adjunct_schema_field "Manner" Both [Phrase Null;Phrase AdvP]; - adjunct_schema_field "Recipent" Both [Phrase Null;Phrase (NP(Case "dat"));Phrase (NumP(Case "dat"));Phrase (NCP(Case "dat",CompTypeUndef,CompUndef))]; - adjunct_schema_field "Instrument" Both [Phrase Null;Phrase (NP(Case "inst"));Phrase (NumP(Case "inst"));Phrase (NCP(Case "inst",CompTypeUndef,CompUndef))]; - adjunct_schema_field "Time" Both [Phrase Null;Phrase (Lex "date");Phrase (Lex "day-lex");Phrase (Lex "day-month");Phrase (Lex "day")]; -(* adjunct_schema_field "" Both [Phrase Null;Phrase PrepP]; (* FIXME: Trzeba będzie uzgodnić PrepNP, PrepAdjP, PrepNumP z PrepP i XP *) *) - adjunct_schema_field "Condition" Forward [Phrase Null;Phrase (CP(CompTypeUndef,CompUndef))]; (* poprawić semantykę *) (* FIXME: to powinno być jako ostatnia lista argumentów *) - adjunct_schema_field "Theme" Both [Phrase Null;Phrase Or]; - ] - -let verb_adjuncts_simp2 = [ - adjunct_schema_field "Manner" Both [Phrase Null;Phrase AdvP]; - adjunct_schema_field "Recipent" Both [Phrase Null;Phrase (NP(Case "dat"));Phrase (NumP(Case "dat"));Phrase (NCP(Case "dat",CompTypeUndef,CompUndef))]; - adjunct_schema_field "Instrument" Both [Phrase Null;Phrase (NP(Case "inst"));Phrase (NumP(Case "inst"));Phrase (NCP(Case "inst",CompTypeUndef,CompUndef))]; - adjunct_schema_field "Time" Both [Phrase Null;Phrase (Lex "date");Phrase (Lex "day-lex");Phrase (Lex "day-month");Phrase (Lex "day")]; - (* adjunct_schema_field "" Both [Phrase Null;Phrase PrepP]; (* FIXME: Trzeba będzie uzgodnić PrepNP, PrepAdjP, PrepNumP z PrepP i XP *) *) - adjunct_schema_field "Condition" Forward [Phrase Null;Phrase (CP(CompTypeUndef,CompUndef))]; (* poprawić semantykę *) (* FIXME: to powinno być jako ostatnia lista argumentów *) - adjunct_schema_field "Theme" Both [Phrase Null;Phrase Or]; - adjunct_schema_field "Theme" Both [Phrase Null;Phrase(Lex "się")]; -] - -let noun_adjuncts_simp = [ (* FIXME: usuniecie noun_adjuncts pozostawia poss dla 'Witoldzie' *) - adjunct_schema_field "Possesive" Both [Phrase Null;Phrase(NP(Case "gen"));Phrase(NumP(Case "gen"))]; - adjunct_schema_field "Aposition" Forward [Phrase Null;Phrase(NP(Case "nom"));Phrase(NumP(Case "nom"));Phrase Null;Phrase(NP(CaseAgr));Phrase(NumP(CaseAgr))]; - adjunct_schema_field "Attribute" Backward [(*Phrase Null;Phrase(AdjP AllAgr)*)Multi[AdjP AllAgr]]; (* FIXME: za pomocą Multi można zrobić konkatenowane leksykalizacje *) - adjunct_schema_field "Base" Forward [Phrase Null;Phrase(AdjP AllAgr)]; -(* adjunct_schema_field "" Both [Phrase Null;Phrase PrepP]; *) - ] - -let noun_measure_adjuncts_simp = [ (* FIXME: usuniecie noun_adjuncts pozostawia poss dla 'Witoldzie' *) - adjunct_schema_field "Attribute" Backward [(*Phrase Null;Phrase(AdjP AllAgr)*)Multi[AdjP AllAgr]]; (* FIXME: za pomocą Multi można zrobić konkatenowane leksykalizacje *) - adjunct_schema_field "Base" Forward [Phrase Null;Phrase(AdjP AllAgr)]; -(* adjunct_schema_field "" Both [Phrase Null;Phrase PrepP]; *) - ] - -let adj_adjuncts_simp = [ - adjunct_schema_field "Manner" Both [Phrase Null;Phrase AdvP]; - ] - -let adv_adjuncts_simp = [ - adjunct_schema_field "Manner" Both [Phrase Null;Phrase AdvP]; - ] let convert_frame expands subtypes equivs lexemes valence lexeme pos (refl,opinion,negation,pred,aspect,schema) = (* Printf.printf "convert_frame %s %s\n" lexeme pos; *) diff --git a/lexSemantics/ENIAMwalReduce.ml b/lexSemantics/ENIAMwalReduce.ml index 73b593a..377210a 100644 --- a/lexSemantics/ENIAMwalReduce.ml +++ b/lexSemantics/ENIAMwalReduce.ml @@ -172,7 +172,7 @@ let create_tests comprep_reqs comprep_reqs2 lexarg_reqs lexemes = StringSet.mem lexemes -let select_entries phrases entries schemata connected meanings comprep_reqs comprep_reqs2 lexarg_reqs lexemes = +let select_entries_full phrases entries schemata connected meanings comprep_reqs comprep_reqs2 lexarg_reqs lexemes = let tests = create_tests comprep_reqs comprep_reqs2 lexarg_reqs lexemes in let entries = reduce_entries lexemes entries in let schemata = reduce_entries lexemes schemata in @@ -194,16 +194,18 @@ let select_all_entries phrases entries schemata connected meanings = let connected = merge_entries_conn phrases meanings connected in entries,schemata,connected -let entries,schemata,connected = +let select_entries lexemes = + select_entries_full ENIAMwalParser.phrases ENIAMwalParser.entries ENIAMwalParser.schemata + ENIAMwalParser.connected ENIAMwalParser.meanings comprep_reqs comprep_reqs2 lexarg_reqs lexemes + +(* let entries,schemata,connected = (* let lexemes = StringSet.of_list ["Ala"; "ma"; "kot"] in *) let lexemes = StringSet.of_list ["dorastać"; "dorobić"; "po"; "bok"; "na"] in select_entries ENIAMwalParser.phrases ENIAMwalParser.entries ENIAMwalParser.schemata - ENIAMwalParser.connected ENIAMwalParser.meanings comprep_reqs comprep_reqs2 lexarg_reqs lexemes + ENIAMwalParser.connected ENIAMwalParser.meanings comprep_reqs comprep_reqs2 lexarg_reqs lexemes *) (* TODO - - usunięcie adjunctów - - uwzględnienie cech morfoskładniowych - - scalenie schematów + - uwzględnienie cech morfoskładniowych - np usunięcie schematów wymagających negacji, gdy nie ma "nie" - dodanie adjunctów - pamiętać o padvp *) (* TODO diff --git a/lexSemantics/makefile b/lexSemantics/makefile index b880bb1..9daa74f 100644 --- a/lexSemantics/makefile +++ b/lexSemantics/makefile @@ -3,10 +3,11 @@ OCAMLOPT=ocamlopt OCAMLDEP=ocamldep INCLUDES=-I +xml-light -I +xlib -I +zip -I +bz2 -I +eniam OCAMLFLAGS=$(INCLUDES) -g -OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa bz2.cmxa xlib.cmxa eniam-lcg-parser.cmxa eniam-lcg-lexicon.cmxa #eniam-tokenizer.cmxa eniam-morphology.cmxa eniam-subsyntax.cmxa eniam-plWordnet.cmxa #eniam-lexSemantics.cmxa +OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa bz2.cmxa xlib.cmxa eniam-tokenizer.cmxa eniam-morphology.cmxa eniam-subsyntax.cmxa eniam-lcg-parser.cmxa eniam-lcg-lexicon.cmxa #eniam-plWordnet.cmxa #eniam-lexSemantics.cmxa INSTALLDIR=`ocamlc -where`/eniam -SOURCES= ENIAMlexSemanticsTypes.ml ENIAMcategories.ml ENIAMlexSemanticsData.ml ENIAMlexSemantics.ml +SOURCES= entries.ml ENIAMwalTypes.ml ENIAMwalStringOf.ml ENIAMwalParser.ml ENIAMwalReduce.ml ENIAMvalence.ml ENIAMadjuncts.ml ENIAMvalence2.ml \ + ENIAMlexSemanticsTypes.ml ENIAMlexSemantics.ml #ENIAMlexSemanticsData.ml all: eniam-lexSemantics.cma eniam-lexSemantics.cmxa @@ -28,8 +29,8 @@ eniam-lexSemantics.cmxa: $(SOURCES) # test: test.ml # $(OCAMLOPT) -o test $(OCAMLOPTFLAGS) test.ml -test: entries.ml ENIAMwalTypes.ml ENIAMwalStringOf.ml ENIAMwalParser.ml ENIAMwalReduce.ml ENIAMvalence.ml ENIAMvalence2.ml test.ml - $(OCAMLOPT) -o test $(OCAMLOPTFLAGS) entries.ml ENIAMwalTypes.ml ENIAMwalStringOf.ml ENIAMwalParser.ml ENIAMwalReduce.ml ENIAMvalence.ml ENIAMvalence2.ml test.ml +test: $(SOURCES) test.ml + $(OCAMLOPT) -o test $(OCAMLOPTFLAGS) $^ .SUFFIXES: .mll .mly .ml .mli .cmo .cmi .cmx diff --git a/subsyntax/interface.ml b/subsyntax/interface.ml index dc097bf..cf3f7d1 100644 --- a/subsyntax/interface.ml +++ b/subsyntax/interface.ml @@ -43,6 +43,10 @@ let spec_list = [ let usage_msg = "Usage: subsyntax <options>\nInput is a sequence of lines. Empty line ends the sequence and invoke parsing. Double empty line shutdown parser.\nOptions are:" +let message = "ENIAMsubsyntax: MWE, abbreviation and sentence detecion for Polish\n\ +Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>\n\ +Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences" + let anon_fun s = raise (Arg.Bad ("invalid argument: " ^ s)) let input_text channel = @@ -80,6 +84,7 @@ let rec main_loop in_chan out_chan = main_loop in_chan out_chan) let _ = + print_endline message; Arg.parse spec_list anon_fun usage_msg; Gc.compact (); print_endline "Ready!";