diff --git a/integration/TODO b/integration/TODO new file mode 100644 index 0000000..8c6c3ed --- /dev/null +++ b/integration/TODO @@ -0,0 +1 @@ +- uporządkować położenie info_sentences i podzielić na część dotyczącą formatu i część dotyczącą korpusu diff --git a/lexSemantics/TODO b/lexSemantics/TODO new file mode 100644 index 0000000..675bdb8 --- /dev/null +++ b/lexSemantics/TODO @@ -0,0 +1,5 @@ +- sprawdzić czy disambiguate_senses nie wycina argumentów, ktore mogą być realizowane przez przyimki +- dodać do walencji preferencje selekcyjne nadrzędników symboli: dzień, godzina, rysunek itp. +- sprawdzić czy walencja nazw własnych jest dobrze zrobiona. +- trzeba zrobić słownik nazw własnych +- trzeba poprawić selekcję preferencji selecyjnych: jeśli podrzędnikiem jest zaimek nie muszą jawnie występować wśród sensów. diff --git a/parser/LCGchart.ml b/parser/LCGchart.ml index 1e828ce..f036197 100644 --- a/parser/LCGchart.ml +++ b/parser/LCGchart.ml @@ -20,6 +20,8 @@ open Xstd open LCGtypes open Printf +open ENIAMtokenizerTypes +open ENIAMlexSemanticsTypes let make size = Array.make_matrix (size+1) (size+1) ([],0) @@ -194,29 +196,31 @@ let is_dep_parsed = function | [LCGtypes.Bracket(false,false,LCGtypes.Tensor[LCGtypes.Atom "<conll_root>"]),_] -> true | _ -> failwith "is_dep_parsed" -let get_parsed_term tokens chart = +let get_parsed_term tokens lex_sems chart = let n = last_node chart in let l = Xlist.fold (find chart 0 n) [] (fun l -> function LCGtypes.Bracket(true,true,LCGtypes.Tensor[LCGtypes.Atom "<root>"]), sem -> (LCGtypes.Cut(LCGtypes.Tuple[sem])) :: l (* | LCGtypes.Bracket(true,true,LCGtypes.Tensor[LCGtypes.Atom "<ors-sentence>"]), sem -> (LCGtypes.Cut (LCGtypes.Tuple[sem])) :: l *) | _ -> l) in - let id = ExtArray.add tokens {PreTypes.empty_token with PreTypes.token=PreTypes.Lemma("<root>","interp",[])} in + let id = ExtArray.add tokens {empty_token with token=Lemma("<root>","interp",[])} in + let _ = ExtArray.add lex_sems empty_lex_sem in LCGtypes.Node{LCGrenderer.empty_node with LCGtypes.pred="<root>"; LCGtypes.cat="interp"; LCGtypes.id=id; - LCGtypes.agf=WalTypes.NOSEM; + LCGtypes.agf=ENIAMwalTypes.NOSEM; LCGtypes.args=LCGrules.make_variant l} -let get_dep_parsed_term tokens = function +let get_dep_parsed_term tokens lex_sems = function [LCGtypes.Bracket(false,false,LCGtypes.Tensor[LCGtypes.Atom "<conll_root>"]),sem] -> - let id = ExtArray.add tokens {PreTypes.empty_token with PreTypes.token=PreTypes.Lemma("<root>","interp",[])} in + let id = ExtArray.add tokens {empty_token with token=Lemma("<root>","interp",[])} in + let _ = ExtArray.add lex_sems empty_lex_sem in let l = [LCGtypes.Cut (LCGtypes.Tuple[sem])] in LCGtypes.Node{LCGrenderer.empty_node with LCGtypes.pred="<root>"; LCGtypes.cat="interp"; LCGtypes.id=id; - LCGtypes.agf=WalTypes.NOSEM; + LCGtypes.agf=ENIAMwalTypes.NOSEM; LCGtypes.args=LCGrules.make_variant l} | _ -> failwith "get_dep_parsed_term" diff --git a/parser/LCGlatexOf.ml b/parser/LCGlatexOf.ml index b339cab..1793aff 100644 --- a/parser/LCGlatexOf.ml +++ b/parser/LCGlatexOf.ml @@ -73,8 +73,8 @@ let rec linear_term c = function "{\\left[\\begin{array}{ll}" ^ (String.concat "\\\\ " (Xlist.map (["PRED",Val t.pred;"CAT",Val t.cat;"ID",Val (string_of_int t.id);"WEIGHT",Val (string_of_float t.weight);"GS",t.gs;"ARGS",t.args] @ t.attrs) (fun (e,t) -> "\\text{" ^ (LatexMain.escape_string e) ^ "} & " ^ (linear_term 0 t)))) ^ "\\end{array}\\right]}" - | Morf m -> "\\text{" ^ LatexMain.escape_string (WalStringOf.morf m) ^ "}" - | Gf s -> "\\text{" ^ LatexMain.escape_string (WalStringOf.gf s) ^ "}" + | Morf m -> "\\text{" ^ LatexMain.escape_string (ENIAMwalStringOf.morf m) ^ "}" + | Gf s -> "\\text{" ^ LatexMain.escape_string (ENIAMwalStringOf.gf s) ^ "}" | Ref i -> "{\\bf ref}\\; " ^ string_of_int i | Cut t -> "{\\bf cut}(" ^ linear_term 0 t ^ ")" | Choice choices -> "{\\bf choice}(" ^ String.concat ";" (StringMap.fold choices [] (fun l ei t -> (sprintf "%s: %s" ei (linear_term 0 t)) :: l)) ^ ")" @@ -127,8 +127,8 @@ let rec linear_term_simple c = function | Apply t -> "{\\bf apply}(" ^ linear_term_simple 0 t ^ ")" | Insert(s,t) -> "{\\bf insert}(" ^ linear_term_simple 0 s ^ "," ^ linear_term_simple 0 t ^ ")" | Node _ -> "node" - | Morf m -> "\\text{" ^ LatexMain.escape_string (WalStringOf.morf m) ^ "}" - | Gf s -> "\\text{" ^ LatexMain.escape_string (WalStringOf.gf s) ^ "}" + | Morf m -> "\\text{" ^ LatexMain.escape_string (ENIAMwalStringOf.morf m) ^ "}" + | Gf s -> "\\text{" ^ LatexMain.escape_string (ENIAMwalStringOf.gf s) ^ "}" | Ref i -> "{\\bf ref}\\; " ^ string_of_int i | Cut t -> "{\\bf cut}(" ^ linear_term_simple 0 t ^ ")" | Choice choices -> "{\\bf choice}(" ^ String.concat ";" (StringMap.fold choices [] (fun l ei t -> (sprintf "%s: %s" ei (linear_term_simple 0 t)) :: l)) ^ ")" diff --git a/parser/LCGlexicon.ml b/parser/LCGlexicon.ml index 815638b..6155d19 100644 --- a/parser/LCGlexicon.ml +++ b/parser/LCGlexicon.ml @@ -17,8 +17,9 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. *) -open PreTypes -open WalTypes +open ENIAMtokenizerTypes +open ENIAMwalTypes +open ENIAMlexSemanticsTypes open LCGtypes open Xstd @@ -56,7 +57,7 @@ let check_frame_case cases = function | CaseUndef -> cases | Case case -> if not (Xlist.mem cases case) then raise Not_found else [case] | Str -> cases - | case -> failwith ("check_frame_case: " ^ WalStringOf.case case) + | case -> failwith ("check_frame_case: " ^ ENIAMwalStringOf.case case) let check_frame_number numbers = function Number num -> if not (Xlist.mem numbers num) then raise Not_found else [num] @@ -161,7 +162,7 @@ let objids = StringSet.of_list ["rysunek"] let int_arg = [arg_schema_field Forward [Phrase(Null);Phrase(Lex "int")]] -let create_entries tokens id (d:PreTypes.token_record) x_flag = +let create_entries tokens lex_sems id (c:ENIAMtokenizerTypes.token_record) (d:ENIAMlexSemanticsTypes.lex_sem) x_flag = let make_node lemma cat weight fnum l = let attrs,args = Xlist.fold l ([],[]) (fun (attrs,args) -> function @@ -199,9 +200,11 @@ let make_node lemma cat weight fnum l = | "pos" -> ("GRAD", Val "pos") :: attrs, args | "com" -> ("GRAD", Val "com") :: attrs, let id = ExtArray.add tokens {empty_token with token=Lemma("bardziej","adv",[])} in + let _ = ExtArray.add lex_sems empty_lex_sem in (Cut(Node{LCGrenderer.empty_node with pred="bardziej"; id=id; cat="adv"; agf=ADJUNCT; arole="Manner"; attrs=[(*"MEANING", Val "bardziej";*)"GRAD", Val "com"(*;"GF",Val "adjunct"*)]})) :: args (* FIXME: MEANING powinno być dodawane później *) | "sup" -> ("GRAD", Val "sup") :: attrs, let id = ExtArray.add tokens {empty_token with token=Lemma("najbardziej","adv",[])} in + let _ = ExtArray.add lex_sems empty_lex_sem in (Cut(Node{LCGrenderer.empty_node with pred="najbardziej"; id=id; cat="adv"; agf=ADJUNCT; arole="Manner"; attrs=[(*"MEANING", Val "najbardziej";*)"GRAD", Val "sup"(*;"GF",Val "adjunct"*)]})) :: args (* FIXME: MEANING powinno być dodawane później *) | "aff" -> attrs, args | "negation" -> ("NEG",Val "+") :: attrs, args @@ -215,7 +218,7 @@ let make_node lemma cat weight fnum l = args=if args = [] then Dot else Tuple(List.rev args)} in (* FIXME: "Można było" - brakuje uzgodnienia rodzaju przymiotnika w przypadku predykatywnym, i ogólnie kontroli składniowej *) -let make_np numbers cases genders persons d lemma cat = (* FIXME: koreferencja siebie i się *) +let make_np numbers cases genders persons (c:ENIAMtokenizerTypes.token_record) d lemma cat = (* FIXME: koreferencja siebie i się *) if d.simple_valence = [] then print_endline "empty simple_valence"; let numbers = expand_numbers numbers in let cases = expand_cases cases in @@ -225,9 +228,9 @@ let make_np numbers cases genders persons d lemma cat = (* FIXME: koreferencja s "sg" -> let quant = ["number",d.e.number,["sg"];"case",d.e.case,cases;"gender",d.e.gender,genders; "person", d.e.person,persons] in let t = ["np"; "number"; "case"; "gender"; "person"] in - let batrs = make_node lemma cat (symbol_weight +. d.weight) 0 ["number"; "case"; "gender"; "person"] in + let batrs = make_node lemma cat (symbol_weight +. c.weight) 0 ["number"; "case"; "gender"; "person"] in let schema_list = [[schema_field CORE "Aposition" Forward [Phrase(Lex "obj-id")]]] in - [LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs] @ l + [LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs] @ l | "pl" -> l | _ -> failwith "make_np") else []) @ @@ -236,15 +239,15 @@ let make_np numbers cases genders persons d lemma cat = (* FIXME: koreferencja s "sg" -> let quant = ["number",d.e.number,["sg"];"case",d.e.case,cases;"gender",d.e.gender,genders; "person", d.e.person,persons] in let t = ["np"; "number"; "case"; "gender"; "person"] in - let batrs = make_node lemma cat (symbol_weight +. d.weight) 0 ["number"; "case"; "gender"; "person"] in + let batrs = make_node lemma cat (symbol_weight +. c.weight) 0 ["number"; "case"; "gender"; "person"] in let schema_list = [[schema_field CORE "Aposition" Both [Phrase(Lex "year")]]] in - [LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs] @ l + [LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs] @ l | "pl" -> let quant = ["number",d.e.number,["pl"];"case",d.e.case,cases;"gender",d.e.gender,genders; "person", d.e.person,persons] in let t = ["np"; "number"; "case"; "gender"; "person"] in - let batrs = make_node lemma cat (symbol_weight +. d.weight) 0 ["number"; "case"; "gender"; "person"] in + let batrs = make_node lemma cat (symbol_weight +. c.weight) 0 ["number"; "case"; "gender"; "person"] in let schema_list = [[schema_field CORE "Aposition" Forward [Phrase(Lex "year-interval")]]] in - [LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs] @ l + [LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs] @ l | _ -> failwith "make_np") else []) @ (if lemma = "wiek" then (* FIXME: "Aranżuje w XIX w." się nie parsuje, niewłaściwa reprezentacja sem dla XIX *) @@ -252,15 +255,15 @@ let make_np numbers cases genders persons d lemma cat = (* FIXME: koreferencja s "sg" -> let quant = ["number",d.e.number,["sg"];"case",d.e.case,cases;"gender",d.e.gender,genders; "person", d.e.person,persons] in let t = ["np"; "number"; "case"; "gender"; "person"] in - let batrs = make_node lemma cat (symbol_weight +. d.weight) 0 ["number"; "case"; "gender"; "person"] in + let batrs = make_node lemma cat (symbol_weight +. c.weight) 0 ["number"; "case"; "gender"; "person"] in let schema_list = [[schema_field CORE "Aposition" Both [Phrase(Lex "roman")]]] in - [LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs] @ l + [LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs] @ l | "pl" -> let quant = ["number",d.e.number,["pl"];"case",d.e.case,cases;"gender",d.e.gender,genders; "person", d.e.person,persons] in let t = ["np"; "number"; "case"; "gender"; "person"] in - let batrs = make_node lemma cat (symbol_weight +. d.weight) 0 ["number"; "case"; "gender"; "person"] in + let batrs = make_node lemma cat (symbol_weight +. c.weight) 0 ["number"; "case"; "gender"; "person"] in let schema_list = [[schema_field CORE "Aposition" Forward [Phrase(Lex "roman-interval")]]] in - [LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs] @ l + [LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs] @ l | _ -> failwith "make_np") else []) @ (if StringSet.mem months lemma then @@ -270,15 +273,15 @@ let make_np numbers cases genders persons d lemma cat = (* FIXME: koreferencja s "gen" -> let quant = ["number",d.e.number,["sg"];"case",d.e.case,["gen"];"gender",d.e.gender,genders; "person", d.e.person,persons] in let t = ["month-lex"] in - let batrs = make_node lemma cat (symbol_weight +. d.weight) 0 ["number"; "case"; "gender"; "person"] in + let batrs = make_node lemma cat (symbol_weight +. c.weight) 0 ["number"; "case"; "gender"; "person"] in let schema_list = [[schema_field CORE "Possesive" Forward [Phrase Null; Phrase(Lex "year"); Phrase(NP(Case "gen"))]]] in - [LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs] @ l + [LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs] @ l | _ -> l) in let quant = ["number",d.e.number,["sg"];"case",d.e.case,cases;"gender",d.e.gender,genders; "person", d.e.person,persons] in let t = ["np"; "number"; "case"; "gender"; "person"] in - let batrs = make_node lemma cat (symbol_weight +. d.weight) 0 ["number"; "case"; "gender"; "person"] in + let batrs = make_node lemma cat (symbol_weight +. c.weight) 0 ["number"; "case"; "gender"; "person"] in let schema_list = [[schema_field CORE "Possesive" Forward [Phrase(Lex "year")]]] in - [LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs] @ l + [LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs] @ l | "pl" -> l | _ -> failwith "make_np") else []) @ @@ -289,29 +292,29 @@ let make_np numbers cases genders persons d lemma cat = (* FIXME: koreferencja s "gen" -> let quant = ["number",d.e.number,["sg"];"case",d.e.case,["gen"];"gender",d.e.gender,genders; "person", d.e.person,persons] in let t = ["day-lex"] in - let batrs = make_node lemma cat (symbol_weight +. d.weight) 0 ["number"; "case"; "gender"; "person"] in + let batrs = make_node lemma cat (symbol_weight +. c.weight) 0 ["number"; "case"; "gender"; "person"] in let schema_list = [[schema_field CORE "Aposition" Forward [Phrase(Lex "date");Phrase(Lex "day");Phrase(Lex "day-month")]]] in - [LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs] @ l + [LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs] @ l | _ -> l) in let quant = ["number",d.e.number,["sg"];"case",d.e.case,cases;"gender",d.e.gender,genders; "person", d.e.person,persons] in let t = ["np"; "number"; "case"; "gender"; "person"] in - let batrs = make_node lemma cat (symbol_weight +. d.weight) 0 ["number"; "case"; "gender"; "person"] in + let batrs = make_node lemma cat (symbol_weight +. c.weight) 0 ["number"; "case"; "gender"; "person"] in let schema_list = [[schema_field CORE "Aposition" Forward [Phrase(Lex "date");Phrase(Lex "day");Phrase(Lex "day-month")]]] in - [LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs] @ l + [LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs] @ l | "pl" -> (* let l = Xlist.fold cases l (fun l -> function "gen" -> let quant = ["number",d.e.number,["sg"];"case",d.e.case,["gen"];"gender",d.e.gender,genders; "person", d.e.person,persons] in let t = ["day-lex"] in - let batrs = make_node lemma cat (symbol_weight +. d.weight) 0 ["number"; "case"; "gender"; "person"] in + let batrs = make_node lemma cat (symbol_weight +. c.weight) 0 ["number"; "case"; "gender"; "person"] in let schema_list = [[schema_field CORE "Aposition" Forward [Phrase(Lex "date-interval");Phrase(Lex "day-interval");Phrase(Lex "day-month-interval")]]] in - [LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs] @ l + [LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs] @ l | _ -> l) in*) let quant = ["number",d.e.number,["pl"];"case",d.e.case,cases;"gender",d.e.gender,genders; "person", d.e.person,persons] in let t = ["np"; "number"; "case"; "gender"; "person"] in - let batrs = make_node lemma cat (symbol_weight +. d.weight) 0 ["number"; "case"; "gender"; "person"] in + let batrs = make_node lemma cat (symbol_weight +. c.weight) 0 ["number"; "case"; "gender"; "person"] in let schema_list = [[schema_field CORE "Aposition" Forward [Phrase(Lex "date-interval");Phrase(Lex "day-interval");Phrase(Lex "day-month-interval")]]] in - [LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs] @ l + [LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs] @ l | _ -> failwith "make_np") else []) @ (if lemma = "godzina" then @@ -319,15 +322,15 @@ let make_np numbers cases genders persons d lemma cat = (* FIXME: koreferencja s "sg" -> let quant = ["number",d.e.number,["sg"];"case",d.e.case,cases;"gender",d.e.gender,genders; "person", d.e.person,persons] in let t = ["np"; "number"; "case"; "gender"; "person"] in - let batrs = make_node lemma cat (symbol_weight +. d.weight) 0 ["number"; "case"; "gender"; "person"] in + let batrs = make_node lemma cat (symbol_weight +. c.weight) 0 ["number"; "case"; "gender"; "person"] in let schema_list = [[schema_field CORE "Aposition" Forward [Phrase(Lex "hour");Phrase(Lex "hour-minute")]]] in - [LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs] @ l + [LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs] @ l | "pl" -> let quant = ["number",d.e.number,["pl"];"case",d.e.case,cases;"gender",d.e.gender,genders; "person", d.e.person,persons] in let t = ["np"; "number"; "case"; "gender"; "person"] in - let batrs = make_node lemma cat (symbol_weight +. d.weight) 0 ["number"; "case"; "gender"; "person"] in + let batrs = make_node lemma cat (symbol_weight +. c.weight) 0 ["number"; "case"; "gender"; "person"] in let schema_list = [[schema_field CORE "Aposition" Forward [Phrase(Lex "hour-interval");Phrase(Lex "hour-minute-interval")]]] in - [LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs] @ l + [LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs] @ l | _ -> failwith "make_np") else []) @ Xlist.fold d.simple_valence [] (fun l -> function @@ -341,12 +344,12 @@ let make_np numbers cases genders persons d lemma cat = (* FIXME: koreferencja s let quant3 = ["number",d.e.number,numbers;"case",d.e.case,all_cases;"gender",d.e.gender,genders; "person", d.e.person,persons] in let t = ["measure"; "number"; "case"; "gender"; "person"] in let t3 = ["measure"; "sg"; "case"; "n2"; "person"] in (* UWAGA: number "sg" i gender "n2", żeby uzgadniać z podmiotem czasownika *) - let batrs = make_node lemma cat (d.weight +. measure_weight) fnum (nsyn :: (WalStringOf.nsem nsem) :: ["number"; "case"; "gender"; "person"]) in - let batrs3 = make_node lemma cat (d.weight +. measure_weight) fnum (nsyn :: (WalStringOf.nsem nsem) :: ["number"; "gen"; "gender"; "person"]) in + let batrs = make_node lemma cat (c.weight +. measure_weight) fnum (nsyn :: (ENIAMwalStringOf.nsem nsem) :: ["number"; "case"; "gender"; "person"]) in + let batrs3 = make_node lemma cat (c.weight +. measure_weight) fnum (nsyn :: (ENIAMwalStringOf.nsem nsem) :: ["number"; "gen"; "gender"; "person"]) in let schema_list = [qub_inclusion;schema;num_congr] in let schema_list3 = [qub_inclusion;schema;num_rec] in - (LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs) :: - (if Xlist.mem cases "gen" then [LCGrenderer.make_frame x_flag tokens quant3 schema_list3 t3 d batrs3] else []) @ l + (LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs) :: + (if Xlist.mem cases "gen" then [LCGrenderer.make_frame x_flag tokens lex_sems quant3 schema_list3 t3 d batrs3] else []) @ l else let persons = if cases = ["voc"] then ["sec"] else persons in let quant = ["lemma",ge (),[];"number",d.e.number,numbers;"case",d.e.case,cases;"gender",d.e.gender,genders; "person", d.e.person,persons] in @@ -358,22 +361,22 @@ let make_np numbers cases genders persons d lemma cat = (* FIXME: koreferencja s let t3 = ["np"; "sg"; "case"; "n2"; "person"] in (* UWAGA: number "sg" i gender "n2", żeby uzgadniać z podmiotem czasownika *) let t5 = ["np"; "unumber"; "ucase"; "ugender"; "uperson"] in let t6 = ["prepnp"; "lemma"; "ucase"] in - let batrs = make_node lemma cat d.weight fnum (nsyn :: (WalStringOf.nsem nsem) :: ["number"; "case"; "gender"; "person"]) in - let batrs2 = make_node lemma cat d.weight fnum ("nosem" :: nsyn :: (WalStringOf.nsem nsem) :: ["number"; "case"; "gender"; "person"]) in - let batrs3 = make_node lemma cat d.weight fnum (nsyn :: (WalStringOf.nsem nsem) :: ["number"; "gen"; "gender"; "person"]) in - let batrs4 = make_node lemma cat d.weight fnum ("nosem" :: nsyn :: (WalStringOf.nsem nsem) :: ["number"; "gen"; "gender"; "person"]) in + let batrs = make_node lemma cat c.weight fnum (nsyn :: (ENIAMwalStringOf.nsem nsem) :: ["number"; "case"; "gender"; "person"]) in + let batrs2 = make_node lemma cat c.weight fnum ("nosem" :: nsyn :: (ENIAMwalStringOf.nsem nsem) :: ["number"; "case"; "gender"; "person"]) in + let batrs3 = make_node lemma cat c.weight fnum (nsyn :: (ENIAMwalStringOf.nsem nsem) :: ["number"; "gen"; "gender"; "person"]) in + let batrs4 = make_node lemma cat c.weight fnum ("nosem" :: nsyn :: (ENIAMwalStringOf.nsem nsem) :: ["number"; "gen"; "gender"; "person"]) in let schema_list = [qub_inclusion;schema;num_congr] in let schema_list2 = [qub_inclusion;schema;num_congr;nosem_prep] in let schema_list3 = [qub_inclusion;schema;num_rec] in let schema_list4 = [qub_inclusion;schema;num_rec;nosem_prep] in let schema_list5 = [qub_inclusion;schema;noun_measure] in let schema_list6 = [qub_inclusion;schema;noun_measure;nosem_uprep] in - (LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs) :: - (LCGrenderer.make_frame x_flag tokens quant schema_list2 t2 d batrs2) :: - (if Xlist.mem cases "gen" then [LCGrenderer.make_frame x_flag tokens quant3 schema_list3 t3 d batrs3] else []) @ - (if Xlist.mem cases "gen" then [LCGrenderer.make_frame x_flag tokens quant3 schema_list4 t2 d batrs4] else []) @ - (if Xlist.mem cases "gen" then [LCGrenderer.make_frame x_flag tokens quant5 schema_list5 t5 d batrs3] else []) @ - (if Xlist.mem cases "gen" then [LCGrenderer.make_frame x_flag tokens quant5 schema_list6 t6 d batrs4] else []) @ l) + (LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs) :: + (LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list2 t2 d batrs2) :: + (if Xlist.mem cases "gen" then [LCGrenderer.make_frame x_flag tokens lex_sems quant3 schema_list3 t3 d batrs3] else []) @ + (if Xlist.mem cases "gen" then [LCGrenderer.make_frame x_flag tokens lex_sems quant3 schema_list4 t2 d batrs4] else []) @ + (if Xlist.mem cases "gen" then [LCGrenderer.make_frame x_flag tokens lex_sems quant5 schema_list5 t5 d batrs3] else []) @ + (if Xlist.mem cases "gen" then [LCGrenderer.make_frame x_flag tokens lex_sems quant5 schema_list6 t6 d batrs4] else []) @ l) with Not_found -> l) | fnum,Frame(AdjAtrs(_,case,_),schema) -> (try @@ -383,18 +386,18 @@ let make_np numbers cases genders persons d lemma cat = (* FIXME: koreferencja s let t = ["np"; "number"; "case"; "gender"; "person"] in let t2 = ["prepnp"; "lemma"; "case"] in let t3 = ["np"; "sg"; "case"; "n2"; "person"] in (* UWAGA: number "sg" i gender "n2", żeby uzgadniać z podmiotem czasownika *) - let batrs = make_node lemma cat d.weight fnum ["number"; "case"; "gender"; "person"] in - let batrs2 = make_node lemma cat d.weight fnum ["nosem"; "number"; "case"; "gender"; "person"] in - let batrs3 = make_node lemma cat d.weight fnum ["number"; "gen"; "gender"; "person"] in - let batrs4 = make_node lemma cat d.weight fnum ["nosem"; "number"; "gen"; "gender"; "person"] in + let batrs = make_node lemma cat c.weight fnum ["number"; "case"; "gender"; "person"] in + let batrs2 = make_node lemma cat c.weight fnum ["nosem"; "number"; "case"; "gender"; "person"] in + let batrs3 = make_node lemma cat c.weight fnum ["number"; "gen"; "gender"; "person"] in + let batrs4 = make_node lemma cat c.weight fnum ["nosem"; "number"; "gen"; "gender"; "person"] in let schema_list = [qub_inclusion;schema;num_congr] in let schema_list2 = [qub_inclusion;schema;num_congr;nosem_prep] in let schema_list3 = [qub_inclusion;schema;num_rec] in let schema_list4 = [qub_inclusion;schema;num_rec;nosem_prep] in - (LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs) :: - (LCGrenderer.make_frame x_flag tokens quant schema_list2 t2 d batrs2) :: - (if Xlist.mem cases "gen" then [LCGrenderer.make_frame x_flag tokens quant3 schema_list3 t3 d batrs3] else []) @ - (if Xlist.mem cases "gen" then [LCGrenderer.make_frame x_flag tokens quant3 schema_list4 t2 d batrs4] else []) @ l + (LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs) :: + (LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list2 t2 d batrs2) :: + (if Xlist.mem cases "gen" then [LCGrenderer.make_frame x_flag tokens lex_sems quant3 schema_list3 t3 d batrs3] else []) @ + (if Xlist.mem cases "gen" then [LCGrenderer.make_frame x_flag tokens lex_sems quant3 schema_list4 t2 d batrs4] else []) @ l with Not_found -> l) | fnum,LexFrame(lid,SUBST(number,case),NoRestr,schema) -> (try @@ -402,32 +405,32 @@ let make_np numbers cases genders persons d lemma cat = (* FIXME: koreferencja s let numbers = check_frame_number numbers number in let quant = ["number",d.e.number,numbers;"case",d.e.case,cases;"gender",d.e.gender,genders; "person", d.e.person,persons] in let t = ["lex";lid;lemma;"subst"; "number"; "case"; "gender"; "person"] in - let batrs = make_node lemma cat (lex_weight +. d.weight) fnum ["lex";"number"; "case"; "gender"; "person"] in + let batrs = make_node lemma cat (lex_weight +. c.weight) fnum ["lex";"number"; "case"; "gender"; "person"] in let schema_list = [[inclusion];schema] in - (LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs) :: l + (LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs) :: l with Not_found -> l) | fnum,LexFrame(_,ADJ _,_,_) -> l - | fnum,frame -> failwith ("make_np: " ^ lemma ^ ": " ^ WalStringOf.frame lemma frame)) in + | fnum,frame -> failwith ("make_np: " ^ lemma ^ ": " ^ ENIAMwalStringOf.frame lemma frame)) in -let make_np_symbol d lemma cat = +let make_np_symbol (c:ENIAMtokenizerTypes.token_record) d lemma cat = Xlist.fold d.simple_valence [] (fun l -> function fnum,Frame(NounAtrs(_,nsyn,nsem),schema) -> let t = [cat] in - let batrs = make_node lemma cat (symbol_weight +. d.weight) fnum (nsyn :: (WalStringOf.nsem nsem) :: []) in + let batrs = make_node lemma cat (symbol_weight +. c.weight) fnum (nsyn :: (ENIAMwalStringOf.nsem nsem) :: []) in let schema_list = [schema] in - (LCGrenderer.make_frame x_flag tokens [] schema_list t d batrs) :: l - | fnum,frame -> failwith ("make_np_symbol: " ^ lemma ^ ": " ^ WalStringOf.frame lemma frame)) in + (LCGrenderer.make_frame x_flag tokens lex_sems [] schema_list t d batrs) :: l + | fnum,frame -> failwith ("make_np_symbol: " ^ lemma ^ ": " ^ ENIAMwalStringOf.frame lemma frame)) in -let make_day d lemma cat = +let make_day (c:ENIAMtokenizerTypes.token_record) d lemma cat = Xlist.fold d.simple_valence [] (fun l -> function fnum,Frame(NounAtrs(_,nsyn,nsem),[]) -> let t = [cat] in - let batrs = make_node lemma cat (symbol_weight +. d.weight) fnum (nsyn :: (WalStringOf.nsem nsem) :: []) in + let batrs = make_node lemma cat (symbol_weight +. c.weight) fnum (nsyn :: (ENIAMwalStringOf.nsem nsem) :: []) in let schema_list = [[schema_field CORE "Possesive" Forward [Phrase(Lex "month-lex")]]] in - (LCGrenderer.make_frame x_flag tokens [] schema_list t d batrs) :: l - | fnum,frame -> failwith ("make_day: " ^ lemma ^ ": " ^ WalStringOf.frame lemma frame)) in + (LCGrenderer.make_frame x_flag tokens lex_sems [] schema_list t d batrs) :: l + | fnum,frame -> failwith ("make_day: " ^ lemma ^ ": " ^ ENIAMwalStringOf.frame lemma frame)) in -let make_nump numbers cases genders persons acm d lemma cat = (* FIXME: liczba po rzeczowniku *) (* FIXME: zbadać jak liczebniki współdziałąją z jako COMPAR *) +let make_nump numbers cases genders persons acm (c:ENIAMtokenizerTypes.token_record) d lemma cat = (* FIXME: liczba po rzeczowniku *) (* FIXME: zbadać jak liczebniki współdziałąją z jako COMPAR *) let numbers = expand_numbers numbers in let cases = expand_cases cases in let genders = expand_genders genders in @@ -435,14 +438,14 @@ let make_nump numbers cases genders persons acm d lemma cat = (* FIXME: liczba p Xlist.map acm (function "rec" -> let t = ["num"; "number"; "case"; "gender"; "person"; "rec"] in - let batrs = make_node lemma cat d.weight 0 ["rec"; "number"; "case"; "gender"; "person"] in + let batrs = make_node lemma cat c.weight 0 ["rec"; "number"; "case"; "gender"; "person"] in let schema_list = [qub_inclusion] in (* FIXME: jak usunięcie Phrase ProNG wpływa na pokrycie? *) - LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs + LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs | "congr" -> let t = ["num"; "number"; "case"; "gender"; "person"; "congr"] in - let batrs = make_node lemma cat d.weight 0 ["congr"; "number"; "case"; "gender"; "person"] in + let batrs = make_node lemma cat c.weight 0 ["congr"; "number"; "case"; "gender"; "person"] in let schema_list = [qub_inclusion] in (* FIXME: jak usunięcie Phrase ProNG wpływa na pokrycie? *) - LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs + LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs | _ -> failwith "make_nump: num acm") @ Xlist.fold d.simple_valence [] (fun l -> function fnum,LexFrame(lid,NUM(case,gender,acm2),NoRestr,schema) -> @@ -454,78 +457,78 @@ let make_nump numbers cases genders persons acm d lemma cat = (* FIXME: liczba p match acm with "rec" -> let t = ["lex";lid;lemma;"num"; "number"; "case"; "gender"; "person"] in (* UWAGA: Number "sg" i Gender "n2", żeby uzgadniać z podmiotem czasownika *) - let batrs = make_node lemma cat (lex_weight +. d.weight) fnum ["rec";"lex"; "number"; "case"; "gender"; "person"] in + let batrs = make_node lemma cat (lex_weight +. c.weight) fnum ["rec";"lex"; "number"; "case"; "gender"; "person"] in let schema_list = [[inclusion];schema] in - (LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs) :: l + (LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs) :: l | "congr" -> let t = ["lex";lid;lemma;"num"; "number"; "case"; "gender"; "person"] in - let batrs = make_node lemma cat (lex_weight +. d.weight) fnum ["congr";"lex"; "number"; "case"; "gender"; "person"] in + let batrs = make_node lemma cat (lex_weight +. c.weight) fnum ["congr";"lex"; "number"; "case"; "gender"; "person"] in let schema_list = [[inclusion];schema] in - (LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs) :: l + (LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs) :: l | _ -> failwith "make_nump: num acm" with Not_found -> l) - | fnum,frame -> failwith ("make_num: " ^ lemma ^ ": " ^ WalStringOf.frame lemma frame)) in + | fnum,frame -> failwith ("make_num: " ^ lemma ^ ": " ^ ENIAMwalStringOf.frame lemma frame)) in -let make_compar d lemma = +let make_compar (c:ENIAMtokenizerTypes.token_record) d lemma = let quant = ["case",d.e.case,["nom";"gen";"dat";"acc";"inst"]] in let t = ["comparnp"; lemma; "case"] in let t2 = ["comparpp"; lemma] in - let batrs = make_node lemma "prep" d.weight 0 ["case"] in - let batrs2 = make_node lemma "prep" d.weight 0 [] in + let batrs = make_node lemma "prep" c.weight 0 ["case"] in + let batrs2 = make_node lemma "prep" c.weight 0 [] in let schema_list = [qub_inclusion;[adjunct_schema_field "Manner" Backward [Phrase Null;Phrase AdvP];prep_arg_schema_field [Phrase (NP(CaseAgr))]]] in let schema_list2 = [qub_inclusion;[adjunct_schema_field "Manner" Backward [Phrase Null;Phrase AdvP];prep_arg_schema_field [Phrase (PrepNP(Sem,"",CaseUndef));Phrase (PrepNumP(Sem,"",CaseUndef));Phrase (PrepAdjP(Sem,"",CaseUndef));Phrase (ComprepNP(Sem,""))]]] in - [LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs; - LCGrenderer.make_frame x_flag tokens [] schema_list2 t2 d batrs2] in + [LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs; + LCGrenderer.make_frame x_flag tokens lex_sems [] schema_list2 t2 d batrs2] in -let make_arg_prepp cases d lemma pcat phrase = +let make_arg_prepp cases (c:ENIAMtokenizerTypes.token_record) d lemma pcat phrase = let quant = ["case",d.e.case,expand_cases cases] in let t = [pcat; lemma; "case"] in - let batrs = make_node lemma "prep" d.weight 0 ["case"] in + let batrs = make_node lemma "prep" c.weight 0 ["case"] in let schema_list = [qub_inclusion;[adjunct_schema_field "Manner" Backward [Phrase Null;Phrase AdvP];prep_arg_schema_field [Phrase phrase]]] in - LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs in + LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs in -let make_prepnp cases d lemma = make_arg_prepp cases d lemma "prepnp" (NP(CaseAgr)) in -let make_prepnump cases d lemma = make_arg_prepp cases d lemma "prepnp" (NumP(CaseAgr)) in -let make_prepadjp cases d lemma = +let make_prepnp cases (c:ENIAMtokenizerTypes.token_record) d lemma = make_arg_prepp cases c d lemma "prepnp" (NP(CaseAgr)) in +let make_prepnump cases (c:ENIAMtokenizerTypes.token_record) d lemma = make_arg_prepp cases c d lemma "prepnp" (NumP(CaseAgr)) in +let make_prepadjp cases (c:ENIAMtokenizerTypes.token_record) d lemma = let cases = if lemma = "po" then "postp" :: cases else cases in - make_arg_prepp cases d lemma "prepadjp" (AdjP(CaseAgr)) in + make_arg_prepp cases c d lemma "prepadjp" (AdjP(CaseAgr)) in -(*let make_prepp cases d lemma = +(*let make_prepp cases c d lemma = let quant = ["case",d.e.case,expand_cases cases] in let t = ["prepp"; "case"] in - let batrs = make_node lemma "prep" d.weight 0 ["case"] in + let batrs = make_node lemma "prep" c.weight 0 ["case"] in let schema_list = [qub_inclusion;[adjunct_schema_field "Manner" Backward [Phrase Null;Phrase AdvP];prep_arg_schema_field2 [Phrase (NP(CaseAgr));Phrase (NumP(CaseAgr));Phrase (AdjP(CaseAgr))]]] in - LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs in*) + LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs in*) -let make_prep cases d lemma = +let make_prep cases (c:ENIAMtokenizerTypes.token_record) d lemma = let quant = ["case",d.e.case,expand_cases cases] in let t = ["prep"; lemma; "case"] in - let batrs = make_node lemma "prep" d.weight 0 ["case"] in + let batrs = make_node lemma "prep" c.weight 0 ["case"] in let schema_list = [qub_inclusion] in - LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs in + LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs in -let make_time_prep2 case d lemma l = +let make_time_prep2 case (c:ENIAMtokenizerTypes.token_record) d lemma l = let quant = ["case",d.e.case,[case]] in let t = ["prepnp"; lemma; "case"] in - let batrs = make_node lemma "prep" d.weight 0 ["case"] in + let batrs = make_node lemma "prep" c.weight 0 ["case"] in let schema_list = [qub_inclusion;[prep_arg_schema_field (Xlist.map l (fun s -> Phrase(Lex s)))]] in - LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs in + LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs in -let make_time_prep cases d lemma = +let make_time_prep cases (c:ENIAMtokenizerTypes.token_record) d lemma = Xlist.fold cases [] (fun l case -> match lemma,case with - "z","gen" -> [make_time_prep2 case d lemma ["day-month";"day";"year";"date";"hour";"hour-minute"]] @ l - | "do","gen" -> [make_time_prep2 case d lemma ["day-month";"day";"year";"date";"hour";"hour-minute"]] @ l - | "na","acc" -> [make_time_prep2 case d lemma ["day-month";"day";"hour";"hour-minute"]] @ l - | "o","loc" -> [make_time_prep2 case d lemma ["hour";"hour-minute"]] @ l - | "od","gen" -> [make_time_prep2 case d lemma ["day-month";"day";"year";"date";"hour";"hour-minute"]] @ l - | "około","gen" -> [make_time_prep2 case d lemma ["day-month";"day";"year";"hour";"hour-minute"]] @ l - | "po","loc" -> [make_time_prep2 case d lemma ["day-month";"day";"year";"date";"hour";"hour-minute"]] @ l - | "przed","inst" -> [make_time_prep2 case d lemma ["day-month";"day";"year";"date";"hour";"hour-minute"]] @ l - | "w","loc" -> [make_time_prep2 case d lemma ["year"]] @ l + "z","gen" -> [make_time_prep2 case c d lemma ["day-month";"day";"year";"date";"hour";"hour-minute"]] @ l + | "do","gen" -> [make_time_prep2 case c d lemma ["day-month";"day";"year";"date";"hour";"hour-minute"]] @ l + | "na","acc" -> [make_time_prep2 case c d lemma ["day-month";"day";"hour";"hour-minute"]] @ l + | "o","loc" -> [make_time_prep2 case c d lemma ["hour";"hour-minute"]] @ l + | "od","gen" -> [make_time_prep2 case c d lemma ["day-month";"day";"year";"date";"hour";"hour-minute"]] @ l + | "około","gen" -> [make_time_prep2 case c d lemma ["day-month";"day";"year";"hour";"hour-minute"]] @ l + | "po","loc" -> [make_time_prep2 case c d lemma ["day-month";"day";"year";"date";"hour";"hour-minute"]] @ l + | "przed","inst" -> [make_time_prep2 case c d lemma ["day-month";"day";"year";"date";"hour";"hour-minute"]] @ l + | "w","loc" -> [make_time_prep2 case c d lemma ["year"]] @ l | _ -> l) in -let make_lex_prep cases d lemma = +let make_lex_prep cases (c:ENIAMtokenizerTypes.token_record) d lemma = let cases = expand_cases cases in Xlist.fold d.simple_valence [] (fun l -> function | fnum,LexFrame(lid,PREP case,NoRestr,schema) -> @@ -533,26 +536,26 @@ let make_lex_prep cases d lemma = let cases = check_frame_case cases case in let quant = ["case",d.e.case,cases] in let t = ["lex";lid;lemma;"prep"; "case"] in - let batrs = make_node lemma "prep" (lex_weight +. d.weight) fnum ["lex";"case"] in + let batrs = make_node lemma "prep" (lex_weight +. c.weight) fnum ["lex";"case"] in let schema_list = [[inclusion];schema] in - (LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs) :: l + (LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs) :: l with Not_found -> l) | fnum,ComprepFrame(new_lemma,PREP case,NoRestr,schema) -> (try let cases = check_frame_case cases case in let quant = ["case",d.e.case,cases] in let t = ["comprepnp"; new_lemma] in - let batrs = make_node new_lemma "prep" (lex_weight +. d.weight) fnum [] in + let batrs = make_node new_lemma "prep" (lex_weight +. c.weight) fnum [] in let schema_list = [[inclusion];schema] in - let l = (LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs) :: l in + let l = (LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs) :: l in let t = ["prepp"; "case"] in - let batrs = make_node new_lemma "prep" (lex_weight +. d.weight) fnum ["case"] in + let batrs = make_node new_lemma "prep" (lex_weight +. c.weight) fnum ["case"] in let schema_list = [[inclusion];schema] in - (LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs) :: l + (LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs) :: l with Not_found -> l) - | fnum,frame -> failwith ("make_lex_prep: " ^ lemma ^ ": " ^ WalStringOf.frame lemma frame)) in + | fnum,frame -> failwith ("make_lex_prep: " ^ lemma ^ ": " ^ ENIAMwalStringOf.frame lemma frame)) in -let make_adjp numbers cases genders grads d lemma cat = (* FIXME: usunąć niektóre opcje dla roman i ordnum *) +let make_adjp numbers cases genders grads (c:ENIAMtokenizerTypes.token_record) d lemma cat = (* FIXME: usunąć niektóre opcje dla roman i ordnum *) let numbers = expand_numbers numbers in let cases = expand_cases cases in let cases = if Xlist.mem cases "nom" then "pred" :: cases else cases in @@ -565,21 +568,21 @@ let make_adjp numbers cases genders grads d lemma cat = (* FIXME: usunąć niekt let quant = ["lemma",ge (),[];"number",d.e.number,numbers;"case",d.e.case,cases;"gender",d.e.gender,genders] in let t = ["adjp"; "number"; "case"; "gender"] in let t2 = ["prepadjp"; "lemma"; "case"] in - let batrs = make_node lemma cat d.weight fnum (adjsyn :: grad :: ["number"; "case"; "gender"]) in - let batrs2 = make_node lemma cat d.weight fnum ("nosem" :: adjsyn :: grad :: ["number"; "case"; "gender"]) in + let batrs = make_node lemma cat c.weight fnum (adjsyn :: grad :: ["number"; "case"; "gender"]) in + let batrs2 = make_node lemma cat c.weight fnum ("nosem" :: adjsyn :: grad :: ["number"; "case"; "gender"]) in let schema_list = [if adjsyn = "pronoun" then [] else [adjunct_schema_field "Aposition" Backward [Phrase Null;Phrase Adja]];qub_inclusion;schema] in let schema_list2 = [if adjsyn = "pronoun" then [] else [adjunct_schema_field "Aposition" Backward [Phrase Null;Phrase Adja]];qub_inclusion;schema;nosem_prep] in - (LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs) :: - (LCGrenderer.make_frame x_flag tokens quant schema_list2 t2 d batrs2) :: l + (LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs) :: + (LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list2 t2 d batrs2) :: l with Not_found -> l) | fnum,Frame(NounAtrs(_,nsyn,nsem),schema) -> (try let grad = match grads with [grad] -> grad | _ -> failwith "make_adjp: grad" in let quant = ["lemma",ge (),[];"number",d.e.number,numbers;"case",d.e.case,cases;"gender",d.e.gender,genders] in let t = ["adjp"; "number"; "case"; "gender"] in - let batrs = make_node lemma cat d.weight fnum (nsyn :: (WalStringOf.nsem nsem) :: grad :: ["number"; "case"; "gender"]) in + let batrs = make_node lemma cat c.weight fnum (nsyn :: (ENIAMwalStringOf.nsem nsem) :: grad :: ["number"; "case"; "gender"]) in let schema_list = [if nsyn = "pronoun" then [] else [adjunct_schema_field "Aposition" Backward [Phrase Null;Phrase Adja]];qub_inclusion;schema] in - (LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs) :: l + (LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs) :: l with Not_found -> l) | fnum,LexFrame(lid,ADJ(number,case,gender,grad),NoRestr,schema) -> (try @@ -590,11 +593,11 @@ let make_adjp numbers cases genders grads d lemma cat = (* FIXME: usunąć niekt let grad = match grads with [grad] -> grad | _ -> failwith "make_adjp: grad" in let quant = ["number",d.e.number,numbers;"case",d.e.case,cases;"gender",d.e.gender,genders] in let t = ["lex";lid;lemma;"adj"; "number"; "case"; "gender"] in - let batrs = make_node lemma cat (lex_weight +. d.weight) fnum [grad;"lex"; "number"; "case"; "gender"] in + let batrs = make_node lemma cat (lex_weight +. c.weight) fnum [grad;"lex"; "number"; "case"; "gender"] in let schema_list = [[inclusion];schema] in - (LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs) :: l + (LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs) :: l with Not_found -> l) - | fnum,frame -> failwith ("make_adjp: " ^ lemma ^ ": " ^ WalStringOf.frame lemma frame)) in + | fnum,frame -> failwith ("make_adjp: " ^ lemma ^ ": " ^ ENIAMwalStringOf.frame lemma frame)) in let adv_relators = Xlist.fold [ "jak","Attr",[Int;Rel]; @@ -609,18 +612,18 @@ let adv_relators = Xlist.fold [ "gdy","con",[Sub]; ] StringMap.empty (fun map (k,v,l) -> StringMap.add map k (v,l)) in -let make_advp grads (d:PreTypes.token_record) lemma = +let make_advp grads (c:ENIAMtokenizerTypes.token_record) d lemma = (if StringMap.mem adv_relators lemma then let role,ctypes = StringMap.find adv_relators lemma in List.flatten (Xlist.map ctypes (fun ctype -> - let ctype = WalStringOf.comp_type ctype in + let ctype = ENIAMwalStringOf.comp_type ctype in let quant = ["inumber",ge (),[];"igender",ge (),[];"iperson",ge (),[];"ctype",ge (),[ctype]] in let t = ["cp"; "ctype"; lemma] in let sem_mods = ["CTYPE",SubstVar "ctype"] in - let batrs = make_node lemma "adv" d.weight 0 [ctype] in + let batrs = make_node lemma "adv" c.weight 0 [ctype] in let raised_arg1 = [Raised(["ip";"inumber";"igender";"iperson"],Forward,["advp"])] in let schema_list = [if lemma = "jak" then [num_arg_schema_field [Phrase Null;(*Phrase ProNG;*) Phrase AdvP]] else [];[schema_field RAISED "" Forward raised_arg1]] in (* FIXME: dwa znaczenia jak: pytanie o cechę lub spójnik *) - let frame_advp = LCGrenderer.make_frame_raised tokens quant schema_list t d batrs sem_mods in + let frame_advp = LCGrenderer.make_frame_raised tokens lex_sems quant schema_list t d batrs sem_mods in [frame_advp])) else []) @ Xlist.fold d.simple_valence [] (fun l -> function (* FIXME: sprawdzic czy adv_relators maja leksykalizacje i schematy *) fnum,Frame(EmptyAtrs _,schema) -> @@ -628,9 +631,9 @@ let make_advp grads (d:PreTypes.token_record) lemma = let grad = match grads with [grad] -> grad | _ -> failwith "make_advp: grad" in let quant = [] in let t = ["advp"] in - let batrs = make_node lemma "adv" d.weight fnum [grad] in + let batrs = make_node lemma "adv" c.weight fnum [grad] in let schema_list = [[adjunct_schema_field "Aposition" Backward [Phrase Null;Phrase Adja]];qub_inclusion;schema] in - (LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs) :: l + (LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs) :: l with Not_found -> l) | fnum,LexFrame(lid,ADV grad,NoRestr,schema) -> (try @@ -638,26 +641,26 @@ let make_advp grads (d:PreTypes.token_record) lemma = let grad = match grads with [grad] -> grad | _ -> failwith "make_adjp: grad" in let quant = [] in let t = ["lex";lid;lemma;"adv"] in - let batrs = make_node lemma "adv" (lex_weight +. d.weight) fnum [grad;"lex"] in + let batrs = make_node lemma "adv" (lex_weight +. c.weight) fnum [grad;"lex"] in let schema_list = [[inclusion];schema] in - (LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs) :: l + (LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs) :: l with Not_found -> l) | fnum,ComprepFrame(new_lemma,ADV grad,NoRestr,schema) -> (try let _ = check_frame_grad grads grad in let quant = [] in let t = ["comprepnp"; new_lemma] in - let batrs = make_node new_lemma "adv" (lex_weight +. d.weight) fnum [] in + let batrs = make_node new_lemma "adv" (lex_weight +. c.weight) fnum [] in let schema_list = [[inclusion];schema] in - let l = (LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs) :: l in (* FIXME: nieprzetestowane *) + let l = (LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs) :: l in (* FIXME: nieprzetestowane *) let t = ["prepp"; "gen"] in (* FIXME: przypadek nie jest znany *) - let batrs = make_node new_lemma "adv" (lex_weight +. d.weight) fnum [] in + let batrs = make_node new_lemma "adv" (lex_weight +. c.weight) fnum [] in let schema_list = [[inclusion];schema] in - (LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs) :: l + (LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs) :: l with Not_found -> l) - | fnum,frame -> failwith ("make_advp: " ^ lemma ^ ": " ^ WalStringOf.frame lemma frame)) in + | fnum,frame -> failwith ("make_advp: " ^ lemma ^ ": " ^ ENIAMwalStringOf.frame lemma frame)) in -let make_ger numbers cases genders persons aspects negations d lemma cat = +let make_ger numbers cases genders persons aspects negations (c:ENIAMtokenizerTypes.token_record) d lemma cat = let numbers = expand_numbers numbers in let cases = expand_cases cases in let genders = expand_genders genders in @@ -668,9 +671,9 @@ let make_ger numbers cases genders persons aspects negations d lemma cat = let negation = check_frame_negation negations negation in let quant = ["number",d.e.number,numbers;"case",d.e.case,cases;"gender",d.e.gender,genders; "person", d.e.person,persons; "aspect",d.e.aspect,aspects] in let t = ["np"; "number"; "case"; "gender"; "person"] in - let batrs = make_node new_lemma cat d.weight fnum [negation;"aspect";"number"; "case"; "gender"; "person"] in + let batrs = make_node new_lemma cat c.weight fnum [negation;"aspect";"number"; "case"; "gender"; "person"] in let schema_list = [qub_inclusion;schema] in - (LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs) :: l + (LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs) :: l with Not_found -> l) | fnum,LexFrame(lid,GER(number,case,gender,aspect,negation,ReflEmpty),NoRestr,schema) -> (try @@ -681,13 +684,13 @@ let make_ger numbers cases genders persons aspects negations d lemma cat = let negation = check_frame_negation negations negation in let quant = ["number",d.e.number,numbers;"case",d.e.case,cases;"gender",d.e.gender,genders; "person", d.e.person,persons; "aspect",d.e.aspect,aspects] in let t = ["lex";lid;lemma;"ger"; "number"; "case"; "gender"; "person"] in - let batrs = make_node lemma cat (lex_weight +. d.weight) fnum [negation;"aspect";"lex"; "number"; "case"; "gender"; "person"] in + let batrs = make_node lemma cat (lex_weight +. c.weight) fnum [negation;"aspect";"lex"; "number"; "case"; "gender"; "person"] in let schema_list = [[inclusion];schema] in - (LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs) :: l + (LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs) :: l with Not_found -> l) - | fnum,frame -> failwith ("make_ger: " ^ lemma ^ ": " ^ WalStringOf.frame lemma frame)) in + | fnum,frame -> failwith ("make_ger: " ^ lemma ^ ": " ^ ENIAMwalStringOf.frame lemma frame)) in -let make_padj numbers cases genders aspects negations d lemma cat = +let make_padj numbers cases genders aspects negations (c:ENIAMtokenizerTypes.token_record) d lemma cat = let numbers = expand_numbers numbers in let cases = expand_cases cases in let cases = if Xlist.mem cases "nom" || cat = "ppas" then "pred" :: cases else cases in @@ -699,9 +702,9 @@ let make_padj numbers cases genders aspects negations d lemma cat = let negation = check_frame_negation negations negation in let quant = ["number",d.e.number,numbers;"case",d.e.case,cases;"gender",d.e.gender,genders; "aspect",d.e.aspect,aspects] in let t = ["adjp"; "number"; "case"; "gender"] in - let batrs = make_node new_lemma cat d.weight fnum [negation;"aspect";"number"; "case"; "gender"] in + let batrs = make_node new_lemma cat c.weight fnum [negation;"aspect";"number"; "case"; "gender"] in let schema_list = [qub_inclusion;schema] in - (LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs) :: l + (LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs) :: l with Not_found -> l) | fnum,LexFrame(lid,PACT(number,case,gender,aspect,negation,ReflEmpty),NoRestr,schema) -> (try @@ -712,9 +715,9 @@ let make_padj numbers cases genders aspects negations d lemma cat = let genders = check_frame_gender genders gender in let quant = ["number",d.e.number,numbers;"case",d.e.case,cases;"gender",d.e.gender,genders; "aspect",d.e.aspect,aspects] in let t = ["lex";lid;lemma;"pact"; "number"; "case"; "gender"] in - let batrs = make_node lemma cat (lex_weight +. d.weight) fnum [negation;"lex";"aspect"; "number"; "case"; "gender"] in + let batrs = make_node lemma cat (lex_weight +. c.weight) fnum [negation;"lex";"aspect"; "number"; "case"; "gender"] in let schema_list = [[inclusion];schema] in - (LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs) :: l + (LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs) :: l with Not_found -> l) | fnum,LexFrame(lid,PPAS(number,case,gender,aspect,negation),NoRestr,schema) -> (try @@ -725,13 +728,13 @@ let make_padj numbers cases genders aspects negations d lemma cat = let genders = check_frame_gender genders gender in let quant = ["number",d.e.number,numbers;"case",d.e.case,cases;"gender",d.e.gender,genders; "aspect",d.e.aspect,aspects] in let t = ["lex";lid;lemma;"ppas"; "number"; "case"; "gender"] in - let batrs = make_node lemma cat (lex_weight +. d.weight) fnum [negation;"lex";"aspect"; "number"; "case"; "gender"] in + let batrs = make_node lemma cat (lex_weight +. c.weight) fnum [negation;"lex";"aspect"; "number"; "case"; "gender"] in let schema_list = [[inclusion];schema] in - (LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs) :: l + (LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs) :: l with Not_found -> l) - | fnum,frame -> failwith ("make_padj: " ^ lemma ^ ": " ^ WalStringOf.frame lemma frame)) in + | fnum,frame -> failwith ("make_padj: " ^ lemma ^ ": " ^ ENIAMwalStringOf.frame lemma frame)) in -let make_ip numbers genders persons aspects aglt aux2 d lemma cat = +let make_ip numbers genders persons aspects aglt aux2 (c:ENIAMtokenizerTypes.token_record) d lemma cat = let numbers = expand_numbers numbers in let genders = expand_genders genders in Xlist.fold d.simple_valence [] (fun l -> function @@ -742,29 +745,29 @@ let make_ip numbers genders persons aspects aglt aux2 d lemma cat = let aspects = check_frame_aspect aspects aspect in let quant = ["number",d.e.number,numbers;"gender",d.e.gender,genders; "person", d.e.person,persons; "aspect",d.e.aspect,aspects] in let t = ["ip"; "number"; "gender"; "person"] in - let batrs = make_node new_lemma cat d.weight fnum ([mood;tense;"aspect"; "number"; "gender"; "person"] @ if negation = Aff then [] else ["negation"]) in + let batrs = make_node new_lemma cat c.weight fnum ([mood;tense;"aspect"; "number"; "gender"; "person"] @ if negation = Aff then [] else ["negation"]) in let cond_arg = match mood with "conditional" -> [nosem_schema_field Both [Phrase(Lex "by")]] | "" -> failwith "make_ip" | _ -> [] in let aglt_arg = if aglt then [nosem_schema_field Both [Phrase Aglt]] else [] in let aux_arg = match aux with PastAux -> [nosem_schema_field Both [Phrase AuxPast]] | FutAux -> [nosem_schema_field Both [Phrase AuxFut]] | ImpAux -> [nosem_schema_field Both [Phrase AuxImp]] | NoAux -> [] in let schema_list = [if negation = Aff then [] else [nosem_schema_field Backward [Phrase(Lex "nie")]]; qub_inclusion; aglt_arg @ aux_arg @ cond_arg @ schema @ int_arg] in - (LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs) :: l + (LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs) :: l with Not_found -> l) - | fnum,frame -> failwith ("make_ip 1: " ^ lemma ^ ": " ^ WalStringOf.frame lemma frame)) in + | fnum,frame -> failwith ("make_ip 1: " ^ lemma ^ ": " ^ ENIAMwalStringOf.frame lemma frame)) in -let make_infp aspects d lemma = +let make_infp aspects (c:ENIAMtokenizerTypes.token_record) d lemma = Xlist.fold d.simple_valence [] (fun l -> function fnum,Frame(NonPersAtrs(_,new_lemma,role,role_attr,negation,aspect),schema) -> (try let aspects = check_frame_aspect aspects aspect in let quant = ["aspect",d.e.aspect,aspects] in let t = ["infp"; "aspect"] in - let batrs = make_node new_lemma "inf" d.weight fnum (["aspect"] @ if negation = Aff then [] else ["negation"]) in + let batrs = make_node new_lemma "inf" c.weight fnum (["aspect"] @ if negation = Aff then [] else ["negation"]) in let schema_list = [if negation = Aff then [] else [nosem_schema_field Backward [Phrase(Lex "nie")]]; qub_inclusion;schema] in - (LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs) :: l + (LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs) :: l with Not_found -> l) | fnum,LexFrame(lid,INF(aspect,negation,refl),NoRestr,schema) -> (try @@ -772,69 +775,69 @@ let make_infp aspects d lemma = let quant = ["aspect",d.e.aspect,aspects] in let t = ["lex";lid;lemma;"inf"; "aspect"] in let new_lemma,schema = if refl = ReflEmpty then lemma, schema else lemma ^ " się", nosem_refl_schema_field :: schema in - let batrs = make_node new_lemma "inf" (lex_weight +. d.weight) fnum (["lex";"aspect"] @ if negation = Aff then [] else ["negation"]) in + let batrs = make_node new_lemma "inf" (lex_weight +. c.weight) fnum (["lex";"aspect"] @ if negation = Aff then [] else ["negation"]) in let schema_list = [if negation = Aff then [] else [nosem_schema_field Backward [Phrase(Lex "nie")]]; [inclusion];schema] in - (LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs) :: l + (LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs) :: l with Not_found -> l) - | fnum,frame -> failwith ("make_infp: " ^ lemma ^ ": " ^ WalStringOf.frame lemma frame)) in + | fnum,frame -> failwith ("make_infp: " ^ lemma ^ ": " ^ ENIAMwalStringOf.frame lemma frame)) in -let make_padvp aspects d lemma cat = +let make_padvp aspects (c:ENIAMtokenizerTypes.token_record) d lemma cat = Xlist.fold d.simple_valence [] (fun l -> function fnum,Frame(NonPersAtrs(_,new_lemma,role,role_attr,negation,aspect),schema) -> (try let aspects = check_frame_aspect aspects aspect in let quant = ["aspect",d.e.aspect,aspects] in let t = ["padvp"] in - let batrs = make_node new_lemma cat d.weight fnum (["aspect"] @ if negation = Aff then [] else ["negation"]) in + let batrs = make_node new_lemma cat c.weight fnum (["aspect"] @ if negation = Aff then [] else ["negation"]) in let schema_list = [if negation = Aff then [] else [nosem_schema_field Backward [Phrase(Lex "nie")]]; qub_inclusion;schema] in - (LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs) :: l + (LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs) :: l with Not_found -> l) - | fnum,frame -> failwith ("make_padvp: " ^ lemma ^ ": " ^ WalStringOf.frame lemma frame)) in + | fnum,frame -> failwith ("make_padvp: " ^ lemma ^ ": " ^ ENIAMwalStringOf.frame lemma frame)) in -let make_conjunct (d:PreTypes.token_record) lemma cat = (* FIXME: poprawić semantykę *) +let make_conjunct (c:ENIAMtokenizerTypes.token_record) d lemma cat = (* FIXME: poprawić semantykę *) let ctype = if cat = "comp" then "sub" else if cat = "conj" then "coord" else failwith "make_conjunct" in let quant = [] in let t = ["cp"; ctype; lemma] in - let batrs = make_node lemma cat d.weight 0 [ctype] in + let batrs = make_node lemma cat c.weight 0 [ctype] in let schema_list = [[comp_arg_schema_field [Phrase IP]]] in - [LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs] in + [LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs] in (* FIXME: uzgadniania HIPERO i SELPREFS *) -let make_conj f d lemma = +let make_conj f (c:ENIAMtokenizerTypes.token_record) d lemma = (if f then [LCGrenderer.make_conj_frame ["number",d.e.number,all_numbers;"gender",d.e.gender,all_genders;"person",d.e.person,all_persons] (Tensor[Atom "ip"; Top; Top; Top]) (Tensor[Atom "ip"; Top; Top; Top]) ["ip";"number";"gender";"person"] d - (make_node lemma "conj" d.weight 0 ["number";"gender";"person"])] else []) @ + (make_node lemma "conj" c.weight 0 ["number";"gender";"person"])] else []) @ [LCGrenderer.make_conj_frame [] (Tensor[Atom "prepnp"; Top; Top]) (Tensor[Atom "prepnp"; Top; Top]) ["advp"] d - (make_node lemma "conj" d.weight 0 []); + (make_node lemma "conj" c.weight 0 []); LCGrenderer.make_conj_frame [] (Tensor[Atom "advp"]) (Tensor[Atom "prepnp"; Top; Top]) ["advp"] d - (make_node lemma "conj" d.weight 0 []); + (make_node lemma "conj" c.weight 0 []); LCGrenderer.make_conj_frame [] (Tensor[Atom "prepnp"; Top; Top]) (Tensor[Atom "advp"]) ["advp"] d - (make_node lemma "conj" d.weight 0 []); + (make_node lemma "conj" c.weight 0 []); LCGrenderer.make_conj_frame [] (Tensor[Atom "advp"]) (Tensor[Atom "advp"]) ["advp"] d - (make_node lemma "conj" d.weight 0 []); + (make_node lemma "conj" c.weight 0 []); LCGrenderer.make_conj_frame ["lemma",ge (),[];"case",d.e.case,all_cases] (Tensor[Atom "prepnp";AVar "lemma"; AVar "case"]) (Tensor[Atom "prepnp"; AVar "lemma"; AVar "case"]) ["prepnp";"lemma";"case"] d - (make_node lemma "conj" d.weight 0 ["case"]); + (make_node lemma "conj" c.weight 0 ["case"]); LCGrenderer.make_conj_frame ["number",d.e.number,all_numbers;"case",d.e.case,all_cases;"gender",d.e.gender,all_genders;"person",d.e.person,all_persons] (Tensor[Atom "np"; Top; AVar "case"; Top; Top]) (Tensor[Atom "np"; Top; AVar "case"; Top; Top]) ["np"; "number"; "case"; "gender"; "person"] d - (make_node lemma "conj" d.weight 0 ["number";"case";"gender";"person"]); + (make_node lemma "conj" c.weight 0 ["number";"case";"gender";"person"]); LCGrenderer.make_conj_frame ["number",d.e.number,all_numbers;"case",d.e.case,all_cases;"gender",d.e.gender,all_genders] (Tensor[Atom "adjp"; AVar "number"; AVar "case"; AVar "gender"]) (Tensor[Atom "adjp"; AVar "number"; AVar "case"; AVar "gender"]) ["adjp";"number";"case";"gender"] d - (make_node lemma "conj" d.weight 0 ["number";"case";"gender"]); + (make_node lemma "conj" c.weight 0 ["number";"case";"gender"]); ] in (* FIXME: aktualnie NP nie obejmują przymiotników, trzeba albo dodać podrzędniki przymiotnikowe, albo kategorię np dla przymiotników *) @@ -844,77 +847,77 @@ let make_conj f d lemma = (* FIXME: sprawdzić czy są ramy z NegationUndef i NegationNA *) (* FIXME: obniżyć wagi przyimków i kublików pisanych z wielkiej litery podobnie przy skrótach *) -let rec process_interp (d:PreTypes.token_record) = function (* FIXME: rozpoznawanie lematów nie działa, gdy mają wielką literę *) +let rec process_interp (c:ENIAMtokenizerTypes.token_record) (d:ENIAMlexSemanticsTypes.lex_sem) = function (* FIXME: rozpoznawanie lematów nie działa, gdy mają wielką literę *) lemma,"subst",[numbers;cases;genders] -> (if lemma = "co" || lemma = "kto" then (* FIXME: dodać podrzędniki np. co nowego *) List.flatten (Xlist.map ["int";"rel"] (fun ctype -> let quant = ["inumber",ge (),[];"igender",ge (),[];"iperson",ge (),[];"plemma",ge (),[];"ctype",ge (),[ctype];"number",d.e.number,expand_numbers numbers;"case",d.e.case,expand_cases cases;"gender",d.e.gender,expand_genders genders; "person",d.e.person,["ter"]] in let t = ["cp"; "ctype"; lemma] in let sem_mods = ["CTYPE",SubstVar "ctype"] in (* atrybuty ip *) - let batrs = make_node lemma "subst" d.weight 0 [ctype;"case"] in (* atrybuty liścia *) + let batrs = make_node lemma "subst" c.weight 0 [ctype;"case"] in (* atrybuty liścia *) let raised_arg = [Raised(["ip";"inumber";"igender";"iperson"],Forward,["np";"number";"case";"gender";"person"])] in let raised_arg1 = [Raised(["ip";"inumber";"igender";"iperson"],Forward,["prepnp";"plemma";"case"])] in let raised_arg2 = [Raised(["prepnp";"plemma";"case"],Forward,["np";"number";"case";"gender";"person"])] in let raised_arg3 = [Raised(["ip";"inumber";"igender";"iperson"],Forward,["comprepnp";"plemma"])] in let raised_arg4 = [Raised(["comprepnp";"plemma"],Forward,["np";"number";"case";"gender";"person"])] in let schema_list = [[schema_field RAISED "" Forward raised_arg]] in - let frame_np = LCGrenderer.make_frame_raised tokens quant schema_list t d batrs sem_mods in + let frame_np = LCGrenderer.make_frame_raised tokens lex_sems quant schema_list t d batrs sem_mods in let schema_list = [[schema_field RAISED "" Backward raised_arg2];[schema_field RAISED "" Forward raised_arg1]] in - let frame_prepnp = LCGrenderer.make_frame_raised tokens quant schema_list t d batrs sem_mods in + let frame_prepnp = LCGrenderer.make_frame_raised tokens lex_sems quant schema_list t d batrs sem_mods in let schema_list = [[schema_field RAISED "" Backward raised_arg4];[schema_field RAISED "" Forward raised_arg3]] in - let frame_comprepnp = LCGrenderer.make_frame_raised tokens quant schema_list t d batrs sem_mods in + let frame_comprepnp = LCGrenderer.make_frame_raised tokens lex_sems quant schema_list t d batrs sem_mods in [frame_np;frame_prepnp;frame_comprepnp])) else []) @ (if lemma = "to" then (* FIXME: przetestować *) let quant = ["ctype",ge (),[];"lemma",ge (),[];"number",d.e.number,expand_numbers numbers;"case",d.e.case,expand_cases cases;"gender",d.e.gender,expand_genders genders; "person",d.e.person,["ter"]] in let t = ["ncp"; "number"; "case"; "gender"; "person"; "ctype"; "lemma"] in - let batrs = make_node "to" "subst" d.weight 0 ["coreferential"; "number"; "case"; "gender"; "person"; "ctype"] in + let batrs = make_node "to" "subst" c.weight 0 ["coreferential"; "number"; "case"; "gender"; "person"; "ctype"] in let schema_list = [qub_inclusion;[prep_arg_schema_field [Phrase(CP(CompTypeAgr,Comp "lemma"))]]] in - [LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs] + [LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs] else []) @ - make_np numbers cases genders ["ter"] d lemma "subst" + make_np numbers cases genders ["ter"] c d lemma "subst" | lemma,"depr",[numbers;cases;genders] -> (* FIXME: sprawdzić uzgodnienie rodzaju dla depr w podmiocie *) - make_np numbers cases genders ["ter"] d lemma "depr" + make_np numbers cases genders ["ter"] c d lemma "depr" | lemma,"ppron12",[numbers;cases;genders;persons] -> - make_np numbers cases genders persons d lemma "ppron12" + make_np numbers cases genders persons c d lemma "ppron12" | lemma,"ppron12",[numbers;cases;genders;persons;akcs] -> - make_np numbers cases genders persons d lemma "ppron12" + make_np numbers cases genders persons c d lemma "ppron12" | lemma,"ppron3",[numbers;cases;genders;persons] -> - make_np numbers cases genders persons d lemma "ppron3" + make_np numbers cases genders persons c d lemma "ppron3" | lemma,"ppron3",[numbers;cases;genders;persons;akcs] -> - make_np numbers cases genders persons d lemma "ppron3" + make_np numbers cases genders persons c d lemma "ppron3" | lemma,"ppron3",[numbers;cases;genders;persons;akcs;praep] -> List.flatten (Xlist.map praep (function - "npraep" -> make_np numbers cases genders persons d lemma "ppron3" - | "_" -> make_np numbers cases genders persons d lemma "ppron3" + "npraep" -> make_np numbers cases genders persons c d lemma "ppron3" + | "_" -> make_np numbers cases genders persons c d lemma "ppron3" | "praep" -> let quant = ["lemma",ge (),[]; "number",d.e.number,expand_numbers numbers;"case",d.e.case,expand_cases cases;"gender",d.e.gender,expand_genders genders; "person",d.e.person,persons] in let t = ["prepnp"; "lemma"; "case"] in Xlist.fold d.simple_valence [] (fun l -> function fnum,Frame(NounAtrs(_,nsyn,nsem),schema) -> - let batrs = make_node lemma "ppron3" d.weight fnum (nsyn ::(WalStringOf.nsem nsem) :: ["number";"case";"gender";"person"]) in + let batrs = make_node lemma "ppron3" c.weight fnum (nsyn ::(ENIAMwalStringOf.nsem nsem) :: ["number";"case";"gender";"person"]) in let raised_arg = [Raised(["prepnp";"lemma";"case"],Forward,["np";"number";"case";"gender";"person"])] in let schema_list = [[schema_field RAISED "" Backward raised_arg];[inclusion]] in - (LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs) :: l + (LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs) :: l | _ -> failwith "process_interp: ppron3 praep") | _ -> failwith "process_interp: ppron3 praep")) | lemma,"siebie",[cases] -> (* FIXME: rozwiązać koreferencję *) - make_np ["_"] cases ["_"] ["ter"] d lemma "siebie" + make_np ["_"] cases ["_"] ["ter"] c d lemma "siebie" | lemma,"prep",[cases;woks] -> (* FIXME: pomijam niesemantyczny compar *) - if lemma = "jak" || lemma = "jako" || lemma = "niż" || lemma = "niczym" || lemma = "niby" || lemma = "co" then [make_prep all_cases d lemma] @ make_compar d lemma else - [make_prepnp cases d lemma; - make_prepnump cases d lemma; - make_prepadjp cases d lemma; - make_prep cases d lemma] @ - make_lex_prep cases d lemma @ - make_time_prep cases d lemma + if lemma = "jak" || lemma = "jako" || lemma = "niż" || lemma = "niczym" || lemma = "niby" || lemma = "co" then [make_prep all_cases c d lemma] @ make_compar c d lemma else + [make_prepnp cases c d lemma; + make_prepnump cases c d lemma; + make_prepadjp cases c d lemma; + make_prep cases c d lemma] @ + make_lex_prep cases c d lemma @ + make_time_prep cases c d lemma | lemma,"prep",[cases] -> - if lemma = "jak" || lemma = "jako" || lemma = "niż" || lemma = "niczym" || lemma = "niby" || lemma = "co" then [make_prep all_cases d lemma] @ make_compar d lemma else - [make_prepnp cases d lemma; - make_prepnump cases d lemma; - make_prepadjp cases d lemma; - make_prep cases d lemma] @ - make_lex_prep cases d lemma @ - make_time_prep cases d lemma + if lemma = "jak" || lemma = "jako" || lemma = "niż" || lemma = "niczym" || lemma = "niby" || lemma = "co" then [make_prep all_cases c d lemma] @ make_compar c d lemma else + [make_prepnp cases c d lemma; + make_prepnump cases c d lemma; + make_prepadjp cases c d lemma; + make_prep cases c d lemma] @ + make_lex_prep cases c d lemma @ + make_time_prep cases c d lemma (* | lemma,"NUM",[["comp"]] -> failwith "num:comp"*) | lemma,"num",[numbers;cases;genders;acm] -> (* FIXME: liczebniki złożone *) (if lemma = "ile" then (* FIXME: walencja ile *) @@ -924,71 +927,71 @@ let rec process_interp (d:PreTypes.token_record) = function (* FIXME: rozpoznawa let quant = ["inumber",ge (),[];"igender",ge (),[];"iperson",ge (),[];"plemma",ge (),[];"ctype",ge (),[ctype];"number",d.e.number,expand_numbers numbers;"case",d.e.case,expand_cases cases;"gender",d.e.gender,expand_genders genders;"person",d.e.person,["ter"]] in let t = ["cp"; "ctype"; lemma] in let sem_mods = ["CTYPE",SubstVar "ctype"] in - let batrs = make_node lemma "num" d.weight 0 [ctype;acm;"number";"case";"gender";"person"] in + let batrs = make_node lemma "num" c.weight 0 [ctype;acm;"number";"case";"gender";"person"] in let raised_arg1 = [Raised(["ip";"inumber";"igender";"iperson"],Forward,["nump";num;"case";gend;"person"])] in let raised_arg2a = [Raised(["ip";"inumber";"igender";"iperson"],Forward,["prepnp";"plemma";"case"])] in let raised_arg2b = [Raised(["prepnp";"plemma";"case"],Forward,["nump";num;"case";gend;"person"])] in let raised_arg3a = [Raised(["ip";"inumber";"igender";"iperson"],Forward,["comprepnp";"plemma"])] in let raised_arg3b = [Raised(["comprepnp";"plemma"],Forward,["nump";num;"case";gend;"person"])] in let schema_list = [[num_arg_schema_field [Phrase ProNG; Phrase phrase]];[schema_field RAISED "" Forward raised_arg1]] in - let frame_nump = LCGrenderer.make_frame_raised tokens quant schema_list t d batrs sem_mods in + let frame_nump = LCGrenderer.make_frame_raised tokens lex_sems quant schema_list t d batrs sem_mods in let schema_list = [[num_arg_schema_field [Phrase ProNG; Phrase phrase]];[schema_field RAISED "" Backward raised_arg2b];[schema_field RAISED "" Forward raised_arg2a]] in - let frame_prepnp = LCGrenderer.make_frame_raised tokens quant schema_list t d batrs sem_mods in + let frame_prepnp = LCGrenderer.make_frame_raised tokens lex_sems quant schema_list t d batrs sem_mods in let schema_list = [[num_arg_schema_field [Phrase ProNG; Phrase phrase]];[schema_field RAISED "" Backward raised_arg3b];[schema_field RAISED "" Forward raised_arg3a]] in - let frame_comprepnp = LCGrenderer.make_frame_raised tokens quant schema_list t d batrs sem_mods in + let frame_comprepnp = LCGrenderer.make_frame_raised tokens lex_sems quant schema_list t d batrs sem_mods in [frame_nump;frame_prepnp;frame_comprepnp])))) else []) @ - make_nump numbers cases genders ["ter"] acm d lemma "num" + make_nump numbers cases genders ["ter"] acm c d lemma "num" | _,"numc",[] -> [] | lemma,"intnum",[] -> - let batrs = make_node lemma "intnum" d.weight 0 [] in + let batrs = make_node lemma "intnum" c.weight 0 [] in let numbers,acms = if lemma = "1" || lemma = "-1" then ["sg"],["congr"] else let s = String.get lemma (String.length lemma - 1) in ["pl"],if s = '2' || s = '3' || s = '4' then ["rec";"congr"] else ["rec"] in - [LCGrenderer.make_frame x_flag tokens [] [[schema_field NOSEM "" Forward [Phrase(Lex "hyphen")]]] ["adja"] d batrs] @ - make_nump numbers ["_"] ["_"] ["ter"] acms d lemma "intnum" (* FIXME: specjalne traktowanie 1 i poza tym liczba mnoga *) + [LCGrenderer.make_frame x_flag tokens lex_sems [] [[schema_field NOSEM "" Forward [Phrase(Lex "hyphen")]]] ["adja"] d batrs] @ + make_nump numbers ["_"] ["_"] ["ter"] acms c d lemma "intnum" (* FIXME: specjalne traktowanie 1 i poza tym liczba mnoga *) | lemma,"realnum",[] -> - let batrs = make_node lemma "realnum" d.weight 0 [] in - [LCGrenderer.make_frame x_flag tokens [] [[schema_field NOSEM "" Forward [Phrase(Lex "hyphen")]]] ["adja"] d batrs] @ - make_nump ["sg"] ["_"] ["_"] ["ter"] ["rec"] d lemma "realnum" + let batrs = make_node lemma "realnum" c.weight 0 [] in + [LCGrenderer.make_frame x_flag tokens lex_sems [] [[schema_field NOSEM "" Forward [Phrase(Lex "hyphen")]]] ["adja"] d batrs] @ + make_nump ["sg"] ["_"] ["_"] ["ter"] ["rec"] c d lemma "realnum" | lemma,"intnum-interval",[] -> - let batrs = make_node lemma "intnum-interval" d.weight 0 [] in - [LCGrenderer.make_frame x_flag tokens [] [[schema_field NOSEM "" Forward [Phrase(Lex "hyphen")]]] ["adja"] d batrs] @ - make_nump ["pl"] ["_"] ["_"] ["ter"] ["rec";"congr"] d lemma "intnum-interval" + let batrs = make_node lemma "intnum-interval" c.weight 0 [] in + [LCGrenderer.make_frame x_flag tokens lex_sems [] [[schema_field NOSEM "" Forward [Phrase(Lex "hyphen")]]] ["adja"] d batrs] @ + make_nump ["pl"] ["_"] ["_"] ["ter"] ["rec";"congr"] c d lemma "intnum-interval" | lemma,"realnum-interval",[] -> - let batrs = make_node lemma "realnum-interval" d.weight 0 [] in - [LCGrenderer.make_frame x_flag tokens [] [[schema_field NOSEM "" Forward [Phrase(Lex "hyphen")]]] ["adja"] d batrs] @ - make_nump ["sg"] ["_"] ["_"] ["ter"] ["rec"] d lemma "realnum-interval" + let batrs = make_node lemma "realnum-interval" c.weight 0 [] in + [LCGrenderer.make_frame x_flag tokens lex_sems [] [[schema_field NOSEM "" Forward [Phrase(Lex "hyphen")]]] ["adja"] d batrs] @ + make_nump ["sg"] ["_"] ["_"] ["ter"] ["rec"] c d lemma "realnum-interval" | lemma,"symbol",[] -> - make_np ["sg"] ["_"] ["_"] ["ter"] d lemma "symbol" + make_np ["sg"] ["_"] ["_"] ["ter"] c d lemma "symbol" | lemma,"ordnum",[] -> - make_adjp ["_"] ["_"] ["_"] ["pos"] d lemma "ordnum" - | lemma,"date",[] -> make_np_symbol d lemma "date" - | lemma,"date-interval",[] -> make_np_symbol d lemma "date-interval" - | lemma,"hour-minute",[] -> make_np_symbol d lemma "hour-minute" - | lemma,"hour",[] -> make_np_symbol d lemma "hour" - | lemma,"hour-minute-interval",[] -> make_np_symbol d lemma "hour-minute-interval" - | lemma,"hour-interval",[] -> make_np_symbol d lemma "hour-interval" - | lemma,"year",[] -> make_np_symbol d lemma "year" - | lemma,"year-interval",[] -> make_np_symbol d lemma "year-interval" - | lemma,"day",[] -> make_day d lemma "day" - | lemma,"day-interval",[] -> make_day d lemma "day-interval" - | lemma,"day-month",[] -> make_np_symbol d lemma "day-month" - | lemma,"day-month-interval",[] -> make_np_symbol d lemma "day-month-interval" - | lemma,"month-interval",[] -> make_np_symbol d lemma "month-interval" + make_adjp ["_"] ["_"] ["_"] ["pos"] c d lemma "ordnum" + | lemma,"date",[] -> make_np_symbol c d lemma "date" + | lemma,"date-interval",[] -> make_np_symbol c d lemma "date-interval" + | lemma,"hour-minute",[] -> make_np_symbol c d lemma "hour-minute" + | lemma,"hour",[] -> make_np_symbol c d lemma "hour" + | lemma,"hour-minute-interval",[] -> make_np_symbol c d lemma "hour-minute-interval" + | lemma,"hour-interval",[] -> make_np_symbol c d lemma "hour-interval" + | lemma,"year",[] -> make_np_symbol c d lemma "year" + | lemma,"year-interval",[] -> make_np_symbol c d lemma "year-interval" + | lemma,"day",[] -> make_day c d lemma "day" + | lemma,"day-interval",[] -> make_day c d lemma "day-interval" + | lemma,"day-month",[] -> make_np_symbol c d lemma "day-month" + | lemma,"day-month-interval",[] -> make_np_symbol c d lemma "day-month-interval" + | lemma,"month-interval",[] -> make_np_symbol c d lemma "month-interval" | lemma,"roman",[] -> (* "Aranżuje XIX struś." *) - let batrs = make_node lemma "roman" d.weight 0 [] in - [LCGrenderer.make_frame x_flag tokens [] [[schema_field NOSEM "" Forward [Phrase(Lex "hyphen")]]] ["adja"] d batrs] @ - (make_np_symbol d lemma "roman") @ - (make_adjp ["_"] ["_"] ["_"] ["pos"] d lemma "roman-adj") + let batrs = make_node lemma "roman" c.weight 0 [] in + [LCGrenderer.make_frame x_flag tokens lex_sems [] [[schema_field NOSEM "" Forward [Phrase(Lex "hyphen")]]] ["adja"] d batrs] @ + (make_np_symbol c d lemma "roman") @ + (make_adjp ["_"] ["_"] ["_"] ["pos"] c d lemma "roman-adj") | lemma,"roman-interval",[] -> - let batrs = make_node lemma "roman-interval" d.weight 0 [] in - [LCGrenderer.make_frame x_flag tokens [] [[schema_field NOSEM "" Forward [Phrase(Lex "hyphen")]]] ["adja"] d batrs] @ - (make_np_symbol d lemma "roman-interval") - | lemma,"match-result",[] -> make_np_symbol d lemma "match-result" - | lemma,"url",[] -> make_np_symbol d lemma "url" - | lemma,"email",[] -> make_np_symbol d lemma "email" - | lemma,"obj-id",[] -> make_np_symbol d lemma "obj-id" + let batrs = make_node lemma "roman-interval" c.weight 0 [] in + [LCGrenderer.make_frame x_flag tokens lex_sems [] [[schema_field NOSEM "" Forward [Phrase(Lex "hyphen")]]] ["adja"] d batrs] @ + (make_np_symbol c d lemma "roman-interval") + | lemma,"match-result",[] -> make_np_symbol c d lemma "match-result" + | lemma,"url",[] -> make_np_symbol c d lemma "url" + | lemma,"email",[] -> make_np_symbol c d lemma "email" + | lemma,"obj-id",[] -> make_np_symbol c d lemma "obj-id" | lemma,"adj",[numbers;cases;genders;grads] -> (if lemma = "czyj" || lemma = "jaki" || lemma = "który" then List.flatten (Xlist.map ["int"] (fun ctype -> @@ -996,7 +999,7 @@ let rec process_interp (d:PreTypes.token_record) = function (* FIXME: rozpoznawa let quant = ["inumber",ge (),[];"igender",ge (),[];"iperson",ge (),[];"nperson",ge (),[];"plemma",ge (),[];"ctype",ge (),[ctype];"number",d.e.number,expand_numbers numbers;"case",d.e.case,expand_cases cases;"gender",d.e.gender,expand_genders genders] in let t = ["cp"; "ctype"; lemma] in let sem_mods = ["CTYPE",SubstVar "ctype"] in - let batrs = make_node lemma "adj" d.weight 0 [ctype;"number";"case";"gender"] in + let batrs = make_node lemma "adj" c.weight 0 [ctype;"number";"case";"gender"] in let raised_arg0 = [Raised(["np";"number";"case";"gender";"nperson"],Backward,["adjp";"number";"case";"gender"])] in let raised_arg1 = [Raised(["ip";"inumber";"igender";"iperson"],Forward,["np";"number";"case";"gender";"nperson"])] in let raised_arg2a = [Raised(["ip";"inumber";"igender";"iperson"],Forward,["prepnp";"plemma";"case"])] in @@ -1004,11 +1007,11 @@ let rec process_interp (d:PreTypes.token_record) = function (* FIXME: rozpoznawa let raised_arg3a = [Raised(["ip";"inumber";"igender";"iperson"],Forward,["comprepnp";"plemma"])] in let raised_arg3b = [Raised(["comprepnp";"plemma"],Forward,["np";"number";"case";"gender";"nperson"])] in let schema_list = [[schema_field RAISED "" Forward raised_arg0];[schema_field RAISED "" Forward raised_arg1]] in - let frame_np = LCGrenderer.make_frame_raised tokens quant schema_list t d batrs sem_mods in + let frame_np = LCGrenderer.make_frame_raised tokens lex_sems quant schema_list t d batrs sem_mods in let schema_list = [[schema_field RAISED "" Forward raised_arg0];[schema_field RAISED "" Backward raised_arg2b];[schema_field RAISED "" Forward raised_arg2a]] in - let frame_prepnp = LCGrenderer.make_frame_raised tokens quant schema_list t d batrs sem_mods in + let frame_prepnp = LCGrenderer.make_frame_raised tokens lex_sems quant schema_list t d batrs sem_mods in let schema_list = [[schema_field RAISED "" Forward raised_arg0];[schema_field RAISED "" Backward raised_arg3b];[schema_field RAISED "" Forward raised_arg3a]] in - let frame_comprepnp = LCGrenderer.make_frame_raised tokens quant schema_list t d batrs sem_mods in + let frame_comprepnp = LCGrenderer.make_frame_raised tokens lex_sems quant schema_list t d batrs sem_mods in [frame_np;frame_prepnp;frame_comprepnp])) else []) @ (if lemma = "jaki" || lemma = "który" then List.flatten (Xlist.map ["rel"] (fun ctype -> @@ -1016,117 +1019,117 @@ let rec process_interp (d:PreTypes.token_record) = function (* FIXME: rozpoznawa let quant = ["inumber",ge (),[];"igender",ge (),[];"iperson",ge (),[];"plemma",ge (),[];"ctype",ge (),[ctype];"number",d.e.number,expand_numbers numbers;"case",d.e.case,expand_cases cases;"gender",d.e.gender,expand_genders genders; "person",d.e.person,["ter"]] in let t = ["cp"; "ctype"; lemma] in let sem_mods = ["CTYPE",SubstVar "ctype"] in - let batrs = make_node lemma "adj" d.weight 0 [ctype;"number";"case";"gender";"person"] in + let batrs = make_node lemma "adj" c.weight 0 [ctype;"number";"case";"gender";"person"] in let raised_arg = [Raised(["ip";"inumber";"igender";"iperson"],Forward,["np";"number";"case";"gender";"person"])] in let raised_arg1 = [Raised(["ip";"inumber";"igender";"iperson"],Forward,["prepnp";"plemma";"case"])] in let raised_arg2 = [Raised(["prepnp";"plemma";"case"],Forward,["np";"number";"case";"gender";"person"])] in let raised_arg3 = [Raised(["ip";"inumber";"igender";"iperson"],Forward,["comprepnp";"plemma"])] in let raised_arg4 = [Raised(["comprepnp";"plemma"],Forward,["np";"number";"case";"gender";"person"])] in let schema_list = [[schema_field RAISED "" Forward raised_arg]] in - let frame_np = LCGrenderer.make_frame_raised tokens quant schema_list t d batrs sem_mods in + let frame_np = LCGrenderer.make_frame_raised tokens lex_sems quant schema_list t d batrs sem_mods in let schema_list = [[schema_field RAISED "" Backward raised_arg2];[schema_field RAISED "" Forward raised_arg1]] in - let frame_prepnp = LCGrenderer.make_frame_raised tokens quant schema_list t d batrs sem_mods in + let frame_prepnp = LCGrenderer.make_frame_raised tokens lex_sems quant schema_list t d batrs sem_mods in let schema_list = [[schema_field RAISED "" Backward raised_arg4];[schema_field RAISED "" Forward raised_arg3]] in - let frame_comprepnp = LCGrenderer.make_frame_raised tokens quant schema_list t d batrs sem_mods in + let frame_comprepnp = LCGrenderer.make_frame_raised tokens lex_sems quant schema_list t d batrs sem_mods in [frame_np;frame_prepnp;frame_comprepnp])) else []) @ if lemma = "czyj" || lemma = "jaki" || lemma = "który" then [] else (if lemma = "jakiś" || lemma = "ten" || lemma = "taki" then - make_np numbers cases genders ["ter"] d lemma "adj" else []) @ - make_adjp numbers cases genders grads d lemma "adj" - | lemma,"adjc",[] -> make_adjp ["sg"] ["pred"] ["m1";"m2";"m3"] ["pos"] d lemma "adjc" (* np: gotów *) (* FIXME: czy to na pewno ma zwykłą walencję przymiotnika? *) - | lemma,"adjp",[] -> make_adjp all_numbers ["postp"] all_genders ["pos"] d lemma "adjp" + make_np numbers cases genders ["ter"] c d lemma "adj" else []) @ + make_adjp numbers cases genders grads c d lemma "adj" + | lemma,"adjc",[] -> make_adjp ["sg"] ["pred"] ["m1";"m2";"m3"] ["pos"] c d lemma "adjc" (* np: gotów *) (* FIXME: czy to na pewno ma zwykłą walencję przymiotnika? *) + | lemma,"adjp",[] -> make_adjp all_numbers ["postp"] all_genders ["pos"] c d lemma "adjp" | lemma,"adja",[] -> - let batrs = make_node lemma "adja" d.weight 0 [] in - [LCGrenderer.make_frame x_flag tokens [] [[schema_field NOSEM "" Forward [Phrase(Lex "hyphen")]]] (["adja"]) d batrs] + let batrs = make_node lemma "adja" c.weight 0 [] in + [LCGrenderer.make_frame x_flag tokens lex_sems [] [[schema_field NOSEM "" Forward [Phrase(Lex "hyphen")]]] (["adja"]) d batrs] | lemma,"adv",[grads] -> - make_advp grads d lemma + make_advp grads c d lemma | lemma,"adv",[] -> - make_advp ["pos"] d lemma + make_advp ["pos"] c d lemma | lemma,"ger",[numbers;cases;genders;aspects;negations] -> - make_ger numbers cases genders ["ter"] aspects negations d lemma "ger" + make_ger numbers cases genders ["ter"] aspects negations c d lemma "ger" | lemma,"pact",[numbers;cases;genders;aspects;negations] -> - make_padj numbers cases genders aspects negations d lemma "pact" + make_padj numbers cases genders aspects negations c d lemma "pact" | lemma,"ppas",[numbers;cases;genders;aspects;negations] -> - make_padj numbers cases genders aspects negations d lemma "ppas" + make_padj numbers cases genders aspects negations c d lemma "ppas" | lemma,"fin",[numbers;persons;aspects] -> (* FIXME: genders bez przymnogich *) let persons2 = Xlist.fold persons [] (fun l -> function "sec" -> l | s -> s :: l) in - (make_ip numbers ["_"] persons aspects false false d lemma "fin") @ + (make_ip numbers ["_"] persons aspects false false c d lemma "fin") @ (if persons2 = [] then [] else - make_ip numbers ["_"] persons2 aspects false true d lemma "fin") + make_ip numbers ["_"] persons2 aspects false true c d lemma "fin") | lemma,"bedzie",[numbers;persons;aspects] -> (if lemma = "być" then let quant = ["number",d.e.number,expand_numbers numbers;"gender",d.e.gender,all_genders; "person", d.e.person,persons] in let t = ["aux-fut"; "number"; "gender"; "person"] in - [LCGrenderer.make_frame_simple quant t d ( (make_node "być" "bedzie" d.weight 0 [])(*[Dot;Dot;Dot;Dot]*))] else []) @ - (make_ip numbers ["_"] persons aspects false false d lemma "bedzie") + [LCGrenderer.make_frame_simple quant t c ( (make_node "być" "bedzie" c.weight 0 [])(*[Dot;Dot;Dot;Dot]*))] else []) @ + (make_ip numbers ["_"] persons aspects false false c d lemma "bedzie") | lemma,"praet",[numbers;genders;aspects;nagl] -> (if lemma = "być" then let quant = ["number",d.e.number,expand_numbers numbers;"gender",d.e.gender,expand_genders genders; "person",d.e.person, all_persons] in let t = ["aux-past"; "number"; "gender"; "person"] in - [LCGrenderer.make_frame_simple quant t d ( (make_node "być" "praet" d.weight 0 [])(*[Dot;Dot;Dot;Dot]*))] else []) @ - (make_ip numbers genders ["ter"] aspects false false d lemma "praet") @ - (make_ip numbers genders ["pri";"sec"] aspects true false d lemma "praet") @ - (make_ip numbers genders ["pri";"sec";"ter"] aspects false true d lemma "praet") + [LCGrenderer.make_frame_simple quant t c ( (make_node "być" "praet" c.weight 0 [])(*[Dot;Dot;Dot;Dot]*))] else []) @ + (make_ip numbers genders ["ter"] aspects false false c d lemma "praet") @ + (make_ip numbers genders ["pri";"sec"] aspects true false c d lemma "praet") @ + (make_ip numbers genders ["pri";"sec";"ter"] aspects false true c d lemma "praet") | lemma,"praet",[numbers;genders;aspects] -> (if lemma = "być" then let quant = ["number",d.e.number,expand_numbers numbers;"gender",d.e.gender,expand_genders genders; "person",d.e.person, all_persons] in let t = ["aux-past"; "number"; "gender"; "person"] in - [LCGrenderer.make_frame_simple quant t d ( (make_node "być" "praet" d.weight 0 [])(*[Dot;Dot;Dot;Dot]*))] else []) @ - (make_ip numbers genders ["ter"] aspects false false d lemma "praet") @ - (make_ip numbers genders ["pri";"sec"] aspects true false d lemma "praet") @ - (make_ip numbers genders ["pri";"sec";"ter"] aspects false true d lemma "praet") + [LCGrenderer.make_frame_simple quant t c ( (make_node "być" "praet" c.weight 0 [])(*[Dot;Dot;Dot;Dot]*))] else []) @ + (make_ip numbers genders ["ter"] aspects false false c d lemma "praet") @ + (make_ip numbers genders ["pri";"sec"] aspects true false c d lemma "praet") @ + (make_ip numbers genders ["pri";"sec";"ter"] aspects false true c d lemma "praet") | lemma,"winien",[numbers;genders;aspects] -> - (make_ip numbers genders ["ter"] aspects false false d lemma "winien") @ - (make_ip numbers genders ["ter"] aspects false true d lemma "winien") @ - (make_ip numbers genders ["pri";"sec"] aspects true false d lemma "winien") @ - (make_ip numbers genders ["pri";"sec"] aspects true true d lemma "winien") + (make_ip numbers genders ["ter"] aspects false false c d lemma "winien") @ + (make_ip numbers genders ["ter"] aspects false true c d lemma "winien") @ + (make_ip numbers genders ["pri";"sec"] aspects true false c d lemma "winien") @ + (make_ip numbers genders ["pri";"sec"] aspects true true c d lemma "winien") | lemma,"impt",[numbers;persons;aspects] -> (* FIXME: genders bez przymnogich *) - make_ip numbers ["_"] persons aspects false false d lemma "impt" + make_ip numbers ["_"] persons aspects false false c d lemma "impt" | lemma,"imps",[aspects] -> - make_ip ["_"] ["_"] all_persons aspects false false d lemma "imps" + make_ip ["_"] ["_"] all_persons aspects false false c d lemma "imps" | lemma,"pred",[] -> (* FIXME: czy predykatyw zawsze jest niedokonany? *) - (make_ip ["sg"] ["n2"] ["ter"] ["imperf"] false false d lemma "pred") @ - (make_ip ["sg"] ["n2"] ["ter"] ["imperf"] false true d lemma "pred") + (make_ip ["sg"] ["n2"] ["ter"] ["imperf"] false false c d lemma "pred") @ + (make_ip ["sg"] ["n2"] ["ter"] ["imperf"] false true c d lemma "pred") | "być","aglt",[numbers;persons;aspects;wok] -> let numbers = expand_numbers numbers in let quant = ["number",d.e.number,numbers; "person", d.e.person,persons] in let t = ["aglt"; "number"; "person"] in - [LCGrenderer.make_frame_simple quant t d ( (make_node "być" "aglt" d.weight 0 [])(*[Dot;Dot;Dot]*))] + [LCGrenderer.make_frame_simple quant t c ( (make_node "być" "aglt" c.weight 0 [])(*[Dot;Dot;Dot]*))] | lemma,"inf",[aspects] -> (* FIXME: wielopoziomowe InfP *) - make_infp aspects d lemma + make_infp aspects c d lemma | lemma,"pcon",[aspects] -> - make_padvp aspects d lemma "pcon" + make_padvp aspects c d lemma "pcon" | lemma,"pant",[aspects] -> - make_padvp aspects d lemma "pant" - | "się","qub",[] -> [LCGrenderer.make_frame_simple [] ["się"] {d with orth=""} ( (make_node "się" "qub" d.weight 0 [])) (* FIXME: dodać make_np *)] - | "nie","qub",[] -> [LCGrenderer.make_frame_simple [] ["nie"] {d with orth=""} (make_node "nie" "qub" d.weight 0 [])] - | "by","qub",[] -> [LCGrenderer.make_frame_simple [] ["by"] {d with orth=""} (make_node "by" "qub" d.weight 0 [])] - | "niech","qub",[] -> [LCGrenderer.make_frame_simple [] ["aux-imp"] d (make_node "niech" "qub" d.weight 0 [])] - | "niechaj","qub",[] -> [LCGrenderer.make_frame_simple [] ["aux-imp"] d (make_node "niechaj" "qub" d.weight 0 [])] - | "niechże","qub",[] -> [LCGrenderer.make_frame_simple [] ["aux-imp"] d (make_node "niechże" "qub" d.weight 0 [])] - | "niechajże","qub",[] -> [LCGrenderer.make_frame_simple [] ["aux-imp"] d (make_node "niechajże" "qub" d.weight 0 [])] + make_padvp aspects c d lemma "pant" + | "się","qub",[] -> [LCGrenderer.make_frame_simple [] ["się"] {c with orth=""} ( (make_node "się" "qub" c.weight 0 [])) (* FIXME: dodać make_np *)] + | "nie","qub",[] -> [LCGrenderer.make_frame_simple [] ["nie"] {c with orth=""} (make_node "nie" "qub" c.weight 0 [])] + | "by","qub",[] -> [LCGrenderer.make_frame_simple [] ["by"] {c with orth=""} (make_node "by" "qub" c.weight 0 [])] + | "niech","qub",[] -> [LCGrenderer.make_frame_simple [] ["aux-imp"] c (make_node "niech" "qub" c.weight 0 [])] + | "niechaj","qub",[] -> [LCGrenderer.make_frame_simple [] ["aux-imp"] c (make_node "niechaj" "qub" c.weight 0 [])] + | "niechże","qub",[] -> [LCGrenderer.make_frame_simple [] ["aux-imp"] c (make_node "niechże" "qub" c.weight 0 [])] + | "niechajże","qub",[] -> [LCGrenderer.make_frame_simple [] ["aux-imp"] c (make_node "niechajże" "qub" c.weight 0 [])] | "czy","qub",[] -> (* FIXME: poprawić semantykę *) let quant = [] in let t = ["cp"; "int"; "czy"] in - let batrs = make_node "czy" "qub" d.weight 0 ["int"] in + let batrs = make_node "czy" "qub" c.weight 0 ["int"] in let schema_list = [[comp_arg_schema_field [Phrase IP]]] in - [LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs] + [LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs] | "gdyby","qub",[] -> (* FIXME: poprawić semantykę *) (* FIXME: poprawić tryb przypuszczający *) (* FIXME: problem z interpretacją jako 'gdy' *) let quant = [] in let t = ["cp"; "rel"; "gdyby"] in - let batrs = make_node "gdyby" "qub" d.weight 0 ["rel"] in + let batrs = make_node "gdyby" "qub" c.weight 0 ["rel"] in let schema_list = [[comp_arg_schema_field [Phrase IP]]] in - [LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs] - | lemma,"qub",[] -> [LCGrenderer.make_frame_simple [] ["qub"] d ( (make_node lemma "qub" d.weight 0 []))] (* FIXME: semantyka i rodzaje kublików *) - | lemma,"comp",[] -> make_conjunct d lemma "comp" - | "i","conj",[] -> make_conj true d "i" @ (make_conjunct d "i" "conj") - | "lub","conj",[] -> make_conj true d "lub" @ (make_conjunct d "lub" "conj") - | "czy","conj",[] -> make_conj true d "czy" @ (make_conjunct d "czy" "conj") - | "bądź","conj",[] -> make_conj true d "bądź" @ (make_conjunct d "bądź" "conj") - | lemma,"conj",[] -> make_conjunct d lemma "conj" + [LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs] + | lemma,"qub",[] -> [LCGrenderer.make_frame_simple [] ["qub"] c ( (make_node lemma "qub" c.weight 0 []))] (* FIXME: semantyka i rodzaje kublików *) + | lemma,"comp",[] -> make_conjunct c d lemma "comp" + | "i","conj",[] -> make_conj true c d "i" @ (make_conjunct c d "i" "conj") + | "lub","conj",[] -> make_conj true c d "lub" @ (make_conjunct c d "lub" "conj") + | "czy","conj",[] -> make_conj true c d "czy" @ (make_conjunct c d "czy" "conj") + | "bądź","conj",[] -> make_conj true c d "bądź" @ (make_conjunct c d "bądź" "conj") + | lemma,"conj",[] -> make_conjunct c d lemma "conj" (* | "interp",[] -> [] | "brev",[pun] -> []*) - | lemma,"interj",[] -> [LCGrenderer.make_frame_simple [] ["interj"] d (make_node lemma "interj" d.weight 0 [])] + | lemma,"interj",[] -> [LCGrenderer.make_frame_simple [] ["interj"] c (make_node lemma "interj" c.weight 0 [])] | lemma,"burk",[] -> [] (* FIXME *) (* | "dig",[] -> [] | "romandig",[] -> [] @@ -1134,103 +1137,103 @@ let rec process_interp (d:PreTypes.token_record) = function (* FIXME: rozpoznawa | "xxx",[] -> [] (* to występuje w słowniku skrótów *)*) (* | ".","interp",[] -> [] | "%","interp",[] -> []*) - | "-","interp",[] -> [LCGrenderer.make_frame_simple [] ["hyphen"] d (make_node "-" "interp" d.weight 0 [])] + | "-","interp",[] -> [LCGrenderer.make_frame_simple [] ["hyphen"] c (make_node "-" "interp" c.weight 0 [])] (* | ":","interp",[] -> [LCGrenderer.make_frame_simple [] ["colon"] ":" beg len [Dot] [Dot]]*) - | "?","interp",[] -> [LCGrenderer.make_frame_simple [] ["int"] d (make_node "?" "interp" d.weight 0 [])] (*FIXME: zdanie nadrzędne powinno mieć atrybut pytajności(Attr("INT",Val "+"))] *) - | ",","interp",[] -> make_conj false d "," (*@ [LCGrenderer.make_frame_simple [] ["comma"] "," beg len [Dot] [Dot]]*) + | "?","interp",[] -> [LCGrenderer.make_frame_simple [] ["int"] c (make_node "?" "interp" c.weight 0 [])] (*FIXME: zdanie nadrzędne powinno mieć atrybut pytajności(Attr("INT",Val "+"))] *) + | ",","interp",[] -> make_conj false c d "," (*@ [LCGrenderer.make_frame_simple [] ["comma"] "," beg len [Dot] [Dot]]*) | ";","interp",[] -> [](*[LCGrenderer.make_frame_simple [] ["comma"] ";" beg len [Dot] [Dot]]*) | "„","interp",[] -> [(* FIXME: zaznaczyć niesemantyczność quotów *) LCGrenderer.make_quot_frame ["number",d.e.number,[];"case",d.e.case,[];"gender",d.e.gender,[];"person",d.e.person,[]] (Tensor[Atom "np"; AVar "number"; AVar "case"; AVar "gender"; AVar "person"]) (Tensor[Atom "rquot"]) ["np";"number";"case";"gender";"person"] d - (make_node "„" "interp" d.weight 0 [])] - | "”","interp",[] -> [LCGrenderer.make_frame_simple [] ["rquot"] d (make_node "”" "interp" d.weight 0 [])] - | "«","interp",[] -> [LCGrenderer.make_frame_simple [] ["rquot3"] d (make_node "«" "interp" d.weight 0 []); + (make_node "„" "interp" c.weight 0 [])] + | "”","interp",[] -> [LCGrenderer.make_frame_simple [] ["rquot"] c (make_node "”" "interp" c.weight 0 [])] + | "«","interp",[] -> [LCGrenderer.make_frame_simple [] ["rquot3"] c (make_node "«" "interp" c.weight 0 []); LCGrenderer.make_quot_frame ["number",d.e.number,[];"case",d.e.case,[];"gender",d.e.gender,[];"person",d.e.person,[]] (Tensor[Atom "np"; AVar "number"; AVar "case"; AVar "gender"; AVar "person"]) (Tensor[Atom "rquot2"]) ["np";"number";"case";"gender";"person"] d - (make_node "«" "interp" d.weight 0 [])] - | "»","interp",[] -> [LCGrenderer.make_frame_simple [] ["rquot2"] d (make_node "»" "interp" d.weight 0 []); + (make_node "«" "interp" c.weight 0 [])] + | "»","interp",[] -> [LCGrenderer.make_frame_simple [] ["rquot2"] c (make_node "»" "interp" c.weight 0 []); LCGrenderer.make_quot_frame ["number",d.e.number,[];"case",d.e.case,[];"gender",d.e.gender,[];"person",d.e.person,[]] (Tensor[Atom "np"; AVar "number"; AVar "case"; AVar "gender"; AVar "person"]) (Tensor[Atom "rquot3"]) ["np";"number";"case";"gender";"person"] d - (make_node "»" "interp" d.weight 0 [])] - | "(","interp",[] -> [LCGrenderer.make_inclusion_frame (Tensor[Atom "rparen"]) d (make_node "(" "interp" d.weight 0 [])] - | ")","interp",[] -> [LCGrenderer.make_frame_simple [] ["rparen"] d (make_node ")" "interp" d.weight 0 [])] - | "[","interp",[] -> [LCGrenderer.make_inclusion_frame (Tensor[Atom "rparen"]) d (make_node "[" "interp" d.weight 0 [])] - | "]","interp",[] -> [LCGrenderer.make_frame_simple [] ["rparen"] d (make_node "]" "interp" d.weight 0 [])] + (make_node "»" "interp" c.weight 0 [])] + | "(","interp",[] -> [LCGrenderer.make_inclusion_frame (Tensor[Atom "rparen"]) d (make_node "(" "interp" c.weight 0 [])] + | ")","interp",[] -> [LCGrenderer.make_frame_simple [] ["rparen"] c (make_node ")" "interp" c.weight 0 [])] + | "[","interp",[] -> [LCGrenderer.make_inclusion_frame (Tensor[Atom "rparen"]) d (make_node "[" "interp" c.weight 0 [])] + | "]","interp",[] -> [LCGrenderer.make_frame_simple [] ["rparen"] c (make_node "]" "interp" c.weight 0 [])] | lemma,"unk",[] -> let quant = ["number",d.e.number,all_numbers;"case",d.e.case,all_cases; "gender",d.e.gender,all_genders; "person",d.e.person, ["ter"]] in let t = ["np"; "number"; "case"; "gender"; "person"] in - let batrs = make_node lemma "unk" d.weight 0 ["number"; "case"; "gender"; "person"] in - [LCGrenderer.make_frame_simple quant t d ( batrs)] + let batrs = make_node lemma "unk" c.weight 0 ["number"; "case"; "gender"; "person"] in + [LCGrenderer.make_frame_simple quant t c ( batrs)] | _,"xxx",[] -> [] (* FIXME *) - | ".","interp",[] -> [LCGrenderer.make_frame_simple [] ["dot"] d (make_node "." "interp" d.weight 0 [])] (* FIXME: to jest potrzebne przy CONLL *) + | ".","interp",[] -> [LCGrenderer.make_frame_simple [] ["dot"] c (make_node "." "interp" c.weight 0 [])] (* FIXME: to jest potrzebne przy CONLL *) | "<conll_root>","interp",[] -> - let batrs = (make_node "<conll_root>" "interp" d.weight 0 []) in + let batrs = (make_node "<conll_root>" "interp" c.weight 0 []) in let schema_list = [[schema_field CLAUSE "Clause" Forward [Phrase IP;Phrase (CP(Int,CompUndef));Phrase (NP(Case "voc"));Phrase (Lex "interj")]]] in - [LCGrenderer.make_frame false tokens [] schema_list ["<conll_root>"] d batrs] + [LCGrenderer.make_frame false tokens lex_sems [] schema_list ["<conll_root>"] d batrs] | lemma,c,l -> failwith ("process_interp: " ^ lemma ^ ":" ^ c ^ ":" ^ (String.concat ":" (Xlist.map l (String.concat ".")))) in -let process_bracket_lemma (d:PreTypes.token_record) = function +let process_bracket_lemma (c:ENIAMtokenizerTypes.token_record) (d:ENIAMlexSemanticsTypes.lex_sem) = function (* "<query>" -> - [LCGrenderer.make_frame x_flag tokens [] [[schema_field NOSEM "" Forward [Phrase Null;Phrase (Lex "<dummy>")]];[arg_schema_field Forward [Phrase (Lex "</query>")]]] (["<query>"]) {d with orth=""} (make_node "<query1>" "interp" d.weight 0 []); - LCGrenderer.make_frame x_flag tokens [] [[schema_field SENTENCE "" Forward [Phrase (Lex "<ors>")]]] (["<query>"]) {d with orth=""} (make_node "<query2>" "interp" d.weight 0 []); - LCGrenderer.make_frame x_flag tokens [] [[(*nosem*)arg_schema_field Forward [Phrase (Lex "<speaker>")]];[nosem_schema_field Forward [Phrase (Lex "<colon>")]];[(*nosem*)arg_schema_field Forward [Phrase (Lex "<ors>")]];[(*nosem*)arg_schema_field Forward [Phrase (Lex "</query>")]]] (["<query>"]) {d with orth=""} (make_node "<query3>" "interp" d.weight 0 []); - LCGrenderer.make_frame x_flag tokens [] [[schema_field SENTENCE "" Forward [Phrase (Lex "<colon>")]]] (["<query>"]) {d with orth=""} (make_node "<query4>" "interp" d.weight 0 []); - LCGrenderer.make_frame x_flag tokens [] [[schema_field SENTENCE "" Forward [Phrase (Lex "<colon>")]];[schema_field SENTENCE "" Forward [Phrase (Lex "<ors>")]]] (["<query>"]) {d with orth=""} (make_node "<query5>" "interp" d.weight 0 []); (* FIXME: zdania w odwróconej kolejności *) - LCGrenderer.make_frame x_flag tokens [] [[schema_field SENTENCE "Sentence" Forward [Phrase (Lex "<sentence>")]];[schema_field SENTENCE "" Forward [Phrase (Lex "<ors>")]]] (["<query>"]) {d with orth=""} (make_node "<query6>" "interp" d.weight 0 [])] (* FIXME: zdania w odwróconej kolejności *) + [LCGrenderer.make_frame x_flag tokens lex_sems [] [[schema_field NOSEM "" Forward [Phrase Null;Phrase (Lex "<dummy>")]];[arg_schema_field Forward [Phrase (Lex "</query>")]]] (["<query>"]) {d with orth=""} (make_node "<query1>" "interp" c.weight 0 []); + LCGrenderer.make_frame x_flag tokens lex_sems [] [[schema_field SENTENCE "" Forward [Phrase (Lex "<ors>")]]] (["<query>"]) {d with orth=""} (make_node "<query2>" "interp" c.weight 0 []); + LCGrenderer.make_frame x_flag tokens lex_sems [] [[(*nosem*)arg_schema_field Forward [Phrase (Lex "<speaker>")]];[nosem_schema_field Forward [Phrase (Lex "<colon>")]];[(*nosem*)arg_schema_field Forward [Phrase (Lex "<ors>")]];[(*nosem*)arg_schema_field Forward [Phrase (Lex "</query>")]]] (["<query>"]) {d with orth=""} (make_node "<query3>" "interp" c.weight 0 []); + LCGrenderer.make_frame x_flag tokens lex_sems [] [[schema_field SENTENCE "" Forward [Phrase (Lex "<colon>")]]] (["<query>"]) {d with orth=""} (make_node "<query4>" "interp" c.weight 0 []); + LCGrenderer.make_frame x_flag tokens lex_sems [] [[schema_field SENTENCE "" Forward [Phrase (Lex "<colon>")]];[schema_field SENTENCE "" Forward [Phrase (Lex "<ors>")]]] (["<query>"]) {d with orth=""} (make_node "<query5>" "interp" c.weight 0 []); (* FIXME: zdania w odwróconej kolejności *) + LCGrenderer.make_frame x_flag tokens lex_sems [] [[schema_field SENTENCE "Sentence" Forward [Phrase (Lex "<sentence>")]];[schema_field SENTENCE "" Forward [Phrase (Lex "<ors>")]]] (["<query>"]) {d with orth=""} (make_node "<query6>" "interp" c.weight 0 [])] (* FIXME: zdania w odwróconej kolejności *) | "</query>" -> let t = (["</query>"]) in - let batrs = (make_node "</query>" "interp" d.weight 0 []) in + let batrs = (make_node "</query>" "interp" c.weight 0 []) in let schema_list = [[schema_field NOSEM "" Backward [Phrase Null;Phrase (Lex "<dummy>")]];[schema_field SENTENCE "Sentence" Backward [Multi[Lex "<sentence>"](*Phrase(Lex "s")*)]]] in - [LCGrenderer.make_frame x_flag tokens [] schema_list t d batrs]*) + [LCGrenderer.make_frame x_flag tokens lex_sems [] schema_list t d batrs]*) | "„s" -> [] - (*let batrs = make_node "pro-komunikować" "pro" d.weight 0 [] in - [LCGrenderer.make_frame x_flag tokens [] [[schema_field OBJ "Theme" Forward [Phrase (Lex "</or1>")]]] (["<sentence>"(*"or"*)]) {d with orth=""} batrs; - LCGrenderer.make_frame_simple [] ["<dummy>"] d ( (make_node "„s" "interp" d.weight 0 []))]*) + (*let batrs = make_node "pro-komunikować" "pro" c.weight 0 [] in + [LCGrenderer.make_frame x_flag tokens lex_sems [] [[schema_field OBJ "Theme" Forward [Phrase (Lex "</or1>")]]] (["<sentence>"(*"or"*)]) {d with orth=""} batrs; + LCGrenderer.make_frame_simple [] ["<dummy>"] d ( (make_node "„s" "interp" c.weight 0 []))]*) | "”s" -> [] (*let t = (["</or1>"]) in - let batrs = (make_node "”s" "interp" d.weight 0 []) in + let batrs = (make_node "”s" "interp" c.weight 0 []) in let schema_list = [[schema_field SENTENCE "Sentence" Backward [Multi[Lex "<sentence>"](*Phrase(Lex "s")*)]]] in - [LCGrenderer.make_frame x_flag tokens [] schema_list t d batrs; - LCGrenderer.make_frame_simple [] ["<dummy>"] d ( (make_node "”s" "interp" d.weight 0 []))]*) + [LCGrenderer.make_frame x_flag tokens lex_sems [] schema_list t d batrs; + LCGrenderer.make_frame_simple [] ["<dummy>"] d ( (make_node "”s" "interp" c.weight 0 []))]*) | "«s" -> [] (* FIXME *) | "»s" -> [] (* FIXME *) | ":" -> - [LCGrenderer.make_frame_simple [] ["or"] d (LCGrenderer.make_pro_komunikat tokens)] + [LCGrenderer.make_frame_simple [] ["or"] c (LCGrenderer.make_pro_komunikat tokens lex_sems)] | ":s" -> - let batrs = make_node "pro-komunikować" "pro" d.weight 0 [] in - [LCGrenderer.make_frame x_flag tokens [] [[schema_field SUBJ "Initiator" Backward [Phrase (Lex "<speaker>")]]] (["<colon>"]) {d with orth=""} batrs; - LCGrenderer.make_frame x_flag tokens [] [[schema_field SUBJ "Initiator" Backward [Phrase (Lex "<speaker>")]];[schema_field OBJ "Theme" Forward [Phrase (Lex "</query>")]]] (["<colon>"]) {d with orth=""} batrs] - (*| "<or>" -> [LCGrenderer.make_frame x_flag tokens [] [[nosem_schema_field Forward [Phrase (Lex "</or>")]]] (["or"]) {d with orth=""} (make_node "<or>" "interp" d.weight 0 [])]*) + let batrs = make_node "pro-komunikować" "pro" c.weight 0 [] in + [LCGrenderer.make_frame x_flag tokens lex_sems [] [[schema_field SUBJ "Initiator" Backward [Phrase (Lex "<speaker>")]]] (["<colon>"]) {c with orth=""} batrs; + LCGrenderer.make_frame x_flag tokens lex_sems [] [[schema_field SUBJ "Initiator" Backward [Phrase (Lex "<speaker>")]];[schema_field OBJ "Theme" Forward [Phrase (Lex "</query>")]]] (["<colon>"]) {c with orth=""} batrs] + (*| "<or>" -> [LCGrenderer.make_frame x_flag tokens lex_sems [] [[nosem_schema_field Forward [Phrase (Lex "</or>")]]] (["or"]) {d with orth=""} (make_node "<or>" "interp" c.weight 0 [])]*) | "<or>" -> [] | "<or-sentence>" -> (* FIXME: dodać mówcę jako pro *) - let batrs = make_node "pro-komunikować" "pro" d.weight 0 [] in - [LCGrenderer.make_frame x_flag tokens [] [[schema_field ARG ""(*"Theme"*) Forward [Phrase (Lex "s")]]] ["<root>"] {d with orth=""} batrs; - LCGrenderer.or_frame (make_node "<sentence>" "interp" d.weight 0 [])] + let batrs = make_node "pro-komunikować" "pro" c.weight 0 [] in + [LCGrenderer.make_frame x_flag tokens lex_sems [] [[schema_field ARG ""(*"Theme"*) Forward [Phrase (Lex "s")]]] ["<root>"] {c with orth=""} batrs; + LCGrenderer.or_frame (make_node "<sentence>" "interp" c.weight 0 [])] | "</or-sentence>" -> let t = ["or2"] in - let batrs = (make_node "</or-sentence>" "interp" d.weight 0 []) in + let batrs = (make_node "</or-sentence>" "interp" c.weight 0 []) in let schema_list = [[schema_field CLAUSE "Clause" Backward [Multi[IP;CP(Int,CompUndef);NP(Case "voc");Lex "interj"](*Phrase IP;Phrase(CP(Int,CompUndef));Phrase(NP(Case "voc"))*)]](*;[WalFrames.schema_field NOSEM "" Backward [Phrase(Lex "<sentence>")]]*)] in - [LCGrenderer.make_frame x_flag tokens [] schema_list t d batrs] + [LCGrenderer.make_frame x_flag tokens lex_sems [] schema_list t d batrs] | "</or>" -> [] (*let t = (["</or>"]) in - let batrs = (make_node "</or>" "interp" d.weight 0 []) in + let batrs = (make_node "</or>" "interp" c.weight 0 []) in let schema_list = [[schema_field SENTENCE "Sentence" Backward [Multi[Lex "<sentence>"](*Phrase(Lex "s")*)]]] in - [LCGrenderer.make_frame x_flag tokens [] schema_list t d batrs] (* FIXME: semantyka *)*) + [LCGrenderer.make_frame x_flag tokens lex_sems [] schema_list t d batrs] (* FIXME: semantyka *)*) | "<sentence>" -> - [LCGrenderer.make_frame x_flag tokens [] [[arg_schema_field Forward [Phrase (Lex "s")]]] ["<root>"] {d with orth=""} (make_node "<sentence>" "interp" d.weight 0 []); - LCGrenderer.make_frame x_flag tokens [] [[arg_schema_field Forward [Phrase (NP(Case "nom"))]];[nosem_schema_field Forward [Phrase (Lex "</speaker>")]]] (["<speaker>"]) {d with orth=""} (make_node "<speaker>" "interp" d.weight 0 [])] + [LCGrenderer.make_frame x_flag tokens lex_sems [] [[arg_schema_field Forward [Phrase (Lex "s")]]] ["<root>"] {c with orth=""} (make_node "<sentence>" "interp" c.weight 0 []); + LCGrenderer.make_frame x_flag tokens lex_sems [] [[arg_schema_field Forward [Phrase (NP(Case "nom"))]];[nosem_schema_field Forward [Phrase (Lex "</speaker>")]]] (["<speaker>"]) {c with orth=""} (make_node "<speaker>" "interp" c.weight 0 [])] | "</sentence>" -> let t = ["s"] in - let batrs = (make_node "</sentence>" "interp" d.weight 0 []) in + let batrs = (make_node "</sentence>" "interp" c.weight 0 []) in let schema_list = [[schema_field CLAUSE "Clause" Backward [Multi[IP;CP(Int,CompUndef);NP(Case "voc");Lex "interj"](*Phrase IP;Phrase(CP(Int,CompUndef));Phrase(NP(Case "voc"))*)]](*;[WalFrames.schema_field NOSEM "" Backward [Phrase(Lex "<sentence>")]]*)] in - [LCGrenderer.make_frame_simple [] ["</speaker>"] d ( (make_node "</speaker>" "interp" d.weight 0 [])); - LCGrenderer.make_frame x_flag tokens [] schema_list t d batrs] + [LCGrenderer.make_frame_simple [] ["</speaker>"] c ( (make_node "</speaker>" "interp" c.weight 0 [])); + LCGrenderer.make_frame x_flag tokens lex_sems [] schema_list t d batrs] | lemma -> raise Not_found in let get_labels () = { @@ -1242,26 +1245,26 @@ let get_labels () = { } in (* create_entries *) - match d with + match c with {token = Interp "<clause>"} -> [BracketSet(Forward),Dot] | {token = Interp "</clause>"} -> [BracketSet(Backward),Dot] | {token = Interp lemma} -> (try - Xlist.fold (process_bracket_lemma d lemma) [] (fun l (symbol,sem) -> (Bracket(true,true,symbol),sem) :: l) + Xlist.fold (process_bracket_lemma c d lemma) [] (fun l (symbol,sem) -> (Bracket(true,true,symbol),sem) :: l) with Not_found -> (* print_endline ("x"^lemma^"x"); *) - let entries = process_interp d (lemma,"interp",[]) in + let entries = process_interp c d (lemma,"interp",[]) in Xlist.map entries (fun (symbol,sem) -> Bracket(false,false,symbol),sem)) | {token = Lemma(lemma,"sinterj",[[]])} -> let t = ["interj"] in - let batrs = make_node lemma "sinterj" d.weight 0 [] in - let symbol,sem = LCGrenderer.make_frame_simple [] t d ( batrs) in + let batrs = make_node lemma "sinterj" c.weight 0 [] in + let symbol,sem = LCGrenderer.make_frame_simple [] t c ( batrs) in [Bracket(true,true,symbol),sem] | {token = Lemma(lemma,pos,interp)} -> (* print_endline (lemma ^ " " ^ pos); *) Xlist.fold interp [] (fun l tags -> let d = {d with e=get_labels (); valence=LCGrenderer.make_controll d.valence} in - let entries = process_interp d (lemma,pos,tags) in + let entries = process_interp c d (lemma,pos,tags) in Xlist.map entries (fun (symbol,sem) -> Bracket(false,false,symbol),sem) @ l) | _ -> [] @@ -1280,10 +1283,11 @@ let create (paths,last) tokens lex_sems = uni_weight := 0.; let chart = LCGchart.make last in let chart = Xlist.fold paths chart (fun chart (id,lnode,rnode) -> - let t = ExtArray.get tokens id in + let c = ExtArray.get tokens id in + let d = ExtArray.get lex_sems id in (* if t.weight < -0.9 || Xlist.mem t.attrs "notvalidated proper" || Xlist.mem t.attrs "lemmatized as lowercase" then chart else *) - let chart = LCGchart.add_inc chart lnode rnode (Tensor[Atom ("[" ^ t.orth ^ "]")], Dot) 0 in - LCGchart.add_inc_list chart lnode rnode (create_entries tokens id (t:PreTypes.token_record) false) 0) in + let chart = LCGchart.add_inc chart lnode rnode (Tensor[Atom ("[" ^ c.orth ^ "]")], Dot) 0 in + LCGchart.add_inc_list chart lnode rnode (create_entries tokens lex_sems id (c:ENIAMtokenizerTypes.token_record) d false) 0) in let set = Xlist.fold paths IntIntSet.empty (fun set (_,lnode,rnode) -> IntIntSet.add set (lnode,rnode)) in let chart = IntIntSet.fold set chart (fun chart (i,j) -> LCGchart.make_unique chart i j) in chart @@ -1299,13 +1303,14 @@ let rec dep_create_rec nodes sons conll_id = (* Printf.printf "dep_create_rec [%s] %d [%s]\n" (String.concat ";" (Xlist.map left string_of_int)) conll_id (String.concat ";" (Xlist.map right string_of_int)); *) DepNode(conll_id, Xlist.map left (dep_create_rec nodes sons), node, Xlist.map right (dep_create_rec nodes sons)) -let dep_create paths tokens = +let dep_create paths tokens lex_sems = uni_weight := 0.; let sons = Int.fold 1 (Array.length paths - 1) IntMap.empty (fun sons i -> let _,super,_ = paths.(i) in IntMap.add_inc sons super [i] (fun l -> i :: l)) in let nodes = Int.fold 0 (Array.length paths - 1) IntMap.empty (fun nodes i -> let id,_,_ = paths.(i) in - let t = ExtArray.get tokens id in - IntMap.add nodes i (create_entries tokens id t true)) in + let c = ExtArray.get tokens id in + let d = ExtArray.get lex_sems id in + IntMap.add nodes i (create_entries tokens lex_sems id c d true)) in dep_create_rec nodes sons 0 diff --git a/parser/LCGrenderer.ml b/parser/LCGrenderer.ml index 88a37f7..bcfb9e8 100644 --- a/parser/LCGrenderer.ml +++ b/parser/LCGrenderer.ml @@ -17,8 +17,9 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. *) -open WalTypes -open PreTypes +open ENIAMtokenizerTypes +open ENIAMwalTypes +open ENIAMlexSemanticsTypes open Xstd let dir_of_dir = function @@ -189,7 +190,7 @@ let make_arg_phrase = function | Null -> One | X -> Tensor[Atom "X"] | Lex lex -> Tensor[Atom lex] - | phrase -> failwith ("make_arg_phrase: " ^ WalStringOf.phrase phrase) + | phrase -> failwith ("make_arg_phrase: " ^ ENIAMwalStringOf.phrase phrase) let make_arg_pos = function (* wprowadzam uzgodnienia a nie wartości cech, bo wartości cech są wprowadzane przez leksem a uzgodnienia wiążą je z wartościami u nadrzędnika *) | SUBST(_,Case case) -> [Atom "subst"; Top; Atom case; Top; Top] @@ -226,7 +227,7 @@ let make_arg_pos = function (* wprowadzam uzgodnienia a nie wartości cech, bo w | COMPAR -> [Atom "TODO"] (* FIXME: todo *) | COMP ctype -> [Atom "comp"; arg_of_ctype ctype] | PERS _ -> [Atom "TODO"] (* FIXME: todo *) - | pos -> failwith ("make_arg_pos: " ^ WalStringOf.pos pos) + | pos -> failwith ("make_arg_pos: " ^ ENIAMwalStringOf.pos pos) let rec make_arg quant = function Phrase phrase -> make_arg_phrase phrase @@ -234,7 +235,7 @@ let rec make_arg quant = function | LexArg(id,arg,lex) -> Tensor([Atom "lex";Atom id;Atom lex] @ make_arg_pos arg) (* | LexRealization(arg,lex) -> (match make_arg arg with Tensor l -> Tensor([Atom "lexr";Atom lex] @ l) | _ -> failwith "make_arg") *) | Raised(arg1,dir,arg2) -> Imp(Tensor(make_tensor_type quant arg1),dir_of_dir dir,Tensor(make_tensor_type quant arg2)) - | morf -> failwith ("make_arg: " ^ WalStringOf.morf morf) + | morf -> failwith ("make_arg: " ^ ENIAMwalStringOf.morf morf) let empty_schema_field = {gf=NOGF; role=""; role_attr=""; sel_prefs=[]; cr=[]; ce=[]; dir=Both; morfs=[]} @@ -252,19 +253,22 @@ let get_pro_id () = incr pro_id_counter; !pro_id_counter*) -let make_pro tokens = - let t = {empty_token with token=Lemma("pro","pro",[]); senses=["pro",["0"],0.]} in +let make_pro tokens lex_sems = + let t = {empty_token with token=Lemma("pro","pro",[])} in let id = ExtArray.add tokens t in + let _ = ExtArray.add lex_sems {empty_lex_sem with senses=["pro",["0"],0.]} in Node{empty_node with pred="pro"; cat="pro"; weight=0.; id=id; attrs=[]; args=Dot} -let make_prong tokens = - let t = {empty_token with token=Lemma("pro","pro",[]); senses=["pro",["0"],0.]} in +let make_prong tokens lex_sems = + let t = {empty_token with token=Lemma("pro","pro",[])} in let id = ExtArray.add tokens t in + let _ = ExtArray.add lex_sems {empty_lex_sem with senses=["pro",["0"],0.]} in Node{empty_node with pred="pro"; cat="pro"; weight=0.; id=id; attrs=["NUM",SubstVar "number";"GEND",SubstVar "gender";"PERS",SubstVar "person"]; args=Dot} -let make_pro_komunikat tokens = - let t = {empty_token with token=Lemma("pro-komunikat","pro",[]); senses=["pro-komunikat",["0"],0.]} in +let make_pro_komunikat tokens lex_sems = + let t = {empty_token with token=Lemma("pro-komunikat","pro",[])} in let id = ExtArray.add tokens t in + let _ = ExtArray.add lex_sems {empty_lex_sem with senses=["pro-komunikat",["0"],0.]} in {empty_node with pred="pro-komunikat"; cat="pro"; weight=10.; id=id; attrs=[]; args=Dot} let make_var vars gf = @@ -299,7 +303,7 @@ let make_var vars gf = | CLAUSE -> "clause" | SENTENCE -> "sentence"*) -let make_args tokens quant var_map v = function +let make_args tokens lex_sems quant var_map v = function {gf=RAISED; morfs=[arg]} as s -> let arg = make_arg quant arg in ((dir_of_dir s.dir,arg),v,[],[Var v]),var_map @@ -325,10 +329,10 @@ let make_args tokens quant var_map v = function | s -> (* FIXME: argument pusty występuje tyle razy ile jest preferencji, a chyba powinien jeden raz *) let args2 = Xlist.map s.morfs (fun morf -> make_arg quant morf, morf) in let sem_args = Xlist.map args2 (function - One, Phrase Pro -> SetAttr("MORF",Morf(Phrase Pro),make_pro tokens) (*s.sel_prefs*) - | One, Phrase ProNG -> SetAttr("MORF",Morf(Phrase ProNG),make_prong tokens) (*s.sel_prefs*) - | One, E Pro -> SetAttr("MORF",Morf(E Pro ),make_pro tokens) (*s.sel_prefs*) - | One, E ProNG -> SetAttr("MORF",Morf(E ProNG),make_prong tokens) (*s.sel_prefs*) + One, Phrase Pro -> SetAttr("MORF",Morf(Phrase Pro),make_pro tokens lex_sems) (*s.sel_prefs*) + | One, Phrase ProNG -> SetAttr("MORF",Morf(Phrase ProNG),make_prong tokens lex_sems) (*s.sel_prefs*) + | One, E Pro -> SetAttr("MORF",Morf(E Pro ),make_pro tokens lex_sems) (*s.sel_prefs*) + | One, E ProNG -> SetAttr("MORF",Morf(E ProNG),make_prong tokens lex_sems) (*s.sel_prefs*) | One, Phrase Null -> Dot | One, _ -> failwith "make_args 3" | _,morf -> SetAttr("MORF",Morf morf,Var "q")) in @@ -340,14 +344,14 @@ let make_args tokens quant var_map v = function ((dir_of_dir s.dir,Plus(Xlist.map args2 fst)),v, [Case(Var v,Xlist.map sem_args (function Dot -> "q",Dot | t -> "q",Cut(SetAttr("AROLE",Val s.role,SetAttr("GF",Gf s.gf,(*SetElem*) t)))))],[]),var_map -let make_args2 tokens quant var_map s = +let make_args2 tokens lex_sems quant var_map s = let v,var_map = make_var var_map (String.lowercase s.role) (*gf*) in (* let s = {s with morfs=List.flatten (Xlist.map s.morfs (function E l -> Xlist.map l (fun p -> E[p]) | m -> [m]))} in *) - make_args tokens quant var_map v s + make_args tokens lex_sems quant var_map v s -let make_schema tokens quant schema var_map = +let make_schema tokens lex_sems quant schema var_map = let schema,_,var_map = Xlist.fold schema ([],StringMap.empty,var_map) (fun (schema,labels,var_map) s -> - let schema_pos,var_map = make_args2 tokens quant var_map s in + let schema_pos,var_map = make_args2 tokens lex_sems quant var_map s in schema_pos :: schema, labels, var_map) in Xlist.fold schema ([],[],[],[]) (fun (args,vars,sem_args,raised_args) (arg,var,sem_arg,raised_arg) -> arg :: args, var :: vars, sem_arg @ sem_args, raised_arg @ raised_args), var_map @@ -356,11 +360,11 @@ let add_x_args schema_list = [{gf=ADJUNCT; role="Unknown Backward"; role_attr="Backward"; sel_prefs=[]; cr=[]; ce=[]; dir=Backward; morfs=[Multi[X]]}; {gf=ADJUNCT; role="Unknown Forward"; role_attr="Forward"; sel_prefs=[]; cr=[]; ce=[]; dir=Forward; morfs=[Multi[X]]}] :: schema_list -let make_frame x_flag tokens quant schema_list tl d node = (* UWAGA: to zadziała, gdy jest conajwyżej jeden podniesiony typ *) +let make_frame x_flag tokens lex_sems quant schema_list tl d node = (* UWAGA: to zadziała, gdy jest conajwyżej jeden podniesiony typ *) let schema_list = if x_flag then add_x_args schema_list else schema_list in let args_vars_list,sem_args,raised_args,_ = Xlist.fold schema_list ([],[],[],StringMap.empty) (fun (args_vars_list,sem_args,raised_args,var_map) schema -> -(* print_endline (WalStringOf.schema schema); *) - let (args,vars,sem_arg,raised_arg),var_map = make_schema tokens quant schema var_map in +(* print_endline (ENIAMwalStringOf.schema schema); *) + let (args,vars,sem_arg,raised_arg),var_map = make_schema tokens lex_sems quant schema var_map in (args,vars) :: args_vars_list, sem_arg @ sem_args, raised_arg @ raised_args, var_map) in let t = Tensor(make_tensor_type quant tl) in let at = Xlist.fold schema_list tl (fun at schema -> @@ -379,9 +383,9 @@ let make_frame x_flag tokens quant schema_list tl d node = (* UWAGA: to zadział simplify_impset (ImpSet(t,args),LambdaSet(vars,sem))) in make_type_quantification quant (t,sem) -let make_frame_raised tokens quant schema_list tl d node sem_mods = +let make_frame_raised tokens lex_sems quant schema_list tl d node sem_mods = let args_vars_list,sem_args,raised_args,_ = Xlist.fold schema_list ([],[],[],StringMap.empty) (fun (args_vars_list,sem_args,raised_args,var_map) schema -> - let (args,vars,sem_arg,raised_arg),var_map = make_schema tokens quant schema var_map in + let (args,vars,sem_arg,raised_arg),var_map = make_schema tokens lex_sems quant schema var_map in (args,vars) :: args_vars_list, sem_arg @ sem_args, raised_arg @ raised_args, var_map) in let t = Tensor(make_tensor_type quant tl) in let at = Xlist.fold (List.rev schema_list) tl (fun at schema -> @@ -400,6 +404,7 @@ let make_frame_raised tokens quant schema_list tl d node sem_mods = | _ -> failwith "make_frame_raised: raised_args" in let sem = Xlist.fold sem_mods sem (fun sem (e,t) -> SetAttr(e,t,sem)) in let id = ExtArray.add tokens {empty_token with token=Lemma("raised","raised",[])} in (* FIXME: czy raised to jest to co tu być powinno? *) + let _ = ExtArray.add lex_sems empty_lex_sem in let sem = Node{empty_node with args = Cut(SetAttr("GF",Gf CORE,sem)); id=id; gs=make_gs quant tl} in let t,sem = Xlist.fold args_vars_list (t,sem) (fun (t,sem) (args,vars) -> simplify_impset (ImpSet(t,args),LambdaSet(vars,sem))) in diff --git a/parser/LCGstringOf.ml b/parser/LCGstringOf.ml index 4c69b75..825748f 100644 --- a/parser/LCGstringOf.ml +++ b/parser/LCGstringOf.ml @@ -56,8 +56,8 @@ let rec linear_term c = function "[" ^ (String.concat "; " (Xlist.map (["PRED",Val t.pred;"CAT",Val t.cat;"ID",Val (string_of_int t.id);"WEIGHT",Val (string_of_float t.weight);"GS",t.gs;"ARGS",t.args] @ t.attrs) (fun (e,t) -> e ^ ": " ^ (linear_term 0 t)))) ^ "]" - | Morf m -> WalStringOf.morf m - | Gf s -> WalStringOf.gf s + | Morf m -> ENIAMwalStringOf.morf m + | Gf s -> ENIAMwalStringOf.gf s | Ref i -> "ref " ^ string_of_int i | Cut t -> "cut(" ^ linear_term 0 t ^ ")" | Choice choices -> "choice(" ^ String.concat ";" (StringMap.fold choices [] (fun l ei t -> (sprintf "%s: %s" ei (linear_term 0 t)) :: l)) ^ ")" diff --git a/parser/LCGtypes.ml b/parser/LCGtypes.ml index 61b2117..49193e9 100644 --- a/parser/LCGtypes.ml +++ b/parser/LCGtypes.ml @@ -29,14 +29,14 @@ type node = { weight: float; id: int; gs: linear_term; - agf: WalTypes.gf; - amorf: WalTypes.morf; + agf: ENIAMwalTypes.gf; + amorf: ENIAMwalTypes.morf; arole: string; arole_attr: string; meaning: string; hipero: StringSet.t; meaning_weight: float; - position: WalTypes.schema_field; + position: ENIAMwalTypes.schema_field; attrs: (string * linear_term) list; args: linear_term} @@ -72,8 +72,8 @@ and linear_term = | Apply of linear_term | Insert of linear_term * linear_term | Node of node - | Morf of WalTypes.morf - | Gf of WalTypes.gf + | Morf of ENIAMwalTypes.morf + | Gf of ENIAMwalTypes.gf | Choice of linear_term StringMap.t (*string * string * linear_term*) (* etykieta * indeks * term *) | Concept of concept | Context of context diff --git a/parser/LCGvalence.ml b/parser/LCGvalence.ml index 1a1740c..5d33d3e 100644 --- a/parser/LCGvalence.ml +++ b/parser/LCGvalence.ml @@ -17,7 +17,7 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. *) -open WalTypes +open ENIAMwalTypes open LCGtypes open Printf open Xstd @@ -56,29 +56,30 @@ let extract_roles = function | _ -> failwith "extract_roles" let get_lemma = function - PreTypes.Lemma(lemma,cat,_) -> lemma,cat - | PreTypes.Interp lemma -> lemma,"interp" + ENIAMtokenizerTypes.Lemma(lemma,cat,_) -> lemma,cat + | ENIAMtokenizerTypes.Interp lemma -> lemma,"interp" | _ -> "","" -let prepare_valence tokens = +let prepare_valence tokens lex_sems = let valence = Array.make (ExtArray.size tokens) [] in Int.iter 1 (ExtArray.size tokens - 1) (fun id -> - let d = ExtArray.get tokens id in - let lemma,cat = get_lemma d.PreTypes.token in + let c = ExtArray.get tokens id in + let d = ExtArray.get lex_sems id in + let lemma,cat = get_lemma c.ENIAMtokenizerTypes.token in let lemma = if lemma = "<or-sentence>" (*|| lemma = ":s" || lemma = "„s"*) then "pro-komunikować" else lemma in if lemma = "" then () else let prep_valence = if cat = "prep" then (* (0,lemma,StringSet.empty,0.,"NOSEM","",Frame(EmptyAtrs[],[])) :: *) - match d.PreTypes.semantics with - PreTypes.Normal -> [] - | PreTypes.PrepSemantics l -> - Xlist.rev_map l (fun (lrole,lrole_attr,hipero,sel_prefs) -> + match d.ENIAMlexSemanticsTypes.semantics with + ENIAMlexSemanticsTypes.Normal -> [] + | ENIAMlexSemanticsTypes.PrepSemantics l -> (* FIXME: uzgadnianie cases *) + Xlist.rev_map l (fun (case,lrole,lrole_attr,hipero,sel_prefs) -> 0,lemma,hipero,0.,lrole,lrole_attr,Frame(EmptyAtrs[],[])) | _ -> failwith "prepare_valence" else [] in - let valence2 = if d.PreTypes.valence = [] then [0,Frame(EmptyAtrs[],[])] else d.PreTypes.valence in - let lrole,lrole_attr = d.PreTypes.lroles in + let valence2 = if d.ENIAMlexSemanticsTypes.valence = [] then [0,Frame(EmptyAtrs[],[])] else d.ENIAMlexSemanticsTypes.valence in + let lrole,lrole_attr = d.ENIAMlexSemanticsTypes.lroles in valence.(id) <- prep_valence @ List.flatten (Xlist.map valence2 (function fnum,Frame(attrs,schema) -> let meanings,lemma,attrs = extract_meaning lemma attrs in @@ -86,14 +87,14 @@ let prepare_valence tokens = if cat = "pact" || cat = "ppas" then extract_roles attrs else if cat = "pcon" then "Con","" else if cat = "pant" then "Ant","" else - d.PreTypes.lroles in - Xlist.map (prepare_senses lemma meanings d.PreTypes.senses) (fun (meaning,hipero,weight) -> + d.ENIAMlexSemanticsTypes.lroles in + Xlist.map (prepare_senses lemma meanings d.ENIAMlexSemanticsTypes.senses) (fun (meaning,hipero,weight) -> let hipero = if cat = "conj" then ["0"] else hipero in fnum,meaning,StringSet.of_list hipero,weight,lrole,lrole_attr, Frame(attrs,Xlist.map schema (fun s -> (* let s = if s.sel_prefs=[] then (print_endline ("prepare_valence empty sel_prefs: " ^ lemma ^ " " ^ cat); {s with sel_prefs=["ALL"]}) else s in *) if s.role="" && s.gf <> ADJUNCT && s.gf <> NOSEM then ( - printf "%d: %s\n%!" fnum (WalStringOf.frame lemma (Frame(attrs,schema))); + printf "%d: %s\n%!" fnum (ENIAMwalStringOf.frame lemma (Frame(attrs,schema))); failwith ("prepare_valence empty role: " ^ lemma ^ " " ^ cat)) else {s with morfs=List.sort compare s.morfs}))) | fnum,(LexFrame _ as frame) -> [fnum,"lex",StringSet.empty,0.,lrole,lrole_attr,frame] @@ -194,9 +195,9 @@ let match_args_pos modifications nodes e i schema t = let schema,selected = if morfs = [] then schema,[] else let morfs = List.sort compare morfs in -(* printf "gf=%s morfs=%s\n%!" (WalStringOf.gf gf) (String.concat ";" (Xlist.map morfs WalStringOf.morf)); *) +(* printf "gf=%s morfs=%s\n%!" (ENIAMwalStringOf.gf gf) (String.concat ";" (Xlist.map morfs ENIAMwalStringOf.morf)); *) Xlist.fold schema ([],[]) (fun (schema,selected) pos -> -(* printf "pos.gf=%s pos.morfs=%s\n%!" (WalStringOf.gf pos.gf) (String.concat ";" (Xlist.map pos.morfs WalStringOf.morf)); *) +(* printf "pos.gf=%s pos.morfs=%s\n%!" (ENIAMwalStringOf.gf pos.gf) (String.concat ";" (Xlist.map pos.morfs ENIAMwalStringOf.morf)); *) if gf = pos.gf || (gf = ADJUNCT && pos.gf=ARG) then if match_position (morfs,(*mark_sem_morfs*) pos.morfs) then schema, pos :: selected else pos :: schema, selected else pos :: schema, selected) in @@ -288,17 +289,17 @@ let rec propagate_nosem_selprefs modifications ei = function if (t.cat = "prep" && t.arole = "NOSEM") || t.cat = "num" then let refs = IntSet.of_list (get_arg_refs [] t.args) in IntSet.iter refs (fun r -> - modifications.(r) <- StringMap.add_inc modifications.(r) ei t.position.WalTypes.sel_prefs (fun l -> - if l = t.position.WalTypes.sel_prefs then l else failwith ("propagate_nosem_selprefs 1: [" ^ String.concat ";" l ^ "] [" ^ String.concat ";" t.position.WalTypes.sel_prefs ^ "]"))); - Node{t with position= {t.position with WalTypes.sel_prefs = []}} + modifications.(r) <- StringMap.add_inc modifications.(r) ei t.position.ENIAMwalTypes.sel_prefs (fun l -> + if l = t.position.ENIAMwalTypes.sel_prefs then l else failwith ("propagate_nosem_selprefs 1: [" ^ String.concat ";" l ^ "] [" ^ String.concat ";" t.position.ENIAMwalTypes.sel_prefs ^ "]"))); + Node{t with position= {t.position with ENIAMwalTypes.sel_prefs = []}} else Node t | _ -> failwith "propagate_nosem_selprefs 2" let rec apply_modifications2_rec mods = function Variant(e,l) -> Variant(e,Xlist.map l (fun (i,t) -> i, apply_modifications2_rec mods t)) | Node t -> - if t.position.WalTypes.sel_prefs <> [] then failwith "apply_modifications2_rec" else - Node{t with position={t.position with WalTypes.sel_prefs=mods}} + if t.position.ENIAMwalTypes.sel_prefs <> [] then failwith "apply_modifications2_rec" else + Node{t with position={t.position with ENIAMwalTypes.sel_prefs=mods}} | _ -> failwith "apply_modifications2_rec" let apply_modifications2 modifications references = @@ -310,9 +311,9 @@ let apply_modifications2 modifications references = try apply_modifications2_rec (StringMap.find modifications.(r) ei) t with Not_found -> t)) | _ -> failwith "apply_modifications2") -let assign_frames_and_senses tokens references = +let assign_frames_and_senses tokens lex_sems references = let modifications = Array.make (Array.length references) StringMap.empty in - let valence = prepare_valence tokens in + let valence = prepare_valence tokens lex_sems in let nodes = Array.map get_nodes references in let references = Array.map (assign_frames_and_senses_rec modifications valence nodes) nodes in apply_modifications (*tokens*) modifications nodes references; diff --git a/parser/disambSelPref.ml b/parser/disambSelPref.ml index e53687d..067e2bb 100644 --- a/parser/disambSelPref.ml +++ b/parser/disambSelPref.ml @@ -16,85 +16,85 @@ * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. *) - -open WalTypes + +open ENIAMwalTypes open LCGtypes open Printf open Xstd - + let fit_node1 t args w = - let w = - if t.agf = ADJUNCT || t.agf = CORE || t.agf = NOSEM || t.agf = CLAUSE || t.agf = SENTENCE then w else + let w = + if t.agf = ADJUNCT || t.agf = CORE || t.agf = NOSEM || t.agf = CLAUSE || t.agf = SENTENCE then w else (* if is_nosem_node t then fit_sel_prefs_nosem_node disamb ei t + w else *) if t.position.role = "" && (t.agf = SUBJ || t.agf = OBJ || t.agf = ARG) then w + 20 else - let b = + let b = if StringSet.mem t.hipero "0" then true else Xlist.fold t.position.sel_prefs false (fun b s -> StringSet.mem t.hipero s || b) in - (if b then 0 else 1) + w in + (if b then 0 else 1) + w in Node{t with args=args},w - + let fit_node2 t args w = - let b = Xlist.fold t.position.WalTypes.sel_prefs false (fun b s -> StringSet.mem t.hipero s || b) in + let b = Xlist.fold t.position.sel_prefs false (fun b s -> StringSet.mem t.hipero s || b) in let t = {t with args=args} in - if b then Node t,w else - (match t.agf, t.position.WalTypes.gf with - WalTypes.ADJUNCT,_ -> (* FIXME: można dodać tuszowanie braków w walentym *) - let pos = + if b then Node t,w else + (match t.agf, t.position.gf with + ADJUNCT,_ -> (* FIXME: można dodać tuszowanie braków w walentym *) + let pos = (* let r,a = paths_array.(t.id).PreTypes.lroles in if r <> "" then (* FIXME: pomijam to, że role dla rzeczowników dotyczą tylko inst *) - {t.position with WalTypes.role=r; WalTypes.role_attr=a} else*) - {t.position with WalTypes.role=t.arole; WalTypes.role_attr=t.arole_attr} in + {t.position with role=r; role_attr=a} else*) + {t.position with role=t.arole; role_attr=t.arole_attr} in Node{t with position=pos}, w+1 - | WalTypes.CLAUSE,WalTypes.NOGF -> Node t,w+0 - | WalTypes.SENTENCE,WalTypes.NOGF -> Node t,w+0 - | WalTypes.ARG,WalTypes.NOGF -> Node t,w+1 - | WalTypes.CORE,WalTypes.NOGF -> - let pos = {t.position with WalTypes.role=t.arole; WalTypes.role_attr=t.arole_attr} in + | CLAUSE,NOGF -> Node t,w+0 + | SENTENCE,NOGF -> Node t,w+0 + | ARG,NOGF -> Node t,w+1 + | CORE,NOGF -> + let pos = {t.position with role=t.arole; role_attr=t.arole_attr} in Node{t with position=pos}, w+0 - | WalTypes.OBJ,WalTypes.NOGF -> Node t,w+0 - | WalTypes.SUBJ,WalTypes.NOGF -> Node t,w+0 - | WalTypes.SUBJ,WalTypes.SUBJ -> Node t,w+2 - | WalTypes.OBJ,WalTypes.OBJ -> Node t,w+2 - | WalTypes.ARG,WalTypes.ARG -> Node t,w+1 - | WalTypes.NOSEM,WalTypes.NOGF -> Node t,w+0 - | WalTypes.NOGF,WalTypes.NOGF -> Node t,w+0 - | WalTypes.NOSEM,WalTypes.NOSEM -> Node t,w+0 -(* | WalTypes.,WalTypes. -> 0 *) + | OBJ,NOGF -> Node t,w+0 + | SUBJ,NOGF -> Node t,w+0 + | SUBJ,SUBJ -> Node t,w+2 + | OBJ,OBJ -> Node t,w+2 + | ARG,ARG -> Node t,w+1 + | NOSEM,NOGF -> Node t,w+0 + | NOGF,NOGF -> Node t,w+0 + | NOSEM,NOSEM -> Node t,w+0 +(* | , -> 0 *) | a,g ->(* printf "fit_sel_prefs_rec: pred=%s agf=%s pos.gf=%s\n%!" t.pred (WalStringOf.gf a) (WalStringOf.gf g);*) Node t,w+1) - -let rec fit_sel_prefs_choice fit_node_fun references disamb satisfaction r = function - Choice choice -> + +let rec fit_sel_prefs_choice fit_node_fun references disamb satisfaction r = function + Choice choice -> let choice,sat = StringMap.fold choice (StringMap.empty,StringMap.empty) (fun (choice,sat) ei t -> let t,w = fit_sel_prefs_variant fit_node_fun references disamb satisfaction t in StringMap.add choice ei t, StringMap.add sat ei w) in satisfaction.(r) <- sat; Choice choice | _ -> failwith "fit_sel_prefs_choice" - -and fit_sel_prefs_variant fit_node_fun references disamb satisfaction = function - Variant(e,l) -> + +and fit_sel_prefs_variant fit_node_fun references disamb satisfaction = function + Variant(e,l) -> let l,min_w = Xlist.fold l ([],max_int) (fun (l,min_w) (i,t) -> let t,w = fit_sel_prefs_rec fit_node_fun references disamb satisfaction (e ^ i) t in if w = min_w then (i,t) :: l, min_w else if w < min_w then [i,t],w else l,min_w) in Variant(e, List.rev l),min_w | _ -> failwith "fit_sel_prefs_variant" - -and fit_sel_prefs_rec fit_node_fun references disamb satisfaction ei = function - Node t -> + +and fit_sel_prefs_rec fit_node_fun references disamb satisfaction ei = function + Node t -> let args,w = fit_sel_prefs_rec fit_node_fun references disamb satisfaction ei t.args in fit_node2 t args w - | Tuple l -> + | Tuple l -> let l,sum_w = Xlist.fold l ([],0) (fun (l,sum_w) t -> let t,w = fit_sel_prefs_rec fit_node_fun references disamb satisfaction ei t in t :: l, sum_w + w) in Tuple(List.rev l), sum_w - | Variant(e,l) as t -> + | Variant(e,l) as t -> let l,min_w = Xlist.fold l ([],max_int) (fun (l,min_w) (i,t) -> let t,w = fit_sel_prefs_rec fit_node_fun references disamb satisfaction ei t in if w = min_w then (i,t) :: l, min_w else if w < min_w then [i,t],w else l,min_w) in - let l = + let l = let map = Xlist.fold l TermSet.empty (fun map (_,t) -> TermSet.add map t) in fst (TermSet.fold map ([],1) (fun (l,i) t -> (string_of_int i,t) :: l, i+1)) in (match l with @@ -103,7 +103,7 @@ and fit_sel_prefs_rec fit_node_fun references disamb satisfaction ei = function | _ -> Variant(e, List.rev l),min_w) | Dot -> Dot, 0 | Val s -> Val s, 0 - | Ref i -> + | Ref i -> if disamb.(i) = Dot then (disamb.(i) <- fit_sel_prefs_choice fit_node_fun references disamb satisfaction i references.(i)); Ref i, (try StringMap.find satisfaction.(i) ei with Not_found -> failwith ("fit_sel_prefs_rec 3: r=" ^ string_of_int i ^ " ei=" ^ ei)) | t -> failwith ("fit_sel_prefs_rec 2: " ^ LCGstringOf.linear_term 0 t) @@ -113,8 +113,5 @@ let fit_sel_prefs fit_node_fun references = let satisfaction = Array.make (Array.length references) StringMap.empty in disamb.(0) <- fst (fit_sel_prefs_variant fit_node_fun references disamb satisfaction references.(0)); disamb - -(***************************************************************************************) - - +(***************************************************************************************) diff --git a/parser/exec.ml b/parser/exec.ml index f7d0290..a0b2320 100644 --- a/parser/exec.ml +++ b/parser/exec.ml @@ -33,6 +33,8 @@ let empty_result = { selected_sent_text=RawText ""; semantic_text=RawText ""; selected_semantic_text=RawText ""; + tokens=ExtArray.make 1 ENIAMtokenizerTypes.empty_token; + lex_sems=ExtArray.make 1 ENIAMlexSemanticsTypes.empty_lex_sem; } let empty_eniam_parse_result = { @@ -157,7 +159,7 @@ let eniam_parse_sentence timeout test_only_flag paths last tokens lex_sems = let result = {result with parse_time=time4 -. time3; chart_size=LCGchart.get_no_entries chart} in if LCGchart.is_parsed chart then try - let term = LCGchart.get_parsed_term tokens chart in + let term = LCGchart.get_parsed_term tokens lex_sems chart in let dependency_tree = LCGreductions.reduce term references in let time5 = time_fun () in let result = if test_only_flag then result else {result with dependency_tree=dependency_tree} in @@ -216,7 +218,7 @@ let conll_parse_sentence timeout test_only_flag paths tokens lex_sems = let result = {result with parse_time=time4 -. time3} in if LCGchart.is_dep_parsed parsed_dep_chart then try - let term = LCGchart.get_dep_parsed_term tokens parsed_dep_chart in + let term = LCGchart.get_dep_parsed_term tokens lex_sems parsed_dep_chart in (* LCGlatexOf.print_dependency_tree "dep_dependency_tree1" dependency_tree; *) let dependency_tree = LCGreductions.reduce term references in let time5 = time_fun () in @@ -322,12 +324,12 @@ let rec parse_sentence timeout test_only_flag mode file_prefix tokens lex_sems = if not Paths.config.Paths.mate_parser_enabled then DepSentence paths else ( print_endline "parse_sentence 1"; (* print_endline (Visualization.html_of_dep_sentence tokens paths); *) - let conll = CONLL.string_of_paths ENIAMsubsyntaxTypes.Mate tokens paths in + let conll = ENIAM_CONLL.string_of_paths ENIAMsubsyntaxTypes.Mate tokens paths in print_endline "parse_sentence 2"; (* printf "|%s|\n" conll; *) Printf.fprintf mate_out "%s%!" conll; print_endline "parse_sentence 3"; - let new_paths = get_paths paths (CONLL.load_sentence mate_in) in + let new_paths = get_paths paths (ENIAM_CONLL.load_sentence mate_in) in print_endline "parse_sentence 4"; (* print_endline (Visualization.html_of_dep_sentence tokens new_paths); *) let result = conll_parse_sentence timeout test_only_flag new_paths tokens lex_sems in @@ -364,7 +366,7 @@ let rec parse_text timeout test_only_flag mode tokens lex_sems = function | StructText paragraphs -> let paragraphs = Xlist.rev_map paragraphs (fun paragraph -> parse_paragraph timeout test_only_flag mode tokens lex_sems paragraph) in - StructText(List.rev paragraphs, tokens) + StructText(List.rev paragraphs) | AltText l -> AltText(Xlist.map l (fun (mode,text) -> mode, parse_text timeout test_only_flag mode tokens lex_sems text)) @@ -420,18 +422,18 @@ let rec select_sentences_paragraph = function let rec select_sentences_text = function RawText s -> RawText s - | StructText(paragraphs,tokens) -> + | StructText paragraphs -> let paragraphs = Xlist.rev_map paragraphs (fun paragraph -> select_sentences_paragraph paragraph) in - StructText(List.rev paragraphs, tokens) + StructText(List.rev paragraphs) | AltText l -> AltText(Xlist.map l (fun (mode,text) -> mode, select_sentences_text text)) -let semantic_processing timeout test_only_flag file_prefix tokens max_n dependency_tree = +let semantic_processing timeout test_only_flag file_prefix tokens lex_sems max_n dependency_tree = let time5 = time_fun () in let result = {empty_semantic_processing_result with file_prefix=file_prefix} in try - let (*dependency_tree2*)(*sem*)disamb = LCGvalence.assign_frames_and_senses tokens dependency_tree in + let (*dependency_tree2*)(*sem*)disamb = LCGvalence.assign_frames_and_senses tokens lex_sems dependency_tree in let disamb(*sem*) = DisambSelPref.fit_sel_prefs DisambSelPref.fit_node1 (*dependency_tree2*)disamb in let (*sem*)disamb = DisambLemma.disambiguate_nodes (*dependency_tree*)(*sem*)disamb in let (*sem*)disamb = DisambLemma.remove_unused(*disambiguate_nodes*) (*dependency_tree*)(*sem*)disamb in @@ -441,7 +443,7 @@ let semantic_processing timeout test_only_flag file_prefix tokens max_n dependen let sem = DisambLemma.disambiguate_meanings (*dependency_tree*)sem in let sem(*disamb*) = DisambLemma.remove_unused_choices(*disambiguate_nodes*) (*dependency_tree*)sem(*disamb*) in let result = if test_only_flag then result else {result with sem=sem} in - let sem2 = SemGraph.translate tokens (*disamb*)sem in + let sem2 = SemGraph.translate tokens lex_sems (*disamb*)sem in let result = if test_only_flag then result else {result with sem2=sem2} in let sem3(*disamb*) = SemGraph.make_tree(*disambiguate_nodes*) (*dependency_tree*)sem2(*disamb*) in let sem3(*disamb*) = SemGraph.simplify_tree(*disambiguate_nodes*) (*dependency_tree*)sem3(*disamb*) in @@ -469,41 +471,41 @@ let semantic_processing timeout test_only_flag file_prefix tokens max_n dependen {result with status=SemError; msg=Printexc.to_string e; sem_time=time6 -. time5} -let rec semantic_processing_sentence timeout test_only_flag tokens max_n = function +let rec semantic_processing_sentence timeout test_only_flag tokens lex_sems max_n = function RawSentence s -> RawSentence s - | ENIAMSentence result -> SemSentence (semantic_processing timeout test_only_flag result.file_prefix tokens max_n result.dependency_tree) - | CONLLSentence result -> SemSentence (semantic_processing timeout test_only_flag result.file_prefix tokens max_n result.dependency_tree) + | ENIAMSentence result -> SemSentence (semantic_processing timeout test_only_flag result.file_prefix tokens lex_sems max_n result.dependency_tree) + | CONLLSentence result -> SemSentence (semantic_processing timeout test_only_flag result.file_prefix tokens lex_sems max_n result.dependency_tree) | QuotedSentences sentences -> let sentences = Xlist.rev_map sentences (fun p -> - let sentence = semantic_processing_sentence timeout test_only_flag tokens max_n p.psentence in + let sentence = semantic_processing_sentence timeout test_only_flag tokens lex_sems max_n p.psentence in {p with psentence=sentence}) in QuotedSentences(List.rev sentences) | AltSentence l -> let l = Xlist.rev_map l (fun (mode,sentence) -> - mode, semantic_processing_sentence timeout test_only_flag tokens max_n sentence) in + mode, semantic_processing_sentence timeout test_only_flag tokens lex_sems max_n sentence) in AltSentence(List.rev l) | _ -> failwith "semantic_processing_sentence" -let rec semantic_processing_paragraph timeout test_only_flag tokens max_n = function +let rec semantic_processing_paragraph timeout test_only_flag tokens lex_sems max_n = function RawParagraph s -> RawParagraph s | StructParagraph sentences -> let sentences = Xlist.rev_map sentences (fun p -> - let sentence = semantic_processing_sentence timeout test_only_flag tokens max_n p.psentence in + let sentence = semantic_processing_sentence timeout test_only_flag tokens lex_sems max_n p.psentence in {p with psentence=sentence}) in StructParagraph(List.rev sentences) | AltParagraph l -> let l = Xlist.rev_map l (fun (mode,paragraph) -> - mode, semantic_processing_paragraph timeout test_only_flag tokens max_n paragraph) in + mode, semantic_processing_paragraph timeout test_only_flag tokens lex_sems max_n paragraph) in AltParagraph(List.rev l) -let rec semantic_processing_text timeout test_only_flag max_n = function +let rec semantic_processing_text timeout test_only_flag tokens lex_sems max_n = function RawText s -> RawText s - | StructText(paragraphs,tokens) -> + | StructText paragraphs -> let paragraphs = Xlist.rev_map paragraphs (fun paragraph -> - semantic_processing_paragraph timeout test_only_flag tokens max_n paragraph) in - StructText(List.rev paragraphs, tokens) + semantic_processing_paragraph timeout test_only_flag tokens lex_sems max_n paragraph) in + StructText(List.rev paragraphs) | AltText l -> AltText(Xlist.map l (fun (mode,text) -> - mode, semantic_processing_text timeout test_only_flag max_n text)) + mode, semantic_processing_text timeout test_only_flag tokens lex_sems max_n text)) let rec extract_query_text = function RawText s -> s @@ -512,7 +514,7 @@ let rec extract_query_text = function let process_query pre_in pre_out timeout test_only_flag id full_query max_n = (* print_endline "process_query 0"; *) - let result = {empty_result with input_text=translate_text full_query} in + let result = {empty_result with input_text=translate_text (fst full_query)} in let time1 = time_fun () in (* print_endline "process_query 1"; *) Marshal.to_channel pre_out full_query []; @@ -523,11 +525,11 @@ let process_query pre_in pre_out timeout test_only_flag id full_query max_n = ENIAMtokenizerTypes.token_record ExtArray.t * ENIAMlexSemanticsTypes.lex_sem ExtArray.t * string * float) in let time2 = time_fun () in - let result = if test_only_flag then result else {result with pre_text=translate_text pre_text} in + let result = if test_only_flag then result else {result with pre_text=translate_text pre_text; tokens=tokens; lex_sems=lex_sems} in let result = {result with pre_time1=pre_time1; pre_time2=time2 -. time1} in if msg <> "" then {result with status=PreprocessingError; msg=msg} else ( (* print_endline "process_query 3"; *) - let parsed_text = parse_text timeout test_only_flag Struct (translate_text pre_text) in + let parsed_text = parse_text timeout test_only_flag Struct tokens lex_sems (translate_text pre_text) in (* print_endline "process_query 4"; *) let time3 = time_fun () in let result = if test_only_flag then result else {result with status=Parsed; parsed_text=parsed_text} in @@ -538,7 +540,7 @@ let process_query pre_in pre_out timeout test_only_flag id full_query max_n = else select_sentences_text parsed_text in (* print_endline "process_query 6"; *) let result = if test_only_flag then result else {result with status=Parsed; selected_sent_text=selected_sent_text} in - let semantic_text = semantic_processing_text timeout test_only_flag max_n selected_sent_text in + let semantic_text = semantic_processing_text timeout test_only_flag tokens lex_sems max_n selected_sent_text in (* print_endline "process_query 7"; *) let selected_semantic_text = if not Paths.config.Paths.sentence_selection_enabled then semantic_text diff --git a/parser/execTypes.ml b/parser/execTypes.ml index 5a6c910..c9a15fb 100644 --- a/parser/execTypes.ml +++ b/parser/execTypes.ml @@ -92,7 +92,7 @@ and paragraph = type text = RawText of string - | StructText of paragraph list + | StructText of paragraph list | AltText of (mode * text) list @@ -109,6 +109,8 @@ type result = { selected_sent_text: text; semantic_text: text; selected_semantic_text: text; + tokens: ENIAMtokenizerTypes.token_record ExtArray.t; + lex_sems: ENIAMlexSemanticsTypes.lex_sem ExtArray.t; } type sum_result = { diff --git a/parser/makefile b/parser/makefile index 49459d3..150a1b7 100755 --- a/parser/makefile +++ b/parser/makefile @@ -1,13 +1,13 @@ OCAMLC=ocamlc OCAMLOPT=ocamlopt OCAMLDEP=ocamldep -INCLUDES=-I +xml-light -I +xlib -I +zip -I +bz2 -I ../../../Dropbox/lib/latexvis -I ../../installed/latexvis -I ../lib/xt -I ../../../Dropbox/Clarin-pl/podzadania/nkjp/fold_text -I ../podzadania/morfeusz -I ../pre -I ../tokenizer -I ../subsyntax -I ../walenty -I ../lexSemantics -I ../corpora +INCLUDES=-I +xml-light -I +xlib -I +zip -I +bz2 -I ../../../Dropbox/lib/latexvis -I ../../installed/latexvis -I ../lib/xt -I ../../../Dropbox/Clarin-pl/podzadania/nkjp/fold_text -I ../podzadania/morfeusz -I ../pre -I ../tokenizer -I ../subsyntax -I ../walenty -I ../lexSemantics -I ../integration #INCLUDES=-I +xml-light -I +xlib -I ../pre OCAMLFLAGS=$(INCLUDES) -g OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa bz2.cmxa xlib.cmxa latexvis.cmxa #nkjp.cmxa #OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa xlib.cmxa -PRE= ../pre/paths.ml ../walenty/ENIAMwalTypes.ml ../tokenizer/ENIAMtokenizerTypes.ml ../subsyntax/ENIAMsubsyntaxTypes.ml ../lexSemantics/ENIAMlexSemanticsTypes.ml ../walenty/ENIAMwalStringOf.ml ../corpora/CONLL.ml +PRE= ../pre/paths.ml ../tokenizer/ENIAMtokenizerTypes.ml ../subsyntax/ENIAMsubsyntaxTypes.ml ../walenty/ENIAMwalTypes.ml ../lexSemantics/ENIAMlexSemanticsTypes.ml ../walenty/ENIAMwalStringOf.ml ../integration/ENIAM_CONLL.ml LCG= LCGtypes.ml LCGstringOf.ml LCGrules.ml LCGrenderer.ml LCGchart.ml LCGlatexOf.ml LCGreductions.ml LCGlexicon.ml LCGvalence.ml #LCG= LCGtypes.ml LCGstringOf.ml LCGrules.ml LCGrenderer.ml LCGchart.ml LCGreductions.ml LCGlexicon.ml LCGvalence.ml DISAMB= disambSelPref.ml disambLemma.ml @@ -16,9 +16,9 @@ SEM= semGraph.ml semTypes.ml semStringOf.ml semLatexOf.ml semMmlOf.ml semMrl.ml EXEC= execTypes.ml visualization.ml exec.ml all: -# $(OCAMLOPT) -o pipe $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) pipe.ml - $(OCAMLOPT) -o server2 $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) server.ml - $(OCAMLOPT) -o parser2.cgi $(OCAMLOPTFLAGS) $(PRE) LCGtypes.ml LCGstringOf.ml LCGrules.ml LCGrenderer.ml LCGchart.ml LCGlatexOf.ml semTypes.ml semMmlOf.ml execTypes.ml visualization.ml webInterface.ml + $(OCAMLOPT) -o pipe $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) pipe.ml + # $(OCAMLOPT) -o server2 $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) server.ml + # $(OCAMLOPT) -o parser2.cgi $(OCAMLOPTFLAGS) $(PRE) LCGtypes.ml LCGstringOf.ml LCGrules.ml LCGrenderer.ml LCGchart.ml LCGlatexOf.ml semTypes.ml semMmlOf.ml execTypes.ml visualization.ml webInterface.ml # $(OCAMLOPT) -o eniam.distr $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) overseer.ml # $(OCAMLOPT) -o eniam.worker $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) worker.ml # $(OCAMLOPT) -o parser.api $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) apiInterface.ml @@ -55,4 +55,4 @@ swigra_test: swigra_test.ml $(OCAMLOPT) $(OCAMLOPTFLAGS) -c $< clean: - rm -f *~ *.cm[oix] *.o eniam eniam.distr eniam.worker server2 parser2.cgi + rm -f *~ *.cm[oix] *.o eniam eniam.distr eniam.worker server2 parser2.cgi pipe diff --git a/parser/pipe.ml b/parser/pipe.ml index a329b5e..a9e4a8a 100644 --- a/parser/pipe.ml +++ b/parser/pipe.ml @@ -30,7 +30,7 @@ let get_sock_addr host_name port = let get_paths query = let i,o = Unix.open_connection (get_sock_addr Paths.pre_host Paths.pre_port) in Printf.fprintf o "%s\n%!" query; - let paths,msg,time = (Marshal.from_channel i : ((int * int * PreTypes.token_record) list * int * int) * string * float) in + let paths,msg,time = (Marshal.from_channel i : ((int * int * ENIAMtokenizerTypes.token_record) list * int * int) * string * float) in Printf.fprintf o "\n%!"; let _ = Unix.shutdown_connection i in paths,msg,time @@ -39,21 +39,21 @@ let get_paths query = let simple_disambiguate (paths,last) = Xlist.fold paths [] (fun paths (i,j,t) -> - if Xlist.mem t.PreTypes.attrs "notvalidated proper" || Xlist.mem t.PreTypes.attrs "lemma not validated" then paths else (i,j,t) :: paths),last + if Xlist.mem t.ENIAMtokenizerTypes.attrs "notvalidated proper" || Xlist.mem t.ENIAMtokenizerTypes.attrs "lemma not validated" then paths else (i,j,t) :: paths),last (* FIXME: przerobić koordynację *) let lcg_process query = let ic,oc = Unix.open_connection (get_sock_addr Paths.pre_host Paths.pre_port) in - let result = Exec.process_query ic oc 30. false "x" (PreTypes.RawText query,ExtArray.make 1 ENIAMtokenizerTypes.empty_token) 10 in + let result = Exec.process_query ic oc 30. false "x" (ENIAMsubsyntaxTypes.RawText query,ExtArray.make 1 ENIAMtokenizerTypes.empty_token) 10 in let path = "results/" in - Visualization.print_html_text path "input_text" result.input_text; - Visualization.print_html_text path "pre_text" result.pre_text; - Visualization.print_html_text path "parsed_text" result.parsed_text; - Visualization.print_html_text path "selected_sent_text" result.selected_sent_text; - Visualization.print_html_text path "semantic_text" result.semantic_text; - Visualization.print_html_text path "selected_semantic_text" result.selected_semantic_text; - Visualization.print_main_result_text "aaa/" (path ^ "main/") "xxxx" result.selected_semantic_text; + Visualization.print_html_text path "input_text" result.input_text result.tokens result.lex_sems; + Visualization.print_html_text path "pre_text" result.pre_text result.tokens result.lex_sems; + Visualization.print_html_text path "parsed_text" result.parsed_text result.tokens result.lex_sems; + Visualization.print_html_text path "selected_sent_text" result.selected_sent_text result.tokens result.lex_sems; + Visualization.print_html_text path "semantic_text" result.semantic_text result.tokens result.lex_sems; + Visualization.print_html_text path "selected_semantic_text" result.selected_semantic_text result.tokens result.lex_sems; + Visualization.print_main_result_text "aaa/" (path ^ "main/") "xxxx" result.tokens result.selected_semantic_text; Exec.print_result stdout result; (*Visualization.print_paths "results/" "paths" result.paths; Visualization.print_paths_latex "paths" result.paths; @@ -117,7 +117,7 @@ let lcg_process query = LatexMain.latex_compile_and_clean "results/" "chart"*) | _ -> ());*) (* Printf.fprintf oc "\n%!"; *) - Marshal.to_channel oc (PreTypes.RawText "",ExtArray.make 1 ENIAMtokenizerTypes.empty_token) []; + Marshal.to_channel oc (ENIAMsubsyntaxTypes.RawText "",ExtArray.make 1 ENIAMtokenizerTypes.empty_token) []; flush oc; let _ = Unix.shutdown_connection ic in () @@ -162,7 +162,7 @@ let lcg_process_file filename result_path result_name = failwith "lcg_process_fi (* let _ = LCGexec.process_file_id "data/sentences-składnica-with-trees.tab" "results/sentences-składnica-with-trees.eff" 100. *) (* Przetwarzanie korpusów w formacie CONLL *) - +(* let id_counter = ref 0 let get_id () = @@ -187,7 +187,7 @@ let process_id s = Xstring.check_prefix "morph_" c && Xstring.check_sufix "-s" c then Xstring.cut_prefix "NKJP_1M_" a ^ "." ^ Xstring.cut_sufix "-s" (Xstring.cut_prefix "morph_" c) else failwith ("process_id: " ^ s) - +*) (* FIXME let process_conll_corpus filename = let corpus = File.file_in filename (fun file -> CONLL.match_corpus (CONLL.load_corpus file)) in @@ -232,13 +232,13 @@ let _ = *) - +(* let has_pos pos (paths,_,_) = Xlist.fold paths false (fun b (_,_,t) -> match t.PreTypes.token with PreTypes.Lemma(_,cat,_) -> if cat = pos then true else b | _ -> b) - +*) (* Wydobycie zdań zawierających symbole *) (*let _ = let i,o = Unix.open_connection (get_sock_addr host port) in @@ -346,7 +346,7 @@ let print_stats n stats = stats.adj_sense (float stats.adj_sense /. float n) (float stats.adj_sense /. float stats.adj) stats.adj_valence (float stats.adj_valence /. float n) (float stats.adj_valence /. float stats.adj); () - +(* let get_stats stats (paths,_) = Xlist.fold paths stats (fun stats (_,_,t) -> (* if Xlist.mem t.PreTypes.attrs "notvalidated proper" || Xlist.mem t.PreTypes.attrs "lemma not validated" then stats else *) @@ -364,7 +364,7 @@ let get_stats stats (paths,_) = adj_valence=if t.PreTypes.valence=[] then stats.adj_valence else stats.adj_valence+1} | _ -> stats) | _ -> stats) - +*) (* Test pokrycia słowosieci i walentego *) (*let _ = let sentences = File.load_lines "data/sentences-składnica.txt" in diff --git a/parser/semGraph.ml b/parser/semGraph.ml index 1b6d372..3e91aec 100644 --- a/parser/semGraph.ml +++ b/parser/semGraph.ml @@ -61,7 +61,7 @@ let rec get_person = function | _ :: l -> get_person l | [] -> "" -let rec create_normal_concept (*roles role_attrs*) tokens t sem_args = +let rec create_normal_concept (*roles role_attrs*) tokens lex_sems t sem_args = let sem_args = if t.cat = "pro" then match get_person t.attrs with "pri" -> ["indexical"] @@ -70,13 +70,13 @@ let rec create_normal_concept (*roles role_attrs*) tokens t sem_args = | "" -> ["indexical";"coreferential";"deictic"] | _ -> failwith "create_normal_concept: pro" else sem_args in (* FIXME: przesunąć to do rozszerzania path_array *) - if t.agf = WalTypes.NOSEM then t.args else + if t.agf = ENIAMwalTypes.NOSEM then t.args else let c = {empty_concept with c_sense = Val t.meaning; c_relations=(*create_concepts tokens*) t.args; c_quant=make_sem_args sem_args; c_variable=string_of_int t.id,""; - c_pos=(*if t.id >= Array.length tokens then -1 else*) (ExtArray.get tokens t.id).PreTypes.beg; + c_pos=(*if t.id >= Array.length tokens then -1 else*) (ExtArray.get tokens t.id).ENIAMtokenizerTypes.beg; c_local_quant=true} in if t.cat = "subst" || t.cat = "depr" || t.cat = "ger" || t.cat = "unk" || StringSet.mem symbols t.cat then (* FIXME: wykrywanie plurale tantum *) let c = {c with c_local_quant=false} in @@ -103,9 +103,10 @@ let rec create_normal_concept (*roles role_attrs*) tokens t sem_args = | e,t -> failwith ("create_normal_concept noun: " ^ e)) in let c = if t.cat = "depr" then {c with c_relations=Tuple[c.c_relations;SingleRelation(Val "depr")]} else c in if cx_flag then - let id = ExtArray.add tokens PreTypes.empty_token in + let id = ExtArray.add tokens ENIAMtokenizerTypes.empty_token in + let _ = ExtArray.add lex_sems ENIAMlexSemanticsTypes.empty_lex_sem in Context{empty_context with cx_contents=Concept c; cx_variable=string_of_int id,""; cx_pos=c.c_pos} - else Relation(Val t.position.WalTypes.role,Val t.position.WalTypes.role_attr,Concept c) else + else Relation(Val t.position.ENIAMwalTypes.role,Val t.position.ENIAMwalTypes.role_attr,Concept c) else if t.cat = "fin" || t.cat = "bedzie" || t.cat = "praet" || t.cat = "winien" || t.cat = "impt" || t.cat = "imps" || t.cat = "pred" || t.pred = "pro-komunikować" then let c = {c with c_local_quant=false} in let c = Xlist.fold t.attrs c (fun c -> function @@ -122,10 +123,11 @@ let rec create_normal_concept (*roles role_attrs*) tokens t sem_args = | "NEG",Val "+" -> {c with c_quant=Tuple[c.c_quant;Val "nie"]} | e,t -> failwith ("create_normal_concept verb: " ^ e)) in let c = if t.pred = "pro-komunikować" then {c with c_relations=Relation(Val "Theme",Val "",c.c_relations)} else c in (* FIXME: to by trzeba przesunąć na wcześniej *) - let id = ExtArray.add tokens PreTypes.empty_token in + let id = ExtArray.add tokens ENIAMtokenizerTypes.empty_token in + let _ = ExtArray.add lex_sems ENIAMlexSemanticsTypes.empty_lex_sem in let cx = {empty_context with cx_contents=Concept c; cx_variable=string_of_int id,""; cx_pos=c.c_pos} in - if t.position.WalTypes.role <> "" || t.position.WalTypes.role_attr <> "" then failwith "create_normal_concept: verb" else -(* Relation(Val t.position.WalTypes.role,Val t.position.WalTypes.role_attr,Context cx) else *) + if t.position.ENIAMwalTypes.role <> "" || t.position.ENIAMwalTypes.role_attr <> "" then failwith "create_normal_concept: verb" else +(* Relation(Val t.position.ENIAMwalTypes.role,Val t.position.ENIAMwalTypes.role_attr,Context cx) else *) Context cx else if t.cat = "inf" then let c = {c with c_local_quant=false} in @@ -134,9 +136,10 @@ let rec create_normal_concept (*roles role_attrs*) tokens t sem_args = | "TENSE",t -> {c with c_relations=Tuple[c.c_relations;SingleRelation t]} | "NEG",Val "+" -> {c with c_quant=Tuple[c.c_quant;Val "nie"]} | e,t -> failwith ("create_normal_concept verb: " ^ e)) in - let id = ExtArray.add tokens PreTypes.empty_token in + let id = ExtArray.add tokens ENIAMtokenizerTypes.empty_token in + let _ = ExtArray.add lex_sems in let cx = {empty_context with cx_contents=Concept c; cx_variable=string_of_int id,""; cx_pos=c.c_pos} in - Relation(Val t.position.WalTypes.role,Val t.position.WalTypes.role_attr,Context cx) else + Relation(Val t.position.ENIAMwalTypes.role,Val t.position.ENIAMwalTypes.role_attr,Context cx) else if t.cat = "adj" || t.cat = "adjc" || t.cat = "adjp" || t.cat = "adja" || t.cat = "pact" || t.cat = "ppas" || t.cat = "ordnum" || t.cat = "roman-adj" then let c = if t.cat = "pact" || t.cat = "ppas" then {c with c_local_quant=false} else c in let c = Xlist.fold t.attrs c (fun c -> function @@ -157,8 +160,8 @@ let rec create_normal_concept (*roles role_attrs*) tokens t sem_args = | "LEX",_ -> c (* FIXME *) | e,t -> failwith ("create_normal_concept adj: " ^ e)) in if t.cat = "pact" || t.cat = "ppas" then - RevRelation(Val t.position.WalTypes.role,Val t.position.WalTypes.role_attr,Concept c) - else Relation(Val t.position.WalTypes.role,Val t.position.WalTypes.role_attr,Concept c) else + RevRelation(Val t.position.ENIAMwalTypes.role,Val t.position.ENIAMwalTypes.role_attr,Concept c) + else Relation(Val t.position.ENIAMwalTypes.role,Val t.position.ENIAMwalTypes.role_attr,Concept c) else if t.cat = "adv" || t.cat = "pcon" || t.cat = "pant" then let c = if t.cat = "pcon" || t.cat = "pant" then {c with c_local_quant=false} else c in let c = Xlist.fold t.attrs c (fun c -> function @@ -169,7 +172,7 @@ let rec create_normal_concept (*roles role_attrs*) tokens t sem_args = | "TYPE",_ -> c | "NEG",Val "+" -> {c with c_quant=Tuple[c.c_quant;Val "nie"]} | e,t -> failwith ("create_normal_concept adv: " ^ e)) in - Relation(Val t.position.WalTypes.role,Val t.position.WalTypes.role_attr,Concept c) else + Relation(Val t.position.ENIAMwalTypes.role,Val t.position.ENIAMwalTypes.role_attr,Concept c) else if t.cat = "pro" || t.cat = "ppron12" || t.cat = "ppron3" || t.cat = "siebie" then (* FIXME: indexicalność *) let c = {c with c_local_quant=false} in let c = Xlist.fold t.attrs c (fun c -> function @@ -180,13 +183,13 @@ let rec create_normal_concept (*roles role_attrs*) tokens t sem_args = | "SYN",_ -> c | "NSEM",_ -> c | e,t -> failwith ("create_normal_concept pron: " ^ e)) in - Relation(Val t.position.WalTypes.role,Val t.position.WalTypes.role_attr,Concept c) else + Relation(Val t.position.ENIAMwalTypes.role,Val t.position.ENIAMwalTypes.role_attr,Concept c) else if t.cat = "prep" then - if t.arole = "NOSEM" then Relation(Val t.position.WalTypes.role,Val t.position.WalTypes.role_attr,t.args) else + if t.arole = "NOSEM" then Relation(Val t.position.ENIAMwalTypes.role,Val t.position.ENIAMwalTypes.role_attr,t.args) else let c = Xlist.fold t.attrs c (fun c -> function | "CASE",_ -> c | e,t -> failwith ("create_normal_concept prep: " ^ e)) in - Relation(Val t.position.WalTypes.role,Val t.position.WalTypes.role_attr,Concept c) else + Relation(Val t.position.ENIAMwalTypes.role,Val t.position.ENIAMwalTypes.role_attr,Concept c) else if t.cat = "num" || t.cat = "intnum" || t.cat = "realnum" || t.cat = "intnum-interval" || t.cat = "realnum-interval" then let c = Xlist.fold t.attrs c (fun c -> function (* "MEANING",t -> {c with c_sense=Tuple[c.c_sense;t]} *) @@ -197,20 +200,20 @@ let rec create_normal_concept (*roles role_attrs*) tokens t sem_args = | "PERS",_ -> c | "TYPE",_ -> c | e,t -> failwith ("create_normal_concept num: " ^ e)) in - Relation(Val t.position.WalTypes.role,Val t.position.WalTypes.role_attr,(*Quantifier*)(Concept c)) else + Relation(Val t.position.ENIAMwalTypes.role,Val t.position.ENIAMwalTypes.role_attr,(*Quantifier*)(Concept c)) else if t.cat = "qub" && t.pred="się" then let c = {c with c_quant=Tuple[c.c_quant;Val "coreferential"]} in - Relation(Val t.position.WalTypes.role,Val t.position.WalTypes.role_attr,(*Quantifier*)(Concept c)) else + Relation(Val t.position.ENIAMwalTypes.role,Val t.position.ENIAMwalTypes.role_attr,(*Quantifier*)(Concept c)) else if t.cat = "qub" && (t.pred="czy" || t.pred="gdyby") then - Relation(Val t.position.WalTypes.role,Val t.position.WalTypes.role_attr,SetContextName(t.meaning,t.args)) else + Relation(Val t.position.ENIAMwalTypes.role,Val t.position.ENIAMwalTypes.role_attr,SetContextName(t.meaning,t.args)) else if t.cat = "qub" then let c = Xlist.fold t.attrs c (fun c -> function (* | "TYPE",Val "int" -> {c with c_quant=Tuple[c.c_quant;Val "interrogative"]} | "TYPE",_ -> c*) | e,t -> failwith ("create_normal_concept qub: " ^ e)) in - Relation(Val t.position.WalTypes.role,Val t.position.WalTypes.role_attr,Concept c) else + Relation(Val t.position.ENIAMwalTypes.role,Val t.position.ENIAMwalTypes.role_attr,Concept c) else if t.cat = "comp" then - Relation(Val t.position.WalTypes.role,Val t.position.WalTypes.role_attr,SetContextName(t.meaning,t.args)) else + Relation(Val t.position.ENIAMwalTypes.role,Val t.position.ENIAMwalTypes.role_attr,SetContextName(t.meaning,t.args)) else if t.cat = "conj" then let c = {empty_context with cx_sense=Val t.meaning; cx_contents=RemoveRelation t.args; cx_variable=string_of_int t.id,""; cx_pos=c.c_pos} in let c = Xlist.fold t.attrs c (fun c -> function @@ -219,7 +222,7 @@ let rec create_normal_concept (*roles role_attrs*) tokens t sem_args = | "GEND",_ -> c | "PERS",_ -> c | e,t -> failwith ("create_normal_concept conj: " ^ e)) in - Relation(Val t.position.WalTypes.role,Val t.position.WalTypes.role_attr,Context c) else + Relation(Val t.position.ENIAMwalTypes.role,Val t.position.ENIAMwalTypes.role_attr,Context c) else if t.cat = "interj" then Node t else if t.cat = "sinterj" then let c = Xlist.fold t.attrs c (fun c -> function @@ -240,7 +243,7 @@ let rec create_normal_concept (*roles role_attrs*) tokens t sem_args = if t.cat = "interp" && t.pred = "</query>" then let l = List.rev (make_args_list t.args) in let x = Xlist.fold (List.tl l) (List.hd l) (fun t s -> AddRelation(RemoveRelation t,"Next","Sentence",RemoveRelation s)) in - if t.agf = WalTypes.OBJ then Relation(Val t.arole,Val t.arole_attr,x) else x else + if t.agf = ENIAMwalTypes.OBJ then Relation(Val t.arole,Val t.arole_attr,x) else x else if t.cat = "interp" && t.pred = "<query1>" then t.args else if t.cat = "interp" && t.pred = "<query2>" then t.args else if t.cat = "interp" && t.pred = "<query4>" then t.args else @@ -252,46 +255,46 @@ let rec create_normal_concept (*roles role_attrs*) tokens t sem_args = Xlist.fold (List.tl l) (List.hd l) (fun t s -> AddRelation(RemoveRelation t,"Next","Sentence",RemoveRelation s)) else if t.cat = "interp" && t.pred = "?" then SingleRelation(Val "int") else if t.cat = "interp" && t.pred = "„" then - Relation(Val t.position.WalTypes.role,Val t.position.WalTypes.role_attr,RemoveRelation t.args) else - if t.cat = "interp" || t.pred = "</or-sentence>" then Relation(Val t.position.WalTypes.role,Val t.position.WalTypes.role_attr,t.args) else ( + Relation(Val t.position.ENIAMwalTypes.role,Val t.position.ENIAMwalTypes.role_attr,RemoveRelation t.args) else + if t.cat = "interp" || t.pred = "</or-sentence>" then Relation(Val t.position.ENIAMwalTypes.role,Val t.position.ENIAMwalTypes.role_attr,t.args) else ( if t.cat = "interp" then Node t else - if t.cat = "" then Relation(Val t.position.WalTypes.role,Val t.position.WalTypes.role_attr,t.args) else + if t.cat = "" then Relation(Val t.position.ENIAMwalTypes.role,Val t.position.ENIAMwalTypes.role_attr,t.args) else (* print_endline t.pred; *) Node t) -and create_concepts tokens = function +and create_concepts tokens lex_sems = function Node t -> (* print_endline ("cc " ^ t.pred); *) (* let agf = t.agf in *) let attrs = remove_unimportant_attrs t.attrs in (* let attrs,roles,role_attrs = get_roles attrs in *) let t = {t with attrs=attrs} in - (match (ExtArray.get tokens t.id).PreTypes.semantics with - PreTypes.Normal -> - let t = create_normal_concept tokens t [] in -(* if agf = WalTypes.CORE then Core t else *) t - | PreTypes.PrepSemantics _ -> - let t = create_normal_concept tokens t [] in -(* if agf = WalTypes.CORE then Core t else *) t - | PreTypes.Special l -> - let t = create_normal_concept tokens t l in -(* if agf = WalTypes.CORE then Core t else *) t -(* | PreTypes.SpecialNoun(lemma,_) -> + (match (ExtArray.get lex_sems t.id).ENIAMlexSemanticsTypes.semantics with + ENIAMlexSemanticsTypes.Normal -> + let t = create_normal_concept tokens lex_sems t [] in +(* if agf = ENIAMwalTypes.CORE then Core t else *) t + | ENIAMlexSemanticsTypes.PrepSemantics _ -> + let t = create_normal_concept tokens lex_sems t [] in +(* if agf = ENIAMwalTypes.CORE then Core t else *) t + | ENIAMlexSemanticsTypes.Special l -> + let t = create_normal_concept tokens lex_sems t l in +(* if agf = ENIAMwalTypes.CORE then Core t else *) t +(* | ENIAMlexSemanticsTypes.SpecialNoun(lemma,_) -> let t = create_normal_concept tokens t in*) -(* if agf = WalTypes.CORE then Core t else t*) +(* if agf = ENIAMwalTypes.CORE then Core t else t*) (*| _ -> failwith "create_concepts: ni"*)) - | Tuple l -> Tuple(Xlist.map l (create_concepts tokens)) - | Variant(e,l) -> Variant(e,Xlist.map l (fun (i,t) -> i, create_concepts tokens t)) + | Tuple l -> Tuple(Xlist.map l (create_concepts tokens lex_sems)) + | Variant(e,l) -> Variant(e,Xlist.map l (fun (i,t) -> i, create_concepts tokens lex_sems t)) | Dot -> Dot | Ref i -> Ref i - | Choice choices -> Choice(StringMap.map choices (create_concepts tokens)) + | Choice choices -> Choice(StringMap.map choices (create_concepts tokens lex_sems)) | t -> failwith ("create_concepts: " ^ LCGstringOf.linear_term 0 t) -let translate tokens term = +let translate tokens lex_sems term = let sem = Array.copy term in Int.iter 0 (Array.length sem - 1) (fun i -> - sem.(i) <- create_concepts tokens sem.(i)); + sem.(i) <- create_concepts tokens lex_sems sem.(i)); sem (***************************************************************************************) diff --git a/parser/visualization.ml b/parser/visualization.ml index e6c4d4c..c834176 100644 --- a/parser/visualization.ml +++ b/parser/visualization.ml @@ -20,7 +20,7 @@ open LCGtypes open Xstd open Printf -open PreTypes +open ENIAMtokenizerTypes let string_of_interps interps = String.concat "|" (Xlist.map interps (fun interp -> @@ -28,46 +28,46 @@ let string_of_interps interps = (String.concat "." interp2)))))) let rec string_of_token = function - PreTypes.SmallLetter orth -> sprintf "SmallLetter(%s)" orth - | PreTypes.CapLetter(orth,lc) -> sprintf "CapLetter(%s,%s)" orth lc - | PreTypes.AllSmall orth -> sprintf "AllSmall(%s)" orth - | PreTypes.AllCap(orth,lc,lc2) -> sprintf "AllCap(%s,%s,%s)" orth lc lc2 - | PreTypes.FirstCap(orth,lc,cl,ll) -> sprintf "FirstCap(%s,%s,%s,%s)" orth lc cl ll - | PreTypes.SomeCap orth -> sprintf "SomeCap(%s)" orth - | PreTypes.RomanDig(v,t) -> sprintf "RomanDig(%s,%s)" v t - | PreTypes.Interp orth -> sprintf "Interp(%s)" orth - | PreTypes.Symbol orth -> sprintf "Symbol(%s)" orth - | PreTypes.Dig(v,t) -> sprintf "Dig(%s,%s)" v t - | PreTypes.Other2 orth -> sprintf "Other(%s)" orth - | PreTypes.Lemma(lemma,cat,interps) -> sprintf "Lemma(%s,%s,%s)" lemma cat (string_of_interps interps) - | PreTypes.Proper(lemma,cat,interps,senses) -> sprintf "Proper(%s,%s,%s,%s)" lemma cat (string_of_interps interps) (String.concat "|" senses) - | PreTypes.Compound(sense,l) -> sprintf "Compound(%s,[%s])" sense (String.concat ";" (Xlist.map l string_of_token)) - | PreTypes.Tokens(cat,l) -> sprintf "Tokens(%s,%s)" cat (String.concat ";" (Xlist.map l string_of_int)) + SmallLetter orth -> sprintf "SmallLetter(%s)" orth + | CapLetter(orth,lc) -> sprintf "CapLetter(%s,%s)" orth lc + | AllSmall orth -> sprintf "AllSmall(%s)" orth + | AllCap(orth,lc,lc2) -> sprintf "AllCap(%s,%s,%s)" orth lc lc2 + | FirstCap(orth,lc,cl,ll) -> sprintf "FirstCap(%s,%s,%s,%s)" orth lc cl ll + | SomeCap orth -> sprintf "SomeCap(%s)" orth + | RomanDig(v,t) -> sprintf "RomanDig(%s,%s)" v t + | Interp orth -> sprintf "Interp(%s)" orth + | Symbol orth -> sprintf "Symbol(%s)" orth + | Dig(v,t) -> sprintf "Dig(%s,%s)" v t + | Other orth -> sprintf "Other(%s)" orth + | Lemma(lemma,cat,interps) -> sprintf "Lemma(%s,%s,%s)" lemma cat (string_of_interps interps) + | Proper(lemma,cat,interps,senses) -> sprintf "Proper(%s,%s,%s,%s)" lemma cat (string_of_interps interps) (String.concat "|" senses) + | Compound(sense,l) -> sprintf "Compound(%s,[%s])" sense (String.concat ";" (Xlist.map l string_of_token)) + | Tokens(cat,l) -> sprintf "Tokens(%s,%s)" cat (String.concat ";" (Xlist.map l string_of_int)) let lemma_of_token = function - PreTypes.SmallLetter orth -> orth - | PreTypes.CapLetter(orth,lc) -> orth - | PreTypes.AllSmall orth -> orth - | PreTypes.AllCap(orth,lc,lc2) -> orth - | PreTypes.FirstCap(orth,lc,cl,ll) -> orth - | PreTypes.SomeCap orth -> orth - | PreTypes.RomanDig(v,t) -> v - | PreTypes.Interp orth -> orth - | PreTypes.Symbol orth -> orth - | PreTypes.Dig(v,t) -> v - | PreTypes.Other2 orth -> orth - | PreTypes.Lemma(lemma,cat,interps) -> lemma - | PreTypes.Proper(lemma,cat,interps,senses) -> lemma - | PreTypes.Compound(sense,l) -> "Compound" - | PreTypes.Tokens(cat,l) -> "Tokens" + SmallLetter orth -> orth + | CapLetter(orth,lc) -> orth + | AllSmall orth -> orth + | AllCap(orth,lc,lc2) -> orth + | FirstCap(orth,lc,cl,ll) -> orth + | SomeCap orth -> orth + | RomanDig(v,t) -> v + | Interp orth -> orth + | Symbol orth -> orth + | Dig(v,t) -> v + | Other orth -> orth + | Lemma(lemma,cat,interps) -> lemma + | Proper(lemma,cat,interps,senses) -> lemma + | Compound(sense,l) -> "Compound" + | Tokens(cat,l) -> "Tokens" let rec spaces i = if i = 0 then "" else " " ^ spaces (i-1) - +(* let rec string_of_tokens i = function - PreTypes.Token t -> sprintf "%s{orth=%s;beg=%d;len=%d;next=%d;token=%s;weight=%.2f;attrs=[%s];\n%s senses=[%s];\n%s valence=[%s];\n%s simple_valence=[%s];lroles=%s,%s}" (spaces i) t.PreTypes.orth t.PreTypes.beg t.PreTypes.len t.PreTypes.next (string_of_token t.PreTypes.token) - t.PreTypes.weight (String.concat ";" t.PreTypes.attrs) (spaces i) (String.concat ";" (Xlist.map t.PreTypes.senses (fun (sense,hipero,weight) -> sprintf "%s[%s]%.2f" sense (String.concat "," hipero) weight))) - (spaces i) (String.concat ";" (Xlist.map t.PreTypes.valence (WalStringOf.fnum_frame ""))) (spaces i) (String.concat ";" (Xlist.map t.PreTypes.simple_valence (WalStringOf.fnum_frame ""))) (fst t.lroles) (snd t.lroles) + Token t -> sprintf "%s{orth=%s;beg=%d;len=%d;next=%d;token=%s;weight=%.2f;attrs=[%s];\n%s senses=[%s];\n%s valence=[%s];\n%s simple_valence=[%s];lroles=%s,%s}" (spaces i) t.orth t.beg t.len t.next (string_of_token t.token) + t.weight (String.concat ";" t.PreTypes.attrs) (spaces i) (String.concat ";" (Xlist.map t.PreTypes.senses (fun (sense,hipero,weight) -> sprintf "%s[%s]%.2f" sense (String.concat "," hipero) weight))) + (spaces i) (String.concat ";" (Xlist.map t.PreTypes.valence (ENIAMwalStringOf.fnum_frame ""))) (spaces i) (String.concat ";" (Xlist.map t.PreTypes.simple_valence (ENIAMwalStringOf.fnum_frame ""))) (fst t.lroles) (snd t.lroles) | PreTypes.Variant l -> sprintf "%sVariant[\n%s]" (spaces i) (String.concat ";\n" (Xlist.map l (string_of_tokens (i+1)))) | PreTypes.Seq l -> sprintf "%sSeq[\n%s]" (spaces i) (String.concat ";\n" (Xlist.map l (string_of_tokens (i+1)))) @@ -75,7 +75,7 @@ let paths_to_string_indexed (paths,last,next_id) = String.concat "\n" (Xlist.map paths (fun (i,j,t) -> Printf.sprintf "%2d %2d %s" i j (string_of_tokens 0 (PreTypes.Token t)))) ^ Printf.sprintf "\nlast=%d next_id=%d" last next_id - +*) (*let string_of_token_record1 t = sprintf "{orth=%s;beg=%d;len=%d;next=%d;token=%s;id=%d;lnode=%d;rnode=%d;conll_id=%s;conll_super=%s;conll_label=%s;attrs=[%s]}" t.PreTypes.orth t.PreTypes.beg t.PreTypes.len t.PreTypes.next (string_of_token t.PreTypes.token) @@ -104,8 +104,8 @@ let string_of_status = function let rec xml_of_dependency_tree = function Node t -> Xml.Element("node",["pred",t.pred;"cat",t.cat;"weight",string_of_float t.weight;"id",string_of_int t.id],[ Xml.Element("gs",[],[xml_of_dependency_tree t.gs]); - Xml.Element("agf",[],[Xml.PCData (WalStringOf.gf t.agf)]); - Xml.Element("amorf",[],[Xml.PCData (WalStringOf.morf t.amorf)]); + Xml.Element("agf",[],[Xml.PCData (ENIAMwalStringOf.gf t.agf)]); + Xml.Element("amorf",[],[Xml.PCData (ENIAMwalStringOf.morf t.amorf)]); Xml.Element("attrs",[],Xlist.map t.attrs (fun (e,t) -> Xml.Element("attr",["label",e],[xml_of_dependency_tree t]))); Xml.Element("args",[],[xml_of_dependency_tree t.args])]) | Concept c -> Xml.Element("concept",["var",fst c.c_variable ^ snd c.c_variable;"pos",string_of_int c.c_pos],[ @@ -168,8 +168,8 @@ let string_of_node t = "PRED",Val t.pred;"CAT",Val t.cat;"ID",Val (string_of_int t.id);"WEIGHT",Val (string_of_float t.weight);"GS",t.gs; "AGF",Gf t.agf;"AMORF",Morf t.amorf;"AROLE",Val t.arole;"AROLE-ATTR",Val t.arole_attr; "MEANING",Val t.meaning;"HIPERO",Tuple(Xlist.map (StringSet.to_list t.hipero) (fun s -> Val s));"MEANING-WEIGHT",Val (string_of_float t.meaning_weight); - "ROLE",Val t.position.WalTypes.role;"ROLE-ATTR",Val t.position.WalTypes.role_attr;"SEL-PREFS",Tuple(Xlist.map t.position.WalTypes.sel_prefs (fun s -> Val s)); - "GF",Gf t.position.WalTypes.gf] @ t.attrs in + "ROLE",Val t.position.ENIAMwalTypes.role;"ROLE-ATTR",Val t.position.ENIAMwalTypes.role_attr;"SEL-PREFS",Tuple(Xlist.map t.position.ENIAMwalTypes.sel_prefs (fun s -> Val s)); + "GF",Gf t.position.ENIAMwalTypes.gf] @ t.attrs in "{ " ^ String.concat " | " (Xlist.map l (fun (e,t) -> "{ " ^ e ^ " | " ^ escape_string (LCGstringOf.linear_term 0 t) ^ " }")) ^ " }" let single_rel_id_count = ref 0 @@ -447,16 +447,16 @@ let print_graph2 path name query t = Xlist.iter (Str.split (Str.regexp path) path) (fun _ -> Sys.chdir "..")*) let rec get_lemma = function - PreTypes.Interp orth -> orth - | PreTypes.Lemma(lemma,cat,_) -> lemma ^ "\n" ^ cat + ENIAMtokenizerTypes.Interp orth -> orth + | ENIAMtokenizerTypes.Lemma(lemma,cat,_) -> lemma ^ "\n" ^ cat | _ -> "" let print_paths path name paths = File.file_out (path ^ name ^ ".gv") (fun file -> fprintf file "digraph G {\n"; Array.iter (fun t -> - let lemma = get_lemma t.PreTypes.token in - if lemma <> "" then fprintf file " %d -> %d [label=\"%s\\n%s\"]\n" t.PreTypes.beg t.PreTypes.next t.PreTypes.orth lemma) paths; + let lemma = get_lemma t.ENIAMtokenizerTypes.token in + if lemma <> "" then fprintf file " %d -> %d [label=\"%s\\n%s\"]\n" t.ENIAMtokenizerTypes.beg t.ENIAMtokenizerTypes.next t.ENIAMtokenizerTypes.orth lemma) paths; fprintf file "}\n"); Sys.chdir path; ignore (Sys.command ("dot -Tpng " ^ name ^ ".gv -o " ^ name ^ ".png")); @@ -474,7 +474,7 @@ let rec print_simplified_dependency_tree_rec2 file tokens edge upper = function let rec print_simplified_dependency_tree_rec file tokens edge upper id = function Node t -> - let orth = if t.id = 0 then "" else (ExtArray.get tokens t.id).PreTypes.orth in + let orth = if t.id = 0 then "" else (ExtArray.get tokens t.id).ENIAMtokenizerTypes.orth in fprintf file " %s [label=\"%s\\n%s\\n%s:%s\\n%f\"]\n" id (LCGstringOf.linear_term 0 t.gs) orth t.pred t.cat t.weight; print_edge file edge upper id; print_simplified_dependency_tree_rec2 file tokens "" id t.args @@ -495,7 +495,7 @@ let print_simplified_dependency_tree path name tokens dependency_tree = Int.iter 0 (Array.length dependency_tree - 1) (fun i -> print_simplified_dependency_tree_rec file tokens "" "" ("x" ^ string_of_int i) dependency_tree.(i)); (* match dependency_tree.(i) with Node t -> - let orth = if t.id = 0 then "" else tokens.(t.id).PreTypes.orth in + let orth = if t.id = 0 then "" else tokens.(t.id).ENIAMtokenizerTypes.orth in fprintf file " %d [label=\"%s\\n%s\\n%s:%s\"]\n" i (LCGstringOf.linear_term 0 t.gs) orth t.pred t.cat; let refs = get_refs [] t.args in Xlist.iter refs (fun r -> @@ -510,14 +510,14 @@ let print_simplified_dependency_tree path name tokens dependency_tree = File.file_out filename (fun file -> fprintf file "digraph G {\n"; let set = Xlist.fold paths IntSet.empty (fun set t -> - IntSet.add (IntSet.add set t.PreTypes.beg) t.PreTypes.next) in + IntSet.add (IntSet.add set t.ENIAMtokenizerTypes.beg) t.ENIAMtokenizerTypes.next) in IntSet.iter set (fun i -> fprintf file " %d [width=0; height=0; label=\"\"]\n" i); Xlist.iter paths (fun t -> - let lemma = get_lemma t.PreTypes.token in + let lemma = get_lemma t.ENIAMtokenizerTypes.token in if lemma <> "" then ( - let s = if t.PreTypes.orth = "" then lemma else t.PreTypes.orth ^ "\n" ^ lemma in - fprintf file " %d -> i%d -> %d [arrowhead=none]\n" t.PreTypes.beg t.PreTypes.id t.PreTypes.next; - fprintf file " i%d [label=\"%s\"]\n" t.PreTypes.id s)); + let s = if t.ENIAMtokenizerTypes.orth = "" then lemma else t.ENIAMtokenizerTypes.orth ^ "\n" ^ lemma in + fprintf file " %d -> i%d -> %d [arrowhead=none]\n" t.ENIAMtokenizerTypes.beg t.ENIAMtokenizerTypes.id t.ENIAMtokenizerTypes.next; + fprintf file " i%d [label=\"%s\"]\n" t.ENIAMtokenizerTypes.id s)); fprintf file "}\n"); Sys.chdir "results"; ignore (Sys.command "dot -Tpng tree.gv -o tree.png"); @@ -527,13 +527,13 @@ let print_simplified_dependency_tree path name tokens dependency_tree = File.file_out filename (fun file -> fprintf file "digraph G {\n"; fprintf file " subgraph {\n ordering=out\n"; - let same = Xlist.fold (Xlist.sort paths (fun s t -> compare s.PreTypes.beg t.PreTypes.beg)) [] (fun same t -> - let lemma = get_lemma t.PreTypes.token in + let same = Xlist.fold (Xlist.sort paths (fun s t -> compare s.ENIAMtokenizerTypes.beg t.ENIAMtokenizerTypes.beg)) [] (fun same t -> + let lemma = get_lemma t.ENIAMtokenizerTypes.token in if lemma <> "" then ( - let s = if t.PreTypes.orth = "" then lemma else t.PreTypes.orth ^ "\n" ^ lemma in - fprintf file " i%d -> out [arrowhead=none]\n" t.PreTypes.id; - fprintf file " i%d [label=\"%s\"]\n" t.PreTypes.id s; - t.PreTypes.id :: same) + let s = if t.ENIAMtokenizerTypes.orth = "" then lemma else t.ENIAMtokenizerTypes.orth ^ "\n" ^ lemma in + fprintf file " i%d -> out [arrowhead=none]\n" t.ENIAMtokenizerTypes.id; + fprintf file " i%d [label=\"%s\"]\n" t.ENIAMtokenizerTypes.id s; + t.ENIAMtokenizerTypes.id :: same) else same) in fprintf file " }\n"; fprintf file " { rank = same; %s }\n" (String.concat "; " (Xlist.map same (fun i -> sprintf "\"i%d\"" i))); @@ -555,19 +555,19 @@ let rec schema_latex schema = "\\begin{tabular}{l}" ^ String.concat "\\\\" (Xlist.map schema (fun s -> LatexMain.escape_string (String.concat "," ( - (if s.WalTypes.gf = WalTypes.ARG then [] else [WalStringOf.gf s.WalTypes.gf])@ - (if s.WalTypes.role = "" then [] else [s.WalTypes.role])@ - (if s.WalTypes.role_attr = "" then [] else [s.WalTypes.role_attr])@ - s.WalTypes.sel_prefs@(WalStringOf.controllers s.WalTypes.cr)@(WalStringOf.controllees s.WalTypes.ce)) ^ WalStringOf.direction s.WalTypes.dir ^ "{" ^ String.concat ";" (Xlist.map s.WalTypes.morfs WalStringOf.morf) ^ "}"))) ^ + (if s.ENIAMwalTypes.gf = ENIAMwalTypes.ARG then [] else [ENIAMwalStringOf.gf s.ENIAMwalTypes.gf])@ + (if s.ENIAMwalTypes.role = "" then [] else [s.ENIAMwalTypes.role])@ + (if s.ENIAMwalTypes.role_attr = "" then [] else [s.ENIAMwalTypes.role_attr])@ + s.ENIAMwalTypes.sel_prefs@(ENIAMwalStringOf.controllers s.ENIAMwalTypes.cr)@(ENIAMwalStringOf.controllees s.ENIAMwalTypes.ce)) ^ ENIAMwalStringOf.direction s.ENIAMwalTypes.dir ^ "{" ^ String.concat ";" (Xlist.map s.ENIAMwalTypes.morfs ENIAMwalStringOf.morf) ^ "}"))) ^ "\\end{tabular}" let fnum_frame_latex = function - fnum,WalTypes.Frame(atrs,s) -> - Printf.sprintf "%d: %s: %s" fnum (LatexMain.escape_string (WalStringOf.frame_atrs atrs)) (schema_latex s) - | fnum,WalTypes.LexFrame(id,p,r,s) -> - Printf.sprintf "%d: %s: %s: %s: %s" fnum id (LatexMain.escape_string (WalStringOf.pos p)) (WalStringOf.restr r) (schema_latex s) - | fnum,WalTypes.ComprepFrame(le,p,r,s) -> - Printf.sprintf "%d: %s: %s: %s: %s" fnum le (LatexMain.escape_string (WalStringOf.pos p)) (WalStringOf.restr r) (schema_latex s) + fnum,ENIAMwalTypes.Frame(atrs,s) -> + Printf.sprintf "%d: %s: %s" fnum (LatexMain.escape_string (ENIAMwalStringOf.frame_atrs atrs)) (schema_latex s) + | fnum,ENIAMwalTypes.LexFrame(id,p,r,s) -> + Printf.sprintf "%d: %s: %s: %s: %s" fnum id (LatexMain.escape_string (ENIAMwalStringOf.pos p)) (ENIAMwalStringOf.restr r) (schema_latex s) + | fnum,ENIAMwalTypes.ComprepFrame(le,p,r,s) -> + Printf.sprintf "%d: %s: %s: %s: %s" fnum le (LatexMain.escape_string (ENIAMwalStringOf.pos p)) (ENIAMwalStringOf.restr r) (schema_latex s) (*let print_paths_latex name paths = LatexMain.latex_file_out "results/" name "a0" false (fun file -> @@ -575,11 +575,11 @@ let fnum_frame_latex = function Int.iter 0 (Array.length paths - 1) (fun i -> let t = paths.(i) in fprintf file "%s & %d & %d & %d & %s & %d & %.4f & %s & %s %s &\\begin{tabular}{l|l|p{4cm}}%s\\end{tabular} &\\begin{tabular}{l}%s\\end{tabular} &\\begin{tabular}{l}%s\\end{tabular}\\\\\n\\hline\n" - t.PreTypes.orth t.PreTypes.beg t.PreTypes.len t.PreTypes.next (LatexMain.escape_string (string_of_token t.PreTypes.token)) t.PreTypes.id t.PreTypes.weight - (String.concat ";" t.PreTypes.attrs) (fst t.PreTypes.lroles) (snd t.PreTypes.lroles) - (String.concat "\\\\\n" (Xlist.map t.PreTypes.senses (fun (sense,hipero,weight) -> sprintf "%s & %.2f & %s" sense weight (String.concat "," hipero)))) - (String.concat "\\\\\n\\hline\n" (Xlist.map t.PreTypes.simple_valence (fun x -> fnum_frame_latex x))) - (String.concat "\\\\\n\\hline\n" (Xlist.map t.PreTypes.valence (fun x -> fnum_frame_latex x)))); + t.ENIAMtokenizerTypes.orth t.ENIAMtokenizerTypes.beg t.ENIAMtokenizerTypes.len t.ENIAMtokenizerTypes.next (LatexMain.escape_string (string_of_token t.ENIAMtokenizerTypes.token)) t.ENIAMtokenizerTypes.id t.ENIAMtokenizerTypes.weight + (String.concat ";" t.ENIAMtokenizerTypes.attrs) (fst t.ENIAMtokenizerTypes.lroles) (snd t.ENIAMtokenizerTypes.lroles) + (String.concat "\\\\\n" (Xlist.map t.ENIAMtokenizerTypes.senses (fun (sense,hipero,weight) -> sprintf "%s & %.2f & %s" sense weight (String.concat "," hipero)))) + (String.concat "\\\\\n\\hline\n" (Xlist.map t.ENIAMtokenizerTypes.simple_valence (fun x -> fnum_frame_latex x))) + (String.concat "\\\\\n\\hline\n" (Xlist.map t.ENIAMtokenizerTypes.valence (fun x -> fnum_frame_latex x)))); fprintf file "\\end{longtable}"); LatexMain.latex_compile_and_clean "results/" name*) @@ -715,7 +715,7 @@ let html_of_struct_sentence tokens paths last = String.concat "\n" (Xlist.map (List.sort compare paths) (fun (id,lnode,rnode) -> let t = ExtArray.get tokens id in sprintf "<tr><td>%s</td><td>%s</td><td>%d</td><td>%d</td><td>%d</td></tr>" - t.PreTypes.orth (escape_html (string_of_token t.PreTypes.token)) id lnode rnode)) ^ + t.ENIAMtokenizerTypes.orth (escape_html (string_of_token t.ENIAMtokenizerTypes.token)) id lnode rnode)) ^ sprintf "<tr><td></td><td></td><td></td><td>%d</td><td></td></tr>" last ^ "</table>" @@ -725,7 +725,7 @@ let html_of_dep_sentence tokens paths = let id,super,label = paths.(conll_id) in let t = ExtArray.get tokens id in (sprintf "<tr><td>%s</td><td>%s</td><td>%d</td><td>%d</td><td>%d</td><td>%s</td></tr>" - t.PreTypes.orth (escape_html (string_of_token t.PreTypes.token)) id conll_id super label) :: l))) ^ + t.ENIAMtokenizerTypes.orth (escape_html (string_of_token t.ENIAMtokenizerTypes.token)) id conll_id super label) :: l))) ^ "</table>" let html_of_tokens tokens = @@ -733,26 +733,28 @@ let html_of_tokens tokens = String.concat "\n" (List.rev (Int.fold 0 (ExtArray.size tokens - 1) [] (fun l id -> let t = ExtArray.get tokens id in (sprintf "<tr><td>%d</td><td>%s</td><td>%d</td><td>%d</td><td>%d</td><td>%s</td><td>%s</td></tr>" - id t.PreTypes.orth t.PreTypes.beg t.PreTypes.len t.PreTypes.next (escape_html (string_of_token t.PreTypes.token)) - (String.concat "; " t.PreTypes.attrs)) :: l))) ^ + id t.ENIAMtokenizerTypes.orth t.ENIAMtokenizerTypes.beg t.ENIAMtokenizerTypes.len t.ENIAMtokenizerTypes.next (escape_html (string_of_token t.ENIAMtokenizerTypes.token)) + (String.concat "; " t.ENIAMtokenizerTypes.attrs)) :: l))) ^ "</table>" -let html_of_tokens_simple_valence tokens = +let html_of_tokens_simple_valence tokens lex_sems = "<table><tr><td><b>id</b></td><td><b>orth</b></td><td><b>simple_valence</b></td></tr>" ^ String.concat "\n" (List.rev (Int.fold 0 (ExtArray.size tokens - 1) [] (fun l id -> let t = ExtArray.get tokens id in - Xlist.fold t.simple_valence l (fun l (fnum,frame) -> + let d = ExtArray.get lex_sems id in + Xlist.fold d.ENIAMlexSemanticsTypes.simple_valence l (fun l (fnum,frame) -> (sprintf "<tr><td>%d</td><td>%s</td><td>%s</td></tr>" - id t.PreTypes.orth (WalStringOf.fnum_frame (lemma_of_token t.token) (fnum,frame))) :: l)))) ^ + id t.ENIAMtokenizerTypes.orth (ENIAMwalStringOf.fnum_frame (lemma_of_token t.token) (fnum,frame))) :: l)))) ^ "</table>" -let html_of_tokens_valence tokens = +let html_of_tokens_valence tokens lex_sems = "<table><tr><td><b>id</b></td><td><b>orth</b></td><td><b>simple_valence</b></td></tr>" ^ String.concat "\n" (List.rev (Int.fold 0 (ExtArray.size tokens - 1) [] (fun l id -> let t = ExtArray.get tokens id in - Xlist.fold t.valence l (fun l (fnum,frame) -> + let d = ExtArray.get lex_sems id in + Xlist.fold d.ENIAMlexSemanticsTypes.valence l (fun l (fnum,frame) -> (sprintf "<tr><td>%d</td><td>%s</td><td>%s</td></tr>" - id t.PreTypes.orth (WalStringOf.fnum_frame (lemma_of_token t.token) (fnum,frame))) :: l)))) ^ + id t.ENIAMtokenizerTypes.orth (ENIAMwalStringOf.fnum_frame (lemma_of_token t.token) (fnum,frame))) :: l)))) ^ "</table>" let create_latex_chart path name chart =