(* * ENIAM_LCGparser, a parser for Logical Categorial Grammar formalism * Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> * Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences * * This library is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. *) open Xstd open ENIAM_LCGtypes open Printf (* open ENIAMtokenizerTypes open ENIAMlexSemanticsTypes *) let make size = Array.make_matrix (size+1) (size+1) ([],0) let last_node chart = Array.length chart - 1 let add chart i j v layer = chart.(i).(j) <- [v],layer; chart let add_list chart i j l layer = chart.(i).(j) <- l,layer; chart let add_inc chart i j v layer = let l,layer2 = chart.(i).(j) in chart.(i).(j) <- v :: l, max layer layer2; chart let add_inc_list chart i j l layer = let l2,layer2 = chart.(i).(j) in chart.(i).(j) <- l @ l2, max layer layer2; chart let find chart i j = fst chart.(i).(j) let layer chart i j = snd chart.(i).(j) let fold chart s f = Int.fold 0 (last_node chart) s (fun s i -> Int.fold 0 (last_node chart) s (fun s j -> let layer = layer chart i j in Xlist.fold (find chart i j) s (fun s (symbol,sem) -> f s (symbol,i,j,sem,layer)))) let rec find_paths_rec chart last i = if i = last then [[]] else Int.fold (i+1) last [] (fun paths j -> if chart.(i).(j) = [] then paths else let tails = find_paths_rec chart last j in if Xlist.size tails > 1000000 then failwith "find_paths_rec: to many paths" else Xlist.fold tails paths (fun paths tail -> (chart.(i).(j) :: tail) :: paths)) let find_paths chart = let last = last_node chart - 1 in find_paths_rec chart last 0 let get_no_entries chart = Int.fold 0 (last_node chart) 0 (fun n i -> Int.fold 0 (last_node chart) n (fun n j -> n + (Xlist.size (find chart i j)))) (* Pod referencją 0 będzie trzymany korzeń termu *) let lazify chart = let new_chart = make (last_node chart) in let references = ExtArray.make (2 * last_node chart) Dot in let _ = ExtArray.add references Dot in (* to jest potrzebne by na pozycji 0 umieścić korzeń termu *) let new_chart = fold chart new_chart (fun new_chart (symbol,i,j,sem,layer) -> let n = ExtArray.add references sem in add_inc new_chart i j (symbol,Ref n) layer) in new_chart, references let rec dep_lazify_rec references (DepNode(id,left,l,right)) = (* Printf.printf "dep_lazify_rec %s\n" id; *) let l = Xlist.rev_map l ENIAM_LCGrules.flatten_functor in let l = Xlist.fold l [] (fun l (symbol,sem) -> let n = ExtArray.add references sem in (symbol,Ref n) :: l) in let left = Xlist.map left (dep_lazify_rec references) in let right = Xlist.map right (dep_lazify_rec references) in DepNode(id,left,l,right) let dep_lazify dep_chart = let references = ExtArray.make 100 Dot in let _ = ExtArray.add references Dot in (* to jest potrzebne by na pozycji 0 umieścić korzeń termu *) dep_lazify_rec references dep_chart, references let merge_sems l = (* FIXME: dodać warianty *) let map = Xlist.fold l SymbolMap.empty (fun map (t,sem) -> SymbolMap.add_inc map t [sem] (fun l -> sem :: l)) in SymbolMap.fold map [] (fun l t sems -> (t,ENIAM_LCGrules.make_variant sems) :: l) let make_unique chart i j = let l,layer = chart.(i).(j) in let l = merge_sems l in chart.(i).(j) <- l, layer; chart let parse chart references timeout time_fun = (* print_endline "parse 1"; *) (* ENIAM_LCGrules.references := refs; ENIAM_LCGrules.next_reference := next_ref; *) let start_time = time_fun () in let last_node = last_node chart in let chart = Int.fold 2 last_node chart (fun chart len -> Int.fold 0 (last_node - len) chart (fun chart i -> let k = i + len in Int.fold 1 (len - 1) chart (fun chart d -> let time = time_fun () in if time -. start_time > timeout then raise (Timeout(time -. start_time)) else let j = i + d in let l,lay = Xlist.fold ENIAM_LCGrules.rules (find chart i k,layer chart i k) (fun (l,lay) rule -> (rule references (find chart i j) (find chart j k)) @ l, (* Xlist.fold (find chart i j) l (fun l a -> Xlist.fold (find chart j k) l (fun l b -> (rule (a,b)) @ l)),*) max lay ((max (layer chart i j) (layer chart j k)) + 1)) in (* print_int i; print_string " "; print_int j; print_string " "; print_int k; print_newline (); *) (* let l = LCGreductions.merge_symbols l in *) (* if Xlist.size l > 0 then Printf.printf "parse: %d-%d |l|=%d\n%!" i k (Xlist.size l); *) make_unique (add_list chart i k l lay) i k))) in (* print_endline "parse 2"; *) chart let assign_not_parsed left right (t,sem) = let sem = if left = [] then sem else (print_endline "assign_not_parsed: ni 1"; sem) in let sem = if right = [] then sem else (print_endline "assign_not_parsed: ni 2"; sem) in t, sem let rec dep_parse_rec references start_time timeout time_fun (DepNode(id,left,funct,right)) = (* printf "dep_parse_rec 1 id=%d\n%!" id; *) let time = time_fun () in if time -. start_time > timeout then raise (Timeout(time -. start_time)) else let left = Xlist.map left (dep_parse_rec references start_time timeout time_fun) in let right = Xlist.map right (dep_parse_rec references start_time timeout time_fun) in (* printf "dep_parse_rec 2 id=%d\n%!" id; *) let funct,left = Xlist.fold left (funct,[]) (fun (funct,left) arg -> (* printf "larg: %s\n" (ENIAM_LCGstringOf.symbol_sem_list arg); printf "funct: %s\n" (ENIAM_LCGstringOf.symbol_sem_list funct); *) match ENIAM_LCGrules.backward_application_conll references arg funct with [] -> (*printf "NOT PARSED\n";*) funct, arg :: left | funct -> merge_sems funct, left) in (* printf "dep_parse_rec 3 |right|=%d \n%!" (Xlist.size right); *) let funct,right = Xlist.fold right (funct,[]) (fun (funct,right) arg -> (* printf "funct: %s\n" (ENIAM_LCGstringOf.symbol_sem_list funct); printf "rarg: %s\n" (ENIAM_LCGstringOf.symbol_sem_list arg); *) match ENIAM_LCGrules.forward_application_conll references funct arg with [] -> (*printf "NOT PARSED\n";*) funct, arg :: right | funct -> merge_sems funct, right) in (* printf "dep_parse_rec 4\n%!"; *) if left = [] && right = [] then funct else ( let xleft = Xlist.rev_map left (fun arg -> Xlist.rev_map arg ENIAM_LCGrules.set_x_type) in let xright = Xlist.rev_map right (fun arg -> Xlist.rev_map arg ENIAM_LCGrules.set_x_type) in (* printf "dep_parse_rec 5\n%!"; *) let xfunct,xleft = Xlist.fold xleft (funct,[]) (fun (xfunct,left) arg -> (* printf "larg: %s\n" (ENIAM_LCGstringOf.symbol_sem_list arg); printf "funct: %s\n" (ENIAM_LCGstringOf.symbol_sem_list xfunct); *) match ENIAM_LCGrules.backward_application_conll references arg xfunct with [] -> (*printf "NOT PARSED\n";*) xfunct, arg :: left | xfunct -> merge_sems xfunct, left) in (* printf "dep_parse_rec 6\n%!"; *) let xfunct,xright = Xlist.fold xright (xfunct,[]) (fun (xfunct,right) arg -> (* printf "funct: %s\n" (ENIAM_LCGstringOf.symbol_sem_list xfunct); printf "rarg: %s\n" (ENIAM_LCGstringOf.symbol_sem_list arg); *) match ENIAM_LCGrules.forward_application_conll references xfunct arg with [] -> (*printf "NOT PARSED\n";*) xfunct, arg :: right | xfunct -> merge_sems xfunct, right) in (* printf "dep_parse_rec 7\n%!"; *) if xleft = [] && xright = [] then xfunct else raise (NotDepParsed(id,left,funct,right))) let dep_parse dep_chart references timeout time_fun = (* print_endline "dep_parse"; *) let start_time = time_fun () in let parsed_dep_chart = dep_parse_rec references start_time timeout time_fun dep_chart in parsed_dep_chart let is_parsed chart = let n = last_node chart in Xlist.fold (find chart 0 n) false (fun b -> function ENIAM_LCGtypes.Bracket(true,true,ENIAM_LCGtypes.Tensor[ENIAM_LCGtypes.Atom "<root>"]), _ -> true (* | ENIAM_LCGtypes.Bracket(true,true,ENIAM_LCGtypes.Tensor[ENIAM_LCGtypes.Atom "<ors-sentence>"]), _ -> true *) | _ -> false) (* try let _ = Xlist.assoc (find chart 0 n) (ENIAM_LCGtypes.Bracket(true,true,ENIAM_LCGtypes.Tensor[ENIAM_LCGtypes.Atom "<sentence>"])) in (* FIXME: !!! *) true with Not_found -> false *) let is_dep_parsed = function [] -> false | [ENIAM_LCGtypes.Bracket(false,false,ENIAM_LCGtypes.Tensor[ENIAM_LCGtypes.Atom "<conll_root>"]),_] -> true | [ENIAM_LCGtypes.Bracket(false,false,ENIAM_LCGtypes.Imp(ENIAM_LCGtypes.Tensor[ENIAM_LCGtypes.Atom("<conll_root>")],ENIAM_LCGtypes.Forward,ENIAM_LCGtypes.Maybe _)),sem]-> true | [t,_] -> print_endline @@ ENIAM_LCGstringOf.grammar_symbol_prime t; failwith "is_dep_parsed" | l -> failwith ("is_dep_parsed " ^ (string_of_int @@ List.length l)) let get_parsed_term chart = let n = last_node chart in let l = Xlist.fold (find chart 0 n) [] (fun l -> function ENIAM_LCGtypes.Bracket(true,true,ENIAM_LCGtypes.Tensor[ENIAM_LCGtypes.Atom "<root>"]), sem -> (ENIAM_LCGtypes.Cut(ENIAM_LCGtypes.Tuple[sem])) :: l | ENIAM_LCGtypes.Bracket(false,false,ENIAM_LCGtypes.Imp(ENIAM_LCGtypes.Tensor[ENIAM_LCGtypes.Atom("<conll_root>")],ENIAM_LCGtypes.Forward,ENIAM_LCGtypes.Maybe _)) as t,sem-> let sem = List.hd (ENIAM_LCGrules.deduce_optarg sem t) in (ENIAM_LCGtypes.Cut(ENIAM_LCGtypes.Tuple[sem])) :: l (* | ENIAM_LCGtypes.Bracket(true,true,ENIAM_LCGtypes.Tensor[ENIAM_LCGtypes.Atom "<ors-sentence>"]), sem -> (ENIAM_LCGtypes.Cut (ENIAM_LCGtypes.Tuple[sem])) :: l *) | _ -> l) in Node{ENIAM_LCGrenderer.empty_node with lemma="<root>"; args=ENIAM_LCGrules.make_variant l} (* FIXME: poprawić poniższe *) (* let get_parsed_term tokens lex_sems chart = let n = last_node chart in let l = Xlist.fold (find chart 0 n) [] (fun l -> function ENIAM_LCGtypes.Bracket(true,true,ENIAM_LCGtypes.Tensor[ENIAM_LCGtypes.Atom "<root>"]), sem -> (ENIAM_LCGtypes.Cut(ENIAM_LCGtypes.Tuple[sem])) :: l | ENIAM_LCGtypes.Bracket(false,false,ENIAM_LCGtypes.Imp(ENIAM_LCGtypes.Tensor[ENIAM_LCGtypes.Atom("<conll_root>")],ENIAM_LCGtypes.Forward,ENIAM_LCGtypes.Maybe _)) as t,sem-> let sem = List.hd (ENIAM_LCGrules.deduce_optarg sem t) in (ENIAM_LCGtypes.Cut(ENIAM_LCGtypes.Tuple[sem])) :: l (* | ENIAM_LCGtypes.Bracket(true,true,ENIAM_LCGtypes.Tensor[ENIAM_LCGtypes.Atom "<ors-sentence>"]), sem -> (ENIAM_LCGtypes.Cut (ENIAM_LCGtypes.Tuple[sem])) :: l *) | _ -> l) in let id = ExtArray.add tokens {empty_token with token=Lemma("<root>","interp",[])} in let _ = ExtArray.add lex_sems empty_lex_sem in ENIAM_LCGtypes.Node{LCGrenderer.empty_node with ENIAM_LCGtypes.pred="<root>"; ENIAM_LCGtypes.cat="interp"; ENIAM_LCGtypes.id=id; ENIAM_LCGtypes.agf=ENIAMwalTypes.NOSEM; ENIAM_LCGtypes.args=ENIAM_LCGrules.make_variant l} let get_dep_parsed_term tokens lex_sems = function [ENIAM_LCGtypes.Bracket(false,false,ENIAM_LCGtypes.Tensor[ENIAM_LCGtypes.Atom "<conll_root>"]),sem] -> let id = ExtArray.add tokens {empty_token with token=Lemma("<root>","interp",[])} in let _ = ExtArray.add lex_sems empty_lex_sem in let l = [ENIAM_LCGtypes.Cut (ENIAM_LCGtypes.Tuple[sem])] in ENIAM_LCGtypes.Node{LCGrenderer.empty_node with ENIAM_LCGtypes.pred="<root>"; ENIAM_LCGtypes.cat="interp"; ENIAM_LCGtypes.id=id; ENIAM_LCGtypes.agf=ENIAMwalTypes.NOSEM; ENIAM_LCGtypes.args=ENIAM_LCGrules.make_variant l} | _ -> failwith "get_dep_parsed_term" *) (*FIXME: Bębni na maszynie do pisania. Na myśl o czym brykasz?*)