LCGchart.ml 10.5 KB
(*
 *  ENIAM: Categorial Syntactic-Semantic Parser for Polish
 *  Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
 *  Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences
 *
 *  This program is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *)

open Xstd
open LCGtypes
open Printf

let make size = Array.make_matrix (size+1) (size+1) ([],0)

let last_node chart = Array.length chart - 1

let add chart i j v layer =
  chart.(i).(j) <- [v],layer;
  chart

let add_list chart i j l layer =
  chart.(i).(j) <- l,layer;
  chart

let add_inc chart i j v layer =
  let l,layer2 = chart.(i).(j) in
  chart.(i).(j) <- v :: l, max layer layer2;
  chart

let add_inc_list chart i j l layer =
  let l2,layer2 = chart.(i).(j) in
  chart.(i).(j) <- l @ l2, max layer layer2;
  chart

let find chart i j = fst chart.(i).(j)
let layer chart i j = snd chart.(i).(j)

let fold chart s f =
  Int.fold 0 (last_node chart) s (fun s i ->
    Int.fold 0 (last_node chart) s (fun s j ->
      let layer = layer chart i j in
      Xlist.fold (find chart i j) s (fun s (symbol,sem) ->
        f s (symbol,i,j,sem,layer))))

let rec find_paths_rec chart last i =
  if i = last then [[]] else
  Int.fold (i+1) last [] (fun paths j ->
    if chart.(i).(j) = [] then paths else
    let tails = find_paths_rec chart last j in
    if Xlist.size tails > 1000000 then failwith "find_paths_rec: to many paths" else
    Xlist.fold tails paths (fun paths tail -> (chart.(i).(j) :: tail) :: paths))

let find_paths chart =
  let last = last_node chart - 1 in
  find_paths_rec chart last 0

let get_no_entries chart =
  Int.fold 0 (last_node chart) 0 (fun n i ->
    Int.fold 0 (last_node chart) n (fun n j ->
      n + (Xlist.size (find chart i j))))

(* Pod referencją 0 będzie trzymany korzeń termu *)
let lazify chart =
  let new_chart = make (last_node chart) in
  let references = ExtArray.make (2 * last_node chart) Dot in
  let _ = ExtArray.add references Dot in (* to jest potrzebne by na pozycji 0 umieścić korzeń termu *)
  let new_chart = fold chart new_chart (fun new_chart (symbol,i,j,sem,layer) ->
    let n = ExtArray.add references sem in
    add_inc new_chart i j (symbol,Ref n) layer) in
  new_chart, references

let rec dep_lazify_rec references (DepNode(id,left,l,right)) =
  (* Printf.printf "dep_lazify_rec %s\n" id; *)
  let l = Xlist.rev_map l LCGrules.flatten_functor in
  let l = Xlist.fold l [] (fun l (symbol,sem) ->
    let n = ExtArray.add references sem in
    (symbol,Ref n) :: l) in
  let left = Xlist.map left (dep_lazify_rec references) in
  let right = Xlist.map right (dep_lazify_rec references) in
  DepNode(id,left,l,right)

let dep_lazify dep_chart =
  let references = ExtArray.make 100 Dot in
  let _ = ExtArray.add references Dot in (* to jest potrzebne by na pozycji 0 umieścić korzeń termu *)
  dep_lazify_rec references dep_chart, references

let merge_sems l = (* FIXME: dodać warianty *)
  let map = Xlist.fold l SymbolMap.empty (fun map (t,sem) -> SymbolMap.add_inc map t [sem] (fun l -> sem :: l)) in
  SymbolMap.fold map [] (fun l t sems -> (t,LCGrules.make_variant sems) :: l)

let make_unique chart i j =
  let l,layer = chart.(i).(j) in
  let l = merge_sems l in
  chart.(i).(j) <- l, layer;
  chart

let parse chart references timeout time_fun =
  (* print_endline "parse 1"; *)
  (* LCGrules.references := refs;
  LCGrules.next_reference := next_ref; *)
  let start_time = time_fun () in
  let last_node = last_node chart in
  let chart = Int.fold 2 last_node chart (fun chart len ->
    Int.fold 0 (last_node - len) chart (fun chart i ->
      let k = i + len in
      Int.fold 1 (len - 1) chart (fun chart d ->
        let time = time_fun () in
        if time -. start_time > timeout then raise (Timeout(time -. start_time)) else
        let j = i + d in
        let l,lay = Xlist.fold LCGrules.rules (find chart i k,layer chart i k) (fun (l,lay) rule ->
          (rule references (find chart i j) (find chart j k)) @ l,
(*          Xlist.fold (find chart i j) l (fun l a ->
             Xlist.fold (find chart j k) l (fun l b ->
               (rule (a,b)) @ l)),*)
          max lay ((max (layer chart i j) (layer chart j k)) + 1)) in
        (* print_int i; print_string " "; print_int j; print_string " "; print_int k; print_newline (); *)
(*         let l = LCGreductions.merge_symbols l in *)
(*          if Xlist.size l > 0 then Printf.printf "parse: %d-%d |l|=%d\n%!" i k (Xlist.size l); *)
        make_unique (add_list chart i k l lay) i k))) in
  (* print_endline "parse 2"; *)
  chart

let assign_not_parsed left right (t,sem) =
  let sem = if left = [] then sem else (print_endline "assign_not_parsed: ni 1"; sem) in
  let sem = if right = [] then sem else (print_endline "assign_not_parsed: ni 2"; sem) in
  t, sem

let rec dep_parse_rec references start_time timeout time_fun (DepNode(id,left,funct,right)) =
  (* printf "dep_parse_rec 1 id=%d\n%!" id; *)
  let time = time_fun () in
  if time -. start_time > timeout then raise (Timeout(time -. start_time)) else
  let left = Xlist.map left (dep_parse_rec references start_time timeout time_fun) in
  let right = Xlist.map right (dep_parse_rec references start_time timeout time_fun) in
  (* printf "dep_parse_rec 2 id=%d\n%!" id; *)
  let funct,left = Xlist.fold left (funct,[]) (fun (funct,left) arg ->
    (* printf "larg: %s\n" (LCGstringOf.symbol_sem_list arg);
    printf "funct: %s\n" (LCGstringOf.symbol_sem_list funct); *)
    match LCGrules.backward_application_conll references arg funct with
      [] -> (*printf "NOT PARSED\n";*) funct, arg :: left
    | funct -> merge_sems funct, left) in
  (* printf "dep_parse_rec 3 |right|=%d \n%!" (Xlist.size right); *)
  let funct,right = Xlist.fold right (funct,[]) (fun (funct,right) arg ->
    (* printf "funct: %s\n" (LCGstringOf.symbol_sem_list funct);
    printf "rarg: %s\n" (LCGstringOf.symbol_sem_list arg);  *)
    match LCGrules.forward_application_conll references funct arg with
      [] -> (*printf "NOT PARSED\n";*) funct, arg :: right
    | funct -> merge_sems funct, right) in
  (* printf "dep_parse_rec 4\n%!"; *)
  if left = [] && right = [] then funct else (
  let xleft = Xlist.rev_map left (fun arg -> Xlist.rev_map arg LCGrules.set_x_type) in
  let xright = Xlist.rev_map right (fun arg -> Xlist.rev_map arg LCGrules.set_x_type) in
  (* printf "dep_parse_rec 5\n%!"; *)
  let xfunct,xleft = Xlist.fold xleft (funct,[]) (fun (xfunct,left) arg ->
    (* printf "larg: %s\n" (LCGstringOf.symbol_sem_list arg);
    printf "funct: %s\n" (LCGstringOf.symbol_sem_list xfunct); *)
    match LCGrules.backward_application_conll references arg xfunct with
      [] -> (*printf "NOT PARSED\n";*) xfunct, arg :: left
    | xfunct -> merge_sems xfunct, left) in
  (* printf "dep_parse_rec 6\n%!"; *)
  let xfunct,xright = Xlist.fold xright (xfunct,[]) (fun (xfunct,right) arg ->
    (* printf "funct: %s\n" (LCGstringOf.symbol_sem_list xfunct);
    printf "rarg: %s\n" (LCGstringOf.symbol_sem_list arg); *)
    match LCGrules.forward_application_conll references xfunct arg with
      [] -> (*printf "NOT PARSED\n";*) xfunct, arg :: right
    | xfunct -> merge_sems xfunct, right) in
  (* printf "dep_parse_rec 7\n%!"; *)
  if xleft = [] && xright = [] then xfunct else
  raise (NotDepParsed(id,left,funct,right)))

let dep_parse dep_chart references timeout time_fun =
(*   print_endline "dep_parse"; *)
  let start_time = time_fun () in
  let parsed_dep_chart = dep_parse_rec references start_time timeout time_fun dep_chart in
  parsed_dep_chart

let is_parsed chart =
  let n = last_node chart in
  Xlist.fold (find chart 0 n) false (fun b -> function
      LCGtypes.Bracket(true,true,LCGtypes.Tensor[LCGtypes.Atom "<root>"]), _ -> true
    (* | LCGtypes.Bracket(true,true,LCGtypes.Tensor[LCGtypes.Atom "<ors-sentence>"]), _ -> true *)
    | _ -> false)
  (* try
    let _ = Xlist.assoc (find chart 0 n) (LCGtypes.Bracket(true,true,LCGtypes.Tensor[LCGtypes.Atom "<sentence>"])) in (* FIXME: !!! *)
    true
  with Not_found -> false *)

let is_dep_parsed = function
    [] -> false
  | [LCGtypes.Bracket(false,false,LCGtypes.Tensor[LCGtypes.Atom "<conll_root>"]),_] -> true
  | [LCGtypes.Bracket(false,false,LCGtypes.Imp(LCGtypes.Tensor[LCGtypes.Atom("<conll_root>")],LCGtypes.Forward,LCGtypes.Maybe _)),sem]-> true
  | [t,_] -> print_endline @@ LCGstringOf.grammar_symbol_prime t; failwith "is_dep_parsed"
  | l -> failwith ("is_dep_parsed " ^ (string_of_int @@ List.length l))

let get_parsed_term tokens chart =
  let n = last_node chart in
  let l = Xlist.fold (find chart 0 n) [] (fun l -> function
      LCGtypes.Bracket(true,true,LCGtypes.Tensor[LCGtypes.Atom "<root>"]), sem -> (LCGtypes.Cut(LCGtypes.Tuple[sem])) :: l
    | LCGtypes.Bracket(false,false,LCGtypes.Imp(LCGtypes.Tensor[LCGtypes.Atom("<conll_root>")],LCGtypes.Forward,LCGtypes.Maybe _)) as t,sem->
         let sem = List.hd (LCGrules.deduce_optarg sem t) in
         (LCGtypes.Cut(LCGtypes.Tuple[sem])) :: l
    (* | LCGtypes.Bracket(true,true,LCGtypes.Tensor[LCGtypes.Atom "<ors-sentence>"]), sem -> (LCGtypes.Cut (LCGtypes.Tuple[sem])) :: l *)
    | _ -> l) in
  let id = ExtArray.add tokens {PreTypes.empty_token with PreTypes.token=PreTypes.Lemma("<root>","interp",[])} in
  LCGtypes.Node{LCGrenderer.empty_node with
    LCGtypes.pred="<root>";
    LCGtypes.cat="interp";
    LCGtypes.id=id;
    LCGtypes.agf=WalTypes.NOSEM;
    LCGtypes.args=LCGrules.make_variant l}

let get_dep_parsed_term tokens = function
    [LCGtypes.Bracket(false,false,LCGtypes.Tensor[LCGtypes.Atom "<conll_root>"]),sem] ->
       let id = ExtArray.add tokens {PreTypes.empty_token with PreTypes.token=PreTypes.Lemma("<root>","interp",[])} in
       let l = [LCGtypes.Cut (LCGtypes.Tuple[sem])] in
       LCGtypes.Node{LCGrenderer.empty_node with
         LCGtypes.pred="<root>";
         LCGtypes.cat="interp";
         LCGtypes.id=id;
         LCGtypes.agf=WalTypes.NOSEM;
         LCGtypes.args=LCGrules.make_variant l}
  | _ -> failwith "get_dep_parsed_term"

(*FIXME:  Bębni na maszynie do pisania.
          Na myśl o czym brykasz?*)