ENIAM_LCGchart.ml 11.7 KB
(*
 *  ENIAM_LCGparser, a parser for Logical Categorial Grammar formalism
 *  Copyright (C) 2016-2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
 *  Copyright (C) 2016-2017 Institute of Computer Science Polish Academy of Sciences
 *
 *  This library is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU Lesser General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public License
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *)

open Xstd
open ENIAM_LCGtypes
open Printf
(* open ENIAMtokenizerTypes
open ENIAMlexSemanticsTypes *)

let make size = Array.make_matrix (size+1) (size+1) ([],0)

let last_node chart = Array.length chart - 1

let add chart i j v layer =
  chart.(i).(j) <- [v],layer;
  chart

let add_list chart i j l layer =
  chart.(i).(j) <- l,layer;
  chart

let add_inc chart i j v layer =
  let l,layer2 = chart.(i).(j) in
  chart.(i).(j) <- v :: l, max layer layer2;
  chart

let add_inc_list chart i j l layer =
  let l2,layer2 = chart.(i).(j) in
  chart.(i).(j) <- l @ l2, max layer layer2;
  chart

let find chart i j = fst chart.(i).(j)
let layer chart i j = snd chart.(i).(j)

let fold chart s f =
  Int.fold 0 (last_node chart) s (fun s i ->
      Int.fold 0 (last_node chart) s (fun s j ->
          let layer = layer chart i j in
          Xlist.fold (find chart i j) s (fun s (symbol,sem) ->
              f s (symbol,i,j,sem,layer))))

let rec find_paths_rec chart last i =
  if i = last then [[]] else
    Int.fold (i+1) last [] (fun paths j ->
        if chart.(i).(j) = [] then paths else
          let tails = find_paths_rec chart last j in
          if Xlist.size tails > 1000000 then failwith "find_paths_rec: to many paths" else
            Xlist.fold tails paths (fun paths tail -> (chart.(i).(j) :: tail) :: paths))

let find_paths chart =
  let last = last_node chart - 1 in
  find_paths_rec chart last 0

let get_no_entries chart =
  Int.fold 0 (last_node chart) 0 (fun n i ->
      Int.fold 0 (last_node chart) n (fun n j ->
          n + (Xlist.size (find chart i j))))

(* Pod referencją 0 będzie trzymany korzeń termu *)
let lazify chart =
  let new_chart = make (last_node chart) in
  let references = ExtArray.make (2 * last_node chart) Dot in
  let _ = ExtArray.add references Dot in (* to jest potrzebne by na pozycji 0 umieścić korzeń termu *)
  let new_chart = fold chart new_chart (fun new_chart (symbol,i,j,sem,layer) ->
      let n = ExtArray.add references sem in
      add_inc new_chart i j (symbol,Ref n) layer) in
  new_chart, references

let rec dep_lazify_rec references (DepNode(id,left,l,right)) =
  (* Printf.printf "dep_lazify_rec %s\n" id; *)
  let l = Xlist.rev_map l ENIAM_LCGrules.flatten_functor in
  let l = Xlist.fold l [] (fun l (symbol,sem) ->
      let n = ExtArray.add references sem in
      (symbol,Ref n) :: l) in
  let left = Xlist.map left (dep_lazify_rec references) in
  let right = Xlist.map right (dep_lazify_rec references) in
  DepNode(id,left,l,right)

let dep_lazify dep_chart =
  let references = ExtArray.make 100 Dot in
  let _ = ExtArray.add references Dot in (* to jest potrzebne by na pozycji 0 umieścić korzeń termu *)
  dep_lazify_rec references dep_chart, references

let merge_sems l = (* FIXME: dodać warianty *)
  let map = Xlist.fold l SymbolMap.empty (fun map (t,sem) -> SymbolMap.add_inc map t [sem] (fun l -> sem :: l)) in
  SymbolMap.fold map [] (fun l t sems -> (t,ENIAM_LCGrules.make_variant sems) :: l)

let make_unique chart i j =
  let l,layer = chart.(i).(j) in
  let l = merge_sems l in
  chart.(i).(j) <- l, layer;
  chart

let parse chart references timeout time_fun =
  (* print_endline "parse 1"; *)
  (* ENIAM_LCGrules.references := refs;
     ENIAM_LCGrules.next_reference := next_ref; *)
  let start_time = time_fun () in
  let last_node = last_node chart in
  let chart = Int.fold 2 last_node chart (fun chart len ->
      Int.fold 0 (last_node - len) chart (fun chart i ->
          let k = i + len in
          Int.fold 1 (len - 1) chart (fun chart d ->
              let time = time_fun () in
              if time -. start_time > timeout then raise (Timeout(time -. start_time)) else
                let j = i + d in
                let l,lay = Xlist.fold ENIAM_LCGrules.rules (find chart i k,layer chart i k) (fun (l,lay) rule ->
                    (rule references (find chart i j) (find chart j k)) @ l,
                    (*          Xlist.fold (find chart i j) l (fun l a ->
                                 Xlist.fold (find chart j k) l (fun l b ->
                                   (rule (a,b)) @ l)),*)
                    max lay ((max (layer chart i j) (layer chart j k)) + 1)) in
                (* print_int i; print_string " "; print_int j; print_string " "; print_int k; print_newline (); *)
                (*         let l = LCGreductions.merge_symbols l in *)
                (*          if Xlist.size l > 0 then Printf.printf "parse: %d-%d |l|=%d\n%!" i k (Xlist.size l); *)
                make_unique (add_list chart i k l lay) i k))) in
  (* print_endline "parse 2"; *)
  chart

let assign_not_parsed left right (t,sem) =
  let sem = if left = [] then sem else (print_endline "assign_not_parsed: ni 1"; sem) in
  let sem = if right = [] then sem else (print_endline "assign_not_parsed: ni 2"; sem) in
  t, sem

let rec dep_parse_rec references start_time timeout time_fun (DepNode(id,left,funct,right)) =
  (* printf "dep_parse_rec 1 id=%d\n%!" id; *)
  let time = time_fun () in
  if time -. start_time > timeout then raise (Timeout(time -. start_time)) else
    let left = Xlist.map left (dep_parse_rec references start_time timeout time_fun) in
    let right = Xlist.map right (dep_parse_rec references start_time timeout time_fun) in
    (* printf "dep_parse_rec 2 id=%d\n%!" id; *)
    let funct,left = Xlist.fold left (funct,[]) (fun (funct,left) arg ->
        (* printf "larg: %s\n" (ENIAM_LCGstringOf.symbol_sem_list arg);
           printf "funct: %s\n" (ENIAM_LCGstringOf.symbol_sem_list funct); *)
        match ENIAM_LCGrules.backward_application_conll references arg funct with
          [] -> (*printf "NOT PARSED\n";*) funct, arg :: left
        | funct -> merge_sems funct, left) in
    (* printf "dep_parse_rec 3 |right|=%d \n%!" (Xlist.size right); *)
    let funct,right = Xlist.fold right (funct,[]) (fun (funct,right) arg ->
        (* printf "funct: %s\n" (ENIAM_LCGstringOf.symbol_sem_list funct);
           printf "rarg: %s\n" (ENIAM_LCGstringOf.symbol_sem_list arg);  *)
        match ENIAM_LCGrules.forward_application_conll references funct arg with
          [] -> (*printf "NOT PARSED\n";*) funct, arg :: right
        | funct -> merge_sems funct, right) in
    (* printf "dep_parse_rec 4\n%!"; *)
    if left = [] && right = [] then funct else (
      let xleft = Xlist.rev_map left (fun arg -> Xlist.rev_map arg ENIAM_LCGrules.set_x_type) in
      let xright = Xlist.rev_map right (fun arg -> Xlist.rev_map arg ENIAM_LCGrules.set_x_type) in
      (* printf "dep_parse_rec 5\n%!"; *)
      let xfunct,xleft = Xlist.fold xleft (funct,[]) (fun (xfunct,left) arg ->
          (* printf "larg: %s\n" (ENIAM_LCGstringOf.symbol_sem_list arg);
             printf "funct: %s\n" (ENIAM_LCGstringOf.symbol_sem_list xfunct); *)
          match ENIAM_LCGrules.backward_application_conll references arg xfunct with
            [] -> (*printf "NOT PARSED\n";*) xfunct, arg :: left
          | xfunct -> merge_sems xfunct, left) in
      (* printf "dep_parse_rec 6\n%!"; *)
      let xfunct,xright = Xlist.fold xright (xfunct,[]) (fun (xfunct,right) arg ->
          (* printf "funct: %s\n" (ENIAM_LCGstringOf.symbol_sem_list xfunct);
             printf "rarg: %s\n" (ENIAM_LCGstringOf.symbol_sem_list arg); *)
          match ENIAM_LCGrules.forward_application_conll references xfunct arg with
            [] -> (*printf "NOT PARSED\n";*) xfunct, arg :: right
          | xfunct -> merge_sems xfunct, right) in
      (* printf "dep_parse_rec 7\n%!"; *)
      if xleft = [] && xright = [] then xfunct else
        raise (NotDepParsed(id,left,funct,right)))

let dep_parse dep_chart references timeout time_fun =
  (*   print_endline "dep_parse"; *)
  let start_time = time_fun () in
  let parsed_dep_chart = dep_parse_rec references start_time timeout time_fun dep_chart in
  parsed_dep_chart

let is_parsed chart =
  let n = last_node chart in
  Xlist.fold (find chart 0 n) false (fun b -> function
        Bracket(true,true,Tensor[Atom "<root>"]), _ -> true
      (* | Bracket(true,true,Tensor[Atom "<ors-sentence>"]), _ -> true *)
      | _ -> false)
(* try
   let _ = Xlist.assoc (find chart 0 n) (Bracket(true,true,Tensor[Atom "<sentence>"])) in (* FIXME: !!! *)
   true
   with Not_found -> false *)

let is_dep_parsed = function
    [] -> false
  | [Bracket(false,false,Tensor[Atom "<conll_root>"]),_] -> true
  | [Bracket(false,false,Imp(Tensor[Atom("<conll_root>")],Forward,Maybe _)),sem]-> true
  | [t,_] -> print_endline @@ ENIAM_LCGstringOf.grammar_symbol_prime t; failwith "is_dep_parsed"
  | l -> failwith ("is_dep_parsed " ^ (string_of_int @@ List.length l))

let get_parsed_term chart =
  let n = last_node chart in
  let l = Xlist.fold (find chart 0 n) [] (fun l -> function
        Bracket(true,true,Tensor[Atom "<root>"]), sem -> (Cut(Tuple[sem])) :: l
      | Bracket(false,false,Imp(Tensor[Atom("<conll_root>")],Forward,Maybe _)) as t,sem->
        let sem = List.hd (ENIAM_LCGrules.deduce_optarg sem t) in
        (Cut(Tuple[sem])) :: l
      (* | Bracket(true,true,Tensor[Atom "<ors-sentence>"]), sem -> (Cut (Tuple[sem])) :: l *)
      | _ -> l) in
  Node{ENIAM_LCGrenderer.empty_node with lemma="<root>"; args=ENIAM_LCGrules.make_variant l}

let get_dep_parsed_term = function
    [Bracket(false,false,Tensor[Atom "<conll_root>"]),sem] ->
    let l = [Cut (Tuple[sem])] in
    Node{ENIAM_LCGrenderer.empty_node with lemma="<root>"; args=ENIAM_LCGrules.make_variant l}
  | _ -> failwith "get_dep_parsed_term"

(* FIXME: poprawić poniższe *)
(*
let get_parsed_term tokens lex_sems chart =
  let n = last_node chart in
  let l = Xlist.fold (find chart 0 n) [] (fun l -> function
        Bracket(true,true,Tensor[Atom "<root>"]), sem -> (Cut(Tuple[sem])) :: l
      | Bracket(false,false,Imp(Tensor[Atom("<conll_root>")],Forward,Maybe _)) as t,sem->
        let sem = List.hd (ENIAM_LCGrules.deduce_optarg sem t) in
        (Cut(Tuple[sem])) :: l
      (* | Bracket(true,true,Tensor[Atom "<ors-sentence>"]), sem -> (Cut (Tuple[sem])) :: l *)
      | _ -> l) in
  let id = ExtArray.add tokens {empty_token with token=Lemma("<root>","interp",[])} in
  let _ = ExtArray.add lex_sems empty_lex_sem in
  Node{LCGrenderer.empty_node with
                pred="<root>";
                cat="interp";
                id=id;
                agf=ENIAMwalTypes.NOSEM;
                args=ENIAM_LCGrules.make_variant l}

let get_dep_parsed_term tokens lex_sems = function
    [Bracket(false,false,Tensor[Atom "<conll_root>"]),sem] ->
    let id = ExtArray.add tokens {empty_token with token=Lemma("<root>","interp",[])} in
    let _ = ExtArray.add lex_sems empty_lex_sem in
    let l = [Cut (Tuple[sem])] in
    Node{LCGrenderer.empty_node with
                  pred="<root>";
                  cat="interp";
                  id=id;
                  agf=ENIAMwalTypes.NOSEM;
                  args=ENIAM_LCGrules.make_variant l}
  | _ -> failwith "get_dep_parsed_term"
*)
(*FIXME:  Bębni na maszynie do pisania.
          Na myśl o czym brykasz?*)