LCGchart.ml 8.47 KB
(*
 *  ENIAM: Categorial Syntactic-Semantic Parser for Polish
 *  Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
 *  Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences
 *
 *  This program is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *)

open Xstd
open LCGtypes
open Printf

let make size = Array.make_matrix (size+1) (size+1) ([],0)

let last_node chart = Array.length chart - 1

let add chart i j v layer =
  chart.(i).(j) <- [v],layer;
  chart

let add_list chart i j l layer =
  chart.(i).(j) <- l,layer;
  chart

let add_inc chart i j v layer =
  let l,layer2 = chart.(i).(j) in
  chart.(i).(j) <- v :: l, max layer layer2;
  chart

let add_inc_list chart i j l layer =
  let l2,layer2 = chart.(i).(j) in
  chart.(i).(j) <- l @ l2, max layer layer2;
  chart

let find chart i j = fst chart.(i).(j)
let layer chart i j = snd chart.(i).(j)

let fold chart s f =
  Int.fold 0 (last_node chart) s (fun s i ->
    Int.fold 0 (last_node chart) s (fun s j ->
      let layer = layer chart i j in
      Xlist.fold (find chart i j) s (fun s (symbol,sem) ->
        f s (symbol,i,j,sem,layer))))

let rec find_paths_rec chart last i =
  if i = last then [[]] else
  Int.fold (i+1) last [] (fun paths j ->
    if chart.(i).(j) = [] then paths else
    let tails = find_paths_rec chart last j in
    if Xlist.size tails > 1000000 then failwith "find_paths_rec: to many paths" else
    Xlist.fold tails paths (fun paths tail -> (chart.(i).(j) :: tail) :: paths))

let find_paths chart =
  let last = last_node chart - 1 in
  find_paths_rec chart last 0

let get_no_entries chart =
  Int.fold 0 (last_node chart) 0 (fun n i ->
    Int.fold 0 (last_node chart) n (fun n j ->
      n + (Xlist.size (find chart i j))))

(* Pod referencją 0 będzie trzymany korzeń termu *)
let lazify chart =
  let new_chart = make (last_node chart) in
  let references = ExtArray.make (2 * last_node chart) Dot in
  let _ = ExtArray.add references Dot in (* to jest potrzebne by na pozycji 0 umieścić korzeń termu *)
  let new_chart = fold chart new_chart (fun new_chart (symbol,i,j,sem,layer) ->
    let n = ExtArray.add references sem in
    add_inc new_chart i j (symbol,Ref n) layer) in
  new_chart, references

let rec dep_lazify_rec references (DepNode(id,left,l,right)) =
  (* Printf.printf "dep_lazify_rec %s\n" id; *)
  let l = Xlist.rev_map l LCGrules.flatten_functor in
  let l = Xlist.fold l [] (fun l (symbol,sem) ->
    let n = ExtArray.add references sem in
    (symbol,Ref n) :: l) in
  let left = Xlist.map left (dep_lazify_rec references) in
  let right = Xlist.map right (dep_lazify_rec references) in
  DepNode(id,left,l,right)

let dep_lazify dep_graph =
  let references = ExtArray.make 100 Dot in
  let _ = ExtArray.add references Dot in (* to jest potrzebne by na pozycji 0 umieścić korzeń termu *)
  dep_lazify_rec references dep_graph, references

let merge_sems l = (* FIXME: dodać warianty *)
  let map = Xlist.fold l SymbolMap.empty (fun map (t,sem) -> SymbolMap.add_inc map t [sem] (fun l -> sem :: l)) in
  SymbolMap.fold map [] (fun l t sems -> (t,LCGrules.make_variant sems) :: l)

let make_unique chart i j =
  let l,layer = chart.(i).(j) in
  let l = merge_sems l in
  chart.(i).(j) <- l, layer;
  chart

let parse chart references timeout time_fun =
  (* print_endline "parse 1"; *)
  (* LCGrules.references := refs;
  LCGrules.next_reference := next_ref; *)
  let start_time = time_fun () in
  let last_node = last_node chart in
  let chart = Int.fold 2 last_node chart (fun chart len ->
    Int.fold 0 (last_node - len) chart (fun chart i ->
      let k = i + len in
      Int.fold 1 (len - 1) chart (fun chart d ->
        let time = time_fun () in
        if time -. start_time > timeout then raise (Timeout(time -. start_time)) else
        let j = i + d in
        let l,lay = Xlist.fold LCGrules.rules (find chart i k,layer chart i k) (fun (l,lay) rule ->
          (rule references (find chart i j) (find chart j k)) @ l,
(*          Xlist.fold (find chart i j) l (fun l a ->
             Xlist.fold (find chart j k) l (fun l b ->
               (rule (a,b)) @ l)),*)
          max lay ((max (layer chart i j) (layer chart j k)) + 1)) in
        (* print_int i; print_string " "; print_int j; print_string " "; print_int k; print_newline (); *)
(*         let l = LCGreductions.merge_symbols l in *)
(*          if Xlist.size l > 0 then Printf.printf "parse: %d-%d |l|=%d\n%!" i k (Xlist.size l); *)
        make_unique (add_list chart i k l lay) i k))) in
  (* print_endline "parse 2"; *)
  chart

(* let rec dep_parse2 funct = function
    larg :: left, rarg :: right ->
      (dep_parse2 (LCGrules.forward_application funct rarg) (larg :: left, right)) @
      (dep_parse2 (LCGrules.backward_application larg funct) (left, rarg :: right))
  | larg :: left, [] -> dep_parse2 (LCGrules.backward_application larg funct) (left, [])
  | [], rarg :: right -> dep_parse2 (LCGrules.forward_application funct rarg) ([], right)
  | [], [] -> funct *)

let assign_not_parsed left right (t,sem) =
  let sem = if left = [] then sem else (print_endline "assign_not_parsed: ni 1"; sem) in
  let sem = if right = [] then sem else (print_endline "assign_not_parsed: ni 2"; sem) in
  t, sem

let rec dep_parse_rec references timeout time_fun (DepNode(id,left,l,right)) =
  let left = Xlist.map left (dep_parse_rec references timeout time_fun) in
  let right = Xlist.map right (dep_parse_rec references timeout time_fun) in
  (* printf "dep_parse_rec id=%s\n" id; *)
  let l,left = Xlist.fold left (l,[]) (fun (funct,left) arg ->
    (* printf "larg: %s\n" (LCGstringOf.symbol_sem_list arg);
    printf "funct: %s\n" (LCGstringOf.symbol_sem_list funct); *)
    match LCGrules.backward_application references arg funct with
      [] -> (*printf "NOT PARSED\n";*) l, arg :: left
    | l -> merge_sems l, left) in
  let l,right = Xlist.fold right (l,[]) (fun (funct,right) arg ->
    (* printf "funct: %s\n" (LCGstringOf.symbol_sem_list funct);
    printf "rarg: %s\n" (LCGstringOf.symbol_sem_list arg); *)
    match LCGrules.forward_application references funct arg with
      [] -> (*printf "NOT PARSED\n";*) l, arg :: right
    | l -> merge_sems l, right) in
  Xlist.rev_map l (assign_not_parsed left right)
  (* merge_sems (dep_parse2 l (left,right)) *)

let dep_parse dep_graph references timeout time_fun =
(*   print_endline "dep_parse"; *)
  (* LCGrules.references := refs;
  LCGrules.next_reference := next_ref; *)
  let start_time = time_fun () in
  let dep_graph = dep_parse_rec references timeout time_fun dep_graph in
  dep_graph

let is_parsed chart =
  let n = last_node chart in
  try
    let _ = Xlist.assoc (find chart 0 n) (LCGtypes.Bracket(true,true,LCGtypes.Tensor[LCGtypes.Atom (*"<query>"*)"<sentence>"])) in (* FIXME: !!! *)
    true
  with Not_found -> false

let is_dep_parsed = function
    [] -> false
  | [LCGtypes.Bracket(false,false,LCGtypes.Tensor[LCGtypes.Atom "<conll_root>"]),_] -> true
  | _ -> failwith "is_dep_parsed"

let get_parsed_term chart =
  let n = last_node chart in
  let l = Xlist.fold (find chart 0 n) [] (fun l -> function
      LCGtypes.Bracket(true,true,LCGtypes.Tensor[LCGtypes.Atom (*"<query>"*)"<sentence>"]), sem -> (LCGtypes.Cut (LCGtypes.Tuple[sem])) :: l (* FIXME: !!! *)
    | _ -> l) in
  LCGtypes.Node{LCGrenderer.empty_node with
    LCGtypes.pred="<root>";
    LCGtypes.cat="interp";
    LCGtypes.id=LCGrenderer.get_pro_id ();
    LCGtypes.agf=WalTypes.NOSEM;
    LCGtypes.args=LCGrules.make_variant l}

let get_dep_parsed_term = function
    [LCGtypes.Bracket(false,false,LCGtypes.Tensor[LCGtypes.Atom "<conll_root>"]),sem] ->
       let l = [LCGtypes.Cut (LCGtypes.Tuple[sem])] in
       LCGtypes.Node{LCGrenderer.empty_node with
         LCGtypes.pred="<root>";
         LCGtypes.cat="interp";
         LCGtypes.id=LCGrenderer.get_pro_id ();
         LCGtypes.agf=WalTypes.NOSEM;
         LCGtypes.args=LCGrules.make_variant l}
  | _ -> failwith "get_dep_parsed_term"

(*FIXME:  Bębni na maszynie do pisania.
          Na myśl o czym brykasz?*)