LCGchart.ml 5.02 KB
(*
 *  ENIAM: Categorial Syntactic-Semantic Parser for Polish
 *  Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
 *  Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences
 *
 *  This program is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *)

open Xstd 
open LCGtypes
open Printf

let make size = Array.make_matrix (size+1) (size+1) ([],0)

let last_node graph = Array.length graph - 1

let add graph i j v layer = 
  graph.(i).(j) <- [v],layer;
  graph
    
let add_list graph i j l layer = 
  graph.(i).(j) <- l,layer;
  graph
    
let add_inc graph i j v layer = 
  let l,layer2 = graph.(i).(j) in
  graph.(i).(j) <- v :: l, max layer layer2;
  graph
    
let add_inc_list graph i j l layer = 
  let l2,layer2 = graph.(i).(j) in
  graph.(i).(j) <- l @ l2, max layer layer2;
  graph
    
let find graph i j = fst graph.(i).(j)
let layer graph i j = snd graph.(i).(j)
  
let fold graph s f =
  Int.fold 0 (last_node graph) s (fun s i -> 
    Int.fold 0 (last_node graph) s (fun s j -> 
      let layer = layer graph i j in
      Xlist.fold (find graph i j) s (fun s (symbol,sem) ->
        f s (symbol,i,j,sem,layer))))

let rec find_paths_rec graph last i =
  if i = last then [[]] else
  Int.fold (i+1) last [] (fun paths j ->
    if graph.(i).(j) = [] then paths else
    let tails = find_paths_rec graph last j in
    if Xlist.size tails > 1000000 then failwith "find_paths_rec: to many paths" else
    Xlist.fold tails paths (fun paths tail -> (graph.(i).(j) :: tail) :: paths))
 
let find_paths graph =
  let last = last_node graph - 1 in
  find_paths_rec graph last 0
  
let get_no_entries graph =
  Int.fold 0 (last_node graph) 0 (fun n i -> 
    Int.fold 0 (last_node graph) n (fun n j -> 
      n + (Xlist.size (find graph i j))))
    
(* Pod referencją 0 będzie trzymany korzeń termu *)
let lazify graph =
  let new_graph = make (last_node graph) in
  fold graph (new_graph,[],1) (fun (new_graph,references,n) (symbol,i,j,sem,layer) ->
    add_inc new_graph i j (symbol,Ref n) layer,
    (n,sem) :: references,
    n+1)
    
let merge_sems l = (* FIXME: dodać warianty *)
  let map = Xlist.fold l SymbolMap.empty (fun map (t,sem) -> SymbolMap.add_inc map t [sem] (fun l -> sem :: l)) in
  SymbolMap.fold map [] (fun l t sems -> (t,LCGrules.make_variant sems) :: l)

let make_unique graph i j = 
  let l,layer = graph.(i).(j) in
  let l = merge_sems l in
  graph.(i).(j) <- l, layer;
  graph
  
let parse timeout graph refs next_ref time_fun = 
(*   print_endline "parse"; *)
  LCGrules.references := refs;
  LCGrules.next_reference := next_ref;
  let start_time = time_fun () in
  let last_node = last_node graph in
  let graph = Int.fold 2 last_node graph (fun graph len ->
    Int.fold 0 (last_node - len) graph (fun graph i ->
      let k = i + len in
      Int.fold 1 (len - 1) graph (fun graph d ->
        let time = time_fun () in
        if time -. start_time > timeout then raise (Timeout(time -. start_time)) else
        let j = i + d in
        let l,lay = Xlist.fold LCGrules.rules (find graph i k,layer graph i k) (fun (l,lay) rule ->
          (rule (find graph i j) (find graph j k)) @ l,
(*          Xlist.fold (find graph i j) l (fun l a ->
             Xlist.fold (find graph j k) l (fun l b ->
               (rule (a,b)) @ l)),*)
          max lay ((max (layer graph i j) (layer graph j k)) + 1)) in 
        (* print_int i; print_string " "; print_int j; print_string " "; print_int k; print_newline (); *)
(*         let l = LCGreductions.merge_symbols l in *)
(*          if Xlist.size l > 0 then Printf.printf "parse: %d-%d |l|=%d\n%!" i k (Xlist.size l); *)
        make_unique (add_list graph i k l lay) i k))) in
  graph, !LCGrules.references, !LCGrules.next_reference
  
let is_parsed graph =
  let n = last_node graph in
  try
    let _ = Xlist.assoc (find graph 0 n) (LCGtypes.Bracket(true,true,LCGtypes.Tensor[LCGtypes.Atom "<query>"])) in 
    true
  with Not_found -> false

let get_parsed_term graph =
  let n = last_node graph in
  let l = Xlist.fold (find graph 0 n) [] (fun l -> function 
      LCGtypes.Bracket(true,true,LCGtypes.Tensor[LCGtypes.Atom "<query>"]), sem -> (LCGtypes.Cut (LCGtypes.Tuple[sem])) :: l
    | _ -> l) in
  LCGtypes.Node{LCGrenderer.empty_node with 
    LCGtypes.pred="<root>"; 
    LCGtypes.cat="interp"; 
    LCGtypes.id=LCGrenderer.get_pro_id ();
    LCGtypes.agf=WalTypes.NOSEM; 
    LCGtypes.args=LCGrules.make_variant l}
    
    
(*FIXME:  Bębni na maszynie do pisania. 
          Na myśl o czym brykasz?*)