LCGreductions.ml 11.7 KB
(*
 *  ENIAM: Categorial Syntactic-Semantic Parser for Polish
 *  Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
 *  Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences
 *
 *  This program is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *)

open LCGtypes
open Xstd 

let variant_label_ref = ref []

let rec add_variant_label = function
    [] -> ["A"]
  | "Z" :: l -> "A" :: add_variant_label l
  | s :: l -> String.make 1 (Char.chr (Char.code (String.get s 0) + 1)) :: l
  
let get_variant_label () =
  variant_label_ref := add_variant_label (!variant_label_ref);
  String.concat "" (List.rev (!variant_label_ref))
  
let prepare_references t references next_reference =  
  let a = Array.make next_reference Dot in
  Xlist.iter references (fun (i,t) -> a.(i) <- t);
  a.(0) <- t;
  a
      
let prepare_references2 t references next_reference =  
  let a = Array.make next_reference Dot in
  TermMap.iter references (fun t i -> a.(i) <- t);
  a.(0) <- t;
  a

let rec extract_nth n rev = function
    [] -> failwith "extract_nth"
  | s :: l -> 
      if n = 1 then s, (List.rev rev) @ l 
      else extract_nth (n-1) (s :: rev) l
    
let rec is_reduced_rec = function
    Tuple l -> Xlist.fold l true (fun b t -> b && is_reduced_rec t)
  | Variant(_,l) -> Xlist.fold l true (fun b (_,t) -> b && is_reduced_rec t)
  | Dot -> true
  | Val s -> true
  | Node t -> Xlist.fold t.attrs true (fun b (_,t) -> b && is_reduced_rec t) && is_reduced_rec t.gs && is_reduced_rec t.args
  | Morf m -> true
  | Cut t -> is_reduced_rec t
  | Ref i -> true
  | _ -> false
  
let is_reduced = (*function
    Triple(_,_,_) as t -> is_reduced_rec t
  | _ -> false*) is_reduced_rec
  
let is_reduced_references references =
  Int.fold 0 (Array.length references - 1) true (fun b i ->
    b && is_reduced references.(i))
      
let rec assign_labels_rec = function      
    Tuple l -> Tuple(Xlist.map l assign_labels_rec)
  | Variant(_,l) -> Variant(get_variant_label (), fst (Xlist.fold l ([],1) (fun (l,i) (_,t) -> 
        (string_of_int i, assign_labels_rec t) :: l,i+1)))
  | Dot -> Dot
  | Val s -> Val s
  | Node t -> Node{t with args=assign_labels_rec t.args}
  | Morf m -> Morf m
  | Cut t -> Cut(assign_labels_rec t)
  | Ref i -> Ref i
  | _ -> failwith "assign_labels_rec"
  
let assign_labels references =
  Int.iter 0 (Array.length references - 1) (fun i ->
    references.(i) <- assign_labels_rec references.(i))
      
let rec remove_cuts_rec = function      
    Tuple l -> Tuple(Xlist.map l remove_cuts_rec)
  | Variant(e,l) -> Variant(e, Xlist.map l (fun (i,t) -> i, remove_cuts_rec t))
  | Dot -> Dot
  | Val s -> Val s
  | Node t -> Node{t with args=remove_cuts_rec t.args}
  | Morf m -> Morf m
  | Cut t -> remove_cuts_rec t
  | Ref i -> Ref i
  | _ -> failwith "remove_cuts_rec"
  
let remove_cuts references =
  Int.iter 0 (Array.length references - 1) (fun i ->
    references.(i) <- remove_cuts_rec references.(i))
      
let linear_term_beta_reduction4 references =
  let reduced = Array.make (Array.length references) Dot in
  let size = ref 0 in
  let refs = ref TermMap.empty in
  let next_ref = ref 1 in
  
  let rec flatten_variant set = function
    Variant(_,l) -> Xlist.fold l set (fun set (_,t) -> flatten_variant set t)
  | Cut t -> TermSet.add set (Cut t)
  | _ -> failwith "flatten_variant" in      
  
  let rec simplify_args = function
    Tuple l -> Tuple(Xlist.map l simplify_args)
  | Dot -> Dot
  | Variant(e,l) -> (match TermSet.to_list (flatten_variant TermSet.empty (Variant(e,l))) with
        [] -> failwith "simplify_args 1"
      | [t] -> t
      | l -> Variant("",Xlist.map l (fun t -> ("0",t))))
  | Cut t -> Cut t
  | t -> failwith ("simplify_args 2: " ^ LCGstringOf.linear_term 0 t) in
  
  let rec create_cut_refs = function
    Node t -> 
        let t = {t with args=simplify_args t.args} in
        (try
          let i = TermMap.find !refs (Node t) in
          Cut(Ref i)
        with Not_found ->
          refs := TermMap.add !refs (Node t) !next_ref;
          let t = Cut(Ref !next_ref) in
          incr next_ref;
          t)
  | Variant(e,l) -> Variant(e,Xlist.map l (fun (i,t) -> i,create_cut_refs t)) 
  | _ -> failwith "create_cut_refs" in
  
  let rec linear_term_beta_reduction subst = function
    Var v -> (try StringMap.find subst v with Not_found -> Var v) (* zakladam, ze termy, ktore sa podstawiane na zmienne nie maja zmiennych wolnych *)
  | Tuple l -> 
      let l = Xlist.map l (linear_term_beta_reduction subst) in
      (match Xlist.fold l [] (fun l t -> if t = Dot then l else t :: l) with
         [] -> Dot
       | [t] -> t
       | l -> Tuple(List.rev l))
  | Variant(e,l) -> Variant(e,Xlist.map l (fun (i,t) -> i,linear_term_beta_reduction subst t))
  | VariantVar(v,t) -> VariantVar(v, linear_term_beta_reduction subst t)
  | ProjVar(v,t) -> 
      (match linear_term_beta_reduction subst t with
        VariantVar(v2,t2) -> if v = v2 then t2 else ProjVar(v,VariantVar(v2,t2))
      | Variant(e,l) -> Variant(e,Xlist.map l (fun (i,s) -> i,linear_term_beta_reduction subst (ProjVar(v,s))))
      | t2 -> ProjVar(v,t2))
  | SubstVar v -> SubstVar v
  | Subst(s,v,t) -> 
      (match linear_term_beta_reduction subst s with
      | Tuple l -> Tuple(Xlist.map l (fun s -> linear_term_beta_reduction subst (Subst(s,v,t))))
      | Variant(e,l) -> Variant(e,Xlist.map l (fun (i,s) -> i,linear_term_beta_reduction subst (Subst(s,v,t))))
      | Lambda(v2,s) -> Lambda(v2, linear_term_beta_reduction subst (Subst(s,v,t)))
      | Dot -> Dot
      | SetAttr(e,s1,s2) -> SetAttr(e,linear_term_beta_reduction subst (Subst(s1,v,t)),linear_term_beta_reduction subst (Subst(s2,v,t)))
      | Val s -> Val s
      | Var w -> if w = v then t else Subst(Var w,v,t)
      | SubstVar w -> if w = v then t else SubstVar w
      | Node s -> Node{s with attrs=Xlist.map s.attrs (fun (e,s) -> e, linear_term_beta_reduction subst (Subst(s,v,t)));
                              gs=linear_term_beta_reduction subst (Subst(s.gs,v,t));
                              args=linear_term_beta_reduction subst (Subst(s.args,v,t))}
      | Morf m -> Morf m
      | Gf s -> Gf s
      | Cut(Ref i) -> Cut(Ref i)
      | Cut s -> Cut(Subst(s,v,t))
      | s2 -> Subst(s2,v,t)) 
  | Inj(n,t) -> Inj(n,linear_term_beta_reduction subst t)
  | Case(t,l) -> 
      (match linear_term_beta_reduction subst t with
        Inj(n,t) -> 
          if Xlist.size l < n then Case(Inj(n,t),l) else 
          let v, r = List.nth l (n-1) in
          let subst = StringMap.add subst v t in
          linear_term_beta_reduction subst r
      | Variant(e,l2) -> linear_term_beta_reduction subst (Variant(e,Xlist.map l2 (fun (i,t2) -> i,Case(t2,l))))
      | t2 -> Case(t2,Xlist.map l (fun (v,t) -> v, linear_term_beta_reduction subst t))) (* FIXME alfa-konwersja i przykrywanie *)
  | Lambda(v,t) -> Lambda(v, linear_term_beta_reduction subst t)
  | LambdaSet(l,t) -> LambdaSet(l, linear_term_beta_reduction subst t)
  | LambdaRot(n,t) -> 
      (match linear_term_beta_reduction subst t with
        Lambda(v,t) -> if n = 1 then Lambda(v,t) else LambdaRot(n,Lambda(v,t))
      | LambdaSet([v],t) -> if n = 1 then Lambda(v,t) else LambdaRot(n,LambdaSet([v],t))
      | LambdaSet(l,t) -> 
          if Xlist.size l < n then LambdaRot(n,LambdaSet(l,t)) else 
          let s,l = extract_nth n [] l in
          Lambda(s,LambdaSet(l,t))
      | Variant(e,l) -> Variant(e,Xlist.map l (fun (i,s) -> i,linear_term_beta_reduction subst (LambdaRot(n,s))))
      | t2 -> LambdaRot(n,t2))
  | App(s,t) -> 
      let t = linear_term_beta_reduction subst t in
      (match linear_term_beta_reduction subst s, t with
        Lambda(v,s),_ ->   
          let subst = StringMap.add subst v t in
          linear_term_beta_reduction subst s
      | LambdaSet([v],s),_ ->  (* FIXME ten przypadek nie powinien miec miejsca, jego wystepowanie wskazuje na brak rotacji przy maczowaniu *)
          let subst = StringMap.add subst v t in
          linear_term_beta_reduction subst s
      | Variant(e,l),_ -> linear_term_beta_reduction subst (Variant(e,Xlist.map l (fun (i,s) -> i,App(s,t))))
      | t2,_ -> App(t2,t))
  | Dot -> Dot
  | Fix(f,t) ->
      (match linear_term_beta_reduction subst f with
         Empty s -> linear_term_beta_reduction subst s
       | Apply s -> linear_term_beta_reduction subst (App(t,s))
       | Insert(s1,s2) -> Tuple[Fix(s1,t);Fix(s2,t)]
       | f -> Fix(f,linear_term_beta_reduction subst t))
  | Empty t -> Empty(linear_term_beta_reduction subst t)
  | Apply t -> Apply(linear_term_beta_reduction subst t)
  | Insert(s,t) -> Insert(linear_term_beta_reduction subst s,linear_term_beta_reduction subst t)
  | Val s -> Val s
  | SetAttr(e,s,t) -> 
      (match linear_term_beta_reduction subst t with
         Dot -> Dot
       | Tuple l -> linear_term_beta_reduction subst (Tuple(Xlist.map l (fun t -> SetAttr(e,s,t))))
       | Node t -> (match e,s with
             "GF",Gf gf ->  Node{t with agf=gf}
           | "MORF",Morf morf -> Node{t with amorf=morf}
           | "AROLE",Val arole ->  Node{t with arole=arole}
           | _ -> Node{t with attrs=(e,linear_term_beta_reduction subst s) :: t.attrs})
       | Variant(e2,l) -> Variant(e2,Xlist.map l (fun (i,t) -> i,linear_term_beta_reduction subst (SetAttr(e,s,t))))
       | t -> SetAttr(e,s,t))
  | Node t -> 
     if !size > Paths.lcg_no_nodes then raise SemTooBig;
     incr size;
     Node{t with attrs=Xlist.map t.attrs (fun (e,t) -> e, linear_term_beta_reduction subst t);
                          gs=linear_term_beta_reduction subst t.gs;
                          args=linear_term_beta_reduction subst t.args}
  | Morf m -> Morf m
  | Gf s -> Gf s
  | Choice _ -> failwith "linear_term_beta_reduction"
  | Concept _ -> failwith "linear_term_beta_reduction"
  | Context _ -> failwith "linear_term_beta_reduction"
  | Relation _ -> failwith "linear_term_beta_reduction"
  | RevRelation _ -> failwith "linear_term_beta_reduction"
  | SingleRelation _ -> failwith "linear_term_beta_reduction"
  | AddRelation _ -> failwith "linear_term_beta_reduction"
  | RemoveRelation _ -> failwith "linear_term_beta_reduction"
  | SetContextName _ -> failwith "linear_term_beta_reduction"
  | Ref i -> (* nie ma problemu przy wywoływaniu z różnymi podstawieniami, bo termy w poszczególnych referencjach nie mają zmiennych wolnych
                reduced zawiera termy bez zmiennych *)
     if reduced.(i) = Dot then (
       let t = linear_term_beta_reduction subst references.(i) in
       if is_reduced t then reduced.(i) <- t else references.(i) <- t;
       t)
     else reduced.(i)
  | Cut(Ref i) -> Cut(Ref i)
  | Cut t ->
      let t = linear_term_beta_reduction subst t in
      if is_reduced t then create_cut_refs t else Cut t
  in
  
  linear_term_beta_reduction StringMap.empty references.(0), !refs, !next_ref
   
(* dodać usuwanie jednorazowych etykiet i 
   zastąpić Cut(Ref i) przez coś innego *)
let reduce t references next_reference = 
  let references = prepare_references t references next_reference in
(*      print_references "references1" references;        *)
  let t,references,next_reference = linear_term_beta_reduction4 references in
  let references = prepare_references2 t references next_reference in
(*     print_references "references2" references;    *)
  references