disambSelPref.ml 5.84 KB
(*
 *  ENIAM: Categorial Syntactic-Semantic Parser for Polish
 *  Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
 *  Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences
 *
 *  This program is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *)
 
open WalTypes
open LCGtypes
open Printf
open Xstd
            
let fit_node1 t args w =
      let w = 
        if t.agf = ADJUNCT || t.agf = CORE || t.agf = NOSEM || t.agf = CLAUSE || t.agf = SENTENCE then w else 
(*         if is_nosem_node t then fit_sel_prefs_nosem_node disamb ei t + w else   *)
        if t.position.role = "" && (t.agf = SUBJ || t.agf = OBJ || t.agf = ARG) then w + 20 else
        let b = 
          if StringSet.mem t.hipero "0" then true else
          Xlist.fold t.position.sel_prefs false (fun b s -> StringSet.mem t.hipero s || b) in
        (if b then 0 else 1) + w in   
      Node{t with args=args},w
      
let fit_node2 t args w =
      let b = Xlist.fold t.position.WalTypes.sel_prefs false (fun b s -> StringSet.mem t.hipero s || b) in
      let t = {t with args=args} in
      if b then Node t,w else 
        (match t.agf, t.position.WalTypes.gf with
          WalTypes.ADJUNCT,_ -> (* FIXME: można dodać tuszowanie braków w walentym *)
            let pos = 
(*              let r,a = paths_array.(t.id).PreTypes.lroles in
              if r <> "" then (* FIXME: pomijam to, że role dla rzeczowników dotyczą tylko inst *)
                {t.position with WalTypes.role=r; WalTypes.role_attr=a} else*)
              {t.position with WalTypes.role=t.arole; WalTypes.role_attr=t.arole_attr} in
            Node{t with position=pos}, w+1
        | WalTypes.CLAUSE,WalTypes.NOGF -> Node t,w+0
        | WalTypes.SENTENCE,WalTypes.NOGF -> Node t,w+0
        | WalTypes.ARG,WalTypes.NOGF -> Node t,w+1
        | WalTypes.CORE,WalTypes.NOGF -> 
            let pos =  {t.position with WalTypes.role=t.arole; WalTypes.role_attr=t.arole_attr} in
            Node{t with position=pos}, w+0
        | WalTypes.OBJ,WalTypes.NOGF -> Node t,w+0
        | WalTypes.SUBJ,WalTypes.NOGF -> Node t,w+0
        | WalTypes.SUBJ,WalTypes.SUBJ -> Node t,w+2
        | WalTypes.OBJ,WalTypes.OBJ -> Node t,w+2
        | WalTypes.ARG,WalTypes.ARG -> Node t,w+1
        | WalTypes.NOSEM,WalTypes.NOGF -> Node t,w+0 
        | WalTypes.NOGF,WalTypes.NOGF -> Node t,w+0 
        | WalTypes.NOSEM,WalTypes.NOSEM -> Node t,w+0
(*         | WalTypes.,WalTypes. -> 0  *)
        | a,g ->(* printf "fit_sel_prefs_rec: pred=%s agf=%s pos.gf=%s\n%!" t.pred (WalStringOf.gf a) (WalStringOf.gf g);*) Node t,w+1)
 
let rec fit_sel_prefs_choice fit_node_fun references disamb satisfaction r = function 
    Choice choice -> 
      let choice,sat = StringMap.fold choice (StringMap.empty,StringMap.empty) (fun (choice,sat) ei t ->
        let t,w = fit_sel_prefs_variant fit_node_fun references disamb satisfaction t in
        StringMap.add choice ei t, StringMap.add sat ei w) in
      satisfaction.(r) <- sat;
      Choice choice
  | _ -> failwith "fit_sel_prefs_choice"
  
and fit_sel_prefs_variant fit_node_fun references disamb satisfaction = function 
    Variant(e,l) -> 
      let l,min_w = Xlist.fold l ([],max_int) (fun (l,min_w) (i,t) ->
        let t,w = fit_sel_prefs_rec fit_node_fun references disamb satisfaction (e ^ i) t in
        if w = min_w then (i,t) :: l, min_w else
        if w < min_w then [i,t],w else l,min_w) in
    Variant(e, List.rev l),min_w
  | _ -> failwith "fit_sel_prefs_variant"
  
and fit_sel_prefs_rec fit_node_fun references disamb satisfaction ei = function 
    Node t -> 
      let args,w = fit_sel_prefs_rec fit_node_fun references disamb satisfaction ei t.args in
      fit_node2 t args w
  | Tuple l -> 
      let l,sum_w = Xlist.fold l ([],0) (fun (l,sum_w) t ->
        let t,w = fit_sel_prefs_rec fit_node_fun references disamb satisfaction ei t in
        t :: l, sum_w + w) in
      Tuple(List.rev l), sum_w
  | Variant(e,l) as t -> 
      let l,min_w = Xlist.fold l ([],max_int) (fun (l,min_w) (i,t) ->
        let t,w = fit_sel_prefs_rec fit_node_fun references disamb satisfaction ei t in
        if w = min_w then (i,t) :: l, min_w else
        if w < min_w then [i,t],w else l,min_w) in
      let l = 
        let map = Xlist.fold l TermSet.empty (fun map (_,t) -> TermSet.add map t) in
        fst (TermSet.fold map ([],1) (fun (l,i) t -> (string_of_int i,t) :: l, i+1)) in
      (match l with
        [] -> failwith ("fit_sel_prefs_rec 1" ^ LCGstringOf.linear_term 0 t)
      | [i,t] -> t,min_w
      | _ -> Variant(e, List.rev l),min_w)
  | Dot -> Dot, 0
  | Val s -> Val s, 0
  | Ref i -> 
       if disamb.(i) = Dot then (disamb.(i) <- fit_sel_prefs_choice fit_node_fun references disamb satisfaction i references.(i));
       Ref i, (try StringMap.find satisfaction.(i) ei with Not_found -> failwith ("fit_sel_prefs_rec 3: r=" ^ string_of_int i ^ " ei=" ^ ei))
  | t -> failwith ("fit_sel_prefs_rec 2: " ^ LCGstringOf.linear_term 0 t)

let fit_sel_prefs fit_node_fun references =
  let disamb = Array.make (Array.length references) Dot in
  let satisfaction = Array.make (Array.length references) StringMap.empty in
  disamb.(0) <- fst (fit_sel_prefs_variant fit_node_fun references disamb satisfaction references.(0));
  disamb
 
(***************************************************************************************)