disambSelPref.ml
5.49 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
(*
* ENIAM: Categorial Syntactic-Semantic Parser for Polish
* Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
* Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*)
open ENIAMwalTypes
open LCGtypes
open Printf
open Xstd
let fit_node1 t args w =
let w =
if t.agf = ADJUNCT || t.agf = CORE || t.agf = NOSEM || t.agf = CLAUSE || t.agf = SENTENCE then w else
(* if is_nosem_node t then fit_sel_prefs_nosem_node disamb ei t + w else *)
if t.position.role = "" && (t.agf = SUBJ || t.agf = OBJ || t.agf = ARG) then w + 20 else
let b =
if StringSet.mem t.hipero "0" then true else
Xlist.fold t.position.sel_prefs false (fun b s -> StringSet.mem t.hipero s || b) in
(if b then 0 else 1) + w in
Node{t with args=args},w
let fit_node2 t args w =
let b = Xlist.fold t.position.sel_prefs false (fun b s -> StringSet.mem t.hipero s || b) in
let t = {t with args=args} in
if b then Node t,w else
(match t.agf, t.position.gf with
ADJUNCT,_ -> (* FIXME: można dodać tuszowanie braków w walentym *)
let pos =
(* let r,a = paths_array.(t.id).PreTypes.lroles in
if r <> "" then (* FIXME: pomijam to, że role dla rzeczowników dotyczą tylko inst *)
{t.position with role=r; role_attr=a} else*)
{t.position with role=t.arole; role_attr=t.arole_attr} in
Node{t with position=pos}, w+1
| CLAUSE,NOGF -> Node t,w+0
| SENTENCE,NOGF -> Node t,w+0
| ARG,NOGF -> Node t,w+1
| CORE,NOGF ->
let pos = {t.position with role=t.arole; role_attr=t.arole_attr} in
Node{t with position=pos}, w+0
| OBJ,NOGF -> Node t,w+0
| SUBJ,NOGF -> Node t,w+0
| SUBJ,SUBJ -> Node t,w+2
| OBJ,OBJ -> Node t,w+2
| ARG,ARG -> Node t,w+1
| NOSEM,NOGF -> Node t,w+0
| NOGF,NOGF -> Node t,w+0
| NOSEM,NOSEM -> Node t,w+0
(* | , -> 0 *)
| a,g ->(* printf "fit_sel_prefs_rec: pred=%s agf=%s pos.gf=%s\n%!" t.pred (WalStringOf.gf a) (WalStringOf.gf g);*) Node t,w+1)
let rec fit_sel_prefs_choice fit_node_fun references disamb satisfaction r = function
Choice choice ->
let choice,sat = StringMap.fold choice (StringMap.empty,StringMap.empty) (fun (choice,sat) ei t ->
let t,w = fit_sel_prefs_variant fit_node_fun references disamb satisfaction t in
StringMap.add choice ei t, StringMap.add sat ei w) in
satisfaction.(r) <- sat;
Choice choice
| _ -> failwith "fit_sel_prefs_choice"
and fit_sel_prefs_variant fit_node_fun references disamb satisfaction = function
Variant(e,l) ->
let l,min_w = Xlist.fold l ([],max_int) (fun (l,min_w) (i,t) ->
let t,w = fit_sel_prefs_rec fit_node_fun references disamb satisfaction (e ^ i) t in
if w = min_w then (i,t) :: l, min_w else
if w < min_w then [i,t],w else l,min_w) in
Variant(e, List.rev l),min_w
| _ -> failwith "fit_sel_prefs_variant"
and fit_sel_prefs_rec fit_node_fun references disamb satisfaction ei = function
Node t ->
let args,w = fit_sel_prefs_rec fit_node_fun references disamb satisfaction ei t.args in
fit_node2 t args w
| Tuple l ->
let l,sum_w = Xlist.fold l ([],0) (fun (l,sum_w) t ->
let t,w = fit_sel_prefs_rec fit_node_fun references disamb satisfaction ei t in
t :: l, sum_w + w) in
Tuple(List.rev l), sum_w
| Variant(e,l) as t ->
let l,min_w = Xlist.fold l ([],max_int) (fun (l,min_w) (i,t) ->
let t,w = fit_sel_prefs_rec fit_node_fun references disamb satisfaction ei t in
if w = min_w then (i,t) :: l, min_w else
if w < min_w then [i,t],w else l,min_w) in
let l =
let map = Xlist.fold l TermSet.empty (fun map (_,t) -> TermSet.add map t) in
fst (TermSet.fold map ([],1) (fun (l,i) t -> (string_of_int i,t) :: l, i+1)) in
(match l with
[] -> failwith ("fit_sel_prefs_rec 1" ^ LCGstringOf.linear_term 0 t)
| [i,t] -> t,min_w
| _ -> Variant(e, List.rev l),min_w)
| Dot -> Dot, 0
| Val s -> Val s, 0
| Ref i ->
if disamb.(i) = Dot then (disamb.(i) <- fit_sel_prefs_choice fit_node_fun references disamb satisfaction i references.(i));
Ref i, (try StringMap.find satisfaction.(i) ei with Not_found -> failwith ("fit_sel_prefs_rec 3: r=" ^ string_of_int i ^ " ei=" ^ ei))
| t -> failwith ("fit_sel_prefs_rec 2: " ^ LCGstringOf.linear_term 0 t)
let fit_sel_prefs fit_node_fun references =
let disamb = Array.make (Array.length references) Dot in
let satisfaction = Array.make (Array.length references) StringMap.empty in
disamb.(0) <- fst (fit_sel_prefs_variant fit_node_fun references disamb satisfaction references.(0));
disamb
(***************************************************************************************)