ENIAMsemValence.ml
7.27 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
(*
* ENIAMexec implements ENIAM processing stream
* Copyright (C) 2016-2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
* Copyright (C) 2016-2017 Institute of Computer Science Polish Academy of Sciences
*
* This library is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*)
open ENIAMexecTypes
open ENIAM_LCGtypes
open ENIAM_LCGlexiconTypes
open Xstd
type pos = {role: linear_term; role_attr: linear_term; selprefs: linear_term;
is_necessary: bool; morfs: StringSet.t}
let match_value v2 = function
Val v -> if v = v2 then Val v else raise Not_found
| _ -> failwith "match_value"
let rec apply_selector v2 = function
(_,[]) -> failwith "apply_selector"
| Negation,("NEGATION",v) :: l -> ("NEGATION",match_value v2 v) :: l
| Aspect,("ASPECT",v) :: l -> ("ASPECT",match_value v2 v) :: l
| Mood,("MOOD",v) :: l -> ("MOOD",match_value v2 v) :: l
| sel,(attr,v) :: l -> (*print_endline ("apply_selector: " ^ ENIAMcategoriesPL.string_of_selector sel ^ " " ^ attr);*) (attr,v) :: (apply_selector v2 (sel,l))
let rec apply_selectors attrs = function
[] -> attrs
| (sel,Eq,[v]) :: l -> apply_selectors (apply_selector v (sel,attrs)) l
| (sel,Neq,vals) :: l -> failwith "apply_selectors"
| _ -> failwith "apply_selectors"
let rec get_arg_symbols_variant arg_symbols = function
Ref i -> [arg_symbols.(i),Ref i]
| Variant(e,l) ->
let map = Xlist.fold l StringMap.empty (fun map (i,t) ->
Xlist.fold (get_arg_symbols_variant arg_symbols t) map (fun map (arg_symbol,t) ->
StringMap.add_inc map arg_symbol [i,t] (fun l -> (i,t) :: l))) in
StringMap.fold map [] (fun found arg_symbol l -> (arg_symbol,Variant(e,l)) :: found)
| t -> failwith ("get_arg_symbols_variant: " ^ ENIAM_LCGstringOf.linear_term 0 t)
let rec get_arg_symbols_tuple arg_symbols rev = function
Dot -> rev
| Tuple l -> Xlist.fold l rev (get_arg_symbols_tuple arg_symbols)
| t -> (get_arg_symbols_variant arg_symbols t) :: rev
let rec match_arg_positions arg rev = function
p :: positions ->
let l = Xlist.fold arg [] (fun l (arg_symbol,t) ->
if StringSet.mem p.morfs arg_symbol then t :: l else l) in
(match l with
[] -> match_arg_positions arg (p :: rev) positions
| [t] ->
let t = SetAttr("role",p.role,SetAttr("role_attr",p.role_attr,SetAttr("selprefs",p.selprefs,t))) in
(t, rev @ positions) :: (match_arg_positions arg (p :: rev) positions)
| _ -> failwith "match_arg_positions: ni")
| [] -> [] (*failwith "match_arg_positions"*) (* FIXME: to nie musi być błąd *)
(* Jeśli ta funkcja zwróci pustą listę, oznacza to, że argumentów nie dało się dopasować do pozycji *)
let rec match_args_positions_rec positions = function
arg :: args ->
Xlist.fold (match_arg_positions arg [] positions) [] (fun found (arg_pos,positions) ->
Xlist.fold (match_args_positions_rec positions args) found (fun found l -> (arg_pos :: l) :: found))
| [] ->
let b = Xlist.fold positions false (fun b p -> p.is_necessary || b) in
if b then [] else [[]] (* FIXME: miejsce na wstawianie pro? *)
let match_args_positions args positions =
Xlist.rev_map (match_args_positions_rec positions args) (function
[] -> Dot
| [t] -> t
| l -> Tuple l)
let translate_position p =
{role = Val p.ENIAMwalTypes.role;
role_attr = Val p.ENIAMwalTypes.role_attr;
selprefs = (match p.ENIAMwalTypes.sel_prefs with
[] -> Dot
| [s] -> Val s
| l -> Tuple(Xlist.rev_map l (fun s -> Val s)));
is_necessary = Xlist.fold p.ENIAMwalTypes.morfs true (fun b -> function LCG One -> false | _ -> b);
morfs = Xlist.fold position.ENIAMwalTypes.morfs StringSet.empty (fun morfs morf ->
StringSet.add morfs (string_of_morf morf))}
let rec assign_frames_rec tokens lex_sems tree arg_symbols visited = function
Ref i ->
if IntSet.mem visited i then Ref i,visited else
let t,visited = assign_frames_rec tokens lex_sems tree arg_symbols (IntSet.add visited i) tree.(i) in
tree.(i) <- t;
Ref i,visited
| Node t ->
let args,visited = assign_frames_rec tokens lex_sems tree arg_symbols visited t.args in
let t = {t with args=args} in
let args = get_arg_symbols_tuple arg_symbols [] args in
let s = ExtArray.get lex_sems t.id in
let frames = Xlist.fold s.ENIAMlexSemanticsTypes.frames [] (fun frames (selectors,meanings,positions) ->
print_endline ("selectors: " ^ ENIAMcategoriesPL.string_of_selectors selectors);
try
let attrs = apply_selectors t.attrs selectors in
print_endline "passed";
(attrs,meanings,Xlist.rev_map positions translate_position) :: frames
with Not_found -> print_endline "rejected"; frames) in
if frames = [] then Node t,visited else (* FIXME: tu powinien być rzucany wyjątek *)
let e = ENIAM_LCGreductions.get_variant_label () in
let l,_ = Xlist.fold frames ([],1) (fun (l,n) (attrs,meanings,positions) ->
Xlist.fold meanings (l,n) (fun (l,n) (meaning,hipero,weight) ->
Xlist.fold (match_args_positions args positions) (l,n) (fun (l,n) args ->
(string_of_int n, Node{t with attrs=("meaning",Val meaning) :: t.attrs; args=args}) :: l,n+1))) in
Variant(e,l),visited
| Variant(e,l) ->
let l,visited = Xlist.fold l ([],visited) (fun (l,visited) (i,t) ->
let t,visited = assign_frames_rec tokens lex_sems tree arg_symbols visited t in
(i,t) :: l, visited) in
Variant(e,List.rev l),visited
| Tuple l ->
let l,visited = Xlist.fold l ([],visited) (fun (l,visited) t ->
let t,visited = assign_frames_rec tokens lex_sems tree arg_symbols visited t in
t :: l, visited) in
Tuple(List.rev l),visited
| Dot -> Dot,visited
| t -> failwith ("assign_frames_rec: " ^ ENIAM_LCGstringOf.linear_term 0 t)
let string_of_arg_symbol = function
| t -> failwith ("string_of_arg_symbol: " ^ ENIAM_LCGstringOf.linear_term 0 t)
let rec get_arg_symbols = function
Node t -> string_of_arg_symbol t.arg_symbol
| t -> failwith ("get_arg_symbols: " ^ ENIAM_LCGstringOf.linear_term 0 t)
let assign_frames tokens lex_sems tree =
print_endline "assign_frames";
let tree = Array.copy tree in
let arg_symbols = Array.make (Array.length tree) "" in
Int.iter 0 (Array.length tree - 1) (fun i ->
arg_symbols.(i) <- get_arg_symbols tree.(i));
let _ = assign_frames_rec tokens lex_sems tree arg_symbols IntSet.empty (Ref 0) in
tree
let assign tokens lex_sems text =
map_text Struct (fun mode -> function
ENIAMSentence result ->
if result.status <> Parsed then ENIAMSentence result else
ENIAMSentence {result with dependency_tree6=assign_frames tokens lex_sems result.dependency_tree6}
| t -> t) text