Blame view

parser/LCGvalence.ml 16.5 KB
Wojciech Jaworski authored
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
(*
 *  ENIAM: Categorial Syntactic-Semantic Parser for Polish
 *  Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
 *  Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences
 *
 *  This program is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *)

open WalTypes
open LCGtypes
open Printf
open Xstd

let rec list_assoc2 x = function 
    (s,a,b) :: l -> if x = s then a,b else list_assoc2 x l
  | [] -> raise Not_found  

let meaning_weight = -1.              

let prepare_senses lemma meanings senses =
  match meanings,senses with
    [],[] -> [lemma, ["ALL"],0.] (* FIXME *)
  | [],_ -> 
     Xlist.map senses (fun (sense,hipero,weight) -> 
       if hipero = ["0"] then sense,["0"],weight else 
       sense,(if hipero = [] then ["ALL"] else hipero),weight)
  | _,[] -> Xlist.map meanings (fun meaning -> meaning, ["ALL"],meaning_weight)
  | _,_ -> 
     Xlist.map meanings (fun meaning -> 
       let hipero,weight = try list_assoc2 meaning senses with Not_found -> [],meaning_weight in
       if hipero = ["0"] then meaning,["0"],weight else meaning,(if hipero = [] then ["ALL"] else hipero),weight)

let extract_meaning lemma = function
    DefaultAtrs(m,r,o,neg,p,a) -> m,lemma,DefaultAtrs([],r,o,neg,p,a)
  | EmptyAtrs m -> m,lemma,EmptyAtrs []
  | NounAtrs(m,nsyn,s(*,typ*)) -> m,lemma,NounAtrs([],nsyn,s(*,typ*))
  | AdjAtrs(m,c,adjsyn(*,adjsem,typ*)) -> m,lemma,AdjAtrs([],c,adjsyn(*,adjsem,typ*))
  | PersAtrs(m,le,neg,mo,t,au,a) -> m,le,PersAtrs([],le,neg,mo,t,au,a)
  | GerAtrs(m,le,neg,a) -> m,le,GerAtrs([],le,neg,a)
  | NonPersAtrs(m,le,role,role_attr,neg,a) -> m,le,NonPersAtrs([],le,role,role_attr,neg,a)
  | _ -> failwith "extract_meaning"

let extract_roles = function
    NonPersAtrs(m,le,role,role_attr,neg,a) -> role,role_attr
  | _ -> failwith "extract_roles"

let get_lemma = function
    PreTypes.Lemma(lemma,cat,_) -> lemma,cat
  | PreTypes.Interp lemma -> lemma,"interp"
  | _ -> "",""

let prepare_valence paths_array =
  let valence = Array.map (fun d ->
    let lemma,cat = get_lemma d.PreTypes.token in
    let lemma = if lemma = "<ors>" || lemma = ":s" || lemma = "„s" then "pro-komunikować" else lemma in
    if lemma = "" then [] else
    let prep_valence = 
      if cat = "prep" then 
(*         (0,lemma,StringSet.empty,0.,"NOSEM","",Frame(EmptyAtrs[],[])) :: *)
        match d.PreTypes.semantics with
          PreTypes.Normal -> []
        | PreTypes.PrepSemantics l -> 
            Xlist.rev_map l (fun (lrole,lrole_attr,hipero,sel_prefs) -> 
              0,lemma,hipero,0.,lrole,lrole_attr,Frame(EmptyAtrs[],[]))
        | _ -> failwith "prepare_valence"
      else [] in
    let valence = if d.PreTypes.valence = [] then [0,Frame(EmptyAtrs[],[])] else d.PreTypes.valence in
    let lrole,lrole_attr = d.PreTypes.lroles in
    prep_valence @ List.flatten (Xlist.map valence (function
        fnum,Frame(attrs,schema) ->
          let meanings,lemma,attrs = extract_meaning lemma attrs in
          let lrole,lrole_attr = 
            if cat = "pact" || cat = "ppas" then extract_roles attrs else 
            if cat = "pcon" then "Con","" else 
            if cat = "pant" then "Ant","" else
            d.PreTypes.lroles in
          Xlist.map (prepare_senses lemma meanings d.PreTypes.senses) (fun (meaning,hipero,weight) ->
            let hipero = if cat = "conj" then ["0"] else hipero in
            fnum,meaning,StringSet.of_list hipero,weight,lrole,lrole_attr,
            Frame(attrs,Xlist.map schema (fun s -> 
(*               let s = if s.sel_prefs=[] then (print_endline ("prepare_valence empty sel_prefs: " ^ lemma ^ " " ^ cat); {s with sel_prefs=["ALL"]}) else s in *)
              if s.role="" && s.gf <> ADJUNCT && s.gf <> NOSEM then (
                printf "%d: %s\n%!" fnum (WalStringOf.frame lemma (Frame(attrs,schema)));
                failwith ("prepare_valence empty role: " ^ lemma ^ " " ^ cat)) else
              {s with morfs=List.sort compare s.morfs})))
      | fnum,(LexFrame _ as frame) -> [fnum,"lex",StringSet.empty,0.,lrole,lrole_attr,frame]
      | fnum,(ComprepFrame _ as frame) -> [fnum,"comprep",StringSet.empty,0.,lrole,lrole_attr,frame]))) paths_array in
  valence

(*let create_pro_frames t =
  [0,t.pred,StringSet.singleton "0",0.,"","",Frame(EmptyAtrs[],[])]*)

let get_fnum t = 
  let x = try Xlist.assoc t.attrs "FNUM" with Not_found -> Val "0" in
  (match x with
        Val s -> (try int_of_string s with _ -> failwith "get_fnum 1")
      | _ -> failwith "get_fnum 2")

let select_frames l t = 
(*   printf "a1 pred=%s\n" t.pred; *)
  let fnum = get_fnum t in
  let l = Xlist.fold l [] (fun l (n,meaning,hipero,weight,lrole,lrole_attr,frame) ->
    if n = fnum then (meaning,hipero,weight,lrole,lrole_attr,frame) :: l else l) in
(*   printf "a2 pred=%s\n" t.pred; *)
  match l with 
    [] -> "",[]
  | [_,_,_,_,_,LexFrame _] -> "",[]
  | [_,_,_,_,_,ComprepFrame _] -> "",[]
  | _ -> LCGreductions.get_variant_label (),
         fst (Xlist.fold l ([],1) (fun (l,i) t -> (string_of_int i, t) :: l, i+1))

let rec get_arg_refs found = function
   Variant(_,l) -> Xlist.fold l found (fun found (_,t) -> get_arg_refs found t)
 | Ref i -> i :: found
 | t -> failwith ("get_arg_refs: " ^ LCGstringOf.linear_term 0 t)

let rec match_position = function
    a :: la, b :: lb -> 
      if a = b then match_position (la,b :: lb) else
      if a > b then match_position (a :: la,lb)
      else false
  | [],_ -> true
  | _,[] -> false

let mark_sem_morfs morfs =
  Xlist.map morfs (function
    | Phrase(PrepNP(_,prep,c)) -> Phrase(PrepNP(Sem,prep,c))
    | Phrase(PrepAdjP(_,prep,c)) -> Phrase(PrepAdjP(Sem,prep,c))
    | Phrase(PrepNumP(_,prep,c)) -> Phrase(PrepNumP(Sem,prep,c))
    | Phrase(ComprepNP(_,prep)) -> Phrase(ComprepNP(Sem,prep))
    | Phrase(ComparNP(_,prep,c)) -> Phrase(ComparNP(Sem,prep,c))
    | Phrase(ComparPP(_,prep)) -> Phrase(ComparPP(Sem,prep))
    | Phrase(PrepNCP(_,prep,c,ct,co)) -> Phrase(PrepNCP(Sem,prep,c,ct,co))
    | t -> t)

let mark_nosem_morf = function
      Phrase(PrepNP(_,prep,c)) -> Phrase(PrepNP(NoSem,prep,c))
    | Phrase(PrepAdjP(_,prep,c)) -> Phrase(PrepAdjP(NoSem,prep,c))
    | Phrase(PrepNumP(_,prep,c)) -> Phrase(PrepNumP(NoSem,prep,c))
    | Phrase(ComprepNP(_,prep)) -> Phrase(ComprepNP(NoSem,prep))
    | Phrase(ComparNP(_,prep,c)) -> Phrase(ComparNP(NoSem,prep,c))
    | Phrase(ComparPP(_,prep)) -> Phrase(ComparPP(NoSem,prep))
    | Phrase(PrepNCP(_,prep,c,ct,co)) -> Phrase(PrepNCP(NoSem,prep,c,ct,co))
    | t -> failwith "mark_nosem_morf"

let rec is_nosem_morf = function
    Phrase(PrepNP(NoSem,prep,c)) -> true
  | Phrase(PrepAdjP(NoSem,prep,c)) -> true
  | Phrase(PrepNumP(NoSem,prep,c)) -> true
  | Phrase(ComprepNP(NoSem,prep)) -> true
  | Phrase(ComparNP(NoSem,prep,c)) -> true
  | Phrase(ComparPP(NoSem,prep)) -> true
  | Phrase(PrepNCP(NoSem,prep,c,ct,co)) -> true
  | _ -> false

let rec is_sem_morf = function
    Phrase(PrepNP(Sem,prep,c)) -> true
  | Phrase(PrepAdjP(Sem,prep,c)) -> true
  | Phrase(PrepNumP(Sem,prep,c)) -> true
  | Phrase(ComprepNP(Sem,prep)) -> true
  | Phrase(ComparNP(Sem,prep,c)) -> true
  | Phrase(ComparPP(Sem,prep)) -> true
  | Phrase(PrepNCP(Sem,prep,c,ct,co)) -> true
  | _ -> false

let rec exclude_sem_morfs = function
    [] -> []
  | morf :: morfs -> if is_sem_morf morf then exclude_sem_morfs morfs else morf :: exclude_sem_morfs morfs

(* UWAGA: dopasowywane ramy są preselekcjonowane, więc wszystkie argumenty muszą się maczować *)  
let match_args_pos modifications nodes e i schema t =
(*    printf "match_args_pos\n";  *)
(*   if schema = [] then schema else *)
  let refs = get_arg_refs [] t in
  let gfl,morfs = Xlist.fold refs ([],[]) (fun (gfl,morfs) i -> nodes.(i).agf ::  gfl, nodes.(i).amorf :: morfs) in
  if gfl = [] then failwith "match_args_pos: empty gfl" else
  let gf = Xlist.fold (List.tl gfl) (List.hd gfl) (fun gf gf2 -> if gf = gf2 then gf else failwith "match_args_pos 2") in
(*  if gf = NOSEM || gf = NOGF then schema else
  if gf = CORE then schema else (* FIXME: semantyka dla core *)*)
  let morfs = exclude_sem_morfs morfs in
  let schema,selected = 
    if morfs = [] then schema,[] else
    let morfs = List.sort compare morfs in 
(*     printf "gf=%s morfs=%s\n%!" (WalStringOf.gf gf) (String.concat ";" (Xlist.map morfs WalStringOf.morf));   *)
    Xlist.fold schema ([],[]) (fun (schema,selected) pos ->
(*      printf "pos.gf=%s pos.morfs=%s\n%!" (WalStringOf.gf pos.gf) (String.concat ";" (Xlist.map pos.morfs WalStringOf.morf));  *)
      if gf = pos.gf || (gf = ADJUNCT && pos.gf=ARG) then
        if match_position (morfs,(*mark_sem_morfs*) pos.morfs) then schema, pos :: selected else pos :: schema, selected
      else pos :: schema, selected) in
  (match selected with
    [] -> (*if gf = ARG then failwith "match_args_pos 3" else*)
      Xlist.iter refs (fun r ->
        modifications.(r) <- StringMap.add modifications.(r) (e ^ i) 
          LCGrenderer.empty_schema_field(*{gf=ADJUNCT; role=""; role_attr=""; sel_prefs=[]; cr=[]; ce=[]; dir=Both; morfs=[]}*) )
(* FIXME: przy kilku pasujących pozycjach wybieram pierwszą a nich, do poprawienia przy okazji porządków z walencją 
np walencja leksemu "godzina":
1: : : common: time: Poss,T|{null;np(gen);nump(gen)}+Arg,T|{null;np(gen);nump(gen)};
1: : : common: time: Temp,T|{null;np(gen);nump(gen)}]; *)
  | pos :: _ -> Xlist.iter refs (fun r -> (* FIXME: gdzieś tu trzeba wstawić uzupełnianie brakujących ról dla argumentów i adjunctów *) (* FIXME: tu pewnie trzeba będzie wstawić rolę dla adjunctów, które pojawią się w wyniku niespełniania SEL-PREFS *)
      modifications.(r) <- StringMap.add modifications.(r) (e ^ i) pos)
  (*| _ -> failwith "match_args_pos 4"*));
  schema

let rec match_args_tuple modifications nodes e i schema = function
    Tuple l -> 
      Xlist.fold l schema (fun schema t ->
        match_args_tuple modifications nodes e i schema t)
  | t -> match_args_pos modifications nodes e i schema t

let match_args modifications nodes e i t = function    
    Frame(_,schema) -> ignore (match_args_tuple modifications nodes e i schema t.args)
  | LexFrame _ -> failwith "match_args"
  | ComprepFrame _ -> failwith "match_args"

let rec assign_frames_and_senses_rec modifications valence nodes t = 
(*        printf "pred=%s id=%d\n" t.pred t.id;   *)
      if t.id = 0 then failwith ("assign_frames_and_senses_rec: t.id=0 pred=" ^ t.pred) else
      let e,node_valence = select_frames ((*if t.id >= Array.length valence then create_pro_frames t else*) valence.(t.id)) t in
      let node_valence = if node_valence <> [] then node_valence else ["1",(t.pred,StringSet.empty,0.,"","",Frame(EmptyAtrs[],[]))] in
      let l = Xlist.map node_valence (fun (i,(meaning,hipero,weight,lrole,lrole_attr,frame)) ->
        let t = if lrole = "" then t else {t with arole=lrole;arole_attr=lrole_attr} in (* FIXME: pomijam to, że role dla rzeczowników dotyczą tylko inst *)
(*         let t = if lrole = "NOSEM" then {t with amorf=mark_nosem_morf t.amorf} else t in *)
        if t.args <> Dot then match_args modifications nodes e i t frame;
(*          printf "meaning=%s\n" meaning;   *)
        i,Node{t with meaning=meaning;
                    hipero=hipero;
                    meaning_weight=weight}) in          
      Variant(e,l)

let rec is_nosem_morfs morfs = 
  let sem = Xlist.fold morfs false (fun b m -> b || is_sem_morf m) in
  let nosem = Xlist.fold morfs false (fun b m -> b || is_nosem_morf m) in
  nosem && not sem (* FIXME: pewne uproszczenie, ale liczę, że nie ma wielu ramek z xp koordynowanym z prep *)

let has_tuple = function
    Tuple _ -> true
  | _ -> false

let rec apply_modifications_rec (*paths_array*) pos = function
    Variant(e,l) -> Variant(e,List.rev (Xlist.fold l [] (fun l (i,t) -> 
      let t = apply_modifications_rec (*paths_array*) pos t in
      (*if t = Dot then l else*) (i, (*apply_modifications_rec (*paths_array*) pos*) t) :: l)))
  | Node t -> 
      if is_sem_morf t.amorf then Node t else  (* FIXME: czy to jest poprawne? *)
      let t = if is_nosem_morf t.amorf then {t with agf=ARG} else t in
      Node{t with position=pos}
  | _ -> failwith "apply_modifications_rec"

(* let empty_pos = {gf=ADJUNCT; role=""; role_attr=""; sel_prefs=[]; cr=[]; ce=[]; dir=Both; morfs=[]} (* FIXME: jaka GF? *) *)

let apply_modifications (*paths_array*) modifications nodes references =
  Int.iter 1 (Array.length references - 1) (fun r ->
(*     if StringMap.is_empty modifications.(r) then failwith ("apply_modifications: " ^ nodes.(r).pred) else *)
    references.(r) <- Choice(StringMap.map modifications.(r) (fun pos -> 
      apply_modifications_rec (*paths_array*) pos references.(r))))

let rec extract_nosem rev = function
    [] -> List.rev rev, false
  | ("NOSEM",Val "+") :: l -> (List.rev rev) @ l, true
  | x :: l -> extract_nosem (x :: rev) l

let get_nodes = function
    Node t -> 
       let attrs,b = extract_nosem [] t.attrs in
       let t = if t.pred = "<query1>" || t.pred = "<query2>" || t.pred = "<query3>" || t.pred = "<query4>" || t.pred = "<query5>" || t.pred = "<query6>" then {t with agf=CORE} else t in
       if t.agf = NOGF then failwith ("get_nodes agf=NOGF: " ^ t.pred) else
       if b then {t with amorf=mark_nosem_morf t.amorf; attrs=attrs} else t
  | _ -> failwith "get_nodes"   

let rec propagate_nosem_selprefs modifications ei = function 
    Choice choice -> Choice(StringMap.map choice (propagate_nosem_selprefs modifications ""))
  | Variant(e,l) -> Variant(e,Xlist.map l (fun (i,t) -> i, propagate_nosem_selprefs modifications (e ^ i) t))
  | Node t -> 
        if (t.cat = "prep" && t.arole = "NOSEM") || t.cat = "num" then 
          let refs = IntSet.of_list (get_arg_refs [] t.args) in
          IntSet.iter refs (fun r -> 
            modifications.(r) <- StringMap.add_inc modifications.(r) ei t.position.WalTypes.sel_prefs (fun l -> 
              if l = t.position.WalTypes.sel_prefs then l else failwith ("propagate_nosem_selprefs 1: [" ^ String.concat ";" l ^ "] [" ^ String.concat ";" t.position.WalTypes.sel_prefs ^ "]")));
          Node{t with position= {t.position with WalTypes.sel_prefs = []}}
        else Node t
  | _ -> failwith "propagate_nosem_selprefs 2"

let rec apply_modifications2_rec mods = function
    Variant(e,l) -> Variant(e,Xlist.map l (fun (i,t) -> i, apply_modifications2_rec mods t))
  | Node t -> 
      if t.position.WalTypes.sel_prefs <> [] then failwith "apply_modifications2_rec" else
      Node{t with position={t.position with WalTypes.sel_prefs=mods}}
  | _ -> failwith "apply_modifications2_rec"

let apply_modifications2 modifications references =
  Int.iter 1 (Array.length references - 1) (fun r ->
    if not (StringMap.is_empty modifications.(r)) then
    match references.(r) with
      Choice choice -> 
        references.(r) <- Choice(StringMap.mapi choice (fun ei t -> 
          try apply_modifications2_rec (StringMap.find modifications.(r) ei) t with Not_found -> t))
    | _ -> failwith "apply_modifications2")

let assign_frames_and_senses paths_array references = 
  let modifications = Array.make (Array.length references) StringMap.empty in
  let valence = prepare_valence paths_array in
  let nodes = Array.map get_nodes references in
  let references = Array.map (assign_frames_and_senses_rec modifications valence nodes) nodes in
  apply_modifications (*paths_array*) modifications nodes references; 
(*  let modifications = Array.make (Array.length references) StringMap.empty in
  Int.iter 0 (Array.length references - 1) (fun r -> references.(r) <- propagate_nosem_selprefs modifications "" references.(r)); (* FIXME: propagowanie preferencji selekcyjnych więcej niż jeden poziom w głąb nie działa *)
  apply_modifications2 modifications references;   
  Int.iter 0 (Array.length references - 1) (fun r -> references.(r) <- propagate_nosem_selprefs modifications "" references.(r));
  apply_modifications2 modifications references;   
  Int.iter 0 (Array.length references - 1) (fun r -> references.(r) <- propagate_nosem_selprefs modifications "" references.(r));
  apply_modifications2 modifications references;   *)
  references