Commit 1e3a756e036af57885dd04fe6c14f9c644be2d40

Authored by Wojciech Jaworski
1 parent 05112178

Redukcja drzew walencji semantycznej

LCGparser/ENIAM_LCGgraphOf.ml
... ... @@ -95,6 +95,7 @@ let rec print_simplified_dependency_tree_rec2 file edge upper = function
95 95 (* fprintf file " %s [shape=box,label=\"SetAttr(%s,%s)\"]\n" id a (ENIAM_LCGstringOf.linear_term 0 s);
96 96 print_edge file edge upper id; *)
97 97 (* print_simplified_dependency_tree_rec2 file "" id t *)
  98 + | Node t -> ()
98 99 | t -> failwith ("print_simplified_dependency_tree_rec2: " ^ ENIAM_LCGstringOf.linear_term 0 t)
99 100  
100 101 let rec print_simplified_dependency_tree_rec file edge upper id = function
... ...
exec/ENIAMsemValence.ml
... ... @@ -123,7 +123,14 @@ let rec match_args_positions_rec positions = function
123 123 Printf.printf "match_args_positions_rec: args=[] positions=[%s]\n%!" (String.concat "; " (Xlist.map positions string_of_position));
124 124 let b = Xlist.fold positions false (fun b p -> p.is_necessary || b) in
125 125 if b then print_endline "match_args_positions: not matched";
126   - if b then [] else [[]] (* FIXME: miejsce na wstawianie pro? *)
  126 + if b then [] else
  127 + [Xlist.fold positions [] (fun found p ->
  128 + if not p.is_pro then found else
  129 + let attrs = ["role",p.role; "role_attr",p.role_attr; "selprefs",p.selprefs; "gf",Val (ENIAMwalStringOf.gf p.gf)] in
  130 + let attrs = if p.is_prong then attrs else attrs in (* FIXME: dodać number, gender *)
  131 + let attrs = Xlist.fold p.cr attrs (fun attrs cr -> ("controller",Val cr) :: attrs) in
  132 + let attrs = Xlist.fold p.ce attrs (fun attrs ce -> ("controllee",Val ce) :: attrs) in
  133 + Node{ENIAM_LCGrenderer.empty_node with lemma="pro"; pos="pro"; attrs=attrs} :: found)]
127 134  
128 135 (* FIXME: opcjonalność podrzędników argumentów zleksykalizowanych *)
129 136  
... ... @@ -275,3 +282,69 @@ let assign tokens lex_sems text =
275 282 if result.status <> Parsed then ENIAMSentence result else
276 283 ENIAMSentence {result with dependency_tree6=assign_frames tokens lex_sems result.dependency_tree6}
277 284 | t -> t) text
  285 +
  286 +let rec cut_nodes result_tree = function
  287 + | Node t ->
  288 + let i = ExtArray.add result_tree (Node t) in
  289 + Ref i
  290 + | Variant(e,l) ->
  291 + let l = Xlist.rev_map l (fun (i,t) -> i, cut_nodes result_tree t) in
  292 + Variant(e,List.rev l)
  293 + | Tuple l ->
  294 + let l = Xlist.rev_map l (cut_nodes result_tree) in
  295 + Tuple(List.rev l)
  296 + | Dot -> Dot
  297 + | t -> failwith ("cut_nodes: " ^ ENIAM_LCGstringOf.linear_term 0 t)
  298 +
  299 +let rec reduce_set_attr attr v = function
  300 + Node t -> Node{t with attrs=(attr,v) :: t.attrs}
  301 + | Variant(e,l) ->
  302 + Variant(e,List.rev (Xlist.rev_map l (fun (i,t) ->
  303 + i, reduce_set_attr attr v t)))
  304 + | t -> failwith ("reduce_set_attr: " ^ ENIAM_LCGstringOf.linear_term 0 t)
  305 +
  306 +let rec reduce_tree_rec tokens lex_sems result_tree mid_tree orig_tree = function
  307 + Ref i ->
  308 + if mid_tree.(i) <> Dot then mid_tree.(i) else
  309 + let t = reduce_tree_rec tokens lex_sems result_tree mid_tree orig_tree orig_tree.(i) in
  310 + mid_tree.(i) <- t;
  311 + t
  312 + | Node t ->
  313 + let args = reduce_tree_rec tokens lex_sems result_tree mid_tree orig_tree t.args in
  314 + print_endline ("reduce_tree_rec 1: " ^ ENIAM_LCGstringOf.linear_term 0 args);
  315 + let args = cut_nodes result_tree args in
  316 + print_endline ("reduce_tree_rec 2: " ^ ENIAM_LCGstringOf.linear_term 0 args);
  317 + let id =
  318 + if t.id = 0 then
  319 + let id = ExtArray.add tokens {ENIAMtokenizerTypes.empty_token_env with ENIAMtokenizerTypes.token=ENIAMtokenizerTypes.Lemma("pro","pro",[[]])} in
  320 + let _ = ExtArray.add lex_sems empty_lex_sem in
  321 + id
  322 + else t.id in
  323 + Node{t with args=args; id=id}
  324 + | Variant(e,l) ->
  325 + let l = Xlist.rev_map l (fun (i,t) -> i, reduce_tree_rec tokens lex_sems result_tree mid_tree orig_tree t) in
  326 + Variant(e,List.rev l)
  327 + | Tuple l ->
  328 + let l = Xlist.rev_map l (reduce_tree_rec tokens lex_sems result_tree mid_tree orig_tree) in
  329 + Tuple(List.rev l)
  330 + | Dot -> Dot
  331 + | SetAttr(attr,v,t) ->
  332 + let t = reduce_tree_rec tokens lex_sems result_tree mid_tree orig_tree t in
  333 + reduce_set_attr attr v t
  334 + | t -> failwith ("reduce_tree_rec: " ^ ENIAM_LCGstringOf.linear_term 0 t)
  335 +
  336 +let reduce_tree tokens lex_sems orig_tree =
  337 + print_endline "reduce_tree";
  338 + let mid_tree = Array.make (Array.length orig_tree) Dot in
  339 + let result_tree = ExtArray.make (Array.length orig_tree) Dot in
  340 + let _ = ExtArray.add result_tree Dot in
  341 + let t = reduce_tree_rec tokens lex_sems result_tree mid_tree orig_tree orig_tree.(0) in
  342 + ExtArray.set result_tree 0 t;
  343 + ExtArray.to_array result_tree
  344 +
  345 +let reduce tokens lex_sems text =
  346 + map_text Struct (fun mode -> function
  347 + ENIAMSentence result ->
  348 + if result.status <> Parsed then ENIAMSentence result else
  349 + ENIAMSentence {result with dependency_tree6=reduce_tree tokens lex_sems result.dependency_tree6}
  350 + | t -> t) text
... ...
exec/TODO
... ... @@ -4,3 +4,5 @@ przetwarzanie biogramu do końca
4 4 przetwarzanie dialogów
5 5 przechwytywanie błędów subsyntax itp w parserze i semparserze
6 6 interfejs dla clarin
  7 +
  8 +przetwarzanie kontroli jako dodawanie pro/koreferencji, oraz uzgadnianie przypadków
... ...
exec/parser.ml
... ... @@ -97,6 +97,7 @@ let rec main_loop sub_in sub_out =
97 97 let text = if !select_sentence_modes_flag then ENIAMselectSent.select_sentence_modes_text text else text in
98 98 let text = if !select_sentences_flag then ENIAMselectSent.select_sentences_text ENIAMexecTypes.Struct text else text in
99 99 let text = if !assign_semantic_valence_flag then ENIAMsemValence.assign tokens lex_sems text else text in
  100 + let text = if !assign_semantic_valence_flag then ENIAMsemValence.reduce tokens lex_sems text else text in
100 101 ENIAMvisualization.print_html_text !output_dir "parsed_text" text !img !verbosity tokens);
101 102 prerr_endline "Done!";
102 103 main_loop sub_in sub_out)
... ...
lexSemantics/ENIAMadjuncts.ml
... ... @@ -270,13 +270,17 @@ let add_adjuncts preps compreps compars pos2 (selectors,schema) =
270 270  
271 271 open ENIAMlexSemanticsTypes
272 272  
  273 +let add_subj_cr cr positions =
  274 + Xlist.map positions (fun p ->
  275 + if p.gf = SUBJ then {p with cr=cr :: p.cr} else p)
  276 +
273 277 let add_connected_adjuncts preps compreps compars pos2 frame =
274 278 let compreps = Xlist.rev_map compreps ENIAMwalRenderer.render_connected_comprep in
275 279 let prepnps = Xlist.rev_map preps (fun (prep,cases) -> ENIAMwalRenderer.render_connected_prepnp prep cases) in
276 280 let prepadjps = Xlist.rev_map preps (fun (prep,cases) -> ENIAMwalRenderer.render_connected_prepadjp prep cases) in
277 281 let compars = Xlist.rev_map compars ENIAMwalRenderer.render_connected_compar in
278 282 match pos2 with
279   - "verb" -> [{frame with positions=frame.positions @ ENIAMwalRenderer.verb_connected_adjuncts_simp @ prepnps @ prepadjps @ compreps @ compars}]
  283 + "verb" -> [{frame with positions=(add_subj_cr "3" frame.positions) @ ENIAMwalRenderer.verb_connected_adjuncts_simp @ prepnps @ prepadjps @ compreps @ compars}]
280 284 | "noun" -> [
281 285 {frame with selectors=[Nsyn,Eq,["proper"]] @ frame.selectors; positions=ENIAMwalRenderer.proper_noun_connected_adjuncts_simp @ prepnps @ compreps @ compars};
282 286 {frame with selectors=[Nsyn,Eq,["common"];Nsem,Eq,["measure"]] @ frame.selectors; positions=ENIAMwalRenderer.measure_noun_connected_adjuncts_simp @ prepnps @ compreps @ compars};
... ...
lexSemantics/ENIAMwalFrames.ml deleted
1   -(*
2   - * ENIAMlexSemantics is a library that assigns tokens with lexicosemantic information.
3   - * Copyright (C) 2016-2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
4   - * Copyright (C) 2016-2017 Institute of Computer Science Polish Academy of Sciences
5   - *
6   - * This library is free software: you can redistribute it and/or modify
7   - * it under the terms of the GNU Lesser General Public License as published by
8   - * the Free Software Foundation, either version 3 of the License, or
9   - * (at your option) any later version.
10   - *
11   - * This library is distributed in the hope that it will be useful,
12   - * but WITHOUT ANY WARRANTY; without even the implied warranty of
13   - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14   - * GNU Lesser General Public License for more details.
15   - *
16   - * You should have received a copy of the GNU Lesser General Public License
17   - * along with this program. If not, see <http://www.gnu.org/licenses/>.
18   - *)
19   -
20   -open ENIAMwalTypes
21   -open Xstd
22   -
23   -let expands,compreps,comprep_reqs,subtypes,equivs = ENIAMwalParser.load_realizations ()
24   -(*let verb_frames = ENIAMwalParser.load_frames (Paths.walenty_path ^ Paths.verb_filename)
25   -let noun_frames = ENIAMwalParser.load_frames (Paths.walenty_path ^ Paths.noun_filename)
26   -let adj_frames = ENIAMwalParser.load_frames (Paths.walenty_path ^ Paths.adj_filename)
27   -let adv_frames = ENIAMwalParser.load_frames (Paths.walenty_path ^ Paths.adv_filename) *)
28   -
29   -let walenty = (*StringMap.empty*)ENIAMwalTEI.load_walenty2 ()
30   -
31   -(*let _ = StringMap.iter walenty (fun pos map ->
32   - StringMap.iter map (fun lexeme frames ->
33   - Printf.printf "%s %s %d\n%!" pos lexeme (Xlist.size frames)))*)
34   -
35   -(*let all_frames =
36   - ["subst",noun_frames;
37   - "adj",adj_frames;
38   - "adv",adv_frames;
39   - "ger",verb_frames;
40   - "pact",verb_frames;
41   - "ppas",verb_frames;
42   - "fin",verb_frames;
43   - "praet",verb_frames;
44   - "impt",verb_frames;
45   - "imps",verb_frames;
46   - "inf",verb_frames;
47   - "pcon",verb_frames]*)
48   -
49   -let rec get_role_and_sense = function
50   - Phrase(Lex "się") -> "Theme","", []
51   - | PhraseAbbr(Xp "abl",_) -> "Location","Source", []
52   - | PhraseAbbr(Xp "adl",_) -> "Location","Goal", []
53   - | PhraseAbbr(Xp "caus",_) -> "Condition","", []
54   - | PhraseAbbr(Xp "dest",_) -> "Purpose","", []
55   - | PhraseAbbr(Xp "dur",_) -> "Duration","", []
56   - | PhraseAbbr(Xp "instr",_) -> "Instrument","", []
57   - | PhraseAbbr(Xp "locat",_) -> "Location","", []
58   - | PhraseAbbr(Xp "mod",_) -> "Manner","", []
59   - | PhraseAbbr(Xp "perl",_) -> "Path","", []
60   - | PhraseAbbr(Xp "temp",_) -> "Time","", []
61   - | PhraseAbbr(Advp "abl",_) -> "Location","Source", []
62   - | PhraseAbbr(Advp "adl",_) -> "Location","Goal", []
63   - | PhraseAbbr(Advp "dur",_) -> "Duration","", []
64   - | PhraseAbbr(Advp "locat",_) -> "Location","", []
65   - | PhraseAbbr(Advp "mod",_) -> "Manner","", []
66   - | PhraseAbbr(Advp "perl",_) -> "Path","", []
67   - | PhraseAbbr(Advp "temp",_) -> "Time","", []
68   -(* | PhraseAbbr(Advp "pron",_) -> "Arg","", []
69   - | PhraseAbbr(Advp "misc",_) -> "Arg","", []*)
70   - | PhraseAbbr(Distrp,_) -> "Distributive","", [] (* FIXME: to jest kwantyfikator *)
71   - | PhraseAbbr(Possp,_) -> "Possesive","", []
72   - | LexPhraseMode("abl",_,_) -> "Location","Source", []
73   - | LexPhraseMode("adl",_,_) -> "Location","Goal", []
74   - | LexPhraseMode("caus",_,_) -> "Condition","", []
75   - | LexPhraseMode("dest",_,_) -> "Purpose","", []
76   - | LexPhraseMode("dur",_,_) -> "Duration","", []
77   - | LexPhraseMode("instr",_,_) -> "Instrument","", []
78   - | LexPhraseMode("locat",_,_) -> "Location","", []
79   - | LexPhraseMode("mod",_,_) -> "Manner","", []
80   - | LexPhraseMode("perl",_,_) -> "Path","", []
81   - | LexPhraseMode("temp",_,_) -> "Time","", []
82   - | _ -> "Arg","", []
83   -
84   -
85   -(*let rec get_gf_role = function
86   - [],Phrase(NP case) -> "C", "", ["T"]
87   - | [],Phrase(AdjP case) -> "R", "", ["T"]
88   - | [],Phrase(NumP(case,_)) -> "C", "", ["T"]
89   - | [],Phrase(PrepNP _) -> "C", "", ["T"]
90   - | [],Phrase(PrepAdjP _) -> "C", "", ["T"]
91   - | [],Phrase(PrepNumP _) -> "C", "", ["T"]
92   - | [],Phrase(ComprepNP _) -> "C", "", ["T"]
93   - | [],Phrase(ComparP _) -> "C", "", ["T"]
94   - | [],Phrase(CP _) -> "C", "", ["T"]
95   - | [],Phrase(NCP(case,_,_)) -> "C", "", ["T"]
96   - | [],Phrase(PrepNCP _) -> "C", "", ["T"]
97   - | [],Phrase(InfP _) -> "C", "", ["T"]
98   - | [],Phrase(FixedP _) -> "C", "", ["T"]
99   - | [],Phrase Or -> "C", "", ["T"] (* FIXME: zbadać w walentym faktyczne użycia or, bo to nie tylko zdania, ale też np(nom) w cudzysłowach *)
100   - | [],Phrase(Lex "się") -> "C", "Ptnt", ["T"]
101   - | [],PhraseAbbr(Xp mode,_) -> "C", mode, ["T"]
102   - | [],PhraseAbbr(Advp "pron",_) -> "R", "", ["T"]
103   - | [],PhraseAbbr(Advp "misc",_) -> "R", "", ["T"]
104   - | [],PhraseAbbr(Advp mode,_) -> "C", mode, ["T"]
105   - | [],PhraseAbbr(Nonch,_) -> "C", "", ["T"]
106   - | [],PhraseAbbr(Distrp,_) -> "C", "Distr", ["T"]
107   - | [],PhraseAbbr(Possp,_) -> "C", "Poss", ["T"]
108   - | [],LexPhraseMode(mode,_,_) -> "C", mode, ["T"]
109   - | [],LexPhrase((SUBST(_,case),_) :: _,_) -> "C", "", ["T"]
110   - | [],LexPhrase((PREP _,_) :: _,_) -> "C", "", ["T"]
111   - | [],LexPhrase((NUM(case,_,_),_) :: _,_) -> "C", "", ["T"]
112   - | [],LexPhrase((ADJ(_,case,_,_),_) :: _,_) -> "C", "", ["T"]
113   - | [],LexPhrase((ADV _,_) :: _,_) -> "C", "", ["T"]
114   - | [],LexPhrase((GER(_,case,_,_,_,_),_) :: _,_) -> "C", "", ["T"]
115   - | [],LexPhrase((PACT(_,case,_,_,_,_),_) :: _,_) -> "C", "", ["T"]
116   - | [],LexPhrase((PPAS(_,case,_,_,_),_) :: _,_) -> "C", "", ["T"]
117   - | [],LexPhrase((INF _,_) :: _,_) -> "C", "", ["T"]
118   - | [],LexPhrase((QUB,_) :: _,_) -> "C", "", ["T"]
119   - | [],LexPhrase((COMPAR,_) :: _,_) -> "C", "", ["T"]
120   - | [],LexPhrase((COMP _,_) :: _,_) -> "C", "", ["T"]
121   - | [],morf -> print_endline(*failwith*) ("get_gf: []," ^ ENIAMwalStringOf.morf morf);"","",[]
122   - | _,Phrase(InfP _) -> "X", "", ["T"]
123   - | _,Phrase(CP _) -> "X", "", ["T"] (* zwykle możliwa koordynacja z infp *)
124   - | _,Phrase _ -> "X", "", ["T"]
125   - | _,PhraseAbbr _ -> "X", "", ["T"]
126   - | _,LexPhraseMode _ -> "X", "", ["T"]
127   - | _,LexPhrase((INF _,_) :: _,_) -> "X", "", ["T"]
128   - | _,LexPhrase _ -> "X", "", ["T"]
129   - | _,morf -> failwith ("get_gf: _," ^ ENIAMwalStringOf.morf morf)*)
130   -
131   -(*let gf_rank = Xlist.fold [
132   - "",1;
133   - ] StringMap.empty (fun gf_rank (gf,v) -> StringMap.add gf_rank gf v)*)
134   -
135   -(*let agregate_gfs s gfs_roles =
136   -(* fst (Xlist.fold gfs ("",0) (fun (best_gf,best_rank) gf ->
137   - let rank = try StringMap.find gf_rank gf with Not_found -> failwith ("agregate_gfs: " ^ gf) in
138   - if rank > best_rank then gf, rank else best_gf, best_rank))*)
139   -(* let gfs,roles = List.split gfs_roles in
140   - let gfs = StringSet.to_list (Xlist.fold gfs StringSet.empty StringSet.add) in
141   - if Xlist.size gfs > 1 then print_endline ("agregate_gfs: " ^ String.concat " " gfs);
142   - if Xlist.size roles > 1 then print_endline ("agregate_gfs: " ^ String.concat " " roles);*)
143   - let gf,role,prefs = List.hd gfs_roles in
144   - {s with gf=gf; role=role; prefs=prefs}
145   -
146   -let rec make_gfs schema =
147   - let schema = Xlist.map schema (function
148   - {gf="subj"} as s -> {s with gf="SUBJ"; role="Agnt"; prefs=["T"]; morfs=make_gfs_morfs s.morfs}
149   - | {gf="obj"} as s -> {s with gf="OBJ"; role="Ptnt"; prefs=["T"]; morfs=make_gfs_morfs s.morfs}
150   - | {gf=""} as s -> agregate_gfs {s with morfs=make_gfs_morfs s.morfs} (Xlist.map s.morfs (fun morf -> get_gf_role (s.ce,morf)))
151   - | {gf=t} -> failwith ("make_gfs: " ^ t)) in
152   -(* let schema = List.rev (fst (Xlist.fold schema ([],StringMap.empty) (fun (schema,map) s ->
153   - try
154   - let n = StringMap.find map s.gf in
155   - {s with gf=s.gf ^ string_of_int (n+1)} :: schema,
156   - StringMap.add map s.gf (n+1)
157   - with Not_found ->
158   - s :: schema, StringMap.add map s.gf 1))) in*)
159   - schema
160   -
161   -and make_gfs_morfs morfs =
162   - List.flatten (Xlist.map morfs (function
163   - Phrase _ as morf -> [morf]
164   - | PhraseAbbr(Advp _,[]) -> [Phrase AdvP]
165   - | PhraseAbbr(_,[]) -> failwith "make_gfs_morfs"
166   - | PhraseAbbr(_,morfs) -> make_gfs_morfs morfs
167   - | LexPhrase(pos_lex,(restr,schema)) -> [LexPhrase(pos_lex,(restr,make_gfs schema))]
168   - | LexPhraseMode(_,pos_lex,(restr,schema)) -> [LexPhrase(pos_lex,(restr,make_gfs schema))]
169   - | _ -> failwith "make_gfs_morfs"))*)
170   -
171   -let mark_nosem_morfs morfs =
172   - Xlist.map morfs (function
173   - Phrase(PrepNP(_,prep,c)) -> Phrase(PrepNP(NoSem,prep,c))
174   - | Phrase(PrepAdjP(_,prep,c)) -> Phrase(PrepAdjP(NoSem,prep,c))
175   - | Phrase(PrepNumP(_,prep,c)) -> Phrase(PrepNumP(NoSem,prep,c))
176   -(* | Phrase(ComprepNP(_,prep)) -> Phrase(ComprepNP(NoSem,prep)) *) (* FIXME: na razie ComprepNP są zawsze semantyczne *)
177   -(* | Phrase(ComparNP(_,prep,c)) -> Phrase(ComparNP(NoSem,prep,c)) (* FIXME: pomijam niesemantyczny compar *)
178   - | Phrase(ComparPP(_,prep)) -> Phrase(ComparPP(NoSem,prep))*)
179   - | Phrase(PrepNCP(_,prep,c,ct,co)) -> Phrase(PrepNCP(NoSem,prep,c,ct,co))
180   - | t -> t)
181   -
182   -
183   -let agregate_role_and_sense s l =
184   - let roles,senses = Xlist.fold l (StringSet.empty,StringSet.empty) (fun (roles,senses) (role,role_attr,sense) ->
185   - StringSet.add roles (role ^ " " ^ role_attr),
186   - Xlist.fold sense senses StringSet.add) in
187   - let roles = if StringSet.size roles = 1 then roles else StringSet.remove roles "Arg " in
188   - let role,role_attr =
189   - match Str.split (Str.regexp " ") (StringSet.min_elt roles) with
190   - [r;a] -> r,a
191   - | [r] -> r,""
192   - | _ -> failwith "agregate_role_and_sense" in
193   - {s with role=role; role_attr=role_attr(*; sel_prefs=StringSet.to_list senses*)}
194   -
195   -let rec assign_role_and_sense schema =
196   - Xlist.map schema (function
197   - {gf=SUBJ} as s ->
198   - if s.role = "" then {s with role="Initiator"; sel_prefs=["ALL"]; morfs=assign_role_and_sense_morfs s.morfs}
199   - else {s with morfs=assign_role_and_sense_morfs (mark_nosem_morfs s.morfs)}
200   - | {gf=OBJ} as s ->
201   - if s.role = "" then {s with role="Theme"; sel_prefs=["ALL"]; morfs=assign_role_and_sense_morfs s.morfs}
202   - else {s with morfs=assign_role_and_sense_morfs (mark_nosem_morfs s.morfs)}
203   - | {gf=ARG} as s ->
204   - if s.role = "" then agregate_role_and_sense {s with sel_prefs=["ALL"]; morfs=assign_role_and_sense_morfs s.morfs}
205   - (Xlist.map s.morfs (fun morf -> get_role_and_sense morf))
206   - else {s with morfs=assign_role_and_sense_morfs (mark_nosem_morfs s.morfs)}
207   - | _ -> failwith "assign_role_and_sense")
208   -
209   -and assign_role_and_sense_morfs morfs =
210   - List.flatten (Xlist.map morfs (function
211   - Phrase _ as morf -> [morf]
212   - | E _ as morf -> [morf]
213   - | PhraseAbbr(Advp _,[]) -> [Phrase AdvP]
214   - | PhraseAbbr(_,[]) -> failwith "assign_role_and_sense_morfs"
215   - | PhraseAbbr(_,morfs) -> assign_role_and_sense_morfs morfs
216   - | LexPhrase(pos_lex,(restr,schema)) -> [LexPhrase(pos_lex,(restr,assign_role_and_sense schema))]
217   - | LexPhraseMode(_,pos_lex,(restr,schema)) -> [LexPhrase(pos_lex,(restr,assign_role_and_sense schema))]
218   - | _ -> failwith "assign_role_and_sense_morfs"))
219   -
220   -(*let _ =
221   - Xlist.iter walenty_filenames (fun filename ->
222   - print_endline filename;
223   - let frames = load_frames (walenty_path ^ filename) in
224   - StringMap.iter frames (fun _ l ->
225   - Xlist.iter l (fun (refl,opinion,negation,pred,aspect,schema) ->
226   - ignore (process_opinion opinion);
227   - ignore (process_negation [Text negation]);
228   - ignore (process_pred [Text pred]);
229   - ignore (process_aspect [Text aspect]);
230   - ignore (assign_pro_args (make_gfs (process_schema expands subtypes equivs schema))))))*)
231   -
232   -let remove_trivial_args schema =
233   - Xlist.fold schema [] (fun l (_,_,_,morfs) ->
234   - let morfs = Xlist.fold morfs [] (fun morfs -> function
235   - Phrase(AdjP _) -> morfs
236   - | Phrase(NP(Case "gen")) -> morfs
237   - | Phrase(NCP(Case "gen",_,_)) -> morfs
238   - | Phrase(PrepNP _) -> morfs
239   - | Phrase(FixedP _) -> morfs
240   - | LexPhrase([ADJ _,_],_) -> morfs
241   - | LexPhrase([PPAS _,_],_) -> morfs
242   - | LexPhrase([PACT _,_],_) -> morfs
243   - | LexPhrase([SUBST(_,Case "gen"),_],_) -> morfs
244   - | LexPhrase([PREP _,_;_],_) -> morfs
245   - | morf -> morf :: morfs) in
246   - if morfs = [] then l else morfs :: l)
247   -
248   -(* leksykalizacje do zmiany struktury
249   -lex([PREP(gen),'z';SUBST(sg,gen),'nazwa'],atr1[OBL{lex([QUB,'tylko'],natr[])}])
250   -lex([PREP(loc),'na';SUBST(sg,loc),'papier'],atr1[OBL{lex([QUB,'tylko'],natr[])}])
251   -lex([PREP(acc),'w';SUBST(pl,acc),'oko'],atr1[OBL-MOD{lex([ADV(pos),'prosto'],natr[])}])
252   -lex([PREP(gen),'z';SUBST(sg,gen),'most'],ratr1[OBL-MOD{lex([ADV(pos),'prosto'],natr[])}])
253   -lex([PREP(acc),'w';SUBST(pl,acc),'oko'],atr1[OBL-MOD{lex([ADV(pos),'prosto'],natr[])}])
254   -lex([PREP(gen),'z';SUBST(sg,gen),'most'],ratr1[OBL-MOD{lex([ADV(pos),'prosto'],natr[])}])
255   -lex([PREP(acc),'w';SUBST(pl,acc),'oko'],atr1[OBL-MOD{lex([ADV(pos),'prosto'],natr[])}])
256   -lex([PREP(acc),'w';SUBST(pl,acc),'oko'],atr1[OBL-MOD{lex([ADV(pos),'prosto'],natr[])}])
257   -lex([PREP(acc),'w';SUBST(pl,acc),'oko'],atr1[OBL-MOD{lex([ADV(pos),'prosto'],natr[])}])
258   -lex([PREP(acc),'w';SUBST(pl,acc),'oko'],atr1[OBL-MOD{lex([ADV(pos),'prosto'],natr[])}])
259   -lex([PREP(acc),'w';SUBST(pl,acc),'oko'],atr1[OBL-MOD{lex([ADV(pos),'prosto'],natr[])}])
260   -*)
261   -
262   -let num_arg_schema_field morfs =
263   - {gf=CORE; role="QUANT-ARG"; role_attr=""; sel_prefs=["ALL"]; cr=[]; ce=[]; dir=Forward; morfs=morfs}
264   -
265   -let std_arg_schema_field dir morfs =
266   - {gf=ARG; role="Arg"; role_attr=""; sel_prefs=["ALL"]; cr=[]; ce=[]; dir=dir; morfs=morfs}
267   -
268   -let simple_arg_schema_field morfs =
269   - {gf=ARG; role=""; role_attr=""; sel_prefs=["ALL"]; cr=[]; ce=[]; dir=Both; morfs=morfs}
270   -
271   -let nosem_refl_schema_field =
272   - {gf=NOSEM; role=""; role_attr=""; sel_prefs=["ALL"]; cr=[]; ce=[]; dir=Both; morfs=[Phrase(Lex "się")]}
273   -
274   -
275   -let expand_lexicalizations = function
276   - Frame(atrs,schema) -> Frame(atrs,expand_lexicalizations_schema schema)
277   -(* ComprepFrame(s,morfs) -> ComprepFrame(atrs,expand_lexicalizations_morfs morfs) *)
278   - | _ -> failwith "expand_lexicalizations"
279   -
280   -
281   -let prepare_schema_comprep expands subtypes equivs schema =
282   - assign_pro_args (assign_role_and_sense (ENIAMwalParser.expand_equivs_schema equivs (ENIAMwalParser.expand_subtypes subtypes (ENIAMwalParser.expand_schema expands schema))))
283   -
284   -let prepare_schema expands subtypes equivs schema =
285   - prepare_schema_comprep expands subtypes equivs (ENIAMwalParser.parse_schema schema)
286   -
287   -let prepare_schema_sem expands subtypes equivs schema =
288   - prepare_schema_comprep expands subtypes equivs schema
289   -
290   -
291   -let convert_frame expands subtypes equivs lexemes valence lexeme pos (refl,opinion,negation,pred,aspect,schema) =
292   -(* Printf.printf "convert_frame %s %s\n" lexeme pos; *)
293   - try
294   - if refl = "się" && not (StringMap.mem lexemes "się") then raise ImpossibleSchema else
295   - let frame =
296   - try StringMap.find default_frames refl (* w refl jest przekazywana informacja o typie domyślnej ramki *)
297   - with Not_found ->
298   - Frame(DefaultAtrs([],ENIAMwalParser.parse_refl [Text refl],
299   - ENIAMwalParser.parse_opinion opinion,
300   - ENIAMwalParser.parse_negation [Text negation],
301   - ENIAMwalParser.parse_pred [Text pred],
302   - ENIAMwalParser.parse_aspect [Text aspect]),
303   - prepare_schema expands subtypes equivs schema) in
304   - let frame = if StringMap.is_empty lexemes then frame else reduce_schema_frame lexemes frame in
305   - let frame = expand_lexicalizations frame in
306   - Xlist.fold (extract_lex_frames lexeme pos [] frame) valence (fun valence -> function
307   - lexeme,pos,Frame(atrs,schema) ->
308   - let schemas = simplify_lex (split_xor (split_or_coord schema)) in
309   - Xlist.fold schemas valence (fun valence schema ->
310   - let poss = try StringMap.find valence lexeme with Not_found -> StringMap.empty in
311   - let poss = StringMap.add_inc poss pos [Frame(atrs,schema)] (fun l -> Frame(atrs,schema) :: l) in
312   - StringMap.add valence lexeme poss)
313   - | lexeme,pos,LexFrame(id,pos2,restr,schema) ->
314   - let schemas = simplify_lex (split_xor (split_or_coord schema)) in
315   - Xlist.fold schemas valence (fun valence schema ->
316   - let poss = try StringMap.find valence lexeme with Not_found -> StringMap.empty in
317   - let poss = StringMap.add_inc poss pos [LexFrame(id,pos2,restr,schema)] (fun l -> LexFrame(id,pos2,restr,schema) :: l) in
318   - StringMap.add valence lexeme poss)
319   - | _ -> failwith "convert_frame")
320   - with ImpossibleSchema -> valence
321   -
322   -let convert_frame_sem expands subtypes equivs lexemes valence lexeme pos = function
323   - Frame(DefaultAtrs(meanings,refl,opinion,negation,pred,aspect),positions) ->
324   -(* Printf.printf "convert_frame_sem %s\n" (ENIAMwalStringOf.frame lexeme (Frame(DefaultAtrs(meanings,refl,opinion,negation,pred,aspect),positions))); *)
325   - (try
326   - if refl = ReflSie && not (StringMap.mem lexemes "się") then raise ImpossibleSchema else
327   - let frame =
328   - Frame(DefaultAtrs(meanings,refl,opinion,negation,pred,aspect),
329   - prepare_schema_sem expands subtypes equivs positions) in
330   - let frame = if StringMap.is_empty lexemes then frame else reduce_schema_frame lexemes frame in
331   - let frame = expand_lexicalizations frame in
332   - Xlist.fold (extract_lex_frames lexeme pos [] frame) valence (fun valence -> function
333   - lexeme,pos,Frame(atrs,schema) ->
334   - let schemas = simplify_lex (split_xor (split_or_coord schema)) in
335   - Xlist.fold schemas valence (fun valence schema ->
336   - let poss = try StringMap.find valence lexeme with Not_found -> StringMap.empty in
337   - let poss = StringMap.add_inc poss pos [Frame(atrs,schema)] (fun l -> Frame(atrs,schema) :: l) in
338   - StringMap.add valence lexeme poss)
339   - | lexeme,pos,LexFrame(id,pos2,restr,schema) ->
340   - let schemas = simplify_lex (split_xor (split_or_coord schema)) in
341   - Xlist.fold schemas valence (fun valence schema ->
342   - let poss = try StringMap.find valence lexeme with Not_found -> StringMap.empty in
343   - let poss = StringMap.add_inc poss pos [LexFrame(id,pos2,restr,schema)] (fun l -> LexFrame(id,pos2,restr,schema) :: l) in
344   - StringMap.add valence lexeme poss)
345   - | _ -> failwith "convert_frame_sem")
346   - with ImpossibleSchema -> valence)
347   - | _ -> failwith "convert_frame_sem"
348   -
349   -let make_comprep_frames_of_schema s = function
350   - [{cr=[];ce=[]; morfs=[LexPhrase([pos,Lexeme lex],(restr,schema))]}] ->
351   - lex,
352   - (match get_pos lex pos with [pos] -> pos | _ -> failwith "make_comprep_frame_of_schema 2"),
353   - ComprepFrame(s,pos,restr,schema)
354   - | schema -> failwith ("make_comprep_frame_of_schema: " ^ ENIAMwalStringOf.schema schema)
355   -
356   -let convert_comprep_frame expands subtypes equivs lexemes valence lexeme pos (s,morf) =
357   - try
358   - let schema = prepare_schema_comprep expands subtypes equivs [simple_arg_schema_field [morf]] in
359   - let schema = if StringMap.is_empty lexemes then schema else reduce_schema lexemes schema in
360   - let schema = expand_lexicalizations_schema schema in
361   - let lexeme,pos,frame = make_comprep_frames_of_schema s schema in
362   - Xlist.fold (extract_lex_frames lexeme pos [] frame) valence (fun valence -> function
363   - lexeme,pos,ComprepFrame(s,pos2,restr,schema) ->
364   - let schemas = simplify_lex (split_xor (split_or_coord schema)) in
365   - Xlist.fold schemas valence (fun valence schema ->
366   - let poss = try StringMap.find valence lexeme with Not_found -> StringMap.empty in
367   - let poss = StringMap.add_inc poss pos [ComprepFrame(s,pos2,restr,schema)] (fun l -> ComprepFrame(s,pos2,restr,schema) :: l) in
368   - StringMap.add valence lexeme poss)
369   - | lexeme,pos,LexFrame(id,pos2,restr,schema) ->
370   - let schemas = simplify_lex (split_xor (split_or_coord schema)) in
371   - Xlist.fold schemas valence (fun valence schema ->
372   - let poss = try StringMap.find valence lexeme with Not_found -> StringMap.empty in
373   - let poss = StringMap.add_inc poss pos [LexFrame(id,pos2,restr,schema)] (fun l -> LexFrame(id,pos2,restr,schema) :: l) in
374   - StringMap.add valence lexeme poss)
375   - | _ -> failwith "convert_comprep_frame")
376   - with ImpossibleSchema -> valence
377   -
378   -let remove_pro_args schema = (* FIXME: sprawdzić czy Pro i Null są zawsze na początku *)
379   - List.rev (Xlist.fold schema [] (fun schema -> function
380   - {morfs=[Phrase Pro]} -> schema
381   - | {morfs=(Phrase Pro) :: morfs} as s -> {s with morfs=morfs} :: schema
382   - | {morfs=[Phrase Null]} -> schema
383   - | {morfs=(Phrase Null) :: morfs} as s -> {s with morfs=morfs} :: schema
384   - | s -> s :: schema))
385   -
386   -
387   -
388   -(*let _ =
389   - let valence = Xlist.fold all_frames StringMap.empty (fun valence (pos,frame_map) ->
390   - print_endline pos;
391   - StringMap.fold frame_map valence (fun valence lexeme frames ->
392   - Xlist.fold frames valence (fun valence frame ->
393   -(* print_endline (ENIAMwalStringOf.unparsed_frame lexeme frame); *)
394   - convert_frame expands subtypes equivs StringMap.empty valence lexeme pos frame))) in
395   - print_endline "comprepnp";
396   - let valence = StringMap.fold compreps valence (fun valence lexeme frames ->
397   - Xlist.fold frames valence (fun valence (pos,frame) ->
398   - convert_comprep_frame expands subtypes equivs StringMap.empty valence lexeme pos frame)) in
399   - print_endline "expand_restr";
400   - let valence = StringMap.mapi valence (fun lexeme poss ->
401   - StringMap.mapi poss (fun pos frames ->
402   - List.flatten (Xlist.map frames (expand_restr valence lexeme pos)))) in
403   - print_endline "transform_frame";
404   - let _ = StringMap.mapi valence (fun lexeme poss ->
405   - StringMap.mapi poss (fun pos frames ->
406   -(* print_endline lexeme; *)
407   - List.flatten (Xlist.map frames (transform_frame lexeme pos)))) in
408   - print_endline "done";
409   - ()*)
410   -(* StringMap.iter valence (fun lexeme poss ->
411   - StringMap.iter poss (fun pos frames ->
412   - Xlist.iter frames (fun frame -> print_endline (ENIAMwalStringOf.frame lexeme frame))))*)
lexSemantics/ENIAMwalRenderer.ml
... ... @@ -220,6 +220,7 @@ let render_lex_entry = function
220 220 let adjunct morfs = {empty_position with gf=ADJUNCT; is_necessary=Opt; morfs=Xlist.map morfs (fun morf -> LCG morf)}
221 221 let adjunct_multi dir morfs = {empty_position with gf=ADJUNCT; is_necessary=Multi; dir=dir; morfs=Xlist.map morfs (fun morf -> LCG morf)}
222 222 let adjunct_dir dir morfs = {empty_position with gf=ADJUNCT; is_necessary=Opt; dir=dir; morfs=Xlist.map morfs (fun morf -> LCG morf)}
  223 +let adjunct_ce ce morfs = {empty_position with gf=ADJUNCT; ce=[ce]; is_necessary=Opt; morfs=Xlist.map morfs (fun morf -> LCG morf)}
223 224  
224 225 let render_comprep prep = Both,Plus[One;Tensor[Atom "comprepnp"; Atom prep]]
225 226  
... ... @@ -282,7 +283,7 @@ let verb_connected_adjuncts_simp = [
282 283 adjunct_dir Forward_ [Tensor[Atom "cp";Top; Top]];
283 284 adjunct [Tensor[Atom "or"]];
284 285 adjunct [Tensor[Atom "lex";Atom "się";Atom "qub"]];
285   - adjunct [Tensor[Atom "padvp"]];
  286 + adjunct_ce "3" [Tensor[Atom "padvp"]];
286 287 ]
287 288  
288 289 let proper_noun_adjuncts_simp = [
... ...