Commit 1e3a756e036af57885dd04fe6c14f9c644be2d40
1 parent
05112178
Redukcja drzew walencji semantycznej
Showing
7 changed files
with
85 additions
and
415 deletions
LCGparser/ENIAM_LCGgraphOf.ml
... | ... | @@ -95,6 +95,7 @@ let rec print_simplified_dependency_tree_rec2 file edge upper = function |
95 | 95 | (* fprintf file " %s [shape=box,label=\"SetAttr(%s,%s)\"]\n" id a (ENIAM_LCGstringOf.linear_term 0 s); |
96 | 96 | print_edge file edge upper id; *) |
97 | 97 | (* print_simplified_dependency_tree_rec2 file "" id t *) |
98 | + | Node t -> () | |
98 | 99 | | t -> failwith ("print_simplified_dependency_tree_rec2: " ^ ENIAM_LCGstringOf.linear_term 0 t) |
99 | 100 | |
100 | 101 | let rec print_simplified_dependency_tree_rec file edge upper id = function |
... | ... |
exec/ENIAMsemValence.ml
... | ... | @@ -123,7 +123,14 @@ let rec match_args_positions_rec positions = function |
123 | 123 | Printf.printf "match_args_positions_rec: args=[] positions=[%s]\n%!" (String.concat "; " (Xlist.map positions string_of_position)); |
124 | 124 | let b = Xlist.fold positions false (fun b p -> p.is_necessary || b) in |
125 | 125 | if b then print_endline "match_args_positions: not matched"; |
126 | - if b then [] else [[]] (* FIXME: miejsce na wstawianie pro? *) | |
126 | + if b then [] else | |
127 | + [Xlist.fold positions [] (fun found p -> | |
128 | + if not p.is_pro then found else | |
129 | + let attrs = ["role",p.role; "role_attr",p.role_attr; "selprefs",p.selprefs; "gf",Val (ENIAMwalStringOf.gf p.gf)] in | |
130 | + let attrs = if p.is_prong then attrs else attrs in (* FIXME: dodać number, gender *) | |
131 | + let attrs = Xlist.fold p.cr attrs (fun attrs cr -> ("controller",Val cr) :: attrs) in | |
132 | + let attrs = Xlist.fold p.ce attrs (fun attrs ce -> ("controllee",Val ce) :: attrs) in | |
133 | + Node{ENIAM_LCGrenderer.empty_node with lemma="pro"; pos="pro"; attrs=attrs} :: found)] | |
127 | 134 | |
128 | 135 | (* FIXME: opcjonalność podrzędników argumentów zleksykalizowanych *) |
129 | 136 | |
... | ... | @@ -275,3 +282,69 @@ let assign tokens lex_sems text = |
275 | 282 | if result.status <> Parsed then ENIAMSentence result else |
276 | 283 | ENIAMSentence {result with dependency_tree6=assign_frames tokens lex_sems result.dependency_tree6} |
277 | 284 | | t -> t) text |
285 | + | |
286 | +let rec cut_nodes result_tree = function | |
287 | + | Node t -> | |
288 | + let i = ExtArray.add result_tree (Node t) in | |
289 | + Ref i | |
290 | + | Variant(e,l) -> | |
291 | + let l = Xlist.rev_map l (fun (i,t) -> i, cut_nodes result_tree t) in | |
292 | + Variant(e,List.rev l) | |
293 | + | Tuple l -> | |
294 | + let l = Xlist.rev_map l (cut_nodes result_tree) in | |
295 | + Tuple(List.rev l) | |
296 | + | Dot -> Dot | |
297 | + | t -> failwith ("cut_nodes: " ^ ENIAM_LCGstringOf.linear_term 0 t) | |
298 | + | |
299 | +let rec reduce_set_attr attr v = function | |
300 | + Node t -> Node{t with attrs=(attr,v) :: t.attrs} | |
301 | + | Variant(e,l) -> | |
302 | + Variant(e,List.rev (Xlist.rev_map l (fun (i,t) -> | |
303 | + i, reduce_set_attr attr v t))) | |
304 | + | t -> failwith ("reduce_set_attr: " ^ ENIAM_LCGstringOf.linear_term 0 t) | |
305 | + | |
306 | +let rec reduce_tree_rec tokens lex_sems result_tree mid_tree orig_tree = function | |
307 | + Ref i -> | |
308 | + if mid_tree.(i) <> Dot then mid_tree.(i) else | |
309 | + let t = reduce_tree_rec tokens lex_sems result_tree mid_tree orig_tree orig_tree.(i) in | |
310 | + mid_tree.(i) <- t; | |
311 | + t | |
312 | + | Node t -> | |
313 | + let args = reduce_tree_rec tokens lex_sems result_tree mid_tree orig_tree t.args in | |
314 | + print_endline ("reduce_tree_rec 1: " ^ ENIAM_LCGstringOf.linear_term 0 args); | |
315 | + let args = cut_nodes result_tree args in | |
316 | + print_endline ("reduce_tree_rec 2: " ^ ENIAM_LCGstringOf.linear_term 0 args); | |
317 | + let id = | |
318 | + if t.id = 0 then | |
319 | + let id = ExtArray.add tokens {ENIAMtokenizerTypes.empty_token_env with ENIAMtokenizerTypes.token=ENIAMtokenizerTypes.Lemma("pro","pro",[[]])} in | |
320 | + let _ = ExtArray.add lex_sems empty_lex_sem in | |
321 | + id | |
322 | + else t.id in | |
323 | + Node{t with args=args; id=id} | |
324 | + | Variant(e,l) -> | |
325 | + let l = Xlist.rev_map l (fun (i,t) -> i, reduce_tree_rec tokens lex_sems result_tree mid_tree orig_tree t) in | |
326 | + Variant(e,List.rev l) | |
327 | + | Tuple l -> | |
328 | + let l = Xlist.rev_map l (reduce_tree_rec tokens lex_sems result_tree mid_tree orig_tree) in | |
329 | + Tuple(List.rev l) | |
330 | + | Dot -> Dot | |
331 | + | SetAttr(attr,v,t) -> | |
332 | + let t = reduce_tree_rec tokens lex_sems result_tree mid_tree orig_tree t in | |
333 | + reduce_set_attr attr v t | |
334 | + | t -> failwith ("reduce_tree_rec: " ^ ENIAM_LCGstringOf.linear_term 0 t) | |
335 | + | |
336 | +let reduce_tree tokens lex_sems orig_tree = | |
337 | + print_endline "reduce_tree"; | |
338 | + let mid_tree = Array.make (Array.length orig_tree) Dot in | |
339 | + let result_tree = ExtArray.make (Array.length orig_tree) Dot in | |
340 | + let _ = ExtArray.add result_tree Dot in | |
341 | + let t = reduce_tree_rec tokens lex_sems result_tree mid_tree orig_tree orig_tree.(0) in | |
342 | + ExtArray.set result_tree 0 t; | |
343 | + ExtArray.to_array result_tree | |
344 | + | |
345 | +let reduce tokens lex_sems text = | |
346 | + map_text Struct (fun mode -> function | |
347 | + ENIAMSentence result -> | |
348 | + if result.status <> Parsed then ENIAMSentence result else | |
349 | + ENIAMSentence {result with dependency_tree6=reduce_tree tokens lex_sems result.dependency_tree6} | |
350 | + | t -> t) text | |
... | ... |
exec/TODO
exec/parser.ml
... | ... | @@ -97,6 +97,7 @@ let rec main_loop sub_in sub_out = |
97 | 97 | let text = if !select_sentence_modes_flag then ENIAMselectSent.select_sentence_modes_text text else text in |
98 | 98 | let text = if !select_sentences_flag then ENIAMselectSent.select_sentences_text ENIAMexecTypes.Struct text else text in |
99 | 99 | let text = if !assign_semantic_valence_flag then ENIAMsemValence.assign tokens lex_sems text else text in |
100 | + let text = if !assign_semantic_valence_flag then ENIAMsemValence.reduce tokens lex_sems text else text in | |
100 | 101 | ENIAMvisualization.print_html_text !output_dir "parsed_text" text !img !verbosity tokens); |
101 | 102 | prerr_endline "Done!"; |
102 | 103 | main_loop sub_in sub_out) |
... | ... |
lexSemantics/ENIAMadjuncts.ml
... | ... | @@ -270,13 +270,17 @@ let add_adjuncts preps compreps compars pos2 (selectors,schema) = |
270 | 270 | |
271 | 271 | open ENIAMlexSemanticsTypes |
272 | 272 | |
273 | +let add_subj_cr cr positions = | |
274 | + Xlist.map positions (fun p -> | |
275 | + if p.gf = SUBJ then {p with cr=cr :: p.cr} else p) | |
276 | + | |
273 | 277 | let add_connected_adjuncts preps compreps compars pos2 frame = |
274 | 278 | let compreps = Xlist.rev_map compreps ENIAMwalRenderer.render_connected_comprep in |
275 | 279 | let prepnps = Xlist.rev_map preps (fun (prep,cases) -> ENIAMwalRenderer.render_connected_prepnp prep cases) in |
276 | 280 | let prepadjps = Xlist.rev_map preps (fun (prep,cases) -> ENIAMwalRenderer.render_connected_prepadjp prep cases) in |
277 | 281 | let compars = Xlist.rev_map compars ENIAMwalRenderer.render_connected_compar in |
278 | 282 | match pos2 with |
279 | - "verb" -> [{frame with positions=frame.positions @ ENIAMwalRenderer.verb_connected_adjuncts_simp @ prepnps @ prepadjps @ compreps @ compars}] | |
283 | + "verb" -> [{frame with positions=(add_subj_cr "3" frame.positions) @ ENIAMwalRenderer.verb_connected_adjuncts_simp @ prepnps @ prepadjps @ compreps @ compars}] | |
280 | 284 | | "noun" -> [ |
281 | 285 | {frame with selectors=[Nsyn,Eq,["proper"]] @ frame.selectors; positions=ENIAMwalRenderer.proper_noun_connected_adjuncts_simp @ prepnps @ compreps @ compars}; |
282 | 286 | {frame with selectors=[Nsyn,Eq,["common"];Nsem,Eq,["measure"]] @ frame.selectors; positions=ENIAMwalRenderer.measure_noun_connected_adjuncts_simp @ prepnps @ compreps @ compars}; |
... | ... |
lexSemantics/ENIAMwalFrames.ml deleted
1 | -(* | |
2 | - * ENIAMlexSemantics is a library that assigns tokens with lexicosemantic information. | |
3 | - * Copyright (C) 2016-2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> | |
4 | - * Copyright (C) 2016-2017 Institute of Computer Science Polish Academy of Sciences | |
5 | - * | |
6 | - * This library is free software: you can redistribute it and/or modify | |
7 | - * it under the terms of the GNU Lesser General Public License as published by | |
8 | - * the Free Software Foundation, either version 3 of the License, or | |
9 | - * (at your option) any later version. | |
10 | - * | |
11 | - * This library is distributed in the hope that it will be useful, | |
12 | - * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | - * GNU Lesser General Public License for more details. | |
15 | - * | |
16 | - * You should have received a copy of the GNU Lesser General Public License | |
17 | - * along with this program. If not, see <http://www.gnu.org/licenses/>. | |
18 | - *) | |
19 | - | |
20 | -open ENIAMwalTypes | |
21 | -open Xstd | |
22 | - | |
23 | -let expands,compreps,comprep_reqs,subtypes,equivs = ENIAMwalParser.load_realizations () | |
24 | -(*let verb_frames = ENIAMwalParser.load_frames (Paths.walenty_path ^ Paths.verb_filename) | |
25 | -let noun_frames = ENIAMwalParser.load_frames (Paths.walenty_path ^ Paths.noun_filename) | |
26 | -let adj_frames = ENIAMwalParser.load_frames (Paths.walenty_path ^ Paths.adj_filename) | |
27 | -let adv_frames = ENIAMwalParser.load_frames (Paths.walenty_path ^ Paths.adv_filename) *) | |
28 | - | |
29 | -let walenty = (*StringMap.empty*)ENIAMwalTEI.load_walenty2 () | |
30 | - | |
31 | -(*let _ = StringMap.iter walenty (fun pos map -> | |
32 | - StringMap.iter map (fun lexeme frames -> | |
33 | - Printf.printf "%s %s %d\n%!" pos lexeme (Xlist.size frames)))*) | |
34 | - | |
35 | -(*let all_frames = | |
36 | - ["subst",noun_frames; | |
37 | - "adj",adj_frames; | |
38 | - "adv",adv_frames; | |
39 | - "ger",verb_frames; | |
40 | - "pact",verb_frames; | |
41 | - "ppas",verb_frames; | |
42 | - "fin",verb_frames; | |
43 | - "praet",verb_frames; | |
44 | - "impt",verb_frames; | |
45 | - "imps",verb_frames; | |
46 | - "inf",verb_frames; | |
47 | - "pcon",verb_frames]*) | |
48 | - | |
49 | -let rec get_role_and_sense = function | |
50 | - Phrase(Lex "się") -> "Theme","", [] | |
51 | - | PhraseAbbr(Xp "abl",_) -> "Location","Source", [] | |
52 | - | PhraseAbbr(Xp "adl",_) -> "Location","Goal", [] | |
53 | - | PhraseAbbr(Xp "caus",_) -> "Condition","", [] | |
54 | - | PhraseAbbr(Xp "dest",_) -> "Purpose","", [] | |
55 | - | PhraseAbbr(Xp "dur",_) -> "Duration","", [] | |
56 | - | PhraseAbbr(Xp "instr",_) -> "Instrument","", [] | |
57 | - | PhraseAbbr(Xp "locat",_) -> "Location","", [] | |
58 | - | PhraseAbbr(Xp "mod",_) -> "Manner","", [] | |
59 | - | PhraseAbbr(Xp "perl",_) -> "Path","", [] | |
60 | - | PhraseAbbr(Xp "temp",_) -> "Time","", [] | |
61 | - | PhraseAbbr(Advp "abl",_) -> "Location","Source", [] | |
62 | - | PhraseAbbr(Advp "adl",_) -> "Location","Goal", [] | |
63 | - | PhraseAbbr(Advp "dur",_) -> "Duration","", [] | |
64 | - | PhraseAbbr(Advp "locat",_) -> "Location","", [] | |
65 | - | PhraseAbbr(Advp "mod",_) -> "Manner","", [] | |
66 | - | PhraseAbbr(Advp "perl",_) -> "Path","", [] | |
67 | - | PhraseAbbr(Advp "temp",_) -> "Time","", [] | |
68 | -(* | PhraseAbbr(Advp "pron",_) -> "Arg","", [] | |
69 | - | PhraseAbbr(Advp "misc",_) -> "Arg","", []*) | |
70 | - | PhraseAbbr(Distrp,_) -> "Distributive","", [] (* FIXME: to jest kwantyfikator *) | |
71 | - | PhraseAbbr(Possp,_) -> "Possesive","", [] | |
72 | - | LexPhraseMode("abl",_,_) -> "Location","Source", [] | |
73 | - | LexPhraseMode("adl",_,_) -> "Location","Goal", [] | |
74 | - | LexPhraseMode("caus",_,_) -> "Condition","", [] | |
75 | - | LexPhraseMode("dest",_,_) -> "Purpose","", [] | |
76 | - | LexPhraseMode("dur",_,_) -> "Duration","", [] | |
77 | - | LexPhraseMode("instr",_,_) -> "Instrument","", [] | |
78 | - | LexPhraseMode("locat",_,_) -> "Location","", [] | |
79 | - | LexPhraseMode("mod",_,_) -> "Manner","", [] | |
80 | - | LexPhraseMode("perl",_,_) -> "Path","", [] | |
81 | - | LexPhraseMode("temp",_,_) -> "Time","", [] | |
82 | - | _ -> "Arg","", [] | |
83 | - | |
84 | - | |
85 | -(*let rec get_gf_role = function | |
86 | - [],Phrase(NP case) -> "C", "", ["T"] | |
87 | - | [],Phrase(AdjP case) -> "R", "", ["T"] | |
88 | - | [],Phrase(NumP(case,_)) -> "C", "", ["T"] | |
89 | - | [],Phrase(PrepNP _) -> "C", "", ["T"] | |
90 | - | [],Phrase(PrepAdjP _) -> "C", "", ["T"] | |
91 | - | [],Phrase(PrepNumP _) -> "C", "", ["T"] | |
92 | - | [],Phrase(ComprepNP _) -> "C", "", ["T"] | |
93 | - | [],Phrase(ComparP _) -> "C", "", ["T"] | |
94 | - | [],Phrase(CP _) -> "C", "", ["T"] | |
95 | - | [],Phrase(NCP(case,_,_)) -> "C", "", ["T"] | |
96 | - | [],Phrase(PrepNCP _) -> "C", "", ["T"] | |
97 | - | [],Phrase(InfP _) -> "C", "", ["T"] | |
98 | - | [],Phrase(FixedP _) -> "C", "", ["T"] | |
99 | - | [],Phrase Or -> "C", "", ["T"] (* FIXME: zbadać w walentym faktyczne użycia or, bo to nie tylko zdania, ale też np(nom) w cudzysłowach *) | |
100 | - | [],Phrase(Lex "się") -> "C", "Ptnt", ["T"] | |
101 | - | [],PhraseAbbr(Xp mode,_) -> "C", mode, ["T"] | |
102 | - | [],PhraseAbbr(Advp "pron",_) -> "R", "", ["T"] | |
103 | - | [],PhraseAbbr(Advp "misc",_) -> "R", "", ["T"] | |
104 | - | [],PhraseAbbr(Advp mode,_) -> "C", mode, ["T"] | |
105 | - | [],PhraseAbbr(Nonch,_) -> "C", "", ["T"] | |
106 | - | [],PhraseAbbr(Distrp,_) -> "C", "Distr", ["T"] | |
107 | - | [],PhraseAbbr(Possp,_) -> "C", "Poss", ["T"] | |
108 | - | [],LexPhraseMode(mode,_,_) -> "C", mode, ["T"] | |
109 | - | [],LexPhrase((SUBST(_,case),_) :: _,_) -> "C", "", ["T"] | |
110 | - | [],LexPhrase((PREP _,_) :: _,_) -> "C", "", ["T"] | |
111 | - | [],LexPhrase((NUM(case,_,_),_) :: _,_) -> "C", "", ["T"] | |
112 | - | [],LexPhrase((ADJ(_,case,_,_),_) :: _,_) -> "C", "", ["T"] | |
113 | - | [],LexPhrase((ADV _,_) :: _,_) -> "C", "", ["T"] | |
114 | - | [],LexPhrase((GER(_,case,_,_,_,_),_) :: _,_) -> "C", "", ["T"] | |
115 | - | [],LexPhrase((PACT(_,case,_,_,_,_),_) :: _,_) -> "C", "", ["T"] | |
116 | - | [],LexPhrase((PPAS(_,case,_,_,_),_) :: _,_) -> "C", "", ["T"] | |
117 | - | [],LexPhrase((INF _,_) :: _,_) -> "C", "", ["T"] | |
118 | - | [],LexPhrase((QUB,_) :: _,_) -> "C", "", ["T"] | |
119 | - | [],LexPhrase((COMPAR,_) :: _,_) -> "C", "", ["T"] | |
120 | - | [],LexPhrase((COMP _,_) :: _,_) -> "C", "", ["T"] | |
121 | - | [],morf -> print_endline(*failwith*) ("get_gf: []," ^ ENIAMwalStringOf.morf morf);"","",[] | |
122 | - | _,Phrase(InfP _) -> "X", "", ["T"] | |
123 | - | _,Phrase(CP _) -> "X", "", ["T"] (* zwykle możliwa koordynacja z infp *) | |
124 | - | _,Phrase _ -> "X", "", ["T"] | |
125 | - | _,PhraseAbbr _ -> "X", "", ["T"] | |
126 | - | _,LexPhraseMode _ -> "X", "", ["T"] | |
127 | - | _,LexPhrase((INF _,_) :: _,_) -> "X", "", ["T"] | |
128 | - | _,LexPhrase _ -> "X", "", ["T"] | |
129 | - | _,morf -> failwith ("get_gf: _," ^ ENIAMwalStringOf.morf morf)*) | |
130 | - | |
131 | -(*let gf_rank = Xlist.fold [ | |
132 | - "",1; | |
133 | - ] StringMap.empty (fun gf_rank (gf,v) -> StringMap.add gf_rank gf v)*) | |
134 | - | |
135 | -(*let agregate_gfs s gfs_roles = | |
136 | -(* fst (Xlist.fold gfs ("",0) (fun (best_gf,best_rank) gf -> | |
137 | - let rank = try StringMap.find gf_rank gf with Not_found -> failwith ("agregate_gfs: " ^ gf) in | |
138 | - if rank > best_rank then gf, rank else best_gf, best_rank))*) | |
139 | -(* let gfs,roles = List.split gfs_roles in | |
140 | - let gfs = StringSet.to_list (Xlist.fold gfs StringSet.empty StringSet.add) in | |
141 | - if Xlist.size gfs > 1 then print_endline ("agregate_gfs: " ^ String.concat " " gfs); | |
142 | - if Xlist.size roles > 1 then print_endline ("agregate_gfs: " ^ String.concat " " roles);*) | |
143 | - let gf,role,prefs = List.hd gfs_roles in | |
144 | - {s with gf=gf; role=role; prefs=prefs} | |
145 | - | |
146 | -let rec make_gfs schema = | |
147 | - let schema = Xlist.map schema (function | |
148 | - {gf="subj"} as s -> {s with gf="SUBJ"; role="Agnt"; prefs=["T"]; morfs=make_gfs_morfs s.morfs} | |
149 | - | {gf="obj"} as s -> {s with gf="OBJ"; role="Ptnt"; prefs=["T"]; morfs=make_gfs_morfs s.morfs} | |
150 | - | {gf=""} as s -> agregate_gfs {s with morfs=make_gfs_morfs s.morfs} (Xlist.map s.morfs (fun morf -> get_gf_role (s.ce,morf))) | |
151 | - | {gf=t} -> failwith ("make_gfs: " ^ t)) in | |
152 | -(* let schema = List.rev (fst (Xlist.fold schema ([],StringMap.empty) (fun (schema,map) s -> | |
153 | - try | |
154 | - let n = StringMap.find map s.gf in | |
155 | - {s with gf=s.gf ^ string_of_int (n+1)} :: schema, | |
156 | - StringMap.add map s.gf (n+1) | |
157 | - with Not_found -> | |
158 | - s :: schema, StringMap.add map s.gf 1))) in*) | |
159 | - schema | |
160 | - | |
161 | -and make_gfs_morfs morfs = | |
162 | - List.flatten (Xlist.map morfs (function | |
163 | - Phrase _ as morf -> [morf] | |
164 | - | PhraseAbbr(Advp _,[]) -> [Phrase AdvP] | |
165 | - | PhraseAbbr(_,[]) -> failwith "make_gfs_morfs" | |
166 | - | PhraseAbbr(_,morfs) -> make_gfs_morfs morfs | |
167 | - | LexPhrase(pos_lex,(restr,schema)) -> [LexPhrase(pos_lex,(restr,make_gfs schema))] | |
168 | - | LexPhraseMode(_,pos_lex,(restr,schema)) -> [LexPhrase(pos_lex,(restr,make_gfs schema))] | |
169 | - | _ -> failwith "make_gfs_morfs"))*) | |
170 | - | |
171 | -let mark_nosem_morfs morfs = | |
172 | - Xlist.map morfs (function | |
173 | - Phrase(PrepNP(_,prep,c)) -> Phrase(PrepNP(NoSem,prep,c)) | |
174 | - | Phrase(PrepAdjP(_,prep,c)) -> Phrase(PrepAdjP(NoSem,prep,c)) | |
175 | - | Phrase(PrepNumP(_,prep,c)) -> Phrase(PrepNumP(NoSem,prep,c)) | |
176 | -(* | Phrase(ComprepNP(_,prep)) -> Phrase(ComprepNP(NoSem,prep)) *) (* FIXME: na razie ComprepNP są zawsze semantyczne *) | |
177 | -(* | Phrase(ComparNP(_,prep,c)) -> Phrase(ComparNP(NoSem,prep,c)) (* FIXME: pomijam niesemantyczny compar *) | |
178 | - | Phrase(ComparPP(_,prep)) -> Phrase(ComparPP(NoSem,prep))*) | |
179 | - | Phrase(PrepNCP(_,prep,c,ct,co)) -> Phrase(PrepNCP(NoSem,prep,c,ct,co)) | |
180 | - | t -> t) | |
181 | - | |
182 | - | |
183 | -let agregate_role_and_sense s l = | |
184 | - let roles,senses = Xlist.fold l (StringSet.empty,StringSet.empty) (fun (roles,senses) (role,role_attr,sense) -> | |
185 | - StringSet.add roles (role ^ " " ^ role_attr), | |
186 | - Xlist.fold sense senses StringSet.add) in | |
187 | - let roles = if StringSet.size roles = 1 then roles else StringSet.remove roles "Arg " in | |
188 | - let role,role_attr = | |
189 | - match Str.split (Str.regexp " ") (StringSet.min_elt roles) with | |
190 | - [r;a] -> r,a | |
191 | - | [r] -> r,"" | |
192 | - | _ -> failwith "agregate_role_and_sense" in | |
193 | - {s with role=role; role_attr=role_attr(*; sel_prefs=StringSet.to_list senses*)} | |
194 | - | |
195 | -let rec assign_role_and_sense schema = | |
196 | - Xlist.map schema (function | |
197 | - {gf=SUBJ} as s -> | |
198 | - if s.role = "" then {s with role="Initiator"; sel_prefs=["ALL"]; morfs=assign_role_and_sense_morfs s.morfs} | |
199 | - else {s with morfs=assign_role_and_sense_morfs (mark_nosem_morfs s.morfs)} | |
200 | - | {gf=OBJ} as s -> | |
201 | - if s.role = "" then {s with role="Theme"; sel_prefs=["ALL"]; morfs=assign_role_and_sense_morfs s.morfs} | |
202 | - else {s with morfs=assign_role_and_sense_morfs (mark_nosem_morfs s.morfs)} | |
203 | - | {gf=ARG} as s -> | |
204 | - if s.role = "" then agregate_role_and_sense {s with sel_prefs=["ALL"]; morfs=assign_role_and_sense_morfs s.morfs} | |
205 | - (Xlist.map s.morfs (fun morf -> get_role_and_sense morf)) | |
206 | - else {s with morfs=assign_role_and_sense_morfs (mark_nosem_morfs s.morfs)} | |
207 | - | _ -> failwith "assign_role_and_sense") | |
208 | - | |
209 | -and assign_role_and_sense_morfs morfs = | |
210 | - List.flatten (Xlist.map morfs (function | |
211 | - Phrase _ as morf -> [morf] | |
212 | - | E _ as morf -> [morf] | |
213 | - | PhraseAbbr(Advp _,[]) -> [Phrase AdvP] | |
214 | - | PhraseAbbr(_,[]) -> failwith "assign_role_and_sense_morfs" | |
215 | - | PhraseAbbr(_,morfs) -> assign_role_and_sense_morfs morfs | |
216 | - | LexPhrase(pos_lex,(restr,schema)) -> [LexPhrase(pos_lex,(restr,assign_role_and_sense schema))] | |
217 | - | LexPhraseMode(_,pos_lex,(restr,schema)) -> [LexPhrase(pos_lex,(restr,assign_role_and_sense schema))] | |
218 | - | _ -> failwith "assign_role_and_sense_morfs")) | |
219 | - | |
220 | -(*let _ = | |
221 | - Xlist.iter walenty_filenames (fun filename -> | |
222 | - print_endline filename; | |
223 | - let frames = load_frames (walenty_path ^ filename) in | |
224 | - StringMap.iter frames (fun _ l -> | |
225 | - Xlist.iter l (fun (refl,opinion,negation,pred,aspect,schema) -> | |
226 | - ignore (process_opinion opinion); | |
227 | - ignore (process_negation [Text negation]); | |
228 | - ignore (process_pred [Text pred]); | |
229 | - ignore (process_aspect [Text aspect]); | |
230 | - ignore (assign_pro_args (make_gfs (process_schema expands subtypes equivs schema))))))*) | |
231 | - | |
232 | -let remove_trivial_args schema = | |
233 | - Xlist.fold schema [] (fun l (_,_,_,morfs) -> | |
234 | - let morfs = Xlist.fold morfs [] (fun morfs -> function | |
235 | - Phrase(AdjP _) -> morfs | |
236 | - | Phrase(NP(Case "gen")) -> morfs | |
237 | - | Phrase(NCP(Case "gen",_,_)) -> morfs | |
238 | - | Phrase(PrepNP _) -> morfs | |
239 | - | Phrase(FixedP _) -> morfs | |
240 | - | LexPhrase([ADJ _,_],_) -> morfs | |
241 | - | LexPhrase([PPAS _,_],_) -> morfs | |
242 | - | LexPhrase([PACT _,_],_) -> morfs | |
243 | - | LexPhrase([SUBST(_,Case "gen"),_],_) -> morfs | |
244 | - | LexPhrase([PREP _,_;_],_) -> morfs | |
245 | - | morf -> morf :: morfs) in | |
246 | - if morfs = [] then l else morfs :: l) | |
247 | - | |
248 | -(* leksykalizacje do zmiany struktury | |
249 | -lex([PREP(gen),'z';SUBST(sg,gen),'nazwa'],atr1[OBL{lex([QUB,'tylko'],natr[])}]) | |
250 | -lex([PREP(loc),'na';SUBST(sg,loc),'papier'],atr1[OBL{lex([QUB,'tylko'],natr[])}]) | |
251 | -lex([PREP(acc),'w';SUBST(pl,acc),'oko'],atr1[OBL-MOD{lex([ADV(pos),'prosto'],natr[])}]) | |
252 | -lex([PREP(gen),'z';SUBST(sg,gen),'most'],ratr1[OBL-MOD{lex([ADV(pos),'prosto'],natr[])}]) | |
253 | -lex([PREP(acc),'w';SUBST(pl,acc),'oko'],atr1[OBL-MOD{lex([ADV(pos),'prosto'],natr[])}]) | |
254 | -lex([PREP(gen),'z';SUBST(sg,gen),'most'],ratr1[OBL-MOD{lex([ADV(pos),'prosto'],natr[])}]) | |
255 | -lex([PREP(acc),'w';SUBST(pl,acc),'oko'],atr1[OBL-MOD{lex([ADV(pos),'prosto'],natr[])}]) | |
256 | -lex([PREP(acc),'w';SUBST(pl,acc),'oko'],atr1[OBL-MOD{lex([ADV(pos),'prosto'],natr[])}]) | |
257 | -lex([PREP(acc),'w';SUBST(pl,acc),'oko'],atr1[OBL-MOD{lex([ADV(pos),'prosto'],natr[])}]) | |
258 | -lex([PREP(acc),'w';SUBST(pl,acc),'oko'],atr1[OBL-MOD{lex([ADV(pos),'prosto'],natr[])}]) | |
259 | -lex([PREP(acc),'w';SUBST(pl,acc),'oko'],atr1[OBL-MOD{lex([ADV(pos),'prosto'],natr[])}]) | |
260 | -*) | |
261 | - | |
262 | -let num_arg_schema_field morfs = | |
263 | - {gf=CORE; role="QUANT-ARG"; role_attr=""; sel_prefs=["ALL"]; cr=[]; ce=[]; dir=Forward; morfs=morfs} | |
264 | - | |
265 | -let std_arg_schema_field dir morfs = | |
266 | - {gf=ARG; role="Arg"; role_attr=""; sel_prefs=["ALL"]; cr=[]; ce=[]; dir=dir; morfs=morfs} | |
267 | - | |
268 | -let simple_arg_schema_field morfs = | |
269 | - {gf=ARG; role=""; role_attr=""; sel_prefs=["ALL"]; cr=[]; ce=[]; dir=Both; morfs=morfs} | |
270 | - | |
271 | -let nosem_refl_schema_field = | |
272 | - {gf=NOSEM; role=""; role_attr=""; sel_prefs=["ALL"]; cr=[]; ce=[]; dir=Both; morfs=[Phrase(Lex "się")]} | |
273 | - | |
274 | - | |
275 | -let expand_lexicalizations = function | |
276 | - Frame(atrs,schema) -> Frame(atrs,expand_lexicalizations_schema schema) | |
277 | -(* ComprepFrame(s,morfs) -> ComprepFrame(atrs,expand_lexicalizations_morfs morfs) *) | |
278 | - | _ -> failwith "expand_lexicalizations" | |
279 | - | |
280 | - | |
281 | -let prepare_schema_comprep expands subtypes equivs schema = | |
282 | - assign_pro_args (assign_role_and_sense (ENIAMwalParser.expand_equivs_schema equivs (ENIAMwalParser.expand_subtypes subtypes (ENIAMwalParser.expand_schema expands schema)))) | |
283 | - | |
284 | -let prepare_schema expands subtypes equivs schema = | |
285 | - prepare_schema_comprep expands subtypes equivs (ENIAMwalParser.parse_schema schema) | |
286 | - | |
287 | -let prepare_schema_sem expands subtypes equivs schema = | |
288 | - prepare_schema_comprep expands subtypes equivs schema | |
289 | - | |
290 | - | |
291 | -let convert_frame expands subtypes equivs lexemes valence lexeme pos (refl,opinion,negation,pred,aspect,schema) = | |
292 | -(* Printf.printf "convert_frame %s %s\n" lexeme pos; *) | |
293 | - try | |
294 | - if refl = "się" && not (StringMap.mem lexemes "się") then raise ImpossibleSchema else | |
295 | - let frame = | |
296 | - try StringMap.find default_frames refl (* w refl jest przekazywana informacja o typie domyślnej ramki *) | |
297 | - with Not_found -> | |
298 | - Frame(DefaultAtrs([],ENIAMwalParser.parse_refl [Text refl], | |
299 | - ENIAMwalParser.parse_opinion opinion, | |
300 | - ENIAMwalParser.parse_negation [Text negation], | |
301 | - ENIAMwalParser.parse_pred [Text pred], | |
302 | - ENIAMwalParser.parse_aspect [Text aspect]), | |
303 | - prepare_schema expands subtypes equivs schema) in | |
304 | - let frame = if StringMap.is_empty lexemes then frame else reduce_schema_frame lexemes frame in | |
305 | - let frame = expand_lexicalizations frame in | |
306 | - Xlist.fold (extract_lex_frames lexeme pos [] frame) valence (fun valence -> function | |
307 | - lexeme,pos,Frame(atrs,schema) -> | |
308 | - let schemas = simplify_lex (split_xor (split_or_coord schema)) in | |
309 | - Xlist.fold schemas valence (fun valence schema -> | |
310 | - let poss = try StringMap.find valence lexeme with Not_found -> StringMap.empty in | |
311 | - let poss = StringMap.add_inc poss pos [Frame(atrs,schema)] (fun l -> Frame(atrs,schema) :: l) in | |
312 | - StringMap.add valence lexeme poss) | |
313 | - | lexeme,pos,LexFrame(id,pos2,restr,schema) -> | |
314 | - let schemas = simplify_lex (split_xor (split_or_coord schema)) in | |
315 | - Xlist.fold schemas valence (fun valence schema -> | |
316 | - let poss = try StringMap.find valence lexeme with Not_found -> StringMap.empty in | |
317 | - let poss = StringMap.add_inc poss pos [LexFrame(id,pos2,restr,schema)] (fun l -> LexFrame(id,pos2,restr,schema) :: l) in | |
318 | - StringMap.add valence lexeme poss) | |
319 | - | _ -> failwith "convert_frame") | |
320 | - with ImpossibleSchema -> valence | |
321 | - | |
322 | -let convert_frame_sem expands subtypes equivs lexemes valence lexeme pos = function | |
323 | - Frame(DefaultAtrs(meanings,refl,opinion,negation,pred,aspect),positions) -> | |
324 | -(* Printf.printf "convert_frame_sem %s\n" (ENIAMwalStringOf.frame lexeme (Frame(DefaultAtrs(meanings,refl,opinion,negation,pred,aspect),positions))); *) | |
325 | - (try | |
326 | - if refl = ReflSie && not (StringMap.mem lexemes "się") then raise ImpossibleSchema else | |
327 | - let frame = | |
328 | - Frame(DefaultAtrs(meanings,refl,opinion,negation,pred,aspect), | |
329 | - prepare_schema_sem expands subtypes equivs positions) in | |
330 | - let frame = if StringMap.is_empty lexemes then frame else reduce_schema_frame lexemes frame in | |
331 | - let frame = expand_lexicalizations frame in | |
332 | - Xlist.fold (extract_lex_frames lexeme pos [] frame) valence (fun valence -> function | |
333 | - lexeme,pos,Frame(atrs,schema) -> | |
334 | - let schemas = simplify_lex (split_xor (split_or_coord schema)) in | |
335 | - Xlist.fold schemas valence (fun valence schema -> | |
336 | - let poss = try StringMap.find valence lexeme with Not_found -> StringMap.empty in | |
337 | - let poss = StringMap.add_inc poss pos [Frame(atrs,schema)] (fun l -> Frame(atrs,schema) :: l) in | |
338 | - StringMap.add valence lexeme poss) | |
339 | - | lexeme,pos,LexFrame(id,pos2,restr,schema) -> | |
340 | - let schemas = simplify_lex (split_xor (split_or_coord schema)) in | |
341 | - Xlist.fold schemas valence (fun valence schema -> | |
342 | - let poss = try StringMap.find valence lexeme with Not_found -> StringMap.empty in | |
343 | - let poss = StringMap.add_inc poss pos [LexFrame(id,pos2,restr,schema)] (fun l -> LexFrame(id,pos2,restr,schema) :: l) in | |
344 | - StringMap.add valence lexeme poss) | |
345 | - | _ -> failwith "convert_frame_sem") | |
346 | - with ImpossibleSchema -> valence) | |
347 | - | _ -> failwith "convert_frame_sem" | |
348 | - | |
349 | -let make_comprep_frames_of_schema s = function | |
350 | - [{cr=[];ce=[]; morfs=[LexPhrase([pos,Lexeme lex],(restr,schema))]}] -> | |
351 | - lex, | |
352 | - (match get_pos lex pos with [pos] -> pos | _ -> failwith "make_comprep_frame_of_schema 2"), | |
353 | - ComprepFrame(s,pos,restr,schema) | |
354 | - | schema -> failwith ("make_comprep_frame_of_schema: " ^ ENIAMwalStringOf.schema schema) | |
355 | - | |
356 | -let convert_comprep_frame expands subtypes equivs lexemes valence lexeme pos (s,morf) = | |
357 | - try | |
358 | - let schema = prepare_schema_comprep expands subtypes equivs [simple_arg_schema_field [morf]] in | |
359 | - let schema = if StringMap.is_empty lexemes then schema else reduce_schema lexemes schema in | |
360 | - let schema = expand_lexicalizations_schema schema in | |
361 | - let lexeme,pos,frame = make_comprep_frames_of_schema s schema in | |
362 | - Xlist.fold (extract_lex_frames lexeme pos [] frame) valence (fun valence -> function | |
363 | - lexeme,pos,ComprepFrame(s,pos2,restr,schema) -> | |
364 | - let schemas = simplify_lex (split_xor (split_or_coord schema)) in | |
365 | - Xlist.fold schemas valence (fun valence schema -> | |
366 | - let poss = try StringMap.find valence lexeme with Not_found -> StringMap.empty in | |
367 | - let poss = StringMap.add_inc poss pos [ComprepFrame(s,pos2,restr,schema)] (fun l -> ComprepFrame(s,pos2,restr,schema) :: l) in | |
368 | - StringMap.add valence lexeme poss) | |
369 | - | lexeme,pos,LexFrame(id,pos2,restr,schema) -> | |
370 | - let schemas = simplify_lex (split_xor (split_or_coord schema)) in | |
371 | - Xlist.fold schemas valence (fun valence schema -> | |
372 | - let poss = try StringMap.find valence lexeme with Not_found -> StringMap.empty in | |
373 | - let poss = StringMap.add_inc poss pos [LexFrame(id,pos2,restr,schema)] (fun l -> LexFrame(id,pos2,restr,schema) :: l) in | |
374 | - StringMap.add valence lexeme poss) | |
375 | - | _ -> failwith "convert_comprep_frame") | |
376 | - with ImpossibleSchema -> valence | |
377 | - | |
378 | -let remove_pro_args schema = (* FIXME: sprawdzić czy Pro i Null są zawsze na początku *) | |
379 | - List.rev (Xlist.fold schema [] (fun schema -> function | |
380 | - {morfs=[Phrase Pro]} -> schema | |
381 | - | {morfs=(Phrase Pro) :: morfs} as s -> {s with morfs=morfs} :: schema | |
382 | - | {morfs=[Phrase Null]} -> schema | |
383 | - | {morfs=(Phrase Null) :: morfs} as s -> {s with morfs=morfs} :: schema | |
384 | - | s -> s :: schema)) | |
385 | - | |
386 | - | |
387 | - | |
388 | -(*let _ = | |
389 | - let valence = Xlist.fold all_frames StringMap.empty (fun valence (pos,frame_map) -> | |
390 | - print_endline pos; | |
391 | - StringMap.fold frame_map valence (fun valence lexeme frames -> | |
392 | - Xlist.fold frames valence (fun valence frame -> | |
393 | -(* print_endline (ENIAMwalStringOf.unparsed_frame lexeme frame); *) | |
394 | - convert_frame expands subtypes equivs StringMap.empty valence lexeme pos frame))) in | |
395 | - print_endline "comprepnp"; | |
396 | - let valence = StringMap.fold compreps valence (fun valence lexeme frames -> | |
397 | - Xlist.fold frames valence (fun valence (pos,frame) -> | |
398 | - convert_comprep_frame expands subtypes equivs StringMap.empty valence lexeme pos frame)) in | |
399 | - print_endline "expand_restr"; | |
400 | - let valence = StringMap.mapi valence (fun lexeme poss -> | |
401 | - StringMap.mapi poss (fun pos frames -> | |
402 | - List.flatten (Xlist.map frames (expand_restr valence lexeme pos)))) in | |
403 | - print_endline "transform_frame"; | |
404 | - let _ = StringMap.mapi valence (fun lexeme poss -> | |
405 | - StringMap.mapi poss (fun pos frames -> | |
406 | -(* print_endline lexeme; *) | |
407 | - List.flatten (Xlist.map frames (transform_frame lexeme pos)))) in | |
408 | - print_endline "done"; | |
409 | - ()*) | |
410 | -(* StringMap.iter valence (fun lexeme poss -> | |
411 | - StringMap.iter poss (fun pos frames -> | |
412 | - Xlist.iter frames (fun frame -> print_endline (ENIAMwalStringOf.frame lexeme frame))))*) |
lexSemantics/ENIAMwalRenderer.ml
... | ... | @@ -220,6 +220,7 @@ let render_lex_entry = function |
220 | 220 | let adjunct morfs = {empty_position with gf=ADJUNCT; is_necessary=Opt; morfs=Xlist.map morfs (fun morf -> LCG morf)} |
221 | 221 | let adjunct_multi dir morfs = {empty_position with gf=ADJUNCT; is_necessary=Multi; dir=dir; morfs=Xlist.map morfs (fun morf -> LCG morf)} |
222 | 222 | let adjunct_dir dir morfs = {empty_position with gf=ADJUNCT; is_necessary=Opt; dir=dir; morfs=Xlist.map morfs (fun morf -> LCG morf)} |
223 | +let adjunct_ce ce morfs = {empty_position with gf=ADJUNCT; ce=[ce]; is_necessary=Opt; morfs=Xlist.map morfs (fun morf -> LCG morf)} | |
223 | 224 | |
224 | 225 | let render_comprep prep = Both,Plus[One;Tensor[Atom "comprepnp"; Atom prep]] |
225 | 226 | |
... | ... | @@ -282,7 +283,7 @@ let verb_connected_adjuncts_simp = [ |
282 | 283 | adjunct_dir Forward_ [Tensor[Atom "cp";Top; Top]]; |
283 | 284 | adjunct [Tensor[Atom "or"]]; |
284 | 285 | adjunct [Tensor[Atom "lex";Atom "się";Atom "qub"]]; |
285 | - adjunct [Tensor[Atom "padvp"]]; | |
286 | + adjunct_ce "3" [Tensor[Atom "padvp"]]; | |
286 | 287 | ] |
287 | 288 | |
288 | 289 | let proper_noun_adjuncts_simp = [ |
... | ... |