Commit b8d9e115cb4ee2506c5457b6497a0b4ebb50baf3
1 parent
27138426
upraszczanie schematów walencyjnych
Showing
9 changed files
with
512 additions
and
307 deletions
lexSemantics/ENIAMadjuncts.ml
0 → 100644
1 | +(* | |
2 | + * ENIAMlexSemantics is a library that assigns tokens with lexicosemantic information. | |
3 | + * Copyright (C) 2016-2017 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl> | |
4 | + * Copyright (C) 2016-2017 Institute of Computer Science Polish Academy of Sciences | |
5 | + * | |
6 | + * This library is free software: you can redistribute it and/or modify | |
7 | + * it under the terms of the GNU Lesser General Public License as published by | |
8 | + * the Free Software Foundation, either version 3 of the License, or | |
9 | + * (at your option) any later version. | |
10 | + * | |
11 | + * This library is distributed in the hope that it will be useful, | |
12 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | + * GNU Lesser General Public License for more details. | |
15 | + * | |
16 | + * You should have received a copy of the GNU Lesser General Public License | |
17 | + * along with this program. If not, see <http://www.gnu.org/licenses/>. | |
18 | + *) | |
19 | + | |
20 | +open ENIAMwalTypes | |
21 | +open Xstd | |
22 | + | |
23 | +let simplify_position_verb mode l = function (* FIXME: dodać czyszczenie E Pro *) | |
24 | + NP(Case "dat") -> l | |
25 | + | NP(Case "inst") -> l | |
26 | + | NCP(Case "dat",_,_) -> l | |
27 | + | NCP(Case "inst",_,_) -> l | |
28 | + | CP _ -> l | |
29 | + | PrepNP _ -> l | |
30 | + | PrepAdjP _ -> l | |
31 | + | ComprepNP _ -> l | |
32 | + | ComparP _ -> l | |
33 | + | PrepNCP _ -> l | |
34 | + | AdvP _ -> l | |
35 | + | Or -> l | |
36 | + | SimpleLexArg("się",QUB) -> l | |
37 | + | E Or -> l | |
38 | + | E (CP(CompTypeUndef,CompUndef)) -> l | |
39 | + | E (PrepNP(prep,Case case)) -> l | |
40 | + | E (PrepNCP(prep,Case case,CompTypeUndef,CompUndef)) -> l | |
41 | + | NP(Case "gen") as t -> if mode = "temp" then l else t :: l | |
42 | + | NP(Case "acc") as t -> if mode = "dur" then l else t :: l | |
43 | + | t -> t :: l | |
44 | + | |
45 | +let simplify_position_noun mode l = function | |
46 | + NP(Case "gen") -> l | |
47 | + | NP(Case "nom") -> l | |
48 | + | NCP(Case "gen",_,_) -> l | |
49 | + | NP(CaseAgr) -> l | |
50 | + | AdjP AllAgr -> l | |
51 | + | PrepNP _ -> l | |
52 | + | ComprepNP _ -> l | |
53 | + | ComparP _ -> l | |
54 | + | PrepNCP _ -> l | |
55 | + | t -> t :: l | |
56 | + | |
57 | +let simplify_position_adj mode l = function | |
58 | + AdvP _ -> l | |
59 | + | ComparP _ -> l | |
60 | + | t -> t :: l | |
61 | + | |
62 | +let simplify_position_adv mode l = function | |
63 | + AdvP _ -> l (* FIXME: czy na pewno zostawić swobodę modyfikowania przysłówka? *) | |
64 | + | t -> t :: l | |
65 | + | |
66 | +(* | |
67 | +let simplify_position pos l s = | |
68 | + let morfs = match pos with | |
69 | + "verb" -> List.rev (Xlist.fold s.morfs [] simplify_position_verb) | |
70 | + | "noun" -> List.rev (Xlist.fold s.morfs [] simplify_position_noun) | |
71 | + | "adj" -> List.rev (Xlist.fold s.morfs [] simplify_position_adj) | |
72 | + | "adv" -> List.rev (Xlist.fold s.morfs [] simplify_position_adv) | |
73 | + | _ -> s.morfs in | |
74 | + match morfs with | |
75 | + [] -> l | |
76 | + | [Phrase Null] -> l | |
77 | + | _ -> {s with morfs=morfs} :: l | |
78 | + | |
79 | +let simplify_schemata pos schemata = | |
80 | + let schemata = Xlist.fold schemata StringMap.empty (fun schemata (schema,frame) -> | |
81 | + let schema = List.sort compare (Xlist.fold schema [] (fun l s -> | |
82 | + let s = {s with role=""; role_attr=""; sel_prefs=[]; cr=[]; ce=[]; morfs=List.sort compare s.morfs} in | |
83 | + if s.gf <> ARG && s.gf <> ADJUNCT then s :: l else | |
84 | + (* if s.cr <> [] || s.ce <> [] then s :: l else *) | |
85 | + simplify_position pos l s)) in | |
86 | + StringMap.add_inc schemata (ENIAMwalStringOf.schema schema) (schema,[frame]) (fun (_,frames) -> schema, frame :: frames)) in | |
87 | + StringMap.fold schemata [] (fun l _ s -> s :: l) | |
88 | + | |
89 | +let simplify_schemata2 pos schemata = | |
90 | + let simplify_position_fun = match pos with | |
91 | + "verb" -> simplify_position_verb2 | |
92 | + | "noun" -> simplify_position_noun | |
93 | + | "adj" -> simplify_position_adj | |
94 | + | "adv" -> simplify_position_adv | |
95 | + | _ -> (fun l x -> x :: l) in | |
96 | + let morfs = Xlist.fold schemata [] (fun morfs schema -> | |
97 | + Xlist.fold schema morfs (fun morfs s -> | |
98 | + Xlist.fold s.morfs morfs simplify_position_fun)) in | |
99 | + let morfs = Xlist.fold morfs StringMap.empty (fun map s -> | |
100 | + StringMap.add map (ENIAMwalStringOf.morf s) s) in | |
101 | + let schema = StringMap.fold morfs [] (fun schema _ morf -> | |
102 | + {gf=ARG; role=""; role_attr=""; sel_prefs=[]; cr=[]; ce=[]; | |
103 | + dir=Both; morfs=[Phrase Null;morf]} :: schema) in | |
104 | + schema*) | |
105 | + | |
106 | +(*let rec classify_phrase = function | |
107 | + NP _ as phrase -> ENIAMwalStringOf.phrase phrase | |
108 | + | PrepNP _ as phrase -> ENIAMwalStringOf.phrase phrase | |
109 | + | AdjP _ as phrase -> ENIAMwalStringOf.phrase phrase | |
110 | + | PrepAdjP _ as phrase -> ENIAMwalStringOf.phrase phrase | |
111 | + | ComprepNP _ as phrase -> ENIAMwalStringOf.phrase phrase | |
112 | + | ComparP _ as phrase -> ENIAMwalStringOf.phrase phrase | |
113 | + | CP _ -> "cp" | |
114 | + | NCP(case,_,_) -> ENIAMwalStringOf.phrase (NP case) | |
115 | + | PrepNCP(prep,case,_,_) -> ENIAMwalStringOf.phrase (PrepNP(prep,case)) | |
116 | + | InfP _ -> "infp" | |
117 | + | AdvP _ -> "advp" | |
118 | + | FixedP _ as phrase -> ENIAMwalStringOf.phrase phrase | |
119 | + | Or -> "or" | |
120 | + | E Or -> "or" | |
121 | + | E phrase -> classify_phrase phrase | |
122 | + | SimpleLexArg("się",QUB) -> "się" | |
123 | + (* | SimpleLexArg _ -> "lex" *) | |
124 | +(* | LexArg _ -> "lex" *) | |
125 | + | SimpleLexArg _ as phrase -> ENIAMwalStringOf.phrase phrase | |
126 | + | LexArg _ as phrase -> ENIAMwalStringOf.phrase phrase | |
127 | + | phrase -> print_endline ("classify_phrase: " ^ ENIAMwalStringOf.phrase phrase); "other" | |
128 | + | |
129 | +let classify_position pos p = | |
130 | + let l = (*StringSet.to_list*) (Xlist.fold p.morfs StringSet.empty (fun set morf -> | |
131 | + StringSet.add set ((*classify_phrase*)ENIAMwalStringOf.phrase morf))) in | |
132 | + (* match l with | |
133 | + [] -> "empty" | |
134 | + | [c] -> c | |
135 | + (* | ["np(gen)"; "np(acc)"] -> "np(str)" | |
136 | + | ["np(gen)"; "infp"] -> "np(gen)-infp" | |
137 | + | ["np(acc)"; "infp"] -> "np(acc)-infp" *) | |
138 | + | _ -> let c = String.concat " " l in if pos="adv" then print_endline c; c *) | |
139 | + l*) | |
140 | + | |
141 | +module OrderedPhrase = struct | |
142 | + type t = phrase | |
143 | + let compare = compare | |
144 | +end | |
145 | + | |
146 | +module PhraseSet = Xset.Make(OrderedPhrase) | |
147 | + | |
148 | + | |
149 | +let remove_adjuncts_schema pos lemma schema = | |
150 | + let simplify_position_fun = match pos with | |
151 | + "verb" -> simplify_position_verb | |
152 | + | "noun" -> simplify_position_noun | |
153 | + | "adj" -> simplify_position_adj | |
154 | + | "adv" -> simplify_position_adv | |
155 | + | _ -> (fun _ l x -> x :: l) in | |
156 | + List.flatten (Xlist.map schema (fun p -> | |
157 | + let morfs = Xlist.fold p.morfs [] (simplify_position_fun (String.concat " " p.mode)) in | |
158 | + if morfs = [] then [] else [PhraseSet.of_list morfs])) | |
159 | + (* let schema2 = List.flatten (Xlist.map schema1 (fun p -> | |
160 | + let p = {p with morfs = Xlist.fold p.morfs [] (simplify_position_fun (String.concat " " p.mode))} in | |
161 | + let c = classify_position pos p in | |
162 | + if StringSet.is_empty c (*"empty"*) then [] else [c,[p]])) in | |
163 | + (* let sum = Xlist.fold schema2 StringSet.empty (fun set (c,p) -> StringSet.union set c) in | |
164 | + let n = Xlist.fold schema2 0 (fun n (c,p) -> n + StringSet.size c) in | |
165 | + if StringSet.size sum <> n (*&& pos = "noun"*) then (*Printf.printf "%s %s %s\n" pos lemma (ENIAMwalStringOf.schema schema1);*) | |
166 | + Printf.printf "%s %s %s\n" pos lemma (String.concat "+" (Xlist.map schema2 (fun (c,p) -> String.concat "," p.mode ^ "{" ^ String.concat ";" (StringSet.to_list c) ^ "}"))); *) | |
167 | +(* let set = Xlist.fold schema2 StringSet.empty (fun set (c,_) -> | |
168 | + StringSet.add set c) in | |
169 | + (* if StringSet.mem set "np(acc)" && StringSet.mem set "infp" then | |
170 | + Printf.printf "%s %s %s\n" pos lemma (ENIAMwalStringOf.schema schema1); *) | |
171 | + let schema2 = if StringSet.mem set "np(gen)" && StringSet.mem set "np(acc)" then schema2 else | |
172 | + Xlist.map schema2 (function | |
173 | + "np(gen)",p -> "np(str)",p | |
174 | + | "np(acc)",p -> "np(str)",p | |
175 | + | c,p -> c,p) in*) | |
176 | + schema2 *) | |
177 | + | |
178 | + | |
179 | +let is_disjunctive schema = | |
180 | + let sum = Xlist.fold schema PhraseSet.empty (fun set morfs -> PhraseSet.union set morfs) in | |
181 | + let n = Xlist.fold schema 0 (fun n morfs -> n + PhraseSet.size morfs) in | |
182 | + PhraseSet.size sum = n | |
183 | + | |
184 | +let rec find_overlapping morfs rev = function | |
185 | + morfs2 :: schema -> | |
186 | + if PhraseSet.is_empty (PhraseSet.intersection morfs morfs2) then find_overlapping morfs (morfs2 :: rev) schema | |
187 | + else morfs2, List.rev rev @ schema | |
188 | + | [] -> raise Not_found | |
189 | + | |
190 | +let rec merge_schemata_rec cont = function | |
191 | + [] -> cont | |
192 | + | [schema] -> schema :: cont | |
193 | + | schema1 :: schema2 :: schemata -> | |
194 | + let sum_schema,diff_schema = Xlist.fold schema1 ([],schema2) (fun (sum_schema,diff_schema) morfs -> | |
195 | + try | |
196 | + let morfs2,diff_schema = find_overlapping morfs [] diff_schema in | |
197 | + (PhraseSet.union morfs morfs2) :: sum_schema,diff_schema | |
198 | + with Not_found -> morfs :: sum_schema,diff_schema) in | |
199 | + let schema = sum_schema @ diff_schema in | |
200 | + if is_disjunctive schema then ((*print_endline "A";*) merge_schemata_rec cont (schema :: schemata)) | |
201 | + else ((*print_endline "B";*) merge_schemata_rec (schema :: cont) schemata) | |
202 | + | |
203 | +let rec merge_schemata schemata = | |
204 | + let cont,schemata = Xlist.fold schemata ([],[]) (fun (cont,schemata) schema -> | |
205 | + if is_disjunctive schema then cont, schema :: schemata else ((*print_endline "C";*) schema :: cont, schemata)) in | |
206 | + merge_schemata_rec cont schemata | |
207 | + | |
208 | + | |
209 | + | |
210 | +module OrderedSelector = struct | |
211 | + type t = (ENIAM_LCGlexiconTypes.selector * | |
212 | + ENIAM_LCGlexiconTypes.selector_relation * string list) | |
213 | + list | |
214 | + let compare = compare | |
215 | +end | |
216 | + | |
217 | +module SelectorMap = Xmap.Make(OrderedSelector) | |
218 | + | |
219 | +let latex_of_selectors selectors = | |
220 | + String.concat ", " (Xlist.map selectors (fun (cat,rel,l) -> | |
221 | + let rel = if rel = ENIAM_LCGlexiconTypes.Eq then "=" else "!=" in | |
222 | + ENIAMcategoriesPL.string_of_selector cat ^ rel ^ (String.concat "|" l))) | |
223 | + | |
224 | +let simplify_valence pos pos2 lemma schemata = | |
225 | + (* Xlist.iter schemata (fun (selectors,schema) -> if pos2="verb" then Printf.printf "A %s %s [%s] %s\n" pos lemma (latex_of_selectors selectors) (ENIAMwalStringOf.schema schema)); *) | |
226 | + let map = Xlist.fold schemata SelectorMap.empty (fun map (selectors,schema) -> | |
227 | + SelectorMap.add_inc map selectors [schema] (fun l -> schema :: l)) in | |
228 | + let schemata = SelectorMap.fold map [] (fun new_schemata selectors schemata -> | |
229 | + (selectors,Xlist.map schemata (remove_adjuncts_schema pos2 lemma)) :: new_schemata) in | |
230 | + (* Xlist.iter schemata (fun (_,schemata) -> | |
231 | + Xlist.iter schemata (fun schema -> if pos2="verb" then Printf.printf "%s %s %s\n" pos lemma (ENIAMwalStringOf.schema schema))); *) | |
232 | + (* Xlist.iter schemata (fun (selectors,schemata) -> Xlist.iter schemata (fun schema -> | |
233 | + if pos2="verb" then Printf.printf "B %s %s [%s] %s\n" pos lemma (latex_of_selectors selectors) (String.concat "+" (Xlist.map schema (fun morfs -> | |
234 | + "{" ^ String.concat ";" (PhraseSet.fold morfs [] (fun l m -> ENIAMwalStringOf.phrase m :: l)) ^ "}"))))); *) | |
235 | + let schemata = List.flatten (Xlist.map schemata (fun (selectors,schemata) -> | |
236 | + Xlist.map (merge_schemata schemata) (fun schema -> selectors,Xlist.map schema PhraseSet.to_list))) in | |
237 | + (* Xlist.iter schemata (fun (selectors,schema) -> | |
238 | + if pos2="verb" then Printf.printf "C %s %s [%s] %s\n" pos lemma (latex_of_selectors selectors) (String.concat "+" (Xlist.map schema (fun morfs -> | |
239 | + "{" ^ String.concat ";" (PhraseSet.fold morfs [] (fun l m -> ENIAMwalStringOf.phrase m :: l)) ^ "}")))); *) | |
240 | + schemata | |
241 | + | |
242 | +let _ = | |
243 | + let schemata,entries = ENIAMvalence.prepare_all_valence ENIAMwalParser.phrases ENIAMwalParser.schemata ENIAMwalParser.entries in | |
244 | + let _ = Entries.map2 schemata (fun pos lemma schemata -> simplify_valence pos (ENIAMvalence.simplify_pos pos) lemma schemata) in | |
245 | + () | |
246 | + | |
247 | + | |
248 | +(* | |
249 | +let default_frames = Xlist.fold [ (* FIXME: poprawić domyślne ramki po ustaleniu adjunctów *) | |
250 | + "verb",(ReflEmpty,Domyslny,NegationUndef,PredNA,AspectUndef,"subj{np(str)}+obj{np(str)}"); (* FIXME: dodać ramkę z refl *) | |
251 | + "noun",(ReflEmpty,Domyslny,NegationNA,PredNA,AspectNA,"{possp}+{adjp(agr)}"); | |
252 | + "adj",(ReflEmpty,Domyslny,NegationNA,PredNA,AspectNA,""); | |
253 | + "adv",(ReflEmpty,Domyslny,NegationNA,PredNA,AspectNA,""); | |
254 | + "empty",(ReflEmpty,Domyslny,NegationNA,PredNA,AspectNA,""); | |
255 | + "date",(ReflEmpty,Domyslny,NegationNA,PredNA,AspectNA,"{null;lex(np(gen),sg,'rok',natr)}"); | |
256 | + "date2",(ReflEmpty,Domyslny,NegationNA,PredNA,AspectNA,"{null;lex(np(gen),sg,'rok',atr1({adjp(agr)}))}"); (* FIXME: wskazać możliwe podrzędniki *) | |
257 | + "day",(ReflEmpty,Domyslny,NegationNA,PredNA,AspectNA,"" | |
258 | + (*"{lex(np(gen),sg,XOR('styczeń','luty','marzec','kwiecień','maj','czerwiec','lipiec','sierpień','wrzesień','październik','litopad','grudzień'),atr1({np(gen)}))}"*)); (* FIXME: wskazać możliwe podrzędniki *) | |
259 | + "hour",(ReflEmpty,Domyslny,NegationNA,PredNA,AspectNA,"{null;lex(advp(temp),pos,'rano',natr)}"); | |
260 | + ] StringMap.empty (fun map (k,(refl,opinion,negation,pred,aspect,schema)) -> | |
261 | + StringMap.add map k (Frame(DefaultAtrs([],refl,opinion,negation,pred,aspect),prepare_schema expands subtypes equivs schema))) | |
262 | + | |
263 | +let adjunct_schema_field role dir morfs = | |
264 | + {gf=ADJUNCT; role=role; role_attr=""; sel_prefs=["ALL"]; cr=[]; ce=[]; dir=dir; morfs=morfs} | |
265 | + | |
266 | +let verb_prep_adjunct_schema_field lemma case = | |
267 | + {gf=ADJUNCT; role="Manner"; role_attr=""; sel_prefs=["ALL"]; cr=[]; ce=[]; dir=Both; morfs=[ | |
268 | + Phrase Null; | |
269 | + Phrase(PrepNP(Sem,lemma,Case case)); | |
270 | + Phrase(PrepAdjP(Sem,lemma,Case case)); | |
271 | + Phrase(PrepNumP(Sem,lemma,Case case))]} | |
272 | + | |
273 | +let verb_comprep_adjunct_schema_field lemma = | |
274 | + {gf=ADJUNCT; role="Manner"; role_attr=""; sel_prefs=["ALL"]; cr=[]; ce=[]; dir=Both; morfs=[ | |
275 | + Phrase Null; | |
276 | + Phrase(ComprepNP(Sem,lemma))]} | |
277 | + | |
278 | +let verb_compar_adjunct_schema_field lemma = | |
279 | + {gf=ADJUNCT; role="Manner"; role_attr=""; sel_prefs=["ALL"]; cr=[]; ce=[]; dir=Both; morfs=[ | |
280 | + Phrase Null; | |
281 | + Phrase(ComparPP(Sem,lemma))] @ | |
282 | + Xlist.map ["nom";"gen";"dat";"acc";"inst"] (fun case -> Phrase(ComparNP(Sem,lemma,Case case)))} | |
283 | + | |
284 | +let noun_prep_adjunct_schema_field preps compreps = | |
285 | + {gf=ADJUNCT; role="Attribute"; role_attr=""; sel_prefs=["ALL"]; cr=[]; ce=[]; dir=Both; morfs= | |
286 | + let l = Xlist.fold preps [Phrase Null] (fun l (lemma,case) -> | |
287 | + [Phrase(PrepNP(Sem,lemma,Case case)); | |
288 | + Phrase(PrepAdjP(Sem,lemma,Case case)); | |
289 | + Phrase(PrepNumP(Sem,lemma,Case case))] @ l) in | |
290 | + Xlist.fold compreps l (fun l lemma -> | |
291 | + Phrase(ComprepNP(Sem,lemma)) :: l)} | |
292 | + | |
293 | +let noun_compar_adjunct_schema_field compars = | |
294 | + {gf=ADJUNCT; role="Attribute"; role_attr=""; sel_prefs=["ALL"]; cr=[]; ce=[]; dir=Both; morfs= | |
295 | + Xlist.fold compars [Phrase Null] (fun l lemma -> | |
296 | + [Phrase(ComparPP(Sem,lemma))] @ Xlist.map ["nom";"gen";"dat";"acc";"inst"] (fun case -> Phrase(ComparNP(Sem,lemma,Case case))) @ l)} | |
297 | + | |
298 | +let adj_compar_adjunct_schema_field compars = | |
299 | + {gf=ADJUNCT; role="Manner"; role_attr=""; sel_prefs=["ALL"]; cr=[]; ce=[]; dir=Both; morfs= | |
300 | + Xlist.fold compars [Phrase Null] (fun l lemma -> | |
301 | + [Phrase(ComparPP(Sem,lemma))] @ Xlist.map ["nom"] (fun case -> Phrase(ComparNP(Sem,lemma,Case case))) @ l)} | |
302 | + | |
303 | +(*let nogf_schema_field dir morfs = | |
304 | + {gf=NOGF; role=""; role_attr=""; sel_prefs=[]; cr=[]; ce=[]; dir=dir; morfs=morfs} *) | |
305 | + | |
306 | +let schema_field gf role dir morfs = | |
307 | + {gf=gf; role=role; role_attr=""; sel_prefs=["ALL"]; cr=[]; ce=[]; dir=dir; morfs=morfs} | |
308 | + | |
309 | +(*let verb_adjuncts = [ | |
310 | + adjunct_schema_field "R" "" Both [Phrase AdvP]; | |
311 | + adjunct_schema_field "R" "" Both [Phrase PrepP]; (* FIXME: Trzeba będzie uzgodnić PrepNP, PrepAdjP, PrepNumP z PrepP i XP *) | |
312 | + ] | |
313 | + | |
314 | + let noun_adjuncts = [ | |
315 | + adjunct_schema_field "C" "poss" Both [Phrase(NP(Case "gen"))]; | |
316 | + adjunct_schema_field "C" "=" Both [Phrase(NP(Case "nom"))]; | |
317 | + adjunct_schema_field "C" "=" Both [Phrase(NP(CaseAgr))]; | |
318 | + adjunct_schema_field "R" "" Backward [Multi[AdjP AllAgr]]; | |
319 | + adjunct_schema_field "R" "" Forward [Multi[AdjP AllAgr]]; | |
320 | + adjunct_schema_field "R" "" Both [Phrase PrepP]; | |
321 | + ] | |
322 | + | |
323 | + let adj_adjuncts = [ | |
324 | + adjunct_schema_field "R" "" Both [Phrase PrepP]; | |
325 | + ]*) | |
326 | + | |
327 | +let verb_adjuncts = [ | |
328 | + (* adjunct_schema_field "" Both [Phrase Null;Phrase AdvP]; | |
329 | + adjunct_schema_field "" Both [Phrase Null;Phrase PrepP]; (* FIXME: Trzeba będzie uzgodnić PrepNP, PrepAdjP, PrepNumP z PrepP i XP *) | |
330 | + adjunct_schema_field "Topic" Forward [Phrase Null;Phrase (CP(CompTypeUndef,CompUndef))]; (* poprawić semantykę *) (* FIXME: to powinno być jako ostatnia lista argumentów *)*) | |
331 | +] | |
332 | + | |
333 | +(* FIXME: pozycje dublują się z domyślną ramką "noun" *) | |
334 | +let noun_adjuncts = [ (* FIXME: usuniecie noun_adjuncts pozostawia poss dla 'Witoldzie' *) | |
335 | + (* adjunct_schema_field "poss" Both [Phrase Null;Phrase(NP(Case "gen"))]; | |
336 | + adjunct_schema_field "=" Both [Phrase Null;Phrase(NP(Case "nom"))]; | |
337 | + adjunct_schema_field "=" Both [Phrase Null;Phrase(NP(CaseAgr))]; | |
338 | + adjunct_schema_field "" Backward [(*Phrase Null;Phrase(AdjP AllAgr)*)Multi[AdjP AllAgr]]; (* FIXME: za pomocą Multi można zrobić konkatenowane leksykalizacje *) | |
339 | + adjunct_schema_field "" Forward [Phrase Null;Phrase(AdjP AllAgr)]; | |
340 | + adjunct_schema_field "" Both [Phrase Null;Phrase PrepP];*) | |
341 | +] | |
342 | + | |
343 | +let adj_adjuncts = [ | |
344 | + (* adjunct_schema_field "" Both [Phrase Null;Phrase AdvP]; *) | |
345 | +] | |
346 | + | |
347 | + | |
348 | +let verb_adjuncts_simp = [ | |
349 | + adjunct_schema_field "Manner" Both [Phrase Null;Phrase AdvP]; | |
350 | + adjunct_schema_field "Recipent" Both [Phrase Null;Phrase (NP(Case "dat"));Phrase (NumP(Case "dat"));Phrase (NCP(Case "dat",CompTypeUndef,CompUndef))]; | |
351 | + adjunct_schema_field "Instrument" Both [Phrase Null;Phrase (NP(Case "inst"));Phrase (NumP(Case "inst"));Phrase (NCP(Case "inst",CompTypeUndef,CompUndef))]; | |
352 | + adjunct_schema_field "Time" Both [Phrase Null;Phrase (Lex "date");Phrase (Lex "day-lex");Phrase (Lex "day-month");Phrase (Lex "day")]; | |
353 | + (* adjunct_schema_field "" Both [Phrase Null;Phrase PrepP]; (* FIXME: Trzeba będzie uzgodnić PrepNP, PrepAdjP, PrepNumP z PrepP i XP *) *) | |
354 | + adjunct_schema_field "Condition" Forward [Phrase Null;Phrase (CP(CompTypeUndef,CompUndef))]; (* poprawić semantykę *) (* FIXME: to powinno być jako ostatnia lista argumentów *) | |
355 | + adjunct_schema_field "Theme" Both [Phrase Null;Phrase Or]; | |
356 | +] | |
357 | + | |
358 | +let verb_adjuncts_simp2 = [ | |
359 | + adjunct_schema_field "Manner" Both [Phrase Null;Phrase AdvP]; | |
360 | + adjunct_schema_field "Recipent" Both [Phrase Null;Phrase (NP(Case "dat"));Phrase (NumP(Case "dat"));Phrase (NCP(Case "dat",CompTypeUndef,CompUndef))]; | |
361 | + adjunct_schema_field "Instrument" Both [Phrase Null;Phrase (NP(Case "inst"));Phrase (NumP(Case "inst"));Phrase (NCP(Case "inst",CompTypeUndef,CompUndef))]; | |
362 | + adjunct_schema_field "Time" Both [Phrase Null;Phrase (Lex "date");Phrase (Lex "day-lex");Phrase (Lex "day-month");Phrase (Lex "day")]; | |
363 | + (* adjunct_schema_field "" Both [Phrase Null;Phrase PrepP]; (* FIXME: Trzeba będzie uzgodnić PrepNP, PrepAdjP, PrepNumP z PrepP i XP *) *) | |
364 | + adjunct_schema_field "Condition" Forward [Phrase Null;Phrase (CP(CompTypeUndef,CompUndef))]; (* poprawić semantykę *) (* FIXME: to powinno być jako ostatnia lista argumentów *) | |
365 | + adjunct_schema_field "Theme" Both [Phrase Null;Phrase Or]; | |
366 | + adjunct_schema_field "Theme" Both [Phrase Null;Phrase(Lex "się")]; | |
367 | +] | |
368 | + | |
369 | +let noun_adjuncts_simp = [ (* FIXME: usuniecie noun_adjuncts pozostawia poss dla 'Witoldzie' *) | |
370 | + adjunct_schema_field "Possesive" Both [Phrase Null;Phrase(NP(Case "gen"));Phrase(NumP(Case "gen"))]; | |
371 | + adjunct_schema_field "Aposition" Forward [Phrase Null;Phrase(NP(Case "nom"));Phrase(NumP(Case "nom"));Phrase Null;Phrase(NP(CaseAgr));Phrase(NumP(CaseAgr))]; | |
372 | + adjunct_schema_field "Attribute" Backward [(*Phrase Null;Phrase(AdjP AllAgr)*)Multi[AdjP AllAgr]]; (* FIXME: za pomocą Multi można zrobić konkatenowane leksykalizacje *) | |
373 | + adjunct_schema_field "Base" Forward [Phrase Null;Phrase(AdjP AllAgr)]; | |
374 | + (* adjunct_schema_field "" Both [Phrase Null;Phrase PrepP]; *) | |
375 | +] | |
376 | + | |
377 | +let noun_measure_adjuncts_simp = [ (* FIXME: usuniecie noun_adjuncts pozostawia poss dla 'Witoldzie' *) | |
378 | + adjunct_schema_field "Attribute" Backward [(*Phrase Null;Phrase(AdjP AllAgr)*)Multi[AdjP AllAgr]]; (* FIXME: za pomocą Multi można zrobić konkatenowane leksykalizacje *) | |
379 | + adjunct_schema_field "Base" Forward [Phrase Null;Phrase(AdjP AllAgr)]; | |
380 | + (* adjunct_schema_field "" Both [Phrase Null;Phrase PrepP]; *) | |
381 | +] | |
382 | +*) | |
383 | +(* let noun_adjuncts = [NP(Case "gen");NP(Case "nom");Multi[AdjP AllAgr] | |
384 | + | |
385 | +] | |
386 | + | |
387 | +let verb_adjuncts = [ | |
388 | + | |
389 | +] | |
390 | + | |
391 | +let adj_adjuncts = [AdvP "misc"] | |
392 | + | |
393 | +let adv_adjuncts = [AdvP "misc"] *) | |
... | ... |
lexSemantics/ENIAMlexSemantics.ml
... | ... | @@ -22,7 +22,7 @@ open ENIAMsubsyntaxTypes |
22 | 22 | open ENIAMlexSemanticsTypes |
23 | 23 | open ENIAMwalTypes |
24 | 24 | open Xstd |
25 | - | |
25 | +(* | |
26 | 26 | let string_of_lex_sems tokens lex_sems = |
27 | 27 | String.concat "\n" (List.rev (Int.fold 0 (ExtArray.size lex_sems - 1) [] (fun l id -> |
28 | 28 | let t = ExtArray.get lex_sems id in |
... | ... | @@ -43,7 +43,7 @@ let find_senses t = (* FIXME: sensy zawierające 'się' *) |
43 | 43 | Lemma(lemma,pos,_) -> ENIAMplWordnet.find_senses lemma pos |
44 | 44 | | Proper(_,_,_,senses) -> ENIAMplWordnet.find_proper_senses senses |
45 | 45 | | _ -> [] |
46 | - | |
46 | +*) | |
47 | 47 | let rec find a l i = |
48 | 48 | if a.(i) = max_int then ( |
49 | 49 | a.(i) <- i; |
... | ... | @@ -74,14 +74,14 @@ let rec split_tokens_into_groups_sentence a = function |
74 | 74 | union a m (find a [] id))) |
75 | 75 | | QuotedSentences sentences -> |
76 | 76 | Xlist.iter sentences (fun p -> |
77 | - split_tokens_into_groups_sentence a p.psentence) | |
77 | + split_tokens_into_groups_sentence a p.sentence) | |
78 | 78 | | AltSentence l -> Xlist.iter l (fun (mode,sentence) -> |
79 | 79 | split_tokens_into_groups_sentence a sentence) |
80 | 80 | |
81 | 81 | let rec split_tokens_into_groups_paragraph a = function |
82 | 82 | RawParagraph s -> () |
83 | 83 | | StructParagraph sentences -> |
84 | - Xlist.iter sentences (fun p -> split_tokens_into_groups_sentence a p.psentence) | |
84 | + Xlist.iter sentences (fun p -> split_tokens_into_groups_sentence a p.sentence) | |
85 | 85 | | AltParagraph l -> Xlist.iter l (fun (mode,paragraph) -> |
86 | 86 | split_tokens_into_groups_paragraph a paragraph) |
87 | 87 | |
... | ... | @@ -103,6 +103,59 @@ let split_tokens_into_groups size text = |
103 | 103 | IntMap.fold map [] (fun l _ v -> v :: l) |
104 | 104 | |
105 | 105 | let assign_valence tokens lex_sems group = |
106 | + let lexemes = Xlist.fold group StringSet.empty (fun lexemes id -> | |
107 | + let lemma = ENIAMtokens.get_lemma (ExtArray.get tokens id).token in | |
108 | + StringSet.add lexemes lemma) in | |
109 | + let entries,schemata,connected = ENIAMwalReduce.select_entries lexemes in | |
110 | + Xlist.iter group (fun id -> | |
111 | + let lemma = ENIAMtokens.get_lemma (ExtArray.get tokens id).token in | |
112 | + let pos = ENIAMtokens.get_pos (ExtArray.get tokens id).token in | |
113 | + let pos2 = ENIAMvalence.simplify_pos pos in | |
114 | + let schemata = try Entries.find schemata pos2 lemma with Not_found -> ENIAMvalence.get_default_valence pos2 in | |
115 | + let entries = try Entries.find entries pos lemma with Not_found -> [] in | |
116 | + let schemata = List.flatten (Xlist.map schemata (fun (opinion,neg,pred,aspect,schema) -> | |
117 | + ENIAMvalence.transform_entry pos lemma neg pred aspect schema)) in (* FIXME: gubię opinię *) | |
118 | + let schemata = ENIAMadjuncts.simplify_valence pos pos2 lemma schemata in | |
119 | + let schemata = Xlist.map schemata (fun (selectors,schema) -> | |
120 | + selectors,ENIAMvalence2.render_simple_schema schema) in | |
121 | + let entries = List.flatten (Xlist.map entries (ENIAMvalence.transform_lex_entry pos lemma)) in | |
122 | + let entries = Xlist.map entries (fun (selectors,entry) -> | |
123 | + selectors,ENIAMvalence2.render_lex_entry entry) in | |
124 | + ExtArray.set lex_sems id {(ExtArray.get lex_sems id) with | |
125 | + schemata = schemata; lex_entries=entries}) | |
126 | + | |
127 | +(* TODO: | |
128 | + slashe | |
129 | + test | |
130 | + zgranie z LCGlexicon | |
131 | +*) | |
132 | + | |
133 | + (* let lexemes = Xlist.fold group Entries.empty (fun lexemes id -> | |
134 | + let lemma = ENIAMtokens.get_lemma (ExtArray.get tokens id).token in | |
135 | + | |
136 | + match (ExtArray.get tokens id).token with | |
137 | + Lemma(lemma,pos,_) -> | |
138 | + Entries.add_inc lexemes (ENIAMvalence.simplify_pos pos) lemma pos (*(StringSet.singleton pos) (fun set -> StringSet.add set pos)*) | |
139 | + | Proper(lemma,pos,_,_) -> | |
140 | + (* let pos = match pos with | |
141 | + "subst" -> "psubst" | |
142 | + | "depr" -> "pdepr" | |
143 | + | _ -> pos (*failwith ("assign_valence: Proper " ^ pos ^ " " ^ lemma)*) in *) | |
144 | + Entries.add_inc lexemes (ENIAMvalence.simplify_pos pos) lemma pos (*StringSet.singleton pos) (fun set -> StringSet.add set pos*) | |
145 | + (* StringMap.add_inc lexemes lemma (StringSet.singleton pos) (fun set -> StringSet.add set pos) (* nazwy własne mają przypisywaną domyślną walencję rzeczowników *) *) | |
146 | + | _ -> lexemes) in | |
147 | + let lexemes = Entries.map2 (fun l -> StringSet.to_list (StringSet.of_list l)) in | |
148 | + let lexemes_set = Entries.fold lexemes StringSet.empty (fun lexemes_set _ lemma _ -> StringSet.add lexemes_set lemma) in | |
149 | + let schemata,entries = ENIAMvalence.prepare_selected_valence schemata entries lexemes in | |
150 | + let schemata = ENIAMadjuncts.simplify_valence schemata in | |
151 | + let schemata = Entries.map schemata (fun pos lemma (selectors,schema) -> | |
152 | + selectors,ENIAMvalence2.render_schema schema) in | |
153 | + let entries = Entries.map entries (fun pos lemma (selectors,entry) -> | |
154 | + selectors,ENIAMvalence2.render_lex_entry entry) in *) | |
155 | + | |
156 | + | |
157 | +(* | |
158 | +let assign_valence tokens lex_sems group = | |
106 | 159 | let lexemes = Xlist.fold group StringMap.empty (fun lexemes id -> |
107 | 160 | match (ExtArray.get tokens id).token with |
108 | 161 | Lemma(lemma,pos,_) -> |
... | ... | @@ -159,129 +212,7 @@ let disambiguate_senses lex_sems group = |
159 | 212 | senses = Xlist.map t.senses (fun (s,l,w) -> |
160 | 213 | s, List.rev (Xlist.fold l [] (fun l s -> if StringSet.mem senses s then s :: l else l)),w)}) |
161 | 214 | |
162 | -let simplify_position_verb l = function (* FIXME: dodać czyszczenie E Pro *) | |
163 | - Phrase(NP(Case "dat")) -> l | |
164 | - | Phrase(NP(Case "inst")) -> l | |
165 | - | Phrase(PrepNP _) -> l | |
166 | - | Phrase(PrepAdjP _) -> l | |
167 | - | Phrase(NumP (Case "dat")) -> l | |
168 | - | Phrase(NumP (Case "inst")) -> l | |
169 | - | Phrase(PrepNumP _) -> l | |
170 | - | Phrase(ComprepNP _) -> l | |
171 | - | Phrase(ComparNP _) -> l | |
172 | - | Phrase(ComparPP _) -> l | |
173 | - | Phrase(IP) -> l | |
174 | - | Phrase(CP _) -> l | |
175 | - | Phrase(NCP(Case "dat",_,_)) -> l | |
176 | - | Phrase(NCP(Case "inst",_,_)) -> l | |
177 | - | Phrase(PrepNCP _) -> l | |
178 | -(* | Phrase(PadvP) -> l *) | |
179 | - | Phrase(AdvP) -> l | |
180 | - | Phrase(PrepP) -> l | |
181 | - | Phrase(Or) -> l | |
182 | - | Phrase(Qub) -> l | |
183 | - | Phrase(Adja) -> l | |
184 | - | Phrase(Inclusion) -> l | |
185 | - | Phrase Pro -> Phrase Null :: l | |
186 | - | t -> t :: l | |
187 | - | |
188 | -let simplify_position_verb2 l = function (* FIXME: dodać czyszczenie E Pro *) | |
189 | - Phrase(NP(Case "dat")) -> l | |
190 | - | Phrase(NP(Case "inst")) -> l | |
191 | - | Phrase(PrepNP _) -> l | |
192 | - | Phrase(PrepAdjP _) -> l | |
193 | - | Phrase(NumP (Case "dat")) -> l | |
194 | - | Phrase(NumP (Case "inst")) -> l | |
195 | - | Phrase(PrepNumP _) -> l | |
196 | - | Phrase(ComprepNP _) -> l | |
197 | - | Phrase(ComparNP _) -> l | |
198 | - | Phrase(ComparPP _) -> l | |
199 | - | Phrase(IP) -> l | |
200 | - | Phrase(CP _) -> l | |
201 | - | Phrase(NCP(Case "dat",_,_)) -> l | |
202 | - | Phrase(NCP(Case "inst",_,_)) -> l | |
203 | - | Phrase(PrepNCP _) -> l | |
204 | - (* | Phrase(PadvP) -> l *) | |
205 | - | Phrase(AdvP) -> l | |
206 | - | Phrase(PrepP) -> l | |
207 | - | Phrase(Or) -> l | |
208 | - | Phrase(Qub) -> l | |
209 | - | Phrase(Adja) -> l | |
210 | - | Phrase(Inclusion) -> l | |
211 | - | Phrase(Lex "się") -> l | |
212 | - | Phrase Pro -> Phrase Null :: l | |
213 | - | t -> t :: l | |
214 | - | |
215 | -let simplify_position_noun l = function | |
216 | - Phrase(NP(Case "gen")) -> l | |
217 | - | Phrase(NP(Case "nom")) -> l | |
218 | - | Phrase(NP(CaseAgr)) -> l | |
219 | - | Phrase(PrepNP _) -> l | |
220 | - | Phrase(AdjP AllAgr) -> l | |
221 | - | Phrase(NumP (Case "gen")) -> l | |
222 | - | Phrase(NumP (Case "nom")) -> l | |
223 | - | Phrase(NumP (CaseAgr)) -> l | |
224 | - | Phrase(PrepNumP _) -> l | |
225 | - | Phrase(ComprepNP _) -> l | |
226 | - | Phrase(ComparNP _) -> l | |
227 | - | Phrase(ComparPP _) -> l | |
228 | - | Phrase(IP) -> l | |
229 | - | Phrase(NCP(Case "gen",_,_)) -> l | |
230 | - | Phrase(PrepNCP _) -> l | |
231 | - | Phrase(PrepP) -> l | |
232 | - | Phrase(Qub) -> l | |
233 | - | Phrase(Adja) -> l | |
234 | - | Phrase(Inclusion) -> l | |
235 | - | Phrase Pro -> Phrase Null :: l | |
236 | - | t -> t :: l | |
237 | - | |
238 | -let simplify_position_adj l = function | |
239 | - Phrase(AdvP) -> l | |
240 | - | t -> t :: l | |
241 | - | |
242 | -let simplify_position_adv l = function | |
243 | - Phrase(AdvP) -> l | |
244 | - | t -> t :: l | |
245 | - | |
246 | - | |
247 | -let simplify_position pos l s = | |
248 | - let morfs = match pos with | |
249 | - "verb" -> List.rev (Xlist.fold s.morfs [] simplify_position_verb) | |
250 | - | "noun" -> List.rev (Xlist.fold s.morfs [] simplify_position_noun) | |
251 | - | "adj" -> List.rev (Xlist.fold s.morfs [] simplify_position_adj) | |
252 | - | "adv" -> List.rev (Xlist.fold s.morfs [] simplify_position_adv) | |
253 | - | _ -> s.morfs in | |
254 | - match morfs with | |
255 | - [] -> l | |
256 | - | [Phrase Null] -> l | |
257 | - | _ -> {s with morfs=morfs} :: l | |
258 | - | |
259 | -let simplify_schemata pos schemata = | |
260 | - let schemata = Xlist.fold schemata StringMap.empty (fun schemata (schema,frame) -> | |
261 | - let schema = List.sort compare (Xlist.fold schema [] (fun l s -> | |
262 | - let s = {s with role=""; role_attr=""; sel_prefs=[]; cr=[]; ce=[]; morfs=List.sort compare s.morfs} in | |
263 | - if s.gf <> ARG && s.gf <> ADJUNCT then s :: l else | |
264 | - (* if s.cr <> [] || s.ce <> [] then s :: l else *) | |
265 | - simplify_position pos l s)) in | |
266 | - StringMap.add_inc schemata (ENIAMwalStringOf.schema schema) (schema,[frame]) (fun (_,frames) -> schema, frame :: frames)) in | |
267 | - StringMap.fold schemata [] (fun l _ s -> s :: l) | |
268 | - | |
269 | -let simplify_schemata2 pos schemata = | |
270 | - let simplify_position_fun = match pos with | |
271 | - "verb" -> simplify_position_verb2 | |
272 | - | "noun" -> simplify_position_noun | |
273 | - | "adj" -> simplify_position_adj | |
274 | - | "adv" -> simplify_position_adv | |
275 | - | _ -> (fun l x -> x :: l) in | |
276 | - let morfs = Xlist.fold schemata [] (fun morfs schema -> | |
277 | - Xlist.fold schema morfs (fun morfs s -> | |
278 | - Xlist.fold s.morfs morfs simplify_position_fun)) in | |
279 | - let morfs = Xlist.fold morfs StringMap.empty (fun map s -> | |
280 | - StringMap.add map (ENIAMwalStringOf.morf s) s) in | |
281 | - let schema = StringMap.fold morfs [] (fun schema _ morf -> | |
282 | - {gf=ARG; role=""; role_attr=""; sel_prefs=[]; cr=[]; ce=[]; | |
283 | - dir=Both; morfs=[Phrase Null;morf]} :: schema) in | |
284 | - schema | |
215 | + | |
285 | 216 | |
286 | 217 | (* FIXME: problem ComprepNP i PrepNCP *) |
287 | 218 | (* FIXME: problem gdy ten sam token występuje w kilku ścieżkach *) |
... | ... | @@ -471,24 +402,24 @@ let assign_very_simplified_valence tokens lex_sems group = |
471 | 402 | (selectors,schema) :: frames) in |
472 | 403 | (* Printf.printf "|frames|=%d\n" (Xlist.size frames); *) |
473 | 404 | ExtArray.set lex_sems id {t with very_simple_valence=frames}) |
474 | - | |
405 | + *) | |
475 | 406 | |
476 | 407 | let assign tokens text = |
477 | 408 | let lex_sems = ExtArray.make (ExtArray.size tokens) empty_lex_sem in |
478 | 409 | let _ = ExtArray.add lex_sems empty_lex_sem in |
479 | 410 | Int.iter 1 (ExtArray.size tokens - 1) (fun i -> |
480 | - let token = ExtArray.get tokens i in | |
481 | - ExtArray.set tokens i token; | |
482 | - let senses = find_senses token in | |
483 | - let cats = ENIAMcategories.assign token in | |
484 | - let lex_sem = {empty_lex_sem with senses=senses; cats=cats} in | |
411 | + (* let token = ExtArray.get tokens i in | |
412 | + (* ExtArray.set tokens i token; *) | |
413 | + let senses = find_senses token in *) | |
414 | + let lex_sem = {empty_lex_sem with senses=[](*senses*)} in | |
485 | 415 | let _ = ExtArray.add lex_sems lex_sem in |
486 | 416 | ()); |
487 | 417 | let groups = split_tokens_into_groups (ExtArray.size tokens) text in |
488 | 418 | (* Xlist.iter groups (fun group -> print_endline (String.concat " " (Xlist.map group string_of_int))); *) |
489 | 419 | Xlist.iter groups (fun group -> assign_valence tokens lex_sems group); |
420 | + (* Xlist.iter groups (fun group -> assign_valence tokens lex_sems group); | |
490 | 421 | Xlist.iter groups (fun group -> disambiguate_senses lex_sems group); |
491 | 422 | Xlist.iter groups (fun group -> assign_simplified_valence tokens lex_sems group); |
492 | 423 | Xlist.iter groups (fun group -> assign_very_simplified_valence tokens lex_sems group); |
493 | - Xlist.iter groups (fun group -> ENIAMlexSemanticsData.assign_semantics tokens lex_sems group); | |
424 | + Xlist.iter groups (fun group -> ENIAMlexSemanticsData.assign_semantics tokens lex_sems group); *) | |
494 | 425 | lex_sems |
... | ... |
lexSemantics/ENIAMlexSemanticsTypes.ml
... | ... | @@ -36,24 +36,18 @@ type semantics = |
36 | 36 | | SpecialMod of string * (type_arg list * type_term)*) |
37 | 37 | | PrepSemantics of (string * string * string * StringSet.t * string list) list (* case,role,role_attr,hipero,sel_prefs *) |
38 | 38 | |
39 | -(* FIXME: usunąć *) | |
40 | -type categories = {lemma: string; pos: string; pos2: string; | |
41 | - numbers: string list; cases: string list; genders: string list; persons: string list; | |
42 | - grads: string list; praeps: string list; acms: string list; | |
43 | - aspects: string list; negations: string list; moods: string list; tenses: string list; | |
44 | - nsyn: string list; nsem: string list; | |
45 | - } | |
46 | - | |
47 | - | |
48 | 39 | type lex_sem = { |
40 | + schemata: ((ENIAM_LCGlexiconTypes.selector * ENIAM_LCGlexiconTypes.selector_relation * string list) list * | |
41 | + (ENIAM_LCGtypes.direction * ENIAM_LCGtypes.grammar_symbol) list) list; | |
42 | + lex_entries: ((ENIAM_LCGlexiconTypes.selector * ENIAM_LCGlexiconTypes.selector_relation * string list) list * | |
43 | + ENIAM_LCGtypes.grammar_symbol list) list; | |
49 | 44 | e: labels; |
50 | - valence: (int * ENIAMwalTypes.frame) list; | |
45 | + (* valence: (int * ENIAMwalTypes.frame) list; | |
51 | 46 | simple_valence: (int * ENIAMwalTypes.frame) list; |
52 | - very_simple_valence: ((ENIAM_LCGgrammarPLtypes.cat * ENIAM_LCGgrammarPLtypes.selector_relation * string list) list * ENIAM_LCGtypes.grammar_symbol) list; | |
47 | + very_simple_valence: ((ENIAM_LCGgrammarPLtypes.cat * ENIAM_LCGgrammarPLtypes.selector_relation * string list) list * ENIAM_LCGtypes.grammar_symbol) list; *) | |
53 | 48 | senses: (string * string list * float) list; |
54 | 49 | lroles: string * string; |
55 | 50 | semantics: semantics; |
56 | - cats: categories list; | |
57 | 51 | } |
58 | 52 | |
59 | 53 | let empty_labels = { |
... | ... | @@ -65,8 +59,9 @@ let empty_labels = { |
65 | 59 | } |
66 | 60 | |
67 | 61 | let empty_lex_sem = { |
68 | - e=empty_labels; valence=[]; simple_valence=[]; very_simple_valence=[]; senses=[]; | |
69 | - lroles="",""; semantics=Normal; cats=[];} | |
62 | + schemata=[]; lex_entries=[]; | |
63 | + e=empty_labels; (*valence=[]; simple_valence=[]; very_simple_valence=[];*) senses=[]; | |
64 | + lroles="",""; semantics=Normal} | |
70 | 65 | |
71 | 66 | (* FIXME: poprawić katalog *) |
72 | 67 | (*let subst_uncountable_lexemes_filename = resource_path ^ "/lexSemantics/subst_uncountable.dat" |
... | ... | @@ -74,8 +69,8 @@ let subst_uncountable_lexemes_filename2 = resource_path ^ "/lexSemantics/subst_u |
74 | 69 | let subst_container_lexemes_filename = resource_path ^ "/lexSemantics/subst_container.dat" |
75 | 70 | let subst_numeral_lexemes_filename = resource_path ^ "/lexSemantics/subst_numeral.dat" |
76 | 71 | let subst_time_lexemes_filename = resource_path ^ "/lexSemantics/subst_time.dat"*) |
77 | -let subst_uncountable_lexemes_filename = resource_path ^ "/Walenty/subst_uncountable.dat" | |
72 | +(* let subst_uncountable_lexemes_filename = resource_path ^ "/Walenty/subst_uncountable.dat" | |
78 | 73 | let subst_uncountable_lexemes_filename2 = resource_path ^ "/Walenty/subst_uncountable_stare.dat" |
79 | 74 | let subst_container_lexemes_filename = resource_path ^ "/Walenty/subst_container.dat" |
80 | 75 | let subst_numeral_lexemes_filename = resource_path ^ "/Walenty/subst_numeral.dat" |
81 | -let subst_time_lexemes_filename = resource_path ^ "/Walenty/subst_time.dat" | |
76 | +let subst_time_lexemes_filename = resource_path ^ "/Walenty/subst_time.dat" *) | |
... | ... |
lexSemantics/ENIAMvalence.ml
... | ... | @@ -23,8 +23,8 @@ open Xstd |
23 | 23 | let simplify_pos = function |
24 | 24 | "subst" -> "noun" |
25 | 25 | | "depr" -> "noun" |
26 | - | "psubst" -> "noun" | |
27 | - | "pdepr" -> "noun" | |
26 | + (* | "psubst" -> "noun" | |
27 | + | "pdepr" -> "noun" *) | |
28 | 28 | | "adj" -> "adj" |
29 | 29 | | "adjc" -> "adj" |
30 | 30 | | "adjp" -> "adj" |
... | ... | @@ -629,3 +629,17 @@ let prepare_all_valence phrases schemata entries = |
629 | 629 | (Entries.flatten_map entries transform_lex_entry) |
630 | 630 | |
631 | 631 | (* let _ = prepare_all_valence ENIAMwalParser.phrases ENIAMwalParser.schemata ENIAMwalParser.entries *) |
632 | + | |
633 | +(* let prepare_selected_valence schemata entries lexemes = | |
634 | + Entries.fold schemata Entries.empty (fun schemata pos2 lemma (opinion,neg,pred,aspect,schema) -> | |
635 | + Xlist.fold (Entries.find pos2 lemma) schemata (function pos -> | |
636 | + Entries.add_inc_list schemata pos lemma (transform_entry pos lemma neg pred aspect schema))), | |
637 | + Entries.flatten_map entries transform_lex_entry *) | |
638 | + | |
639 | +let get_default_valence = function | |
640 | + "verb" -> [Nieokreslony,NegationUndef,PredFalse,AspectUndef,[{empty_position with gf=SUBJ; morfs=[NP(Str);NCP(Str,CompTypeUndef,CompUndef)]}; | |
641 | + {empty_position with gf=OBJ; morfs=[NP(Str);NCP(Str,CompTypeUndef,CompUndef)]}]] | |
642 | + | "noun" -> [Nieokreslony,NegationUndef,PredFalse,AspectUndef,[]] | |
643 | + | "adj" -> [Nieokreslony,NegationUndef,PredFalse,AspectUndef,[]] | |
644 | + | "adv" -> [Nieokreslony,NegationUndef,PredFalse,AspectUndef,[]] | |
645 | + | _ -> [] | |
... | ... |
lexSemantics/ENIAMvalence2.ml
... | ... | @@ -142,7 +142,8 @@ let render_phrase = function |
142 | 142 | | InfP(Aspect aspect) -> Tensor[Atom "infp"; Atom aspect] |
143 | 143 | | InfP AspectUndef -> Tensor[Atom "infp"; Top] |
144 | 144 | (* | PadvP -> Tensor[Atom "padvp"] *) |
145 | - | AdvP -> Tensor[Atom "advp"] | |
145 | + | AdvP "misc" -> Tensor[Atom "advp"; Top] (* FIXME: a może Atom "mod" zamiast Top *) | |
146 | + | AdvP mode -> Tensor[Atom "advp"; Atom mode] | |
146 | 147 | | FixedP lex -> Tensor[Atom "fixed"; Atom lex] |
147 | 148 | (* | PrepP -> Tensor[Atom "prepp";Top] |
148 | 149 | | Prep("",CaseAgr) -> Tensor[Atom "prep"; Top; AVar "case"] |
... | ... | @@ -181,6 +182,10 @@ let render_schema schema = |
181 | 182 | Xlist.map schema (fun p -> |
182 | 183 | Both,Plus(Xlist.map p.morfs render_morf)) |
183 | 184 | |
185 | +let render_simple_schema schema = | |
186 | + Xlist.map schema (fun morfs -> | |
187 | + Both,Plus(Xlist.map morfs render_morf)) | |
188 | + | |
184 | 189 | (* FIXME: tu trzeba by dodać zwykłe reguły dla czasowników dotyczące ich negacji, aglutynatu itp. *) |
185 | 190 | let render_lex_entry = function |
186 | 191 | SimpleLexEntry(lemma,pos) -> [Tensor([Atom "lex";Atom lemma] @ render_pos_entry pos)] |
... | ... | @@ -195,12 +200,12 @@ let render_lex_entry = function |
195 | 200 | [ImpSet(Tensor([Atom "lex";Atom (string_of_int id);Atom lemma] @ render_pos_entry pos),[Both,Tensor[AVar "schema"]])] |
196 | 201 | | entry -> print_endline ("render_entry:" ^ ENIAMwalStringOf.lex_entry entry); [(*[],entry*)] |
197 | 202 | |
198 | -let schemata,entries = ENIAMvalence.prepare_all_valence ENIAMwalParser.phrases ENIAMwalParser.schemata ENIAMwalParser.entries | |
203 | +(* let schemata,entries = ENIAMvalence.prepare_all_valence ENIAMwalParser.phrases ENIAMwalParser.schemata ENIAMwalParser.entries *) | |
199 | 204 | |
200 | -let _ = | |
205 | +(* let _ = | |
201 | 206 | (* Entries.map schemata (fun pos lemma (selectors,schema) -> |
202 | 207 | (* Printf.printf "%s %s %s\n" pos lemma (ENIAMwalStringOf.schema schema); *) |
203 | 208 | render_schema schema) *) |
204 | 209 | Entries.map entries (fun pos lemma (selectors,entry) -> |
205 | 210 | (* Printf.printf "%s %s %s\n" pos lemma (ENIAMwalStringOf.schema schema); *) |
206 | - selectors,render_lex_entry entry) | |
211 | + selectors,render_lex_entry entry) *) | |
... | ... |
lexSemantics/ENIAMwalFrames.ml
... | ... | @@ -287,147 +287,6 @@ let prepare_schema expands subtypes equivs schema = |
287 | 287 | let prepare_schema_sem expands subtypes equivs schema = |
288 | 288 | prepare_schema_comprep expands subtypes equivs schema |
289 | 289 | |
290 | -let default_frames = Xlist.fold [ (* FIXME: poprawić domyślne ramki po ustaleniu adjunctów *) | |
291 | - "verb",(ReflEmpty,Domyslny,NegationUndef,PredNA,AspectUndef,"subj{np(str)}+obj{np(str)}"); (* FIXME: dodać ramkę z refl *) | |
292 | - "noun",(ReflEmpty,Domyslny,NegationNA,PredNA,AspectNA,"{possp}+{adjp(agr)}"); | |
293 | - "adj",(ReflEmpty,Domyslny,NegationNA,PredNA,AspectNA,""); | |
294 | - "adv",(ReflEmpty,Domyslny,NegationNA,PredNA,AspectNA,""); | |
295 | - "empty",(ReflEmpty,Domyslny,NegationNA,PredNA,AspectNA,""); | |
296 | - "date",(ReflEmpty,Domyslny,NegationNA,PredNA,AspectNA,"{null;lex(np(gen),sg,'rok',natr)}"); | |
297 | - "date2",(ReflEmpty,Domyslny,NegationNA,PredNA,AspectNA,"{null;lex(np(gen),sg,'rok',atr1({adjp(agr)}))}"); (* FIXME: wskazać możliwe podrzędniki *) | |
298 | - "day",(ReflEmpty,Domyslny,NegationNA,PredNA,AspectNA,"" | |
299 | - (*"{lex(np(gen),sg,XOR('styczeń','luty','marzec','kwiecień','maj','czerwiec','lipiec','sierpień','wrzesień','październik','litopad','grudzień'),atr1({np(gen)}))}"*)); (* FIXME: wskazać możliwe podrzędniki *) | |
300 | - "hour",(ReflEmpty,Domyslny,NegationNA,PredNA,AspectNA,"{null;lex(advp(temp),pos,'rano',natr)}"); | |
301 | - ] StringMap.empty (fun map (k,(refl,opinion,negation,pred,aspect,schema)) -> | |
302 | - StringMap.add map k (Frame(DefaultAtrs([],refl,opinion,negation,pred,aspect),prepare_schema expands subtypes equivs schema))) | |
303 | - | |
304 | -let adjunct_schema_field role dir morfs = | |
305 | - {gf=ADJUNCT; role=role; role_attr=""; sel_prefs=["ALL"]; cr=[]; ce=[]; dir=dir; morfs=morfs} | |
306 | - | |
307 | -let verb_prep_adjunct_schema_field lemma case = | |
308 | - {gf=ADJUNCT; role="Manner"; role_attr=""; sel_prefs=["ALL"]; cr=[]; ce=[]; dir=Both; morfs=[ | |
309 | - Phrase Null; | |
310 | - Phrase(PrepNP(Sem,lemma,Case case)); | |
311 | - Phrase(PrepAdjP(Sem,lemma,Case case)); | |
312 | - Phrase(PrepNumP(Sem,lemma,Case case))]} | |
313 | - | |
314 | -let verb_comprep_adjunct_schema_field lemma = | |
315 | - {gf=ADJUNCT; role="Manner"; role_attr=""; sel_prefs=["ALL"]; cr=[]; ce=[]; dir=Both; morfs=[ | |
316 | - Phrase Null; | |
317 | - Phrase(ComprepNP(Sem,lemma))]} | |
318 | - | |
319 | -let verb_compar_adjunct_schema_field lemma = | |
320 | - {gf=ADJUNCT; role="Manner"; role_attr=""; sel_prefs=["ALL"]; cr=[]; ce=[]; dir=Both; morfs=[ | |
321 | - Phrase Null; | |
322 | - Phrase(ComparPP(Sem,lemma))] @ | |
323 | - Xlist.map ["nom";"gen";"dat";"acc";"inst"] (fun case -> Phrase(ComparNP(Sem,lemma,Case case)))} | |
324 | - | |
325 | -let noun_prep_adjunct_schema_field preps compreps = | |
326 | - {gf=ADJUNCT; role="Attribute"; role_attr=""; sel_prefs=["ALL"]; cr=[]; ce=[]; dir=Both; morfs= | |
327 | - let l = Xlist.fold preps [Phrase Null] (fun l (lemma,case) -> | |
328 | - [Phrase(PrepNP(Sem,lemma,Case case)); | |
329 | - Phrase(PrepAdjP(Sem,lemma,Case case)); | |
330 | - Phrase(PrepNumP(Sem,lemma,Case case))] @ l) in | |
331 | - Xlist.fold compreps l (fun l lemma -> | |
332 | - Phrase(ComprepNP(Sem,lemma)) :: l)} | |
333 | - | |
334 | -let noun_compar_adjunct_schema_field compars = | |
335 | - {gf=ADJUNCT; role="Attribute"; role_attr=""; sel_prefs=["ALL"]; cr=[]; ce=[]; dir=Both; morfs= | |
336 | - Xlist.fold compars [Phrase Null] (fun l lemma -> | |
337 | - [Phrase(ComparPP(Sem,lemma))] @ Xlist.map ["nom";"gen";"dat";"acc";"inst"] (fun case -> Phrase(ComparNP(Sem,lemma,Case case))) @ l)} | |
338 | - | |
339 | -let adj_compar_adjunct_schema_field compars = | |
340 | - {gf=ADJUNCT; role="Manner"; role_attr=""; sel_prefs=["ALL"]; cr=[]; ce=[]; dir=Both; morfs= | |
341 | - Xlist.fold compars [Phrase Null] (fun l lemma -> | |
342 | - [Phrase(ComparPP(Sem,lemma))] @ Xlist.map ["nom"] (fun case -> Phrase(ComparNP(Sem,lemma,Case case))) @ l)} | |
343 | - | |
344 | -(*let nogf_schema_field dir morfs = | |
345 | - {gf=NOGF; role=""; role_attr=""; sel_prefs=[]; cr=[]; ce=[]; dir=dir; morfs=morfs} *) | |
346 | - | |
347 | -let schema_field gf role dir morfs = | |
348 | - {gf=gf; role=role; role_attr=""; sel_prefs=["ALL"]; cr=[]; ce=[]; dir=dir; morfs=morfs} | |
349 | - | |
350 | -(*let verb_adjuncts = [ | |
351 | - adjunct_schema_field "R" "" Both [Phrase AdvP]; | |
352 | - adjunct_schema_field "R" "" Both [Phrase PrepP]; (* FIXME: Trzeba będzie uzgodnić PrepNP, PrepAdjP, PrepNumP z PrepP i XP *) | |
353 | - ] | |
354 | - | |
355 | -let noun_adjuncts = [ | |
356 | - adjunct_schema_field "C" "poss" Both [Phrase(NP(Case "gen"))]; | |
357 | - adjunct_schema_field "C" "=" Both [Phrase(NP(Case "nom"))]; | |
358 | - adjunct_schema_field "C" "=" Both [Phrase(NP(CaseAgr))]; | |
359 | - adjunct_schema_field "R" "" Backward [Multi[AdjP AllAgr]]; | |
360 | - adjunct_schema_field "R" "" Forward [Multi[AdjP AllAgr]]; | |
361 | - adjunct_schema_field "R" "" Both [Phrase PrepP]; | |
362 | - ] | |
363 | - | |
364 | -let adj_adjuncts = [ | |
365 | - adjunct_schema_field "R" "" Both [Phrase PrepP]; | |
366 | - ]*) | |
367 | - | |
368 | -let verb_adjuncts = [ | |
369 | -(* adjunct_schema_field "" Both [Phrase Null;Phrase AdvP]; | |
370 | - adjunct_schema_field "" Both [Phrase Null;Phrase PrepP]; (* FIXME: Trzeba będzie uzgodnić PrepNP, PrepAdjP, PrepNumP z PrepP i XP *) | |
371 | - adjunct_schema_field "Topic" Forward [Phrase Null;Phrase (CP(CompTypeUndef,CompUndef))]; (* poprawić semantykę *) (* FIXME: to powinno być jako ostatnia lista argumentów *)*) | |
372 | - ] | |
373 | - | |
374 | -(* FIXME: pozycje dublują się z domyślną ramką "noun" *) | |
375 | -let noun_adjuncts = [ (* FIXME: usuniecie noun_adjuncts pozostawia poss dla 'Witoldzie' *) | |
376 | -(* adjunct_schema_field "poss" Both [Phrase Null;Phrase(NP(Case "gen"))]; | |
377 | - adjunct_schema_field "=" Both [Phrase Null;Phrase(NP(Case "nom"))]; | |
378 | - adjunct_schema_field "=" Both [Phrase Null;Phrase(NP(CaseAgr))]; | |
379 | - adjunct_schema_field "" Backward [(*Phrase Null;Phrase(AdjP AllAgr)*)Multi[AdjP AllAgr]]; (* FIXME: za pomocą Multi można zrobić konkatenowane leksykalizacje *) | |
380 | - adjunct_schema_field "" Forward [Phrase Null;Phrase(AdjP AllAgr)]; | |
381 | - adjunct_schema_field "" Both [Phrase Null;Phrase PrepP];*) | |
382 | - ] | |
383 | - | |
384 | -let adj_adjuncts = [ | |
385 | -(* adjunct_schema_field "" Both [Phrase Null;Phrase AdvP]; *) | |
386 | - ] | |
387 | - | |
388 | - | |
389 | -let verb_adjuncts_simp = [ | |
390 | - adjunct_schema_field "Manner" Both [Phrase Null;Phrase AdvP]; | |
391 | - adjunct_schema_field "Recipent" Both [Phrase Null;Phrase (NP(Case "dat"));Phrase (NumP(Case "dat"));Phrase (NCP(Case "dat",CompTypeUndef,CompUndef))]; | |
392 | - adjunct_schema_field "Instrument" Both [Phrase Null;Phrase (NP(Case "inst"));Phrase (NumP(Case "inst"));Phrase (NCP(Case "inst",CompTypeUndef,CompUndef))]; | |
393 | - adjunct_schema_field "Time" Both [Phrase Null;Phrase (Lex "date");Phrase (Lex "day-lex");Phrase (Lex "day-month");Phrase (Lex "day")]; | |
394 | -(* adjunct_schema_field "" Both [Phrase Null;Phrase PrepP]; (* FIXME: Trzeba będzie uzgodnić PrepNP, PrepAdjP, PrepNumP z PrepP i XP *) *) | |
395 | - adjunct_schema_field "Condition" Forward [Phrase Null;Phrase (CP(CompTypeUndef,CompUndef))]; (* poprawić semantykę *) (* FIXME: to powinno być jako ostatnia lista argumentów *) | |
396 | - adjunct_schema_field "Theme" Both [Phrase Null;Phrase Or]; | |
397 | - ] | |
398 | - | |
399 | -let verb_adjuncts_simp2 = [ | |
400 | - adjunct_schema_field "Manner" Both [Phrase Null;Phrase AdvP]; | |
401 | - adjunct_schema_field "Recipent" Both [Phrase Null;Phrase (NP(Case "dat"));Phrase (NumP(Case "dat"));Phrase (NCP(Case "dat",CompTypeUndef,CompUndef))]; | |
402 | - adjunct_schema_field "Instrument" Both [Phrase Null;Phrase (NP(Case "inst"));Phrase (NumP(Case "inst"));Phrase (NCP(Case "inst",CompTypeUndef,CompUndef))]; | |
403 | - adjunct_schema_field "Time" Both [Phrase Null;Phrase (Lex "date");Phrase (Lex "day-lex");Phrase (Lex "day-month");Phrase (Lex "day")]; | |
404 | - (* adjunct_schema_field "" Both [Phrase Null;Phrase PrepP]; (* FIXME: Trzeba będzie uzgodnić PrepNP, PrepAdjP, PrepNumP z PrepP i XP *) *) | |
405 | - adjunct_schema_field "Condition" Forward [Phrase Null;Phrase (CP(CompTypeUndef,CompUndef))]; (* poprawić semantykę *) (* FIXME: to powinno być jako ostatnia lista argumentów *) | |
406 | - adjunct_schema_field "Theme" Both [Phrase Null;Phrase Or]; | |
407 | - adjunct_schema_field "Theme" Both [Phrase Null;Phrase(Lex "się")]; | |
408 | -] | |
409 | - | |
410 | -let noun_adjuncts_simp = [ (* FIXME: usuniecie noun_adjuncts pozostawia poss dla 'Witoldzie' *) | |
411 | - adjunct_schema_field "Possesive" Both [Phrase Null;Phrase(NP(Case "gen"));Phrase(NumP(Case "gen"))]; | |
412 | - adjunct_schema_field "Aposition" Forward [Phrase Null;Phrase(NP(Case "nom"));Phrase(NumP(Case "nom"));Phrase Null;Phrase(NP(CaseAgr));Phrase(NumP(CaseAgr))]; | |
413 | - adjunct_schema_field "Attribute" Backward [(*Phrase Null;Phrase(AdjP AllAgr)*)Multi[AdjP AllAgr]]; (* FIXME: za pomocą Multi można zrobić konkatenowane leksykalizacje *) | |
414 | - adjunct_schema_field "Base" Forward [Phrase Null;Phrase(AdjP AllAgr)]; | |
415 | -(* adjunct_schema_field "" Both [Phrase Null;Phrase PrepP]; *) | |
416 | - ] | |
417 | - | |
418 | -let noun_measure_adjuncts_simp = [ (* FIXME: usuniecie noun_adjuncts pozostawia poss dla 'Witoldzie' *) | |
419 | - adjunct_schema_field "Attribute" Backward [(*Phrase Null;Phrase(AdjP AllAgr)*)Multi[AdjP AllAgr]]; (* FIXME: za pomocą Multi można zrobić konkatenowane leksykalizacje *) | |
420 | - adjunct_schema_field "Base" Forward [Phrase Null;Phrase(AdjP AllAgr)]; | |
421 | -(* adjunct_schema_field "" Both [Phrase Null;Phrase PrepP]; *) | |
422 | - ] | |
423 | - | |
424 | -let adj_adjuncts_simp = [ | |
425 | - adjunct_schema_field "Manner" Both [Phrase Null;Phrase AdvP]; | |
426 | - ] | |
427 | - | |
428 | -let adv_adjuncts_simp = [ | |
429 | - adjunct_schema_field "Manner" Both [Phrase Null;Phrase AdvP]; | |
430 | - ] | |
431 | 290 | |
432 | 291 | let convert_frame expands subtypes equivs lexemes valence lexeme pos (refl,opinion,negation,pred,aspect,schema) = |
433 | 292 | (* Printf.printf "convert_frame %s %s\n" lexeme pos; *) |
... | ... |
lexSemantics/ENIAMwalReduce.ml
... | ... | @@ -172,7 +172,7 @@ let create_tests comprep_reqs comprep_reqs2 lexarg_reqs lexemes = |
172 | 172 | StringSet.mem lexemes |
173 | 173 | |
174 | 174 | |
175 | -let select_entries phrases entries schemata connected meanings comprep_reqs comprep_reqs2 lexarg_reqs lexemes = | |
175 | +let select_entries_full phrases entries schemata connected meanings comprep_reqs comprep_reqs2 lexarg_reqs lexemes = | |
176 | 176 | let tests = create_tests comprep_reqs comprep_reqs2 lexarg_reqs lexemes in |
177 | 177 | let entries = reduce_entries lexemes entries in |
178 | 178 | let schemata = reduce_entries lexemes schemata in |
... | ... | @@ -194,16 +194,18 @@ let select_all_entries phrases entries schemata connected meanings = |
194 | 194 | let connected = merge_entries_conn phrases meanings connected in |
195 | 195 | entries,schemata,connected |
196 | 196 | |
197 | -let entries,schemata,connected = | |
197 | +let select_entries lexemes = | |
198 | + select_entries_full ENIAMwalParser.phrases ENIAMwalParser.entries ENIAMwalParser.schemata | |
199 | + ENIAMwalParser.connected ENIAMwalParser.meanings comprep_reqs comprep_reqs2 lexarg_reqs lexemes | |
200 | + | |
201 | +(* let entries,schemata,connected = | |
198 | 202 | (* let lexemes = StringSet.of_list ["Ala"; "ma"; "kot"] in *) |
199 | 203 | let lexemes = StringSet.of_list ["dorastać"; "dorobić"; "po"; "bok"; "na"] in |
200 | 204 | select_entries ENIAMwalParser.phrases ENIAMwalParser.entries ENIAMwalParser.schemata |
201 | - ENIAMwalParser.connected ENIAMwalParser.meanings comprep_reqs comprep_reqs2 lexarg_reqs lexemes | |
205 | + ENIAMwalParser.connected ENIAMwalParser.meanings comprep_reqs comprep_reqs2 lexarg_reqs lexemes *) | |
202 | 206 | |
203 | 207 | (* TODO |
204 | - - usunięcie adjunctów | |
205 | - - uwzględnienie cech morfoskładniowych | |
206 | - - scalenie schematów | |
208 | + - uwzględnienie cech morfoskładniowych - np usunięcie schematów wymagających negacji, gdy nie ma "nie" | |
207 | 209 | - dodanie adjunctów - pamiętać o padvp |
208 | 210 | *) |
209 | 211 | (* TODO |
... | ... |
lexSemantics/makefile
... | ... | @@ -3,10 +3,11 @@ OCAMLOPT=ocamlopt |
3 | 3 | OCAMLDEP=ocamldep |
4 | 4 | INCLUDES=-I +xml-light -I +xlib -I +zip -I +bz2 -I +eniam |
5 | 5 | OCAMLFLAGS=$(INCLUDES) -g |
6 | -OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa bz2.cmxa xlib.cmxa eniam-lcg-parser.cmxa eniam-lcg-lexicon.cmxa #eniam-tokenizer.cmxa eniam-morphology.cmxa eniam-subsyntax.cmxa eniam-plWordnet.cmxa #eniam-lexSemantics.cmxa | |
6 | +OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa bz2.cmxa xlib.cmxa eniam-tokenizer.cmxa eniam-morphology.cmxa eniam-subsyntax.cmxa eniam-lcg-parser.cmxa eniam-lcg-lexicon.cmxa #eniam-plWordnet.cmxa #eniam-lexSemantics.cmxa | |
7 | 7 | INSTALLDIR=`ocamlc -where`/eniam |
8 | 8 | |
9 | -SOURCES= ENIAMlexSemanticsTypes.ml ENIAMcategories.ml ENIAMlexSemanticsData.ml ENIAMlexSemantics.ml | |
9 | +SOURCES= entries.ml ENIAMwalTypes.ml ENIAMwalStringOf.ml ENIAMwalParser.ml ENIAMwalReduce.ml ENIAMvalence.ml ENIAMadjuncts.ml ENIAMvalence2.ml \ | |
10 | + ENIAMlexSemanticsTypes.ml ENIAMlexSemantics.ml #ENIAMlexSemanticsData.ml | |
10 | 11 | |
11 | 12 | all: eniam-lexSemantics.cma eniam-lexSemantics.cmxa |
12 | 13 | |
... | ... | @@ -28,8 +29,8 @@ eniam-lexSemantics.cmxa: $(SOURCES) |
28 | 29 | |
29 | 30 | # test: test.ml |
30 | 31 | # $(OCAMLOPT) -o test $(OCAMLOPTFLAGS) test.ml |
31 | -test: entries.ml ENIAMwalTypes.ml ENIAMwalStringOf.ml ENIAMwalParser.ml ENIAMwalReduce.ml ENIAMvalence.ml ENIAMvalence2.ml test.ml | |
32 | - $(OCAMLOPT) -o test $(OCAMLOPTFLAGS) entries.ml ENIAMwalTypes.ml ENIAMwalStringOf.ml ENIAMwalParser.ml ENIAMwalReduce.ml ENIAMvalence.ml ENIAMvalence2.ml test.ml | |
32 | +test: $(SOURCES) test.ml | |
33 | + $(OCAMLOPT) -o test $(OCAMLOPTFLAGS) $^ | |
33 | 34 | |
34 | 35 | |
35 | 36 | .SUFFIXES: .mll .mly .ml .mli .cmo .cmi .cmx |
... | ... |
subsyntax/interface.ml
... | ... | @@ -43,6 +43,10 @@ let spec_list = [ |
43 | 43 | let usage_msg = |
44 | 44 | "Usage: subsyntax <options>\nInput is a sequence of lines. Empty line ends the sequence and invoke parsing. Double empty line shutdown parser.\nOptions are:" |
45 | 45 | |
46 | +let message = "ENIAMsubsyntax: MWE, abbreviation and sentence detecion for Polish\n\ | |
47 | +Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>\n\ | |
48 | +Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences" | |
49 | + | |
46 | 50 | let anon_fun s = raise (Arg.Bad ("invalid argument: " ^ s)) |
47 | 51 | |
48 | 52 | let input_text channel = |
... | ... | @@ -80,6 +84,7 @@ let rec main_loop in_chan out_chan = |
80 | 84 | main_loop in_chan out_chan) |
81 | 85 | |
82 | 86 | let _ = |
87 | + print_endline message; | |
83 | 88 | Arg.parse spec_list anon_fun usage_msg; |
84 | 89 | Gc.compact (); |
85 | 90 | print_endline "Ready!"; |
... | ... |