Commit 4e551ed50f9e290b0330ebf2cefb0f7496ef1fc6
1 parent
5569a92a
dostosowanie parsera do nowego podziału na moduły
Showing
16 changed files
with
691 additions
and
664 deletions
integration/TODO
0 → 100644
1 | +- uporządkować położenie info_sentences i podzielić na część dotyczącą formatu i część dotyczącą korpusu | |
... | ... |
lexSemantics/TODO
0 → 100644
1 | +- sprawdzić czy disambiguate_senses nie wycina argumentów, ktore mogą być realizowane przez przyimki | |
2 | +- dodać do walencji preferencje selekcyjne nadrzędników symboli: dzień, godzina, rysunek itp. | |
3 | +- sprawdzić czy walencja nazw własnych jest dobrze zrobiona. | |
4 | +- trzeba zrobić słownik nazw własnych | |
5 | +- trzeba poprawić selekcję preferencji selecyjnych: jeśli podrzędnikiem jest zaimek nie muszą jawnie występować wśród sensów. | |
... | ... |
parser/LCGchart.ml
... | ... | @@ -20,6 +20,8 @@ |
20 | 20 | open Xstd |
21 | 21 | open LCGtypes |
22 | 22 | open Printf |
23 | +open ENIAMtokenizerTypes | |
24 | +open ENIAMlexSemanticsTypes | |
23 | 25 | |
24 | 26 | let make size = Array.make_matrix (size+1) (size+1) ([],0) |
25 | 27 | |
... | ... | @@ -194,29 +196,31 @@ let is_dep_parsed = function |
194 | 196 | | [LCGtypes.Bracket(false,false,LCGtypes.Tensor[LCGtypes.Atom "<conll_root>"]),_] -> true |
195 | 197 | | _ -> failwith "is_dep_parsed" |
196 | 198 | |
197 | -let get_parsed_term tokens chart = | |
199 | +let get_parsed_term tokens lex_sems chart = | |
198 | 200 | let n = last_node chart in |
199 | 201 | let l = Xlist.fold (find chart 0 n) [] (fun l -> function |
200 | 202 | LCGtypes.Bracket(true,true,LCGtypes.Tensor[LCGtypes.Atom "<root>"]), sem -> (LCGtypes.Cut(LCGtypes.Tuple[sem])) :: l |
201 | 203 | (* | LCGtypes.Bracket(true,true,LCGtypes.Tensor[LCGtypes.Atom "<ors-sentence>"]), sem -> (LCGtypes.Cut (LCGtypes.Tuple[sem])) :: l *) |
202 | 204 | | _ -> l) in |
203 | - let id = ExtArray.add tokens {PreTypes.empty_token with PreTypes.token=PreTypes.Lemma("<root>","interp",[])} in | |
205 | + let id = ExtArray.add tokens {empty_token with token=Lemma("<root>","interp",[])} in | |
206 | + let _ = ExtArray.add lex_sems empty_lex_sem in | |
204 | 207 | LCGtypes.Node{LCGrenderer.empty_node with |
205 | 208 | LCGtypes.pred="<root>"; |
206 | 209 | LCGtypes.cat="interp"; |
207 | 210 | LCGtypes.id=id; |
208 | - LCGtypes.agf=WalTypes.NOSEM; | |
211 | + LCGtypes.agf=ENIAMwalTypes.NOSEM; | |
209 | 212 | LCGtypes.args=LCGrules.make_variant l} |
210 | 213 | |
211 | -let get_dep_parsed_term tokens = function | |
214 | +let get_dep_parsed_term tokens lex_sems = function | |
212 | 215 | [LCGtypes.Bracket(false,false,LCGtypes.Tensor[LCGtypes.Atom "<conll_root>"]),sem] -> |
213 | - let id = ExtArray.add tokens {PreTypes.empty_token with PreTypes.token=PreTypes.Lemma("<root>","interp",[])} in | |
216 | + let id = ExtArray.add tokens {empty_token with token=Lemma("<root>","interp",[])} in | |
217 | + let _ = ExtArray.add lex_sems empty_lex_sem in | |
214 | 218 | let l = [LCGtypes.Cut (LCGtypes.Tuple[sem])] in |
215 | 219 | LCGtypes.Node{LCGrenderer.empty_node with |
216 | 220 | LCGtypes.pred="<root>"; |
217 | 221 | LCGtypes.cat="interp"; |
218 | 222 | LCGtypes.id=id; |
219 | - LCGtypes.agf=WalTypes.NOSEM; | |
223 | + LCGtypes.agf=ENIAMwalTypes.NOSEM; | |
220 | 224 | LCGtypes.args=LCGrules.make_variant l} |
221 | 225 | | _ -> failwith "get_dep_parsed_term" |
222 | 226 | |
... | ... |
parser/LCGlatexOf.ml
... | ... | @@ -73,8 +73,8 @@ let rec linear_term c = function |
73 | 73 | "{\\left[\\begin{array}{ll}" ^ |
74 | 74 | (String.concat "\\\\ " (Xlist.map (["PRED",Val t.pred;"CAT",Val t.cat;"ID",Val (string_of_int t.id);"WEIGHT",Val (string_of_float t.weight);"GS",t.gs;"ARGS",t.args] @ t.attrs) (fun (e,t) -> |
75 | 75 | "\\text{" ^ (LatexMain.escape_string e) ^ "} & " ^ (linear_term 0 t)))) ^ "\\end{array}\\right]}" |
76 | - | Morf m -> "\\text{" ^ LatexMain.escape_string (WalStringOf.morf m) ^ "}" | |
77 | - | Gf s -> "\\text{" ^ LatexMain.escape_string (WalStringOf.gf s) ^ "}" | |
76 | + | Morf m -> "\\text{" ^ LatexMain.escape_string (ENIAMwalStringOf.morf m) ^ "}" | |
77 | + | Gf s -> "\\text{" ^ LatexMain.escape_string (ENIAMwalStringOf.gf s) ^ "}" | |
78 | 78 | | Ref i -> "{\\bf ref}\\; " ^ string_of_int i |
79 | 79 | | Cut t -> "{\\bf cut}(" ^ linear_term 0 t ^ ")" |
80 | 80 | | Choice choices -> "{\\bf choice}(" ^ String.concat ";" (StringMap.fold choices [] (fun l ei t -> (sprintf "%s: %s" ei (linear_term 0 t)) :: l)) ^ ")" |
... | ... | @@ -127,8 +127,8 @@ let rec linear_term_simple c = function |
127 | 127 | | Apply t -> "{\\bf apply}(" ^ linear_term_simple 0 t ^ ")" |
128 | 128 | | Insert(s,t) -> "{\\bf insert}(" ^ linear_term_simple 0 s ^ "," ^ linear_term_simple 0 t ^ ")" |
129 | 129 | | Node _ -> "node" |
130 | - | Morf m -> "\\text{" ^ LatexMain.escape_string (WalStringOf.morf m) ^ "}" | |
131 | - | Gf s -> "\\text{" ^ LatexMain.escape_string (WalStringOf.gf s) ^ "}" | |
130 | + | Morf m -> "\\text{" ^ LatexMain.escape_string (ENIAMwalStringOf.morf m) ^ "}" | |
131 | + | Gf s -> "\\text{" ^ LatexMain.escape_string (ENIAMwalStringOf.gf s) ^ "}" | |
132 | 132 | | Ref i -> "{\\bf ref}\\; " ^ string_of_int i |
133 | 133 | | Cut t -> "{\\bf cut}(" ^ linear_term_simple 0 t ^ ")" |
134 | 134 | | Choice choices -> "{\\bf choice}(" ^ String.concat ";" (StringMap.fold choices [] (fun l ei t -> (sprintf "%s: %s" ei (linear_term_simple 0 t)) :: l)) ^ ")" |
... | ... |
parser/LCGlexicon.ml
... | ... | @@ -17,8 +17,9 @@ |
17 | 17 | * along with this program. If not, see <http://www.gnu.org/licenses/>. |
18 | 18 | *) |
19 | 19 | |
20 | -open PreTypes | |
21 | -open WalTypes | |
20 | +open ENIAMtokenizerTypes | |
21 | +open ENIAMwalTypes | |
22 | +open ENIAMlexSemanticsTypes | |
22 | 23 | open LCGtypes |
23 | 24 | open Xstd |
24 | 25 | |
... | ... | @@ -56,7 +57,7 @@ let check_frame_case cases = function |
56 | 57 | | CaseUndef -> cases |
57 | 58 | | Case case -> if not (Xlist.mem cases case) then raise Not_found else [case] |
58 | 59 | | Str -> cases |
59 | - | case -> failwith ("check_frame_case: " ^ WalStringOf.case case) | |
60 | + | case -> failwith ("check_frame_case: " ^ ENIAMwalStringOf.case case) | |
60 | 61 | |
61 | 62 | let check_frame_number numbers = function |
62 | 63 | Number num -> if not (Xlist.mem numbers num) then raise Not_found else [num] |
... | ... | @@ -161,7 +162,7 @@ let objids = StringSet.of_list ["rysunek"] |
161 | 162 | let int_arg = [arg_schema_field Forward [Phrase(Null);Phrase(Lex "int")]] |
162 | 163 | |
163 | 164 | |
164 | -let create_entries tokens id (d:PreTypes.token_record) x_flag = | |
165 | +let create_entries tokens lex_sems id (c:ENIAMtokenizerTypes.token_record) (d:ENIAMlexSemanticsTypes.lex_sem) x_flag = | |
165 | 166 | |
166 | 167 | let make_node lemma cat weight fnum l = |
167 | 168 | let attrs,args = Xlist.fold l ([],[]) (fun (attrs,args) -> function |
... | ... | @@ -199,9 +200,11 @@ let make_node lemma cat weight fnum l = |
199 | 200 | | "pos" -> ("GRAD", Val "pos") :: attrs, args |
200 | 201 | | "com" -> ("GRAD", Val "com") :: attrs, |
201 | 202 | let id = ExtArray.add tokens {empty_token with token=Lemma("bardziej","adv",[])} in |
203 | + let _ = ExtArray.add lex_sems empty_lex_sem in | |
202 | 204 | (Cut(Node{LCGrenderer.empty_node with pred="bardziej"; id=id; cat="adv"; agf=ADJUNCT; arole="Manner"; attrs=[(*"MEANING", Val "bardziej";*)"GRAD", Val "com"(*;"GF",Val "adjunct"*)]})) :: args (* FIXME: MEANING powinno być dodawane później *) |
203 | 205 | | "sup" -> ("GRAD", Val "sup") :: attrs, |
204 | 206 | let id = ExtArray.add tokens {empty_token with token=Lemma("najbardziej","adv",[])} in |
207 | + let _ = ExtArray.add lex_sems empty_lex_sem in | |
205 | 208 | (Cut(Node{LCGrenderer.empty_node with pred="najbardziej"; id=id; cat="adv"; agf=ADJUNCT; arole="Manner"; attrs=[(*"MEANING", Val "najbardziej";*)"GRAD", Val "sup"(*;"GF",Val "adjunct"*)]})) :: args (* FIXME: MEANING powinno być dodawane później *) |
206 | 209 | | "aff" -> attrs, args |
207 | 210 | | "negation" -> ("NEG",Val "+") :: attrs, args |
... | ... | @@ -215,7 +218,7 @@ let make_node lemma cat weight fnum l = |
215 | 218 | args=if args = [] then Dot else Tuple(List.rev args)} in |
216 | 219 | |
217 | 220 | (* FIXME: "Można było" - brakuje uzgodnienia rodzaju przymiotnika w przypadku predykatywnym, i ogólnie kontroli składniowej *) |
218 | -let make_np numbers cases genders persons d lemma cat = (* FIXME: koreferencja siebie i się *) | |
221 | +let make_np numbers cases genders persons (c:ENIAMtokenizerTypes.token_record) d lemma cat = (* FIXME: koreferencja siebie i się *) | |
219 | 222 | if d.simple_valence = [] then print_endline "empty simple_valence"; |
220 | 223 | let numbers = expand_numbers numbers in |
221 | 224 | let cases = expand_cases cases in |
... | ... | @@ -225,9 +228,9 @@ let make_np numbers cases genders persons d lemma cat = (* FIXME: koreferencja s |
225 | 228 | "sg" -> |
226 | 229 | let quant = ["number",d.e.number,["sg"];"case",d.e.case,cases;"gender",d.e.gender,genders; "person", d.e.person,persons] in |
227 | 230 | let t = ["np"; "number"; "case"; "gender"; "person"] in |
228 | - let batrs = make_node lemma cat (symbol_weight +. d.weight) 0 ["number"; "case"; "gender"; "person"] in | |
231 | + let batrs = make_node lemma cat (symbol_weight +. c.weight) 0 ["number"; "case"; "gender"; "person"] in | |
229 | 232 | let schema_list = [[schema_field CORE "Aposition" Forward [Phrase(Lex "obj-id")]]] in |
230 | - [LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs] @ l | |
233 | + [LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs] @ l | |
231 | 234 | | "pl" -> l |
232 | 235 | | _ -> failwith "make_np") |
233 | 236 | else []) @ |
... | ... | @@ -236,15 +239,15 @@ let make_np numbers cases genders persons d lemma cat = (* FIXME: koreferencja s |
236 | 239 | "sg" -> |
237 | 240 | let quant = ["number",d.e.number,["sg"];"case",d.e.case,cases;"gender",d.e.gender,genders; "person", d.e.person,persons] in |
238 | 241 | let t = ["np"; "number"; "case"; "gender"; "person"] in |
239 | - let batrs = make_node lemma cat (symbol_weight +. d.weight) 0 ["number"; "case"; "gender"; "person"] in | |
242 | + let batrs = make_node lemma cat (symbol_weight +. c.weight) 0 ["number"; "case"; "gender"; "person"] in | |
240 | 243 | let schema_list = [[schema_field CORE "Aposition" Both [Phrase(Lex "year")]]] in |
241 | - [LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs] @ l | |
244 | + [LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs] @ l | |
242 | 245 | | "pl" -> |
243 | 246 | let quant = ["number",d.e.number,["pl"];"case",d.e.case,cases;"gender",d.e.gender,genders; "person", d.e.person,persons] in |
244 | 247 | let t = ["np"; "number"; "case"; "gender"; "person"] in |
245 | - let batrs = make_node lemma cat (symbol_weight +. d.weight) 0 ["number"; "case"; "gender"; "person"] in | |
248 | + let batrs = make_node lemma cat (symbol_weight +. c.weight) 0 ["number"; "case"; "gender"; "person"] in | |
246 | 249 | let schema_list = [[schema_field CORE "Aposition" Forward [Phrase(Lex "year-interval")]]] in |
247 | - [LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs] @ l | |
250 | + [LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs] @ l | |
248 | 251 | | _ -> failwith "make_np") |
249 | 252 | else []) @ |
250 | 253 | (if lemma = "wiek" then (* FIXME: "Aranżuje w XIX w." się nie parsuje, niewłaściwa reprezentacja sem dla XIX *) |
... | ... | @@ -252,15 +255,15 @@ let make_np numbers cases genders persons d lemma cat = (* FIXME: koreferencja s |
252 | 255 | "sg" -> |
253 | 256 | let quant = ["number",d.e.number,["sg"];"case",d.e.case,cases;"gender",d.e.gender,genders; "person", d.e.person,persons] in |
254 | 257 | let t = ["np"; "number"; "case"; "gender"; "person"] in |
255 | - let batrs = make_node lemma cat (symbol_weight +. d.weight) 0 ["number"; "case"; "gender"; "person"] in | |
258 | + let batrs = make_node lemma cat (symbol_weight +. c.weight) 0 ["number"; "case"; "gender"; "person"] in | |
256 | 259 | let schema_list = [[schema_field CORE "Aposition" Both [Phrase(Lex "roman")]]] in |
257 | - [LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs] @ l | |
260 | + [LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs] @ l | |
258 | 261 | | "pl" -> |
259 | 262 | let quant = ["number",d.e.number,["pl"];"case",d.e.case,cases;"gender",d.e.gender,genders; "person", d.e.person,persons] in |
260 | 263 | let t = ["np"; "number"; "case"; "gender"; "person"] in |
261 | - let batrs = make_node lemma cat (symbol_weight +. d.weight) 0 ["number"; "case"; "gender"; "person"] in | |
264 | + let batrs = make_node lemma cat (symbol_weight +. c.weight) 0 ["number"; "case"; "gender"; "person"] in | |
262 | 265 | let schema_list = [[schema_field CORE "Aposition" Forward [Phrase(Lex "roman-interval")]]] in |
263 | - [LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs] @ l | |
266 | + [LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs] @ l | |
264 | 267 | | _ -> failwith "make_np") |
265 | 268 | else []) @ |
266 | 269 | (if StringSet.mem months lemma then |
... | ... | @@ -270,15 +273,15 @@ let make_np numbers cases genders persons d lemma cat = (* FIXME: koreferencja s |
270 | 273 | "gen" -> |
271 | 274 | let quant = ["number",d.e.number,["sg"];"case",d.e.case,["gen"];"gender",d.e.gender,genders; "person", d.e.person,persons] in |
272 | 275 | let t = ["month-lex"] in |
273 | - let batrs = make_node lemma cat (symbol_weight +. d.weight) 0 ["number"; "case"; "gender"; "person"] in | |
276 | + let batrs = make_node lemma cat (symbol_weight +. c.weight) 0 ["number"; "case"; "gender"; "person"] in | |
274 | 277 | let schema_list = [[schema_field CORE "Possesive" Forward [Phrase Null; Phrase(Lex "year"); Phrase(NP(Case "gen"))]]] in |
275 | - [LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs] @ l | |
278 | + [LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs] @ l | |
276 | 279 | | _ -> l) in |
277 | 280 | let quant = ["number",d.e.number,["sg"];"case",d.e.case,cases;"gender",d.e.gender,genders; "person", d.e.person,persons] in |
278 | 281 | let t = ["np"; "number"; "case"; "gender"; "person"] in |
279 | - let batrs = make_node lemma cat (symbol_weight +. d.weight) 0 ["number"; "case"; "gender"; "person"] in | |
282 | + let batrs = make_node lemma cat (symbol_weight +. c.weight) 0 ["number"; "case"; "gender"; "person"] in | |
280 | 283 | let schema_list = [[schema_field CORE "Possesive" Forward [Phrase(Lex "year")]]] in |
281 | - [LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs] @ l | |
284 | + [LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs] @ l | |
282 | 285 | | "pl" -> l |
283 | 286 | | _ -> failwith "make_np") |
284 | 287 | else []) @ |
... | ... | @@ -289,29 +292,29 @@ let make_np numbers cases genders persons d lemma cat = (* FIXME: koreferencja s |
289 | 292 | "gen" -> |
290 | 293 | let quant = ["number",d.e.number,["sg"];"case",d.e.case,["gen"];"gender",d.e.gender,genders; "person", d.e.person,persons] in |
291 | 294 | let t = ["day-lex"] in |
292 | - let batrs = make_node lemma cat (symbol_weight +. d.weight) 0 ["number"; "case"; "gender"; "person"] in | |
295 | + let batrs = make_node lemma cat (symbol_weight +. c.weight) 0 ["number"; "case"; "gender"; "person"] in | |
293 | 296 | let schema_list = [[schema_field CORE "Aposition" Forward [Phrase(Lex "date");Phrase(Lex "day");Phrase(Lex "day-month")]]] in |
294 | - [LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs] @ l | |
297 | + [LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs] @ l | |
295 | 298 | | _ -> l) in |
296 | 299 | let quant = ["number",d.e.number,["sg"];"case",d.e.case,cases;"gender",d.e.gender,genders; "person", d.e.person,persons] in |
297 | 300 | let t = ["np"; "number"; "case"; "gender"; "person"] in |
298 | - let batrs = make_node lemma cat (symbol_weight +. d.weight) 0 ["number"; "case"; "gender"; "person"] in | |
301 | + let batrs = make_node lemma cat (symbol_weight +. c.weight) 0 ["number"; "case"; "gender"; "person"] in | |
299 | 302 | let schema_list = [[schema_field CORE "Aposition" Forward [Phrase(Lex "date");Phrase(Lex "day");Phrase(Lex "day-month")]]] in |
300 | - [LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs] @ l | |
303 | + [LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs] @ l | |
301 | 304 | | "pl" -> |
302 | 305 | (* let l = Xlist.fold cases l (fun l -> function |
303 | 306 | "gen" -> |
304 | 307 | let quant = ["number",d.e.number,["sg"];"case",d.e.case,["gen"];"gender",d.e.gender,genders; "person", d.e.person,persons] in |
305 | 308 | let t = ["day-lex"] in |
306 | - let batrs = make_node lemma cat (symbol_weight +. d.weight) 0 ["number"; "case"; "gender"; "person"] in | |
309 | + let batrs = make_node lemma cat (symbol_weight +. c.weight) 0 ["number"; "case"; "gender"; "person"] in | |
307 | 310 | let schema_list = [[schema_field CORE "Aposition" Forward [Phrase(Lex "date-interval");Phrase(Lex "day-interval");Phrase(Lex "day-month-interval")]]] in |
308 | - [LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs] @ l | |
311 | + [LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs] @ l | |
309 | 312 | | _ -> l) in*) |
310 | 313 | let quant = ["number",d.e.number,["pl"];"case",d.e.case,cases;"gender",d.e.gender,genders; "person", d.e.person,persons] in |
311 | 314 | let t = ["np"; "number"; "case"; "gender"; "person"] in |
312 | - let batrs = make_node lemma cat (symbol_weight +. d.weight) 0 ["number"; "case"; "gender"; "person"] in | |
315 | + let batrs = make_node lemma cat (symbol_weight +. c.weight) 0 ["number"; "case"; "gender"; "person"] in | |
313 | 316 | let schema_list = [[schema_field CORE "Aposition" Forward [Phrase(Lex "date-interval");Phrase(Lex "day-interval");Phrase(Lex "day-month-interval")]]] in |
314 | - [LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs] @ l | |
317 | + [LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs] @ l | |
315 | 318 | | _ -> failwith "make_np") |
316 | 319 | else []) @ |
317 | 320 | (if lemma = "godzina" then |
... | ... | @@ -319,15 +322,15 @@ let make_np numbers cases genders persons d lemma cat = (* FIXME: koreferencja s |
319 | 322 | "sg" -> |
320 | 323 | let quant = ["number",d.e.number,["sg"];"case",d.e.case,cases;"gender",d.e.gender,genders; "person", d.e.person,persons] in |
321 | 324 | let t = ["np"; "number"; "case"; "gender"; "person"] in |
322 | - let batrs = make_node lemma cat (symbol_weight +. d.weight) 0 ["number"; "case"; "gender"; "person"] in | |
325 | + let batrs = make_node lemma cat (symbol_weight +. c.weight) 0 ["number"; "case"; "gender"; "person"] in | |
323 | 326 | let schema_list = [[schema_field CORE "Aposition" Forward [Phrase(Lex "hour");Phrase(Lex "hour-minute")]]] in |
324 | - [LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs] @ l | |
327 | + [LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs] @ l | |
325 | 328 | | "pl" -> |
326 | 329 | let quant = ["number",d.e.number,["pl"];"case",d.e.case,cases;"gender",d.e.gender,genders; "person", d.e.person,persons] in |
327 | 330 | let t = ["np"; "number"; "case"; "gender"; "person"] in |
328 | - let batrs = make_node lemma cat (symbol_weight +. d.weight) 0 ["number"; "case"; "gender"; "person"] in | |
331 | + let batrs = make_node lemma cat (symbol_weight +. c.weight) 0 ["number"; "case"; "gender"; "person"] in | |
329 | 332 | let schema_list = [[schema_field CORE "Aposition" Forward [Phrase(Lex "hour-interval");Phrase(Lex "hour-minute-interval")]]] in |
330 | - [LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs] @ l | |
333 | + [LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs] @ l | |
331 | 334 | | _ -> failwith "make_np") |
332 | 335 | else []) @ |
333 | 336 | Xlist.fold d.simple_valence [] (fun l -> function |
... | ... | @@ -341,12 +344,12 @@ let make_np numbers cases genders persons d lemma cat = (* FIXME: koreferencja s |
341 | 344 | let quant3 = ["number",d.e.number,numbers;"case",d.e.case,all_cases;"gender",d.e.gender,genders; "person", d.e.person,persons] in |
342 | 345 | let t = ["measure"; "number"; "case"; "gender"; "person"] in |
343 | 346 | let t3 = ["measure"; "sg"; "case"; "n2"; "person"] in (* UWAGA: number "sg" i gender "n2", żeby uzgadniać z podmiotem czasownika *) |
344 | - let batrs = make_node lemma cat (d.weight +. measure_weight) fnum (nsyn :: (WalStringOf.nsem nsem) :: ["number"; "case"; "gender"; "person"]) in | |
345 | - let batrs3 = make_node lemma cat (d.weight +. measure_weight) fnum (nsyn :: (WalStringOf.nsem nsem) :: ["number"; "gen"; "gender"; "person"]) in | |
347 | + let batrs = make_node lemma cat (c.weight +. measure_weight) fnum (nsyn :: (ENIAMwalStringOf.nsem nsem) :: ["number"; "case"; "gender"; "person"]) in | |
348 | + let batrs3 = make_node lemma cat (c.weight +. measure_weight) fnum (nsyn :: (ENIAMwalStringOf.nsem nsem) :: ["number"; "gen"; "gender"; "person"]) in | |
346 | 349 | let schema_list = [qub_inclusion;schema;num_congr] in |
347 | 350 | let schema_list3 = [qub_inclusion;schema;num_rec] in |
348 | - (LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs) :: | |
349 | - (if Xlist.mem cases "gen" then [LCGrenderer.make_frame x_flag tokens quant3 schema_list3 t3 d batrs3] else []) @ l | |
351 | + (LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs) :: | |
352 | + (if Xlist.mem cases "gen" then [LCGrenderer.make_frame x_flag tokens lex_sems quant3 schema_list3 t3 d batrs3] else []) @ l | |
350 | 353 | else |
351 | 354 | let persons = if cases = ["voc"] then ["sec"] else persons in |
352 | 355 | let quant = ["lemma",ge (),[];"number",d.e.number,numbers;"case",d.e.case,cases;"gender",d.e.gender,genders; "person", d.e.person,persons] in |
... | ... | @@ -358,22 +361,22 @@ let make_np numbers cases genders persons d lemma cat = (* FIXME: koreferencja s |
358 | 361 | let t3 = ["np"; "sg"; "case"; "n2"; "person"] in (* UWAGA: number "sg" i gender "n2", żeby uzgadniać z podmiotem czasownika *) |
359 | 362 | let t5 = ["np"; "unumber"; "ucase"; "ugender"; "uperson"] in |
360 | 363 | let t6 = ["prepnp"; "lemma"; "ucase"] in |
361 | - let batrs = make_node lemma cat d.weight fnum (nsyn :: (WalStringOf.nsem nsem) :: ["number"; "case"; "gender"; "person"]) in | |
362 | - let batrs2 = make_node lemma cat d.weight fnum ("nosem" :: nsyn :: (WalStringOf.nsem nsem) :: ["number"; "case"; "gender"; "person"]) in | |
363 | - let batrs3 = make_node lemma cat d.weight fnum (nsyn :: (WalStringOf.nsem nsem) :: ["number"; "gen"; "gender"; "person"]) in | |
364 | - let batrs4 = make_node lemma cat d.weight fnum ("nosem" :: nsyn :: (WalStringOf.nsem nsem) :: ["number"; "gen"; "gender"; "person"]) in | |
364 | + let batrs = make_node lemma cat c.weight fnum (nsyn :: (ENIAMwalStringOf.nsem nsem) :: ["number"; "case"; "gender"; "person"]) in | |
365 | + let batrs2 = make_node lemma cat c.weight fnum ("nosem" :: nsyn :: (ENIAMwalStringOf.nsem nsem) :: ["number"; "case"; "gender"; "person"]) in | |
366 | + let batrs3 = make_node lemma cat c.weight fnum (nsyn :: (ENIAMwalStringOf.nsem nsem) :: ["number"; "gen"; "gender"; "person"]) in | |
367 | + let batrs4 = make_node lemma cat c.weight fnum ("nosem" :: nsyn :: (ENIAMwalStringOf.nsem nsem) :: ["number"; "gen"; "gender"; "person"]) in | |
365 | 368 | let schema_list = [qub_inclusion;schema;num_congr] in |
366 | 369 | let schema_list2 = [qub_inclusion;schema;num_congr;nosem_prep] in |
367 | 370 | let schema_list3 = [qub_inclusion;schema;num_rec] in |
368 | 371 | let schema_list4 = [qub_inclusion;schema;num_rec;nosem_prep] in |
369 | 372 | let schema_list5 = [qub_inclusion;schema;noun_measure] in |
370 | 373 | let schema_list6 = [qub_inclusion;schema;noun_measure;nosem_uprep] in |
371 | - (LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs) :: | |
372 | - (LCGrenderer.make_frame x_flag tokens quant schema_list2 t2 d batrs2) :: | |
373 | - (if Xlist.mem cases "gen" then [LCGrenderer.make_frame x_flag tokens quant3 schema_list3 t3 d batrs3] else []) @ | |
374 | - (if Xlist.mem cases "gen" then [LCGrenderer.make_frame x_flag tokens quant3 schema_list4 t2 d batrs4] else []) @ | |
375 | - (if Xlist.mem cases "gen" then [LCGrenderer.make_frame x_flag tokens quant5 schema_list5 t5 d batrs3] else []) @ | |
376 | - (if Xlist.mem cases "gen" then [LCGrenderer.make_frame x_flag tokens quant5 schema_list6 t6 d batrs4] else []) @ l) | |
374 | + (LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs) :: | |
375 | + (LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list2 t2 d batrs2) :: | |
376 | + (if Xlist.mem cases "gen" then [LCGrenderer.make_frame x_flag tokens lex_sems quant3 schema_list3 t3 d batrs3] else []) @ | |
377 | + (if Xlist.mem cases "gen" then [LCGrenderer.make_frame x_flag tokens lex_sems quant3 schema_list4 t2 d batrs4] else []) @ | |
378 | + (if Xlist.mem cases "gen" then [LCGrenderer.make_frame x_flag tokens lex_sems quant5 schema_list5 t5 d batrs3] else []) @ | |
379 | + (if Xlist.mem cases "gen" then [LCGrenderer.make_frame x_flag tokens lex_sems quant5 schema_list6 t6 d batrs4] else []) @ l) | |
377 | 380 | with Not_found -> l) |
378 | 381 | | fnum,Frame(AdjAtrs(_,case,_),schema) -> |
379 | 382 | (try |
... | ... | @@ -383,18 +386,18 @@ let make_np numbers cases genders persons d lemma cat = (* FIXME: koreferencja s |
383 | 386 | let t = ["np"; "number"; "case"; "gender"; "person"] in |
384 | 387 | let t2 = ["prepnp"; "lemma"; "case"] in |
385 | 388 | let t3 = ["np"; "sg"; "case"; "n2"; "person"] in (* UWAGA: number "sg" i gender "n2", żeby uzgadniać z podmiotem czasownika *) |
386 | - let batrs = make_node lemma cat d.weight fnum ["number"; "case"; "gender"; "person"] in | |
387 | - let batrs2 = make_node lemma cat d.weight fnum ["nosem"; "number"; "case"; "gender"; "person"] in | |
388 | - let batrs3 = make_node lemma cat d.weight fnum ["number"; "gen"; "gender"; "person"] in | |
389 | - let batrs4 = make_node lemma cat d.weight fnum ["nosem"; "number"; "gen"; "gender"; "person"] in | |
389 | + let batrs = make_node lemma cat c.weight fnum ["number"; "case"; "gender"; "person"] in | |
390 | + let batrs2 = make_node lemma cat c.weight fnum ["nosem"; "number"; "case"; "gender"; "person"] in | |
391 | + let batrs3 = make_node lemma cat c.weight fnum ["number"; "gen"; "gender"; "person"] in | |
392 | + let batrs4 = make_node lemma cat c.weight fnum ["nosem"; "number"; "gen"; "gender"; "person"] in | |
390 | 393 | let schema_list = [qub_inclusion;schema;num_congr] in |
391 | 394 | let schema_list2 = [qub_inclusion;schema;num_congr;nosem_prep] in |
392 | 395 | let schema_list3 = [qub_inclusion;schema;num_rec] in |
393 | 396 | let schema_list4 = [qub_inclusion;schema;num_rec;nosem_prep] in |
394 | - (LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs) :: | |
395 | - (LCGrenderer.make_frame x_flag tokens quant schema_list2 t2 d batrs2) :: | |
396 | - (if Xlist.mem cases "gen" then [LCGrenderer.make_frame x_flag tokens quant3 schema_list3 t3 d batrs3] else []) @ | |
397 | - (if Xlist.mem cases "gen" then [LCGrenderer.make_frame x_flag tokens quant3 schema_list4 t2 d batrs4] else []) @ l | |
397 | + (LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs) :: | |
398 | + (LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list2 t2 d batrs2) :: | |
399 | + (if Xlist.mem cases "gen" then [LCGrenderer.make_frame x_flag tokens lex_sems quant3 schema_list3 t3 d batrs3] else []) @ | |
400 | + (if Xlist.mem cases "gen" then [LCGrenderer.make_frame x_flag tokens lex_sems quant3 schema_list4 t2 d batrs4] else []) @ l | |
398 | 401 | with Not_found -> l) |
399 | 402 | | fnum,LexFrame(lid,SUBST(number,case),NoRestr,schema) -> |
400 | 403 | (try |
... | ... | @@ -402,32 +405,32 @@ let make_np numbers cases genders persons d lemma cat = (* FIXME: koreferencja s |
402 | 405 | let numbers = check_frame_number numbers number in |
403 | 406 | let quant = ["number",d.e.number,numbers;"case",d.e.case,cases;"gender",d.e.gender,genders; "person", d.e.person,persons] in |
404 | 407 | let t = ["lex";lid;lemma;"subst"; "number"; "case"; "gender"; "person"] in |
405 | - let batrs = make_node lemma cat (lex_weight +. d.weight) fnum ["lex";"number"; "case"; "gender"; "person"] in | |
408 | + let batrs = make_node lemma cat (lex_weight +. c.weight) fnum ["lex";"number"; "case"; "gender"; "person"] in | |
406 | 409 | let schema_list = [[inclusion];schema] in |
407 | - (LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs) :: l | |
410 | + (LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs) :: l | |
408 | 411 | with Not_found -> l) |
409 | 412 | | fnum,LexFrame(_,ADJ _,_,_) -> l |
410 | - | fnum,frame -> failwith ("make_np: " ^ lemma ^ ": " ^ WalStringOf.frame lemma frame)) in | |
413 | + | fnum,frame -> failwith ("make_np: " ^ lemma ^ ": " ^ ENIAMwalStringOf.frame lemma frame)) in | |
411 | 414 | |
412 | -let make_np_symbol d lemma cat = | |
415 | +let make_np_symbol (c:ENIAMtokenizerTypes.token_record) d lemma cat = | |
413 | 416 | Xlist.fold d.simple_valence [] (fun l -> function |
414 | 417 | fnum,Frame(NounAtrs(_,nsyn,nsem),schema) -> |
415 | 418 | let t = [cat] in |
416 | - let batrs = make_node lemma cat (symbol_weight +. d.weight) fnum (nsyn :: (WalStringOf.nsem nsem) :: []) in | |
419 | + let batrs = make_node lemma cat (symbol_weight +. c.weight) fnum (nsyn :: (ENIAMwalStringOf.nsem nsem) :: []) in | |
417 | 420 | let schema_list = [schema] in |
418 | - (LCGrenderer.make_frame x_flag tokens [] schema_list t d batrs) :: l | |
419 | - | fnum,frame -> failwith ("make_np_symbol: " ^ lemma ^ ": " ^ WalStringOf.frame lemma frame)) in | |
421 | + (LCGrenderer.make_frame x_flag tokens lex_sems [] schema_list t d batrs) :: l | |
422 | + | fnum,frame -> failwith ("make_np_symbol: " ^ lemma ^ ": " ^ ENIAMwalStringOf.frame lemma frame)) in | |
420 | 423 | |
421 | -let make_day d lemma cat = | |
424 | +let make_day (c:ENIAMtokenizerTypes.token_record) d lemma cat = | |
422 | 425 | Xlist.fold d.simple_valence [] (fun l -> function |
423 | 426 | fnum,Frame(NounAtrs(_,nsyn,nsem),[]) -> |
424 | 427 | let t = [cat] in |
425 | - let batrs = make_node lemma cat (symbol_weight +. d.weight) fnum (nsyn :: (WalStringOf.nsem nsem) :: []) in | |
428 | + let batrs = make_node lemma cat (symbol_weight +. c.weight) fnum (nsyn :: (ENIAMwalStringOf.nsem nsem) :: []) in | |
426 | 429 | let schema_list = [[schema_field CORE "Possesive" Forward [Phrase(Lex "month-lex")]]] in |
427 | - (LCGrenderer.make_frame x_flag tokens [] schema_list t d batrs) :: l | |
428 | - | fnum,frame -> failwith ("make_day: " ^ lemma ^ ": " ^ WalStringOf.frame lemma frame)) in | |
430 | + (LCGrenderer.make_frame x_flag tokens lex_sems [] schema_list t d batrs) :: l | |
431 | + | fnum,frame -> failwith ("make_day: " ^ lemma ^ ": " ^ ENIAMwalStringOf.frame lemma frame)) in | |
429 | 432 | |
430 | -let make_nump numbers cases genders persons acm d lemma cat = (* FIXME: liczba po rzeczowniku *) (* FIXME: zbadać jak liczebniki współdziałąją z jako COMPAR *) | |
433 | +let make_nump numbers cases genders persons acm (c:ENIAMtokenizerTypes.token_record) d lemma cat = (* FIXME: liczba po rzeczowniku *) (* FIXME: zbadać jak liczebniki współdziałąją z jako COMPAR *) | |
431 | 434 | let numbers = expand_numbers numbers in |
432 | 435 | let cases = expand_cases cases in |
433 | 436 | let genders = expand_genders genders in |
... | ... | @@ -435,14 +438,14 @@ let make_nump numbers cases genders persons acm d lemma cat = (* FIXME: liczba p |
435 | 438 | Xlist.map acm (function |
436 | 439 | "rec" -> |
437 | 440 | let t = ["num"; "number"; "case"; "gender"; "person"; "rec"] in |
438 | - let batrs = make_node lemma cat d.weight 0 ["rec"; "number"; "case"; "gender"; "person"] in | |
441 | + let batrs = make_node lemma cat c.weight 0 ["rec"; "number"; "case"; "gender"; "person"] in | |
439 | 442 | let schema_list = [qub_inclusion] in (* FIXME: jak usunięcie Phrase ProNG wpływa na pokrycie? *) |
440 | - LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs | |
443 | + LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs | |
441 | 444 | | "congr" -> |
442 | 445 | let t = ["num"; "number"; "case"; "gender"; "person"; "congr"] in |
443 | - let batrs = make_node lemma cat d.weight 0 ["congr"; "number"; "case"; "gender"; "person"] in | |
446 | + let batrs = make_node lemma cat c.weight 0 ["congr"; "number"; "case"; "gender"; "person"] in | |
444 | 447 | let schema_list = [qub_inclusion] in (* FIXME: jak usunięcie Phrase ProNG wpływa na pokrycie? *) |
445 | - LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs | |
448 | + LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs | |
446 | 449 | | _ -> failwith "make_nump: num acm") @ |
447 | 450 | Xlist.fold d.simple_valence [] (fun l -> function |
448 | 451 | fnum,LexFrame(lid,NUM(case,gender,acm2),NoRestr,schema) -> |
... | ... | @@ -454,78 +457,78 @@ let make_nump numbers cases genders persons acm d lemma cat = (* FIXME: liczba p |
454 | 457 | match acm with |
455 | 458 | "rec" -> |
456 | 459 | let t = ["lex";lid;lemma;"num"; "number"; "case"; "gender"; "person"] in (* UWAGA: Number "sg" i Gender "n2", żeby uzgadniać z podmiotem czasownika *) |
457 | - let batrs = make_node lemma cat (lex_weight +. d.weight) fnum ["rec";"lex"; "number"; "case"; "gender"; "person"] in | |
460 | + let batrs = make_node lemma cat (lex_weight +. c.weight) fnum ["rec";"lex"; "number"; "case"; "gender"; "person"] in | |
458 | 461 | let schema_list = [[inclusion];schema] in |
459 | - (LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs) :: l | |
462 | + (LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs) :: l | |
460 | 463 | | "congr" -> |
461 | 464 | let t = ["lex";lid;lemma;"num"; "number"; "case"; "gender"; "person"] in |
462 | - let batrs = make_node lemma cat (lex_weight +. d.weight) fnum ["congr";"lex"; "number"; "case"; "gender"; "person"] in | |
465 | + let batrs = make_node lemma cat (lex_weight +. c.weight) fnum ["congr";"lex"; "number"; "case"; "gender"; "person"] in | |
463 | 466 | let schema_list = [[inclusion];schema] in |
464 | - (LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs) :: l | |
467 | + (LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs) :: l | |
465 | 468 | | _ -> failwith "make_nump: num acm" |
466 | 469 | with Not_found -> l) |
467 | - | fnum,frame -> failwith ("make_num: " ^ lemma ^ ": " ^ WalStringOf.frame lemma frame)) in | |
470 | + | fnum,frame -> failwith ("make_num: " ^ lemma ^ ": " ^ ENIAMwalStringOf.frame lemma frame)) in | |
468 | 471 | |
469 | -let make_compar d lemma = | |
472 | +let make_compar (c:ENIAMtokenizerTypes.token_record) d lemma = | |
470 | 473 | let quant = ["case",d.e.case,["nom";"gen";"dat";"acc";"inst"]] in |
471 | 474 | let t = ["comparnp"; lemma; "case"] in |
472 | 475 | let t2 = ["comparpp"; lemma] in |
473 | - let batrs = make_node lemma "prep" d.weight 0 ["case"] in | |
474 | - let batrs2 = make_node lemma "prep" d.weight 0 [] in | |
476 | + let batrs = make_node lemma "prep" c.weight 0 ["case"] in | |
477 | + let batrs2 = make_node lemma "prep" c.weight 0 [] in | |
475 | 478 | let schema_list = [qub_inclusion;[adjunct_schema_field "Manner" Backward [Phrase Null;Phrase AdvP];prep_arg_schema_field [Phrase (NP(CaseAgr))]]] in |
476 | 479 | let schema_list2 = [qub_inclusion;[adjunct_schema_field "Manner" Backward [Phrase Null;Phrase AdvP];prep_arg_schema_field [Phrase (PrepNP(Sem,"",CaseUndef));Phrase (PrepNumP(Sem,"",CaseUndef));Phrase (PrepAdjP(Sem,"",CaseUndef));Phrase (ComprepNP(Sem,""))]]] in |
477 | - [LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs; | |
478 | - LCGrenderer.make_frame x_flag tokens [] schema_list2 t2 d batrs2] in | |
480 | + [LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs; | |
481 | + LCGrenderer.make_frame x_flag tokens lex_sems [] schema_list2 t2 d batrs2] in | |
479 | 482 | |
480 | -let make_arg_prepp cases d lemma pcat phrase = | |
483 | +let make_arg_prepp cases (c:ENIAMtokenizerTypes.token_record) d lemma pcat phrase = | |
481 | 484 | let quant = ["case",d.e.case,expand_cases cases] in |
482 | 485 | let t = [pcat; lemma; "case"] in |
483 | - let batrs = make_node lemma "prep" d.weight 0 ["case"] in | |
486 | + let batrs = make_node lemma "prep" c.weight 0 ["case"] in | |
484 | 487 | let schema_list = [qub_inclusion;[adjunct_schema_field "Manner" Backward [Phrase Null;Phrase AdvP];prep_arg_schema_field [Phrase phrase]]] in |
485 | - LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs in | |
488 | + LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs in | |
486 | 489 | |
487 | -let make_prepnp cases d lemma = make_arg_prepp cases d lemma "prepnp" (NP(CaseAgr)) in | |
488 | -let make_prepnump cases d lemma = make_arg_prepp cases d lemma "prepnp" (NumP(CaseAgr)) in | |
489 | -let make_prepadjp cases d lemma = | |
490 | +let make_prepnp cases (c:ENIAMtokenizerTypes.token_record) d lemma = make_arg_prepp cases c d lemma "prepnp" (NP(CaseAgr)) in | |
491 | +let make_prepnump cases (c:ENIAMtokenizerTypes.token_record) d lemma = make_arg_prepp cases c d lemma "prepnp" (NumP(CaseAgr)) in | |
492 | +let make_prepadjp cases (c:ENIAMtokenizerTypes.token_record) d lemma = | |
490 | 493 | let cases = if lemma = "po" then "postp" :: cases else cases in |
491 | - make_arg_prepp cases d lemma "prepadjp" (AdjP(CaseAgr)) in | |
494 | + make_arg_prepp cases c d lemma "prepadjp" (AdjP(CaseAgr)) in | |
492 | 495 | |
493 | -(*let make_prepp cases d lemma = | |
496 | +(*let make_prepp cases c d lemma = | |
494 | 497 | let quant = ["case",d.e.case,expand_cases cases] in |
495 | 498 | let t = ["prepp"; "case"] in |
496 | - let batrs = make_node lemma "prep" d.weight 0 ["case"] in | |
499 | + let batrs = make_node lemma "prep" c.weight 0 ["case"] in | |
497 | 500 | let schema_list = [qub_inclusion;[adjunct_schema_field "Manner" Backward [Phrase Null;Phrase AdvP];prep_arg_schema_field2 [Phrase (NP(CaseAgr));Phrase (NumP(CaseAgr));Phrase (AdjP(CaseAgr))]]] in |
498 | - LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs in*) | |
501 | + LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs in*) | |
499 | 502 | |
500 | -let make_prep cases d lemma = | |
503 | +let make_prep cases (c:ENIAMtokenizerTypes.token_record) d lemma = | |
501 | 504 | let quant = ["case",d.e.case,expand_cases cases] in |
502 | 505 | let t = ["prep"; lemma; "case"] in |
503 | - let batrs = make_node lemma "prep" d.weight 0 ["case"] in | |
506 | + let batrs = make_node lemma "prep" c.weight 0 ["case"] in | |
504 | 507 | let schema_list = [qub_inclusion] in |
505 | - LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs in | |
508 | + LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs in | |
506 | 509 | |
507 | -let make_time_prep2 case d lemma l = | |
510 | +let make_time_prep2 case (c:ENIAMtokenizerTypes.token_record) d lemma l = | |
508 | 511 | let quant = ["case",d.e.case,[case]] in |
509 | 512 | let t = ["prepnp"; lemma; "case"] in |
510 | - let batrs = make_node lemma "prep" d.weight 0 ["case"] in | |
513 | + let batrs = make_node lemma "prep" c.weight 0 ["case"] in | |
511 | 514 | let schema_list = [qub_inclusion;[prep_arg_schema_field (Xlist.map l (fun s -> Phrase(Lex s)))]] in |
512 | - LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs in | |
515 | + LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs in | |
513 | 516 | |
514 | -let make_time_prep cases d lemma = | |
517 | +let make_time_prep cases (c:ENIAMtokenizerTypes.token_record) d lemma = | |
515 | 518 | Xlist.fold cases [] (fun l case -> |
516 | 519 | match lemma,case with |
517 | - "z","gen" -> [make_time_prep2 case d lemma ["day-month";"day";"year";"date";"hour";"hour-minute"]] @ l | |
518 | - | "do","gen" -> [make_time_prep2 case d lemma ["day-month";"day";"year";"date";"hour";"hour-minute"]] @ l | |
519 | - | "na","acc" -> [make_time_prep2 case d lemma ["day-month";"day";"hour";"hour-minute"]] @ l | |
520 | - | "o","loc" -> [make_time_prep2 case d lemma ["hour";"hour-minute"]] @ l | |
521 | - | "od","gen" -> [make_time_prep2 case d lemma ["day-month";"day";"year";"date";"hour";"hour-minute"]] @ l | |
522 | - | "około","gen" -> [make_time_prep2 case d lemma ["day-month";"day";"year";"hour";"hour-minute"]] @ l | |
523 | - | "po","loc" -> [make_time_prep2 case d lemma ["day-month";"day";"year";"date";"hour";"hour-minute"]] @ l | |
524 | - | "przed","inst" -> [make_time_prep2 case d lemma ["day-month";"day";"year";"date";"hour";"hour-minute"]] @ l | |
525 | - | "w","loc" -> [make_time_prep2 case d lemma ["year"]] @ l | |
520 | + "z","gen" -> [make_time_prep2 case c d lemma ["day-month";"day";"year";"date";"hour";"hour-minute"]] @ l | |
521 | + | "do","gen" -> [make_time_prep2 case c d lemma ["day-month";"day";"year";"date";"hour";"hour-minute"]] @ l | |
522 | + | "na","acc" -> [make_time_prep2 case c d lemma ["day-month";"day";"hour";"hour-minute"]] @ l | |
523 | + | "o","loc" -> [make_time_prep2 case c d lemma ["hour";"hour-minute"]] @ l | |
524 | + | "od","gen" -> [make_time_prep2 case c d lemma ["day-month";"day";"year";"date";"hour";"hour-minute"]] @ l | |
525 | + | "około","gen" -> [make_time_prep2 case c d lemma ["day-month";"day";"year";"hour";"hour-minute"]] @ l | |
526 | + | "po","loc" -> [make_time_prep2 case c d lemma ["day-month";"day";"year";"date";"hour";"hour-minute"]] @ l | |
527 | + | "przed","inst" -> [make_time_prep2 case c d lemma ["day-month";"day";"year";"date";"hour";"hour-minute"]] @ l | |
528 | + | "w","loc" -> [make_time_prep2 case c d lemma ["year"]] @ l | |
526 | 529 | | _ -> l) in |
527 | 530 | |
528 | -let make_lex_prep cases d lemma = | |
531 | +let make_lex_prep cases (c:ENIAMtokenizerTypes.token_record) d lemma = | |
529 | 532 | let cases = expand_cases cases in |
530 | 533 | Xlist.fold d.simple_valence [] (fun l -> function |
531 | 534 | | fnum,LexFrame(lid,PREP case,NoRestr,schema) -> |
... | ... | @@ -533,26 +536,26 @@ let make_lex_prep cases d lemma = |
533 | 536 | let cases = check_frame_case cases case in |
534 | 537 | let quant = ["case",d.e.case,cases] in |
535 | 538 | let t = ["lex";lid;lemma;"prep"; "case"] in |
536 | - let batrs = make_node lemma "prep" (lex_weight +. d.weight) fnum ["lex";"case"] in | |
539 | + let batrs = make_node lemma "prep" (lex_weight +. c.weight) fnum ["lex";"case"] in | |
537 | 540 | let schema_list = [[inclusion];schema] in |
538 | - (LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs) :: l | |
541 | + (LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs) :: l | |
539 | 542 | with Not_found -> l) |
540 | 543 | | fnum,ComprepFrame(new_lemma,PREP case,NoRestr,schema) -> |
541 | 544 | (try |
542 | 545 | let cases = check_frame_case cases case in |
543 | 546 | let quant = ["case",d.e.case,cases] in |
544 | 547 | let t = ["comprepnp"; new_lemma] in |
545 | - let batrs = make_node new_lemma "prep" (lex_weight +. d.weight) fnum [] in | |
548 | + let batrs = make_node new_lemma "prep" (lex_weight +. c.weight) fnum [] in | |
546 | 549 | let schema_list = [[inclusion];schema] in |
547 | - let l = (LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs) :: l in | |
550 | + let l = (LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs) :: l in | |
548 | 551 | let t = ["prepp"; "case"] in |
549 | - let batrs = make_node new_lemma "prep" (lex_weight +. d.weight) fnum ["case"] in | |
552 | + let batrs = make_node new_lemma "prep" (lex_weight +. c.weight) fnum ["case"] in | |
550 | 553 | let schema_list = [[inclusion];schema] in |
551 | - (LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs) :: l | |
554 | + (LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs) :: l | |
552 | 555 | with Not_found -> l) |
553 | - | fnum,frame -> failwith ("make_lex_prep: " ^ lemma ^ ": " ^ WalStringOf.frame lemma frame)) in | |
556 | + | fnum,frame -> failwith ("make_lex_prep: " ^ lemma ^ ": " ^ ENIAMwalStringOf.frame lemma frame)) in | |
554 | 557 | |
555 | -let make_adjp numbers cases genders grads d lemma cat = (* FIXME: usunąć niektóre opcje dla roman i ordnum *) | |
558 | +let make_adjp numbers cases genders grads (c:ENIAMtokenizerTypes.token_record) d lemma cat = (* FIXME: usunąć niektóre opcje dla roman i ordnum *) | |
556 | 559 | let numbers = expand_numbers numbers in |
557 | 560 | let cases = expand_cases cases in |
558 | 561 | let cases = if Xlist.mem cases "nom" then "pred" :: cases else cases in |
... | ... | @@ -565,21 +568,21 @@ let make_adjp numbers cases genders grads d lemma cat = (* FIXME: usunąć niekt |
565 | 568 | let quant = ["lemma",ge (),[];"number",d.e.number,numbers;"case",d.e.case,cases;"gender",d.e.gender,genders] in |
566 | 569 | let t = ["adjp"; "number"; "case"; "gender"] in |
567 | 570 | let t2 = ["prepadjp"; "lemma"; "case"] in |
568 | - let batrs = make_node lemma cat d.weight fnum (adjsyn :: grad :: ["number"; "case"; "gender"]) in | |
569 | - let batrs2 = make_node lemma cat d.weight fnum ("nosem" :: adjsyn :: grad :: ["number"; "case"; "gender"]) in | |
571 | + let batrs = make_node lemma cat c.weight fnum (adjsyn :: grad :: ["number"; "case"; "gender"]) in | |
572 | + let batrs2 = make_node lemma cat c.weight fnum ("nosem" :: adjsyn :: grad :: ["number"; "case"; "gender"]) in | |
570 | 573 | let schema_list = [if adjsyn = "pronoun" then [] else [adjunct_schema_field "Aposition" Backward [Phrase Null;Phrase Adja]];qub_inclusion;schema] in |
571 | 574 | let schema_list2 = [if adjsyn = "pronoun" then [] else [adjunct_schema_field "Aposition" Backward [Phrase Null;Phrase Adja]];qub_inclusion;schema;nosem_prep] in |
572 | - (LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs) :: | |
573 | - (LCGrenderer.make_frame x_flag tokens quant schema_list2 t2 d batrs2) :: l | |
575 | + (LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs) :: | |
576 | + (LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list2 t2 d batrs2) :: l | |
574 | 577 | with Not_found -> l) |
575 | 578 | | fnum,Frame(NounAtrs(_,nsyn,nsem),schema) -> |
576 | 579 | (try |
577 | 580 | let grad = match grads with [grad] -> grad | _ -> failwith "make_adjp: grad" in |
578 | 581 | let quant = ["lemma",ge (),[];"number",d.e.number,numbers;"case",d.e.case,cases;"gender",d.e.gender,genders] in |
579 | 582 | let t = ["adjp"; "number"; "case"; "gender"] in |
580 | - let batrs = make_node lemma cat d.weight fnum (nsyn :: (WalStringOf.nsem nsem) :: grad :: ["number"; "case"; "gender"]) in | |
583 | + let batrs = make_node lemma cat c.weight fnum (nsyn :: (ENIAMwalStringOf.nsem nsem) :: grad :: ["number"; "case"; "gender"]) in | |
581 | 584 | let schema_list = [if nsyn = "pronoun" then [] else [adjunct_schema_field "Aposition" Backward [Phrase Null;Phrase Adja]];qub_inclusion;schema] in |
582 | - (LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs) :: l | |
585 | + (LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs) :: l | |
583 | 586 | with Not_found -> l) |
584 | 587 | | fnum,LexFrame(lid,ADJ(number,case,gender,grad),NoRestr,schema) -> |
585 | 588 | (try |
... | ... | @@ -590,11 +593,11 @@ let make_adjp numbers cases genders grads d lemma cat = (* FIXME: usunąć niekt |
590 | 593 | let grad = match grads with [grad] -> grad | _ -> failwith "make_adjp: grad" in |
591 | 594 | let quant = ["number",d.e.number,numbers;"case",d.e.case,cases;"gender",d.e.gender,genders] in |
592 | 595 | let t = ["lex";lid;lemma;"adj"; "number"; "case"; "gender"] in |
593 | - let batrs = make_node lemma cat (lex_weight +. d.weight) fnum [grad;"lex"; "number"; "case"; "gender"] in | |
596 | + let batrs = make_node lemma cat (lex_weight +. c.weight) fnum [grad;"lex"; "number"; "case"; "gender"] in | |
594 | 597 | let schema_list = [[inclusion];schema] in |
595 | - (LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs) :: l | |
598 | + (LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs) :: l | |
596 | 599 | with Not_found -> l) |
597 | - | fnum,frame -> failwith ("make_adjp: " ^ lemma ^ ": " ^ WalStringOf.frame lemma frame)) in | |
600 | + | fnum,frame -> failwith ("make_adjp: " ^ lemma ^ ": " ^ ENIAMwalStringOf.frame lemma frame)) in | |
598 | 601 | |
599 | 602 | let adv_relators = Xlist.fold [ |
600 | 603 | "jak","Attr",[Int;Rel]; |
... | ... | @@ -609,18 +612,18 @@ let adv_relators = Xlist.fold [ |
609 | 612 | "gdy","con",[Sub]; |
610 | 613 | ] StringMap.empty (fun map (k,v,l) -> StringMap.add map k (v,l)) in |
611 | 614 | |
612 | -let make_advp grads (d:PreTypes.token_record) lemma = | |
615 | +let make_advp grads (c:ENIAMtokenizerTypes.token_record) d lemma = | |
613 | 616 | (if StringMap.mem adv_relators lemma then |
614 | 617 | let role,ctypes = StringMap.find adv_relators lemma in |
615 | 618 | List.flatten (Xlist.map ctypes (fun ctype -> |
616 | - let ctype = WalStringOf.comp_type ctype in | |
619 | + let ctype = ENIAMwalStringOf.comp_type ctype in | |
617 | 620 | let quant = ["inumber",ge (),[];"igender",ge (),[];"iperson",ge (),[];"ctype",ge (),[ctype]] in |
618 | 621 | let t = ["cp"; "ctype"; lemma] in |
619 | 622 | let sem_mods = ["CTYPE",SubstVar "ctype"] in |
620 | - let batrs = make_node lemma "adv" d.weight 0 [ctype] in | |
623 | + let batrs = make_node lemma "adv" c.weight 0 [ctype] in | |
621 | 624 | let raised_arg1 = [Raised(["ip";"inumber";"igender";"iperson"],Forward,["advp"])] in |
622 | 625 | let schema_list = [if lemma = "jak" then [num_arg_schema_field [Phrase Null;(*Phrase ProNG;*) Phrase AdvP]] else [];[schema_field RAISED "" Forward raised_arg1]] in (* FIXME: dwa znaczenia jak: pytanie o cechę lub spójnik *) |
623 | - let frame_advp = LCGrenderer.make_frame_raised tokens quant schema_list t d batrs sem_mods in | |
626 | + let frame_advp = LCGrenderer.make_frame_raised tokens lex_sems quant schema_list t d batrs sem_mods in | |
624 | 627 | [frame_advp])) else []) @ |
625 | 628 | Xlist.fold d.simple_valence [] (fun l -> function (* FIXME: sprawdzic czy adv_relators maja leksykalizacje i schematy *) |
626 | 629 | fnum,Frame(EmptyAtrs _,schema) -> |
... | ... | @@ -628,9 +631,9 @@ let make_advp grads (d:PreTypes.token_record) lemma = |
628 | 631 | let grad = match grads with [grad] -> grad | _ -> failwith "make_advp: grad" in |
629 | 632 | let quant = [] in |
630 | 633 | let t = ["advp"] in |
631 | - let batrs = make_node lemma "adv" d.weight fnum [grad] in | |
634 | + let batrs = make_node lemma "adv" c.weight fnum [grad] in | |
632 | 635 | let schema_list = [[adjunct_schema_field "Aposition" Backward [Phrase Null;Phrase Adja]];qub_inclusion;schema] in |
633 | - (LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs) :: l | |
636 | + (LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs) :: l | |
634 | 637 | with Not_found -> l) |
635 | 638 | | fnum,LexFrame(lid,ADV grad,NoRestr,schema) -> |
636 | 639 | (try |
... | ... | @@ -638,26 +641,26 @@ let make_advp grads (d:PreTypes.token_record) lemma = |
638 | 641 | let grad = match grads with [grad] -> grad | _ -> failwith "make_adjp: grad" in |
639 | 642 | let quant = [] in |
640 | 643 | let t = ["lex";lid;lemma;"adv"] in |
641 | - let batrs = make_node lemma "adv" (lex_weight +. d.weight) fnum [grad;"lex"] in | |
644 | + let batrs = make_node lemma "adv" (lex_weight +. c.weight) fnum [grad;"lex"] in | |
642 | 645 | let schema_list = [[inclusion];schema] in |
643 | - (LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs) :: l | |
646 | + (LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs) :: l | |
644 | 647 | with Not_found -> l) |
645 | 648 | | fnum,ComprepFrame(new_lemma,ADV grad,NoRestr,schema) -> |
646 | 649 | (try |
647 | 650 | let _ = check_frame_grad grads grad in |
648 | 651 | let quant = [] in |
649 | 652 | let t = ["comprepnp"; new_lemma] in |
650 | - let batrs = make_node new_lemma "adv" (lex_weight +. d.weight) fnum [] in | |
653 | + let batrs = make_node new_lemma "adv" (lex_weight +. c.weight) fnum [] in | |
651 | 654 | let schema_list = [[inclusion];schema] in |
652 | - let l = (LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs) :: l in (* FIXME: nieprzetestowane *) | |
655 | + let l = (LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs) :: l in (* FIXME: nieprzetestowane *) | |
653 | 656 | let t = ["prepp"; "gen"] in (* FIXME: przypadek nie jest znany *) |
654 | - let batrs = make_node new_lemma "adv" (lex_weight +. d.weight) fnum [] in | |
657 | + let batrs = make_node new_lemma "adv" (lex_weight +. c.weight) fnum [] in | |
655 | 658 | let schema_list = [[inclusion];schema] in |
656 | - (LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs) :: l | |
659 | + (LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs) :: l | |
657 | 660 | with Not_found -> l) |
658 | - | fnum,frame -> failwith ("make_advp: " ^ lemma ^ ": " ^ WalStringOf.frame lemma frame)) in | |
661 | + | fnum,frame -> failwith ("make_advp: " ^ lemma ^ ": " ^ ENIAMwalStringOf.frame lemma frame)) in | |
659 | 662 | |
660 | -let make_ger numbers cases genders persons aspects negations d lemma cat = | |
663 | +let make_ger numbers cases genders persons aspects negations (c:ENIAMtokenizerTypes.token_record) d lemma cat = | |
661 | 664 | let numbers = expand_numbers numbers in |
662 | 665 | let cases = expand_cases cases in |
663 | 666 | let genders = expand_genders genders in |
... | ... | @@ -668,9 +671,9 @@ let make_ger numbers cases genders persons aspects negations d lemma cat = |
668 | 671 | let negation = check_frame_negation negations negation in |
669 | 672 | let quant = ["number",d.e.number,numbers;"case",d.e.case,cases;"gender",d.e.gender,genders; "person", d.e.person,persons; "aspect",d.e.aspect,aspects] in |
670 | 673 | let t = ["np"; "number"; "case"; "gender"; "person"] in |
671 | - let batrs = make_node new_lemma cat d.weight fnum [negation;"aspect";"number"; "case"; "gender"; "person"] in | |
674 | + let batrs = make_node new_lemma cat c.weight fnum [negation;"aspect";"number"; "case"; "gender"; "person"] in | |
672 | 675 | let schema_list = [qub_inclusion;schema] in |
673 | - (LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs) :: l | |
676 | + (LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs) :: l | |
674 | 677 | with Not_found -> l) |
675 | 678 | | fnum,LexFrame(lid,GER(number,case,gender,aspect,negation,ReflEmpty),NoRestr,schema) -> |
676 | 679 | (try |
... | ... | @@ -681,13 +684,13 @@ let make_ger numbers cases genders persons aspects negations d lemma cat = |
681 | 684 | let negation = check_frame_negation negations negation in |
682 | 685 | let quant = ["number",d.e.number,numbers;"case",d.e.case,cases;"gender",d.e.gender,genders; "person", d.e.person,persons; "aspect",d.e.aspect,aspects] in |
683 | 686 | let t = ["lex";lid;lemma;"ger"; "number"; "case"; "gender"; "person"] in |
684 | - let batrs = make_node lemma cat (lex_weight +. d.weight) fnum [negation;"aspect";"lex"; "number"; "case"; "gender"; "person"] in | |
687 | + let batrs = make_node lemma cat (lex_weight +. c.weight) fnum [negation;"aspect";"lex"; "number"; "case"; "gender"; "person"] in | |
685 | 688 | let schema_list = [[inclusion];schema] in |
686 | - (LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs) :: l | |
689 | + (LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs) :: l | |
687 | 690 | with Not_found -> l) |
688 | - | fnum,frame -> failwith ("make_ger: " ^ lemma ^ ": " ^ WalStringOf.frame lemma frame)) in | |
691 | + | fnum,frame -> failwith ("make_ger: " ^ lemma ^ ": " ^ ENIAMwalStringOf.frame lemma frame)) in | |
689 | 692 | |
690 | -let make_padj numbers cases genders aspects negations d lemma cat = | |
693 | +let make_padj numbers cases genders aspects negations (c:ENIAMtokenizerTypes.token_record) d lemma cat = | |
691 | 694 | let numbers = expand_numbers numbers in |
692 | 695 | let cases = expand_cases cases in |
693 | 696 | let cases = if Xlist.mem cases "nom" || cat = "ppas" then "pred" :: cases else cases in |
... | ... | @@ -699,9 +702,9 @@ let make_padj numbers cases genders aspects negations d lemma cat = |
699 | 702 | let negation = check_frame_negation negations negation in |
700 | 703 | let quant = ["number",d.e.number,numbers;"case",d.e.case,cases;"gender",d.e.gender,genders; "aspect",d.e.aspect,aspects] in |
701 | 704 | let t = ["adjp"; "number"; "case"; "gender"] in |
702 | - let batrs = make_node new_lemma cat d.weight fnum [negation;"aspect";"number"; "case"; "gender"] in | |
705 | + let batrs = make_node new_lemma cat c.weight fnum [negation;"aspect";"number"; "case"; "gender"] in | |
703 | 706 | let schema_list = [qub_inclusion;schema] in |
704 | - (LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs) :: l | |
707 | + (LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs) :: l | |
705 | 708 | with Not_found -> l) |
706 | 709 | | fnum,LexFrame(lid,PACT(number,case,gender,aspect,negation,ReflEmpty),NoRestr,schema) -> |
707 | 710 | (try |
... | ... | @@ -712,9 +715,9 @@ let make_padj numbers cases genders aspects negations d lemma cat = |
712 | 715 | let genders = check_frame_gender genders gender in |
713 | 716 | let quant = ["number",d.e.number,numbers;"case",d.e.case,cases;"gender",d.e.gender,genders; "aspect",d.e.aspect,aspects] in |
714 | 717 | let t = ["lex";lid;lemma;"pact"; "number"; "case"; "gender"] in |
715 | - let batrs = make_node lemma cat (lex_weight +. d.weight) fnum [negation;"lex";"aspect"; "number"; "case"; "gender"] in | |
718 | + let batrs = make_node lemma cat (lex_weight +. c.weight) fnum [negation;"lex";"aspect"; "number"; "case"; "gender"] in | |
716 | 719 | let schema_list = [[inclusion];schema] in |
717 | - (LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs) :: l | |
720 | + (LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs) :: l | |
718 | 721 | with Not_found -> l) |
719 | 722 | | fnum,LexFrame(lid,PPAS(number,case,gender,aspect,negation),NoRestr,schema) -> |
720 | 723 | (try |
... | ... | @@ -725,13 +728,13 @@ let make_padj numbers cases genders aspects negations d lemma cat = |
725 | 728 | let genders = check_frame_gender genders gender in |
726 | 729 | let quant = ["number",d.e.number,numbers;"case",d.e.case,cases;"gender",d.e.gender,genders; "aspect",d.e.aspect,aspects] in |
727 | 730 | let t = ["lex";lid;lemma;"ppas"; "number"; "case"; "gender"] in |
728 | - let batrs = make_node lemma cat (lex_weight +. d.weight) fnum [negation;"lex";"aspect"; "number"; "case"; "gender"] in | |
731 | + let batrs = make_node lemma cat (lex_weight +. c.weight) fnum [negation;"lex";"aspect"; "number"; "case"; "gender"] in | |
729 | 732 | let schema_list = [[inclusion];schema] in |
730 | - (LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs) :: l | |
733 | + (LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs) :: l | |
731 | 734 | with Not_found -> l) |
732 | - | fnum,frame -> failwith ("make_padj: " ^ lemma ^ ": " ^ WalStringOf.frame lemma frame)) in | |
735 | + | fnum,frame -> failwith ("make_padj: " ^ lemma ^ ": " ^ ENIAMwalStringOf.frame lemma frame)) in | |
733 | 736 | |
734 | -let make_ip numbers genders persons aspects aglt aux2 d lemma cat = | |
737 | +let make_ip numbers genders persons aspects aglt aux2 (c:ENIAMtokenizerTypes.token_record) d lemma cat = | |
735 | 738 | let numbers = expand_numbers numbers in |
736 | 739 | let genders = expand_genders genders in |
737 | 740 | Xlist.fold d.simple_valence [] (fun l -> function |
... | ... | @@ -742,29 +745,29 @@ let make_ip numbers genders persons aspects aglt aux2 d lemma cat = |
742 | 745 | let aspects = check_frame_aspect aspects aspect in |
743 | 746 | let quant = ["number",d.e.number,numbers;"gender",d.e.gender,genders; "person", d.e.person,persons; "aspect",d.e.aspect,aspects] in |
744 | 747 | let t = ["ip"; "number"; "gender"; "person"] in |
745 | - let batrs = make_node new_lemma cat d.weight fnum ([mood;tense;"aspect"; "number"; "gender"; "person"] @ if negation = Aff then [] else ["negation"]) in | |
748 | + let batrs = make_node new_lemma cat c.weight fnum ([mood;tense;"aspect"; "number"; "gender"; "person"] @ if negation = Aff then [] else ["negation"]) in | |
746 | 749 | let cond_arg = match mood with "conditional" -> [nosem_schema_field Both [Phrase(Lex "by")]] | "" -> failwith "make_ip" | _ -> [] in |
747 | 750 | let aglt_arg = if aglt then [nosem_schema_field Both [Phrase Aglt]] else [] in |
748 | 751 | let aux_arg = match aux with PastAux -> [nosem_schema_field Both [Phrase AuxPast]] | FutAux -> [nosem_schema_field Both [Phrase AuxFut]] | ImpAux -> [nosem_schema_field Both [Phrase AuxImp]] | NoAux -> [] in |
749 | 752 | let schema_list = [if negation = Aff then [] else [nosem_schema_field Backward [Phrase(Lex "nie")]]; |
750 | 753 | qub_inclusion; |
751 | 754 | aglt_arg @ aux_arg @ cond_arg @ schema @ int_arg] in |
752 | - (LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs) :: l | |
755 | + (LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs) :: l | |
753 | 756 | with Not_found -> l) |
754 | - | fnum,frame -> failwith ("make_ip 1: " ^ lemma ^ ": " ^ WalStringOf.frame lemma frame)) in | |
757 | + | fnum,frame -> failwith ("make_ip 1: " ^ lemma ^ ": " ^ ENIAMwalStringOf.frame lemma frame)) in | |
755 | 758 | |
756 | 759 | |
757 | -let make_infp aspects d lemma = | |
760 | +let make_infp aspects (c:ENIAMtokenizerTypes.token_record) d lemma = | |
758 | 761 | Xlist.fold d.simple_valence [] (fun l -> function |
759 | 762 | fnum,Frame(NonPersAtrs(_,new_lemma,role,role_attr,negation,aspect),schema) -> |
760 | 763 | (try |
761 | 764 | let aspects = check_frame_aspect aspects aspect in |
762 | 765 | let quant = ["aspect",d.e.aspect,aspects] in |
763 | 766 | let t = ["infp"; "aspect"] in |
764 | - let batrs = make_node new_lemma "inf" d.weight fnum (["aspect"] @ if negation = Aff then [] else ["negation"]) in | |
767 | + let batrs = make_node new_lemma "inf" c.weight fnum (["aspect"] @ if negation = Aff then [] else ["negation"]) in | |
765 | 768 | let schema_list = [if negation = Aff then [] else [nosem_schema_field Backward [Phrase(Lex "nie")]]; |
766 | 769 | qub_inclusion;schema] in |
767 | - (LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs) :: l | |
770 | + (LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs) :: l | |
768 | 771 | with Not_found -> l) |
769 | 772 | | fnum,LexFrame(lid,INF(aspect,negation,refl),NoRestr,schema) -> |
770 | 773 | (try |
... | ... | @@ -772,69 +775,69 @@ let make_infp aspects d lemma = |
772 | 775 | let quant = ["aspect",d.e.aspect,aspects] in |
773 | 776 | let t = ["lex";lid;lemma;"inf"; "aspect"] in |
774 | 777 | let new_lemma,schema = if refl = ReflEmpty then lemma, schema else lemma ^ " się", nosem_refl_schema_field :: schema in |
775 | - let batrs = make_node new_lemma "inf" (lex_weight +. d.weight) fnum (["lex";"aspect"] @ if negation = Aff then [] else ["negation"]) in | |
778 | + let batrs = make_node new_lemma "inf" (lex_weight +. c.weight) fnum (["lex";"aspect"] @ if negation = Aff then [] else ["negation"]) in | |
776 | 779 | let schema_list = [if negation = Aff then [] else [nosem_schema_field Backward [Phrase(Lex "nie")]]; |
777 | 780 | [inclusion];schema] in |
778 | - (LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs) :: l | |
781 | + (LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs) :: l | |
779 | 782 | with Not_found -> l) |
780 | - | fnum,frame -> failwith ("make_infp: " ^ lemma ^ ": " ^ WalStringOf.frame lemma frame)) in | |
783 | + | fnum,frame -> failwith ("make_infp: " ^ lemma ^ ": " ^ ENIAMwalStringOf.frame lemma frame)) in | |
781 | 784 | |
782 | -let make_padvp aspects d lemma cat = | |
785 | +let make_padvp aspects (c:ENIAMtokenizerTypes.token_record) d lemma cat = | |
783 | 786 | Xlist.fold d.simple_valence [] (fun l -> function |
784 | 787 | fnum,Frame(NonPersAtrs(_,new_lemma,role,role_attr,negation,aspect),schema) -> |
785 | 788 | (try |
786 | 789 | let aspects = check_frame_aspect aspects aspect in |
787 | 790 | let quant = ["aspect",d.e.aspect,aspects] in |
788 | 791 | let t = ["padvp"] in |
789 | - let batrs = make_node new_lemma cat d.weight fnum (["aspect"] @ if negation = Aff then [] else ["negation"]) in | |
792 | + let batrs = make_node new_lemma cat c.weight fnum (["aspect"] @ if negation = Aff then [] else ["negation"]) in | |
790 | 793 | let schema_list = [if negation = Aff then [] else [nosem_schema_field Backward [Phrase(Lex "nie")]]; |
791 | 794 | qub_inclusion;schema] in |
792 | - (LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs) :: l | |
795 | + (LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs) :: l | |
793 | 796 | with Not_found -> l) |
794 | - | fnum,frame -> failwith ("make_padvp: " ^ lemma ^ ": " ^ WalStringOf.frame lemma frame)) in | |
797 | + | fnum,frame -> failwith ("make_padvp: " ^ lemma ^ ": " ^ ENIAMwalStringOf.frame lemma frame)) in | |
795 | 798 | |
796 | -let make_conjunct (d:PreTypes.token_record) lemma cat = (* FIXME: poprawić semantykę *) | |
799 | +let make_conjunct (c:ENIAMtokenizerTypes.token_record) d lemma cat = (* FIXME: poprawić semantykę *) | |
797 | 800 | let ctype = if cat = "comp" then "sub" else if cat = "conj" then "coord" else failwith "make_conjunct" in |
798 | 801 | let quant = [] in |
799 | 802 | let t = ["cp"; ctype; lemma] in |
800 | - let batrs = make_node lemma cat d.weight 0 [ctype] in | |
803 | + let batrs = make_node lemma cat c.weight 0 [ctype] in | |
801 | 804 | let schema_list = [[comp_arg_schema_field [Phrase IP]]] in |
802 | - [LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs] in | |
805 | + [LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs] in | |
803 | 806 | |
804 | 807 | (* FIXME: uzgadniania HIPERO i SELPREFS *) |
805 | -let make_conj f d lemma = | |
808 | +let make_conj f (c:ENIAMtokenizerTypes.token_record) d lemma = | |
806 | 809 | (if f then |
807 | 810 | [LCGrenderer.make_conj_frame |
808 | 811 | ["number",d.e.number,all_numbers;"gender",d.e.gender,all_genders;"person",d.e.person,all_persons] |
809 | 812 | (Tensor[Atom "ip"; Top; Top; Top]) (Tensor[Atom "ip"; Top; Top; Top]) |
810 | 813 | ["ip";"number";"gender";"person"] d |
811 | - (make_node lemma "conj" d.weight 0 ["number";"gender";"person"])] else []) @ | |
814 | + (make_node lemma "conj" c.weight 0 ["number";"gender";"person"])] else []) @ | |
812 | 815 | [LCGrenderer.make_conj_frame [] |
813 | 816 | (Tensor[Atom "prepnp"; Top; Top]) (Tensor[Atom "prepnp"; Top; Top]) ["advp"] d |
814 | - (make_node lemma "conj" d.weight 0 []); | |
817 | + (make_node lemma "conj" c.weight 0 []); | |
815 | 818 | LCGrenderer.make_conj_frame [] |
816 | 819 | (Tensor[Atom "advp"]) (Tensor[Atom "prepnp"; Top; Top]) ["advp"] d |
817 | - (make_node lemma "conj" d.weight 0 []); | |
820 | + (make_node lemma "conj" c.weight 0 []); | |
818 | 821 | LCGrenderer.make_conj_frame [] |
819 | 822 | (Tensor[Atom "prepnp"; Top; Top]) (Tensor[Atom "advp"]) ["advp"] d |
820 | - (make_node lemma "conj" d.weight 0 []); | |
823 | + (make_node lemma "conj" c.weight 0 []); | |
821 | 824 | LCGrenderer.make_conj_frame [] |
822 | 825 | (Tensor[Atom "advp"]) (Tensor[Atom "advp"]) ["advp"] d |
823 | - (make_node lemma "conj" d.weight 0 []); | |
826 | + (make_node lemma "conj" c.weight 0 []); | |
824 | 827 | LCGrenderer.make_conj_frame ["lemma",ge (),[];"case",d.e.case,all_cases] |
825 | 828 | (Tensor[Atom "prepnp";AVar "lemma"; AVar "case"]) (Tensor[Atom "prepnp"; AVar "lemma"; AVar "case"]) |
826 | 829 | ["prepnp";"lemma";"case"] d |
827 | - (make_node lemma "conj" d.weight 0 ["case"]); | |
830 | + (make_node lemma "conj" c.weight 0 ["case"]); | |
828 | 831 | LCGrenderer.make_conj_frame |
829 | 832 | ["number",d.e.number,all_numbers;"case",d.e.case,all_cases;"gender",d.e.gender,all_genders;"person",d.e.person,all_persons] |
830 | 833 | (Tensor[Atom "np"; Top; AVar "case"; Top; Top]) (Tensor[Atom "np"; Top; AVar "case"; Top; Top]) |
831 | 834 | ["np"; "number"; "case"; "gender"; "person"] d |
832 | - (make_node lemma "conj" d.weight 0 ["number";"case";"gender";"person"]); | |
835 | + (make_node lemma "conj" c.weight 0 ["number";"case";"gender";"person"]); | |
833 | 836 | LCGrenderer.make_conj_frame |
834 | 837 | ["number",d.e.number,all_numbers;"case",d.e.case,all_cases;"gender",d.e.gender,all_genders] |
835 | 838 | (Tensor[Atom "adjp"; AVar "number"; AVar "case"; AVar "gender"]) (Tensor[Atom "adjp"; AVar "number"; AVar "case"; AVar "gender"]) |
836 | 839 | ["adjp";"number";"case";"gender"] d |
837 | - (make_node lemma "conj" d.weight 0 ["number";"case";"gender"]); | |
840 | + (make_node lemma "conj" c.weight 0 ["number";"case";"gender"]); | |
838 | 841 | ] in |
839 | 842 | |
840 | 843 | (* FIXME: aktualnie NP nie obejmują przymiotników, trzeba albo dodać podrzędniki przymiotnikowe, albo kategorię np dla przymiotników *) |
... | ... | @@ -844,77 +847,77 @@ let make_conj f d lemma = |
844 | 847 | (* FIXME: sprawdzić czy są ramy z NegationUndef i NegationNA *) |
845 | 848 | (* FIXME: obniżyć wagi przyimków i kublików pisanych z wielkiej litery podobnie przy skrótach *) |
846 | 849 | |
847 | -let rec process_interp (d:PreTypes.token_record) = function (* FIXME: rozpoznawanie lematów nie działa, gdy mają wielką literę *) | |
850 | +let rec process_interp (c:ENIAMtokenizerTypes.token_record) (d:ENIAMlexSemanticsTypes.lex_sem) = function (* FIXME: rozpoznawanie lematów nie działa, gdy mają wielką literę *) | |
848 | 851 | lemma,"subst",[numbers;cases;genders] -> |
849 | 852 | (if lemma = "co" || lemma = "kto" then (* FIXME: dodać podrzędniki np. co nowego *) |
850 | 853 | List.flatten (Xlist.map ["int";"rel"] (fun ctype -> |
851 | 854 | let quant = ["inumber",ge (),[];"igender",ge (),[];"iperson",ge (),[];"plemma",ge (),[];"ctype",ge (),[ctype];"number",d.e.number,expand_numbers numbers;"case",d.e.case,expand_cases cases;"gender",d.e.gender,expand_genders genders; "person",d.e.person,["ter"]] in |
852 | 855 | let t = ["cp"; "ctype"; lemma] in |
853 | 856 | let sem_mods = ["CTYPE",SubstVar "ctype"] in (* atrybuty ip *) |
854 | - let batrs = make_node lemma "subst" d.weight 0 [ctype;"case"] in (* atrybuty liścia *) | |
857 | + let batrs = make_node lemma "subst" c.weight 0 [ctype;"case"] in (* atrybuty liścia *) | |
855 | 858 | let raised_arg = [Raised(["ip";"inumber";"igender";"iperson"],Forward,["np";"number";"case";"gender";"person"])] in |
856 | 859 | let raised_arg1 = [Raised(["ip";"inumber";"igender";"iperson"],Forward,["prepnp";"plemma";"case"])] in |
857 | 860 | let raised_arg2 = [Raised(["prepnp";"plemma";"case"],Forward,["np";"number";"case";"gender";"person"])] in |
858 | 861 | let raised_arg3 = [Raised(["ip";"inumber";"igender";"iperson"],Forward,["comprepnp";"plemma"])] in |
859 | 862 | let raised_arg4 = [Raised(["comprepnp";"plemma"],Forward,["np";"number";"case";"gender";"person"])] in |
860 | 863 | let schema_list = [[schema_field RAISED "" Forward raised_arg]] in |
861 | - let frame_np = LCGrenderer.make_frame_raised tokens quant schema_list t d batrs sem_mods in | |
864 | + let frame_np = LCGrenderer.make_frame_raised tokens lex_sems quant schema_list t d batrs sem_mods in | |
862 | 865 | let schema_list = [[schema_field RAISED "" Backward raised_arg2];[schema_field RAISED "" Forward raised_arg1]] in |
863 | - let frame_prepnp = LCGrenderer.make_frame_raised tokens quant schema_list t d batrs sem_mods in | |
866 | + let frame_prepnp = LCGrenderer.make_frame_raised tokens lex_sems quant schema_list t d batrs sem_mods in | |
864 | 867 | let schema_list = [[schema_field RAISED "" Backward raised_arg4];[schema_field RAISED "" Forward raised_arg3]] in |
865 | - let frame_comprepnp = LCGrenderer.make_frame_raised tokens quant schema_list t d batrs sem_mods in | |
868 | + let frame_comprepnp = LCGrenderer.make_frame_raised tokens lex_sems quant schema_list t d batrs sem_mods in | |
866 | 869 | [frame_np;frame_prepnp;frame_comprepnp])) else []) @ |
867 | 870 | (if lemma = "to" then (* FIXME: przetestować *) |
868 | 871 | let quant = ["ctype",ge (),[];"lemma",ge (),[];"number",d.e.number,expand_numbers numbers;"case",d.e.case,expand_cases cases;"gender",d.e.gender,expand_genders genders; "person",d.e.person,["ter"]] in |
869 | 872 | let t = ["ncp"; "number"; "case"; "gender"; "person"; "ctype"; "lemma"] in |
870 | - let batrs = make_node "to" "subst" d.weight 0 ["coreferential"; "number"; "case"; "gender"; "person"; "ctype"] in | |
873 | + let batrs = make_node "to" "subst" c.weight 0 ["coreferential"; "number"; "case"; "gender"; "person"; "ctype"] in | |
871 | 874 | let schema_list = [qub_inclusion;[prep_arg_schema_field [Phrase(CP(CompTypeAgr,Comp "lemma"))]]] in |
872 | - [LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs] | |
875 | + [LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs] | |
873 | 876 | else []) @ |
874 | - make_np numbers cases genders ["ter"] d lemma "subst" | |
877 | + make_np numbers cases genders ["ter"] c d lemma "subst" | |
875 | 878 | | lemma,"depr",[numbers;cases;genders] -> (* FIXME: sprawdzić uzgodnienie rodzaju dla depr w podmiocie *) |
876 | - make_np numbers cases genders ["ter"] d lemma "depr" | |
879 | + make_np numbers cases genders ["ter"] c d lemma "depr" | |
877 | 880 | | lemma,"ppron12",[numbers;cases;genders;persons] -> |
878 | - make_np numbers cases genders persons d lemma "ppron12" | |
881 | + make_np numbers cases genders persons c d lemma "ppron12" | |
879 | 882 | | lemma,"ppron12",[numbers;cases;genders;persons;akcs] -> |
880 | - make_np numbers cases genders persons d lemma "ppron12" | |
883 | + make_np numbers cases genders persons c d lemma "ppron12" | |
881 | 884 | | lemma,"ppron3",[numbers;cases;genders;persons] -> |
882 | - make_np numbers cases genders persons d lemma "ppron3" | |
885 | + make_np numbers cases genders persons c d lemma "ppron3" | |
883 | 886 | | lemma,"ppron3",[numbers;cases;genders;persons;akcs] -> |
884 | - make_np numbers cases genders persons d lemma "ppron3" | |
887 | + make_np numbers cases genders persons c d lemma "ppron3" | |
885 | 888 | | lemma,"ppron3",[numbers;cases;genders;persons;akcs;praep] -> |
886 | 889 | List.flatten (Xlist.map praep (function |
887 | - "npraep" -> make_np numbers cases genders persons d lemma "ppron3" | |
888 | - | "_" -> make_np numbers cases genders persons d lemma "ppron3" | |
890 | + "npraep" -> make_np numbers cases genders persons c d lemma "ppron3" | |
891 | + | "_" -> make_np numbers cases genders persons c d lemma "ppron3" | |
889 | 892 | | "praep" -> |
890 | 893 | let quant = ["lemma",ge (),[]; "number",d.e.number,expand_numbers numbers;"case",d.e.case,expand_cases cases;"gender",d.e.gender,expand_genders genders; "person",d.e.person,persons] in |
891 | 894 | let t = ["prepnp"; "lemma"; "case"] in |
892 | 895 | Xlist.fold d.simple_valence [] (fun l -> function |
893 | 896 | fnum,Frame(NounAtrs(_,nsyn,nsem),schema) -> |
894 | - let batrs = make_node lemma "ppron3" d.weight fnum (nsyn ::(WalStringOf.nsem nsem) :: ["number";"case";"gender";"person"]) in | |
897 | + let batrs = make_node lemma "ppron3" c.weight fnum (nsyn ::(ENIAMwalStringOf.nsem nsem) :: ["number";"case";"gender";"person"]) in | |
895 | 898 | let raised_arg = [Raised(["prepnp";"lemma";"case"],Forward,["np";"number";"case";"gender";"person"])] in |
896 | 899 | let schema_list = [[schema_field RAISED "" Backward raised_arg];[inclusion]] in |
897 | - (LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs) :: l | |
900 | + (LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs) :: l | |
898 | 901 | | _ -> failwith "process_interp: ppron3 praep") |
899 | 902 | | _ -> failwith "process_interp: ppron3 praep")) |
900 | 903 | | lemma,"siebie",[cases] -> (* FIXME: rozwiązać koreferencję *) |
901 | - make_np ["_"] cases ["_"] ["ter"] d lemma "siebie" | |
904 | + make_np ["_"] cases ["_"] ["ter"] c d lemma "siebie" | |
902 | 905 | | lemma,"prep",[cases;woks] -> (* FIXME: pomijam niesemantyczny compar *) |
903 | - if lemma = "jak" || lemma = "jako" || lemma = "niż" || lemma = "niczym" || lemma = "niby" || lemma = "co" then [make_prep all_cases d lemma] @ make_compar d lemma else | |
904 | - [make_prepnp cases d lemma; | |
905 | - make_prepnump cases d lemma; | |
906 | - make_prepadjp cases d lemma; | |
907 | - make_prep cases d lemma] @ | |
908 | - make_lex_prep cases d lemma @ | |
909 | - make_time_prep cases d lemma | |
906 | + if lemma = "jak" || lemma = "jako" || lemma = "niż" || lemma = "niczym" || lemma = "niby" || lemma = "co" then [make_prep all_cases c d lemma] @ make_compar c d lemma else | |
907 | + [make_prepnp cases c d lemma; | |
908 | + make_prepnump cases c d lemma; | |
909 | + make_prepadjp cases c d lemma; | |
910 | + make_prep cases c d lemma] @ | |
911 | + make_lex_prep cases c d lemma @ | |
912 | + make_time_prep cases c d lemma | |
910 | 913 | | lemma,"prep",[cases] -> |
911 | - if lemma = "jak" || lemma = "jako" || lemma = "niż" || lemma = "niczym" || lemma = "niby" || lemma = "co" then [make_prep all_cases d lemma] @ make_compar d lemma else | |
912 | - [make_prepnp cases d lemma; | |
913 | - make_prepnump cases d lemma; | |
914 | - make_prepadjp cases d lemma; | |
915 | - make_prep cases d lemma] @ | |
916 | - make_lex_prep cases d lemma @ | |
917 | - make_time_prep cases d lemma | |
914 | + if lemma = "jak" || lemma = "jako" || lemma = "niż" || lemma = "niczym" || lemma = "niby" || lemma = "co" then [make_prep all_cases c d lemma] @ make_compar c d lemma else | |
915 | + [make_prepnp cases c d lemma; | |
916 | + make_prepnump cases c d lemma; | |
917 | + make_prepadjp cases c d lemma; | |
918 | + make_prep cases c d lemma] @ | |
919 | + make_lex_prep cases c d lemma @ | |
920 | + make_time_prep cases c d lemma | |
918 | 921 | (* | lemma,"NUM",[["comp"]] -> failwith "num:comp"*) |
919 | 922 | | lemma,"num",[numbers;cases;genders;acm] -> (* FIXME: liczebniki złożone *) |
920 | 923 | (if lemma = "ile" then (* FIXME: walencja ile *) |
... | ... | @@ -924,71 +927,71 @@ let rec process_interp (d:PreTypes.token_record) = function (* FIXME: rozpoznawa |
924 | 927 | let quant = ["inumber",ge (),[];"igender",ge (),[];"iperson",ge (),[];"plemma",ge (),[];"ctype",ge (),[ctype];"number",d.e.number,expand_numbers numbers;"case",d.e.case,expand_cases cases;"gender",d.e.gender,expand_genders genders;"person",d.e.person,["ter"]] in |
925 | 928 | let t = ["cp"; "ctype"; lemma] in |
926 | 929 | let sem_mods = ["CTYPE",SubstVar "ctype"] in |
927 | - let batrs = make_node lemma "num" d.weight 0 [ctype;acm;"number";"case";"gender";"person"] in | |
930 | + let batrs = make_node lemma "num" c.weight 0 [ctype;acm;"number";"case";"gender";"person"] in | |
928 | 931 | let raised_arg1 = [Raised(["ip";"inumber";"igender";"iperson"],Forward,["nump";num;"case";gend;"person"])] in |
929 | 932 | let raised_arg2a = [Raised(["ip";"inumber";"igender";"iperson"],Forward,["prepnp";"plemma";"case"])] in |
930 | 933 | let raised_arg2b = [Raised(["prepnp";"plemma";"case"],Forward,["nump";num;"case";gend;"person"])] in |
931 | 934 | let raised_arg3a = [Raised(["ip";"inumber";"igender";"iperson"],Forward,["comprepnp";"plemma"])] in |
932 | 935 | let raised_arg3b = [Raised(["comprepnp";"plemma"],Forward,["nump";num;"case";gend;"person"])] in |
933 | 936 | let schema_list = [[num_arg_schema_field [Phrase ProNG; Phrase phrase]];[schema_field RAISED "" Forward raised_arg1]] in |
934 | - let frame_nump = LCGrenderer.make_frame_raised tokens quant schema_list t d batrs sem_mods in | |
937 | + let frame_nump = LCGrenderer.make_frame_raised tokens lex_sems quant schema_list t d batrs sem_mods in | |
935 | 938 | let schema_list = [[num_arg_schema_field [Phrase ProNG; Phrase phrase]];[schema_field RAISED "" Backward raised_arg2b];[schema_field RAISED "" Forward raised_arg2a]] in |
936 | - let frame_prepnp = LCGrenderer.make_frame_raised tokens quant schema_list t d batrs sem_mods in | |
939 | + let frame_prepnp = LCGrenderer.make_frame_raised tokens lex_sems quant schema_list t d batrs sem_mods in | |
937 | 940 | let schema_list = [[num_arg_schema_field [Phrase ProNG; Phrase phrase]];[schema_field RAISED "" Backward raised_arg3b];[schema_field RAISED "" Forward raised_arg3a]] in |
938 | - let frame_comprepnp = LCGrenderer.make_frame_raised tokens quant schema_list t d batrs sem_mods in | |
941 | + let frame_comprepnp = LCGrenderer.make_frame_raised tokens lex_sems quant schema_list t d batrs sem_mods in | |
939 | 942 | [frame_nump;frame_prepnp;frame_comprepnp])))) else []) @ |
940 | - make_nump numbers cases genders ["ter"] acm d lemma "num" | |
943 | + make_nump numbers cases genders ["ter"] acm c d lemma "num" | |
941 | 944 | | _,"numc",[] -> [] |
942 | 945 | | lemma,"intnum",[] -> |
943 | - let batrs = make_node lemma "intnum" d.weight 0 [] in | |
946 | + let batrs = make_node lemma "intnum" c.weight 0 [] in | |
944 | 947 | let numbers,acms = |
945 | 948 | if lemma = "1" || lemma = "-1" then ["sg"],["congr"] else |
946 | 949 | let s = String.get lemma (String.length lemma - 1) in |
947 | 950 | ["pl"],if s = '2' || s = '3' || s = '4' then ["rec";"congr"] else ["rec"] in |
948 | - [LCGrenderer.make_frame x_flag tokens [] [[schema_field NOSEM "" Forward [Phrase(Lex "hyphen")]]] ["adja"] d batrs] @ | |
949 | - make_nump numbers ["_"] ["_"] ["ter"] acms d lemma "intnum" (* FIXME: specjalne traktowanie 1 i poza tym liczba mnoga *) | |
951 | + [LCGrenderer.make_frame x_flag tokens lex_sems [] [[schema_field NOSEM "" Forward [Phrase(Lex "hyphen")]]] ["adja"] d batrs] @ | |
952 | + make_nump numbers ["_"] ["_"] ["ter"] acms c d lemma "intnum" (* FIXME: specjalne traktowanie 1 i poza tym liczba mnoga *) | |
950 | 953 | | lemma,"realnum",[] -> |
951 | - let batrs = make_node lemma "realnum" d.weight 0 [] in | |
952 | - [LCGrenderer.make_frame x_flag tokens [] [[schema_field NOSEM "" Forward [Phrase(Lex "hyphen")]]] ["adja"] d batrs] @ | |
953 | - make_nump ["sg"] ["_"] ["_"] ["ter"] ["rec"] d lemma "realnum" | |
954 | + let batrs = make_node lemma "realnum" c.weight 0 [] in | |
955 | + [LCGrenderer.make_frame x_flag tokens lex_sems [] [[schema_field NOSEM "" Forward [Phrase(Lex "hyphen")]]] ["adja"] d batrs] @ | |
956 | + make_nump ["sg"] ["_"] ["_"] ["ter"] ["rec"] c d lemma "realnum" | |
954 | 957 | | lemma,"intnum-interval",[] -> |
955 | - let batrs = make_node lemma "intnum-interval" d.weight 0 [] in | |
956 | - [LCGrenderer.make_frame x_flag tokens [] [[schema_field NOSEM "" Forward [Phrase(Lex "hyphen")]]] ["adja"] d batrs] @ | |
957 | - make_nump ["pl"] ["_"] ["_"] ["ter"] ["rec";"congr"] d lemma "intnum-interval" | |
958 | + let batrs = make_node lemma "intnum-interval" c.weight 0 [] in | |
959 | + [LCGrenderer.make_frame x_flag tokens lex_sems [] [[schema_field NOSEM "" Forward [Phrase(Lex "hyphen")]]] ["adja"] d batrs] @ | |
960 | + make_nump ["pl"] ["_"] ["_"] ["ter"] ["rec";"congr"] c d lemma "intnum-interval" | |
958 | 961 | | lemma,"realnum-interval",[] -> |
959 | - let batrs = make_node lemma "realnum-interval" d.weight 0 [] in | |
960 | - [LCGrenderer.make_frame x_flag tokens [] [[schema_field NOSEM "" Forward [Phrase(Lex "hyphen")]]] ["adja"] d batrs] @ | |
961 | - make_nump ["sg"] ["_"] ["_"] ["ter"] ["rec"] d lemma "realnum-interval" | |
962 | + let batrs = make_node lemma "realnum-interval" c.weight 0 [] in | |
963 | + [LCGrenderer.make_frame x_flag tokens lex_sems [] [[schema_field NOSEM "" Forward [Phrase(Lex "hyphen")]]] ["adja"] d batrs] @ | |
964 | + make_nump ["sg"] ["_"] ["_"] ["ter"] ["rec"] c d lemma "realnum-interval" | |
962 | 965 | | lemma,"symbol",[] -> |
963 | - make_np ["sg"] ["_"] ["_"] ["ter"] d lemma "symbol" | |
966 | + make_np ["sg"] ["_"] ["_"] ["ter"] c d lemma "symbol" | |
964 | 967 | | lemma,"ordnum",[] -> |
965 | - make_adjp ["_"] ["_"] ["_"] ["pos"] d lemma "ordnum" | |
966 | - | lemma,"date",[] -> make_np_symbol d lemma "date" | |
967 | - | lemma,"date-interval",[] -> make_np_symbol d lemma "date-interval" | |
968 | - | lemma,"hour-minute",[] -> make_np_symbol d lemma "hour-minute" | |
969 | - | lemma,"hour",[] -> make_np_symbol d lemma "hour" | |
970 | - | lemma,"hour-minute-interval",[] -> make_np_symbol d lemma "hour-minute-interval" | |
971 | - | lemma,"hour-interval",[] -> make_np_symbol d lemma "hour-interval" | |
972 | - | lemma,"year",[] -> make_np_symbol d lemma "year" | |
973 | - | lemma,"year-interval",[] -> make_np_symbol d lemma "year-interval" | |
974 | - | lemma,"day",[] -> make_day d lemma "day" | |
975 | - | lemma,"day-interval",[] -> make_day d lemma "day-interval" | |
976 | - | lemma,"day-month",[] -> make_np_symbol d lemma "day-month" | |
977 | - | lemma,"day-month-interval",[] -> make_np_symbol d lemma "day-month-interval" | |
978 | - | lemma,"month-interval",[] -> make_np_symbol d lemma "month-interval" | |
968 | + make_adjp ["_"] ["_"] ["_"] ["pos"] c d lemma "ordnum" | |
969 | + | lemma,"date",[] -> make_np_symbol c d lemma "date" | |
970 | + | lemma,"date-interval",[] -> make_np_symbol c d lemma "date-interval" | |
971 | + | lemma,"hour-minute",[] -> make_np_symbol c d lemma "hour-minute" | |
972 | + | lemma,"hour",[] -> make_np_symbol c d lemma "hour" | |
973 | + | lemma,"hour-minute-interval",[] -> make_np_symbol c d lemma "hour-minute-interval" | |
974 | + | lemma,"hour-interval",[] -> make_np_symbol c d lemma "hour-interval" | |
975 | + | lemma,"year",[] -> make_np_symbol c d lemma "year" | |
976 | + | lemma,"year-interval",[] -> make_np_symbol c d lemma "year-interval" | |
977 | + | lemma,"day",[] -> make_day c d lemma "day" | |
978 | + | lemma,"day-interval",[] -> make_day c d lemma "day-interval" | |
979 | + | lemma,"day-month",[] -> make_np_symbol c d lemma "day-month" | |
980 | + | lemma,"day-month-interval",[] -> make_np_symbol c d lemma "day-month-interval" | |
981 | + | lemma,"month-interval",[] -> make_np_symbol c d lemma "month-interval" | |
979 | 982 | | lemma,"roman",[] -> (* "Aranżuje XIX struś." *) |
980 | - let batrs = make_node lemma "roman" d.weight 0 [] in | |
981 | - [LCGrenderer.make_frame x_flag tokens [] [[schema_field NOSEM "" Forward [Phrase(Lex "hyphen")]]] ["adja"] d batrs] @ | |
982 | - (make_np_symbol d lemma "roman") @ | |
983 | - (make_adjp ["_"] ["_"] ["_"] ["pos"] d lemma "roman-adj") | |
983 | + let batrs = make_node lemma "roman" c.weight 0 [] in | |
984 | + [LCGrenderer.make_frame x_flag tokens lex_sems [] [[schema_field NOSEM "" Forward [Phrase(Lex "hyphen")]]] ["adja"] d batrs] @ | |
985 | + (make_np_symbol c d lemma "roman") @ | |
986 | + (make_adjp ["_"] ["_"] ["_"] ["pos"] c d lemma "roman-adj") | |
984 | 987 | | lemma,"roman-interval",[] -> |
985 | - let batrs = make_node lemma "roman-interval" d.weight 0 [] in | |
986 | - [LCGrenderer.make_frame x_flag tokens [] [[schema_field NOSEM "" Forward [Phrase(Lex "hyphen")]]] ["adja"] d batrs] @ | |
987 | - (make_np_symbol d lemma "roman-interval") | |
988 | - | lemma,"match-result",[] -> make_np_symbol d lemma "match-result" | |
989 | - | lemma,"url",[] -> make_np_symbol d lemma "url" | |
990 | - | lemma,"email",[] -> make_np_symbol d lemma "email" | |
991 | - | lemma,"obj-id",[] -> make_np_symbol d lemma "obj-id" | |
988 | + let batrs = make_node lemma "roman-interval" c.weight 0 [] in | |
989 | + [LCGrenderer.make_frame x_flag tokens lex_sems [] [[schema_field NOSEM "" Forward [Phrase(Lex "hyphen")]]] ["adja"] d batrs] @ | |
990 | + (make_np_symbol c d lemma "roman-interval") | |
991 | + | lemma,"match-result",[] -> make_np_symbol c d lemma "match-result" | |
992 | + | lemma,"url",[] -> make_np_symbol c d lemma "url" | |
993 | + | lemma,"email",[] -> make_np_symbol c d lemma "email" | |
994 | + | lemma,"obj-id",[] -> make_np_symbol c d lemma "obj-id" | |
992 | 995 | | lemma,"adj",[numbers;cases;genders;grads] -> |
993 | 996 | (if lemma = "czyj" || lemma = "jaki" || lemma = "który" then |
994 | 997 | List.flatten (Xlist.map ["int"] (fun ctype -> |
... | ... | @@ -996,7 +999,7 @@ let rec process_interp (d:PreTypes.token_record) = function (* FIXME: rozpoznawa |
996 | 999 | let quant = ["inumber",ge (),[];"igender",ge (),[];"iperson",ge (),[];"nperson",ge (),[];"plemma",ge (),[];"ctype",ge (),[ctype];"number",d.e.number,expand_numbers numbers;"case",d.e.case,expand_cases cases;"gender",d.e.gender,expand_genders genders] in |
997 | 1000 | let t = ["cp"; "ctype"; lemma] in |
998 | 1001 | let sem_mods = ["CTYPE",SubstVar "ctype"] in |
999 | - let batrs = make_node lemma "adj" d.weight 0 [ctype;"number";"case";"gender"] in | |
1002 | + let batrs = make_node lemma "adj" c.weight 0 [ctype;"number";"case";"gender"] in | |
1000 | 1003 | let raised_arg0 = [Raised(["np";"number";"case";"gender";"nperson"],Backward,["adjp";"number";"case";"gender"])] in |
1001 | 1004 | let raised_arg1 = [Raised(["ip";"inumber";"igender";"iperson"],Forward,["np";"number";"case";"gender";"nperson"])] in |
1002 | 1005 | let raised_arg2a = [Raised(["ip";"inumber";"igender";"iperson"],Forward,["prepnp";"plemma";"case"])] in |
... | ... | @@ -1004,11 +1007,11 @@ let rec process_interp (d:PreTypes.token_record) = function (* FIXME: rozpoznawa |
1004 | 1007 | let raised_arg3a = [Raised(["ip";"inumber";"igender";"iperson"],Forward,["comprepnp";"plemma"])] in |
1005 | 1008 | let raised_arg3b = [Raised(["comprepnp";"plemma"],Forward,["np";"number";"case";"gender";"nperson"])] in |
1006 | 1009 | let schema_list = [[schema_field RAISED "" Forward raised_arg0];[schema_field RAISED "" Forward raised_arg1]] in |
1007 | - let frame_np = LCGrenderer.make_frame_raised tokens quant schema_list t d batrs sem_mods in | |
1010 | + let frame_np = LCGrenderer.make_frame_raised tokens lex_sems quant schema_list t d batrs sem_mods in | |
1008 | 1011 | let schema_list = [[schema_field RAISED "" Forward raised_arg0];[schema_field RAISED "" Backward raised_arg2b];[schema_field RAISED "" Forward raised_arg2a]] in |
1009 | - let frame_prepnp = LCGrenderer.make_frame_raised tokens quant schema_list t d batrs sem_mods in | |
1012 | + let frame_prepnp = LCGrenderer.make_frame_raised tokens lex_sems quant schema_list t d batrs sem_mods in | |
1010 | 1013 | let schema_list = [[schema_field RAISED "" Forward raised_arg0];[schema_field RAISED "" Backward raised_arg3b];[schema_field RAISED "" Forward raised_arg3a]] in |
1011 | - let frame_comprepnp = LCGrenderer.make_frame_raised tokens quant schema_list t d batrs sem_mods in | |
1014 | + let frame_comprepnp = LCGrenderer.make_frame_raised tokens lex_sems quant schema_list t d batrs sem_mods in | |
1012 | 1015 | [frame_np;frame_prepnp;frame_comprepnp])) else []) @ |
1013 | 1016 | (if lemma = "jaki" || lemma = "który" then |
1014 | 1017 | List.flatten (Xlist.map ["rel"] (fun ctype -> |
... | ... | @@ -1016,117 +1019,117 @@ let rec process_interp (d:PreTypes.token_record) = function (* FIXME: rozpoznawa |
1016 | 1019 | let quant = ["inumber",ge (),[];"igender",ge (),[];"iperson",ge (),[];"plemma",ge (),[];"ctype",ge (),[ctype];"number",d.e.number,expand_numbers numbers;"case",d.e.case,expand_cases cases;"gender",d.e.gender,expand_genders genders; "person",d.e.person,["ter"]] in |
1017 | 1020 | let t = ["cp"; "ctype"; lemma] in |
1018 | 1021 | let sem_mods = ["CTYPE",SubstVar "ctype"] in |
1019 | - let batrs = make_node lemma "adj" d.weight 0 [ctype;"number";"case";"gender";"person"] in | |
1022 | + let batrs = make_node lemma "adj" c.weight 0 [ctype;"number";"case";"gender";"person"] in | |
1020 | 1023 | let raised_arg = [Raised(["ip";"inumber";"igender";"iperson"],Forward,["np";"number";"case";"gender";"person"])] in |
1021 | 1024 | let raised_arg1 = [Raised(["ip";"inumber";"igender";"iperson"],Forward,["prepnp";"plemma";"case"])] in |
1022 | 1025 | let raised_arg2 = [Raised(["prepnp";"plemma";"case"],Forward,["np";"number";"case";"gender";"person"])] in |
1023 | 1026 | let raised_arg3 = [Raised(["ip";"inumber";"igender";"iperson"],Forward,["comprepnp";"plemma"])] in |
1024 | 1027 | let raised_arg4 = [Raised(["comprepnp";"plemma"],Forward,["np";"number";"case";"gender";"person"])] in |
1025 | 1028 | let schema_list = [[schema_field RAISED "" Forward raised_arg]] in |
1026 | - let frame_np = LCGrenderer.make_frame_raised tokens quant schema_list t d batrs sem_mods in | |
1029 | + let frame_np = LCGrenderer.make_frame_raised tokens lex_sems quant schema_list t d batrs sem_mods in | |
1027 | 1030 | let schema_list = [[schema_field RAISED "" Backward raised_arg2];[schema_field RAISED "" Forward raised_arg1]] in |
1028 | - let frame_prepnp = LCGrenderer.make_frame_raised tokens quant schema_list t d batrs sem_mods in | |
1031 | + let frame_prepnp = LCGrenderer.make_frame_raised tokens lex_sems quant schema_list t d batrs sem_mods in | |
1029 | 1032 | let schema_list = [[schema_field RAISED "" Backward raised_arg4];[schema_field RAISED "" Forward raised_arg3]] in |
1030 | - let frame_comprepnp = LCGrenderer.make_frame_raised tokens quant schema_list t d batrs sem_mods in | |
1033 | + let frame_comprepnp = LCGrenderer.make_frame_raised tokens lex_sems quant schema_list t d batrs sem_mods in | |
1031 | 1034 | [frame_np;frame_prepnp;frame_comprepnp])) else []) @ |
1032 | 1035 | if lemma = "czyj" || lemma = "jaki" || lemma = "który" then [] else |
1033 | 1036 | (if lemma = "jakiś" || lemma = "ten" || lemma = "taki" then |
1034 | - make_np numbers cases genders ["ter"] d lemma "adj" else []) @ | |
1035 | - make_adjp numbers cases genders grads d lemma "adj" | |
1036 | - | lemma,"adjc",[] -> make_adjp ["sg"] ["pred"] ["m1";"m2";"m3"] ["pos"] d lemma "adjc" (* np: gotów *) (* FIXME: czy to na pewno ma zwykłą walencję przymiotnika? *) | |
1037 | - | lemma,"adjp",[] -> make_adjp all_numbers ["postp"] all_genders ["pos"] d lemma "adjp" | |
1037 | + make_np numbers cases genders ["ter"] c d lemma "adj" else []) @ | |
1038 | + make_adjp numbers cases genders grads c d lemma "adj" | |
1039 | + | lemma,"adjc",[] -> make_adjp ["sg"] ["pred"] ["m1";"m2";"m3"] ["pos"] c d lemma "adjc" (* np: gotów *) (* FIXME: czy to na pewno ma zwykłą walencję przymiotnika? *) | |
1040 | + | lemma,"adjp",[] -> make_adjp all_numbers ["postp"] all_genders ["pos"] c d lemma "adjp" | |
1038 | 1041 | | lemma,"adja",[] -> |
1039 | - let batrs = make_node lemma "adja" d.weight 0 [] in | |
1040 | - [LCGrenderer.make_frame x_flag tokens [] [[schema_field NOSEM "" Forward [Phrase(Lex "hyphen")]]] (["adja"]) d batrs] | |
1042 | + let batrs = make_node lemma "adja" c.weight 0 [] in | |
1043 | + [LCGrenderer.make_frame x_flag tokens lex_sems [] [[schema_field NOSEM "" Forward [Phrase(Lex "hyphen")]]] (["adja"]) d batrs] | |
1041 | 1044 | | lemma,"adv",[grads] -> |
1042 | - make_advp grads d lemma | |
1045 | + make_advp grads c d lemma | |
1043 | 1046 | | lemma,"adv",[] -> |
1044 | - make_advp ["pos"] d lemma | |
1047 | + make_advp ["pos"] c d lemma | |
1045 | 1048 | | lemma,"ger",[numbers;cases;genders;aspects;negations] -> |
1046 | - make_ger numbers cases genders ["ter"] aspects negations d lemma "ger" | |
1049 | + make_ger numbers cases genders ["ter"] aspects negations c d lemma "ger" | |
1047 | 1050 | | lemma,"pact",[numbers;cases;genders;aspects;negations] -> |
1048 | - make_padj numbers cases genders aspects negations d lemma "pact" | |
1051 | + make_padj numbers cases genders aspects negations c d lemma "pact" | |
1049 | 1052 | | lemma,"ppas",[numbers;cases;genders;aspects;negations] -> |
1050 | - make_padj numbers cases genders aspects negations d lemma "ppas" | |
1053 | + make_padj numbers cases genders aspects negations c d lemma "ppas" | |
1051 | 1054 | | lemma,"fin",[numbers;persons;aspects] -> (* FIXME: genders bez przymnogich *) |
1052 | 1055 | let persons2 = Xlist.fold persons [] (fun l -> function "sec" -> l | s -> s :: l) in |
1053 | - (make_ip numbers ["_"] persons aspects false false d lemma "fin") @ | |
1056 | + (make_ip numbers ["_"] persons aspects false false c d lemma "fin") @ | |
1054 | 1057 | (if persons2 = [] then [] else |
1055 | - make_ip numbers ["_"] persons2 aspects false true d lemma "fin") | |
1058 | + make_ip numbers ["_"] persons2 aspects false true c d lemma "fin") | |
1056 | 1059 | | lemma,"bedzie",[numbers;persons;aspects] -> |
1057 | 1060 | (if lemma = "być" then |
1058 | 1061 | let quant = ["number",d.e.number,expand_numbers numbers;"gender",d.e.gender,all_genders; "person", d.e.person,persons] in |
1059 | 1062 | let t = ["aux-fut"; "number"; "gender"; "person"] in |
1060 | - [LCGrenderer.make_frame_simple quant t d ( (make_node "być" "bedzie" d.weight 0 [])(*[Dot;Dot;Dot;Dot]*))] else []) @ | |
1061 | - (make_ip numbers ["_"] persons aspects false false d lemma "bedzie") | |
1063 | + [LCGrenderer.make_frame_simple quant t c ( (make_node "być" "bedzie" c.weight 0 [])(*[Dot;Dot;Dot;Dot]*))] else []) @ | |
1064 | + (make_ip numbers ["_"] persons aspects false false c d lemma "bedzie") | |
1062 | 1065 | | lemma,"praet",[numbers;genders;aspects;nagl] -> |
1063 | 1066 | (if lemma = "być" then |
1064 | 1067 | let quant = ["number",d.e.number,expand_numbers numbers;"gender",d.e.gender,expand_genders genders; "person",d.e.person, all_persons] in |
1065 | 1068 | let t = ["aux-past"; "number"; "gender"; "person"] in |
1066 | - [LCGrenderer.make_frame_simple quant t d ( (make_node "być" "praet" d.weight 0 [])(*[Dot;Dot;Dot;Dot]*))] else []) @ | |
1067 | - (make_ip numbers genders ["ter"] aspects false false d lemma "praet") @ | |
1068 | - (make_ip numbers genders ["pri";"sec"] aspects true false d lemma "praet") @ | |
1069 | - (make_ip numbers genders ["pri";"sec";"ter"] aspects false true d lemma "praet") | |
1069 | + [LCGrenderer.make_frame_simple quant t c ( (make_node "być" "praet" c.weight 0 [])(*[Dot;Dot;Dot;Dot]*))] else []) @ | |
1070 | + (make_ip numbers genders ["ter"] aspects false false c d lemma "praet") @ | |
1071 | + (make_ip numbers genders ["pri";"sec"] aspects true false c d lemma "praet") @ | |
1072 | + (make_ip numbers genders ["pri";"sec";"ter"] aspects false true c d lemma "praet") | |
1070 | 1073 | | lemma,"praet",[numbers;genders;aspects] -> |
1071 | 1074 | (if lemma = "być" then |
1072 | 1075 | let quant = ["number",d.e.number,expand_numbers numbers;"gender",d.e.gender,expand_genders genders; "person",d.e.person, all_persons] in |
1073 | 1076 | let t = ["aux-past"; "number"; "gender"; "person"] in |
1074 | - [LCGrenderer.make_frame_simple quant t d ( (make_node "być" "praet" d.weight 0 [])(*[Dot;Dot;Dot;Dot]*))] else []) @ | |
1075 | - (make_ip numbers genders ["ter"] aspects false false d lemma "praet") @ | |
1076 | - (make_ip numbers genders ["pri";"sec"] aspects true false d lemma "praet") @ | |
1077 | - (make_ip numbers genders ["pri";"sec";"ter"] aspects false true d lemma "praet") | |
1077 | + [LCGrenderer.make_frame_simple quant t c ( (make_node "być" "praet" c.weight 0 [])(*[Dot;Dot;Dot;Dot]*))] else []) @ | |
1078 | + (make_ip numbers genders ["ter"] aspects false false c d lemma "praet") @ | |
1079 | + (make_ip numbers genders ["pri";"sec"] aspects true false c d lemma "praet") @ | |
1080 | + (make_ip numbers genders ["pri";"sec";"ter"] aspects false true c d lemma "praet") | |
1078 | 1081 | | lemma,"winien",[numbers;genders;aspects] -> |
1079 | - (make_ip numbers genders ["ter"] aspects false false d lemma "winien") @ | |
1080 | - (make_ip numbers genders ["ter"] aspects false true d lemma "winien") @ | |
1081 | - (make_ip numbers genders ["pri";"sec"] aspects true false d lemma "winien") @ | |
1082 | - (make_ip numbers genders ["pri";"sec"] aspects true true d lemma "winien") | |
1082 | + (make_ip numbers genders ["ter"] aspects false false c d lemma "winien") @ | |
1083 | + (make_ip numbers genders ["ter"] aspects false true c d lemma "winien") @ | |
1084 | + (make_ip numbers genders ["pri";"sec"] aspects true false c d lemma "winien") @ | |
1085 | + (make_ip numbers genders ["pri";"sec"] aspects true true c d lemma "winien") | |
1083 | 1086 | | lemma,"impt",[numbers;persons;aspects] -> (* FIXME: genders bez przymnogich *) |
1084 | - make_ip numbers ["_"] persons aspects false false d lemma "impt" | |
1087 | + make_ip numbers ["_"] persons aspects false false c d lemma "impt" | |
1085 | 1088 | | lemma,"imps",[aspects] -> |
1086 | - make_ip ["_"] ["_"] all_persons aspects false false d lemma "imps" | |
1089 | + make_ip ["_"] ["_"] all_persons aspects false false c d lemma "imps" | |
1087 | 1090 | | lemma,"pred",[] -> (* FIXME: czy predykatyw zawsze jest niedokonany? *) |
1088 | - (make_ip ["sg"] ["n2"] ["ter"] ["imperf"] false false d lemma "pred") @ | |
1089 | - (make_ip ["sg"] ["n2"] ["ter"] ["imperf"] false true d lemma "pred") | |
1091 | + (make_ip ["sg"] ["n2"] ["ter"] ["imperf"] false false c d lemma "pred") @ | |
1092 | + (make_ip ["sg"] ["n2"] ["ter"] ["imperf"] false true c d lemma "pred") | |
1090 | 1093 | | "być","aglt",[numbers;persons;aspects;wok] -> |
1091 | 1094 | let numbers = expand_numbers numbers in |
1092 | 1095 | let quant = ["number",d.e.number,numbers; "person", d.e.person,persons] in |
1093 | 1096 | let t = ["aglt"; "number"; "person"] in |
1094 | - [LCGrenderer.make_frame_simple quant t d ( (make_node "być" "aglt" d.weight 0 [])(*[Dot;Dot;Dot]*))] | |
1097 | + [LCGrenderer.make_frame_simple quant t c ( (make_node "być" "aglt" c.weight 0 [])(*[Dot;Dot;Dot]*))] | |
1095 | 1098 | | lemma,"inf",[aspects] -> (* FIXME: wielopoziomowe InfP *) |
1096 | - make_infp aspects d lemma | |
1099 | + make_infp aspects c d lemma | |
1097 | 1100 | | lemma,"pcon",[aspects] -> |
1098 | - make_padvp aspects d lemma "pcon" | |
1101 | + make_padvp aspects c d lemma "pcon" | |
1099 | 1102 | | lemma,"pant",[aspects] -> |
1100 | - make_padvp aspects d lemma "pant" | |
1101 | - | "się","qub",[] -> [LCGrenderer.make_frame_simple [] ["się"] {d with orth=""} ( (make_node "się" "qub" d.weight 0 [])) (* FIXME: dodać make_np *)] | |
1102 | - | "nie","qub",[] -> [LCGrenderer.make_frame_simple [] ["nie"] {d with orth=""} (make_node "nie" "qub" d.weight 0 [])] | |
1103 | - | "by","qub",[] -> [LCGrenderer.make_frame_simple [] ["by"] {d with orth=""} (make_node "by" "qub" d.weight 0 [])] | |
1104 | - | "niech","qub",[] -> [LCGrenderer.make_frame_simple [] ["aux-imp"] d (make_node "niech" "qub" d.weight 0 [])] | |
1105 | - | "niechaj","qub",[] -> [LCGrenderer.make_frame_simple [] ["aux-imp"] d (make_node "niechaj" "qub" d.weight 0 [])] | |
1106 | - | "niechże","qub",[] -> [LCGrenderer.make_frame_simple [] ["aux-imp"] d (make_node "niechże" "qub" d.weight 0 [])] | |
1107 | - | "niechajże","qub",[] -> [LCGrenderer.make_frame_simple [] ["aux-imp"] d (make_node "niechajże" "qub" d.weight 0 [])] | |
1103 | + make_padvp aspects c d lemma "pant" | |
1104 | + | "się","qub",[] -> [LCGrenderer.make_frame_simple [] ["się"] {c with orth=""} ( (make_node "się" "qub" c.weight 0 [])) (* FIXME: dodać make_np *)] | |
1105 | + | "nie","qub",[] -> [LCGrenderer.make_frame_simple [] ["nie"] {c with orth=""} (make_node "nie" "qub" c.weight 0 [])] | |
1106 | + | "by","qub",[] -> [LCGrenderer.make_frame_simple [] ["by"] {c with orth=""} (make_node "by" "qub" c.weight 0 [])] | |
1107 | + | "niech","qub",[] -> [LCGrenderer.make_frame_simple [] ["aux-imp"] c (make_node "niech" "qub" c.weight 0 [])] | |
1108 | + | "niechaj","qub",[] -> [LCGrenderer.make_frame_simple [] ["aux-imp"] c (make_node "niechaj" "qub" c.weight 0 [])] | |
1109 | + | "niechże","qub",[] -> [LCGrenderer.make_frame_simple [] ["aux-imp"] c (make_node "niechże" "qub" c.weight 0 [])] | |
1110 | + | "niechajże","qub",[] -> [LCGrenderer.make_frame_simple [] ["aux-imp"] c (make_node "niechajże" "qub" c.weight 0 [])] | |
1108 | 1111 | | "czy","qub",[] -> (* FIXME: poprawić semantykę *) |
1109 | 1112 | let quant = [] in |
1110 | 1113 | let t = ["cp"; "int"; "czy"] in |
1111 | - let batrs = make_node "czy" "qub" d.weight 0 ["int"] in | |
1114 | + let batrs = make_node "czy" "qub" c.weight 0 ["int"] in | |
1112 | 1115 | let schema_list = [[comp_arg_schema_field [Phrase IP]]] in |
1113 | - [LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs] | |
1116 | + [LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs] | |
1114 | 1117 | | "gdyby","qub",[] -> (* FIXME: poprawić semantykę *) (* FIXME: poprawić tryb przypuszczający *) (* FIXME: problem z interpretacją jako 'gdy' *) |
1115 | 1118 | let quant = [] in |
1116 | 1119 | let t = ["cp"; "rel"; "gdyby"] in |
1117 | - let batrs = make_node "gdyby" "qub" d.weight 0 ["rel"] in | |
1120 | + let batrs = make_node "gdyby" "qub" c.weight 0 ["rel"] in | |
1118 | 1121 | let schema_list = [[comp_arg_schema_field [Phrase IP]]] in |
1119 | - [LCGrenderer.make_frame x_flag tokens quant schema_list t d batrs] | |
1120 | - | lemma,"qub",[] -> [LCGrenderer.make_frame_simple [] ["qub"] d ( (make_node lemma "qub" d.weight 0 []))] (* FIXME: semantyka i rodzaje kublików *) | |
1121 | - | lemma,"comp",[] -> make_conjunct d lemma "comp" | |
1122 | - | "i","conj",[] -> make_conj true d "i" @ (make_conjunct d "i" "conj") | |
1123 | - | "lub","conj",[] -> make_conj true d "lub" @ (make_conjunct d "lub" "conj") | |
1124 | - | "czy","conj",[] -> make_conj true d "czy" @ (make_conjunct d "czy" "conj") | |
1125 | - | "bądź","conj",[] -> make_conj true d "bądź" @ (make_conjunct d "bądź" "conj") | |
1126 | - | lemma,"conj",[] -> make_conjunct d lemma "conj" | |
1122 | + [LCGrenderer.make_frame x_flag tokens lex_sems quant schema_list t d batrs] | |
1123 | + | lemma,"qub",[] -> [LCGrenderer.make_frame_simple [] ["qub"] c ( (make_node lemma "qub" c.weight 0 []))] (* FIXME: semantyka i rodzaje kublików *) | |
1124 | + | lemma,"comp",[] -> make_conjunct c d lemma "comp" | |
1125 | + | "i","conj",[] -> make_conj true c d "i" @ (make_conjunct c d "i" "conj") | |
1126 | + | "lub","conj",[] -> make_conj true c d "lub" @ (make_conjunct c d "lub" "conj") | |
1127 | + | "czy","conj",[] -> make_conj true c d "czy" @ (make_conjunct c d "czy" "conj") | |
1128 | + | "bądź","conj",[] -> make_conj true c d "bądź" @ (make_conjunct c d "bądź" "conj") | |
1129 | + | lemma,"conj",[] -> make_conjunct c d lemma "conj" | |
1127 | 1130 | (* | "interp",[] -> [] |
1128 | 1131 | | "brev",[pun] -> []*) |
1129 | - | lemma,"interj",[] -> [LCGrenderer.make_frame_simple [] ["interj"] d (make_node lemma "interj" d.weight 0 [])] | |
1132 | + | lemma,"interj",[] -> [LCGrenderer.make_frame_simple [] ["interj"] c (make_node lemma "interj" c.weight 0 [])] | |
1130 | 1133 | | lemma,"burk",[] -> [] (* FIXME *) |
1131 | 1134 | (* | "dig",[] -> [] |
1132 | 1135 | | "romandig",[] -> [] |
... | ... | @@ -1134,103 +1137,103 @@ let rec process_interp (d:PreTypes.token_record) = function (* FIXME: rozpoznawa |
1134 | 1137 | | "xxx",[] -> [] (* to występuje w słowniku skrótów *)*) |
1135 | 1138 | (* | ".","interp",[] -> [] |
1136 | 1139 | | "%","interp",[] -> []*) |
1137 | - | "-","interp",[] -> [LCGrenderer.make_frame_simple [] ["hyphen"] d (make_node "-" "interp" d.weight 0 [])] | |
1140 | + | "-","interp",[] -> [LCGrenderer.make_frame_simple [] ["hyphen"] c (make_node "-" "interp" c.weight 0 [])] | |
1138 | 1141 | (* | ":","interp",[] -> [LCGrenderer.make_frame_simple [] ["colon"] ":" beg len [Dot] [Dot]]*) |
1139 | - | "?","interp",[] -> [LCGrenderer.make_frame_simple [] ["int"] d (make_node "?" "interp" d.weight 0 [])] (*FIXME: zdanie nadrzędne powinno mieć atrybut pytajności(Attr("INT",Val "+"))] *) | |
1140 | - | ",","interp",[] -> make_conj false d "," (*@ [LCGrenderer.make_frame_simple [] ["comma"] "," beg len [Dot] [Dot]]*) | |
1142 | + | "?","interp",[] -> [LCGrenderer.make_frame_simple [] ["int"] c (make_node "?" "interp" c.weight 0 [])] (*FIXME: zdanie nadrzędne powinno mieć atrybut pytajności(Attr("INT",Val "+"))] *) | |
1143 | + | ",","interp",[] -> make_conj false c d "," (*@ [LCGrenderer.make_frame_simple [] ["comma"] "," beg len [Dot] [Dot]]*) | |
1141 | 1144 | | ";","interp",[] -> [](*[LCGrenderer.make_frame_simple [] ["comma"] ";" beg len [Dot] [Dot]]*) |
1142 | 1145 | | "„","interp",[] -> [(* FIXME: zaznaczyć niesemantyczność quotów *) |
1143 | 1146 | LCGrenderer.make_quot_frame |
1144 | 1147 | ["number",d.e.number,[];"case",d.e.case,[];"gender",d.e.gender,[];"person",d.e.person,[]] |
1145 | 1148 | (Tensor[Atom "np"; AVar "number"; AVar "case"; AVar "gender"; AVar "person"]) (Tensor[Atom "rquot"]) |
1146 | 1149 | ["np";"number";"case";"gender";"person"] d |
1147 | - (make_node "„" "interp" d.weight 0 [])] | |
1148 | - | "”","interp",[] -> [LCGrenderer.make_frame_simple [] ["rquot"] d (make_node "”" "interp" d.weight 0 [])] | |
1149 | - | "«","interp",[] -> [LCGrenderer.make_frame_simple [] ["rquot3"] d (make_node "«" "interp" d.weight 0 []); | |
1150 | + (make_node "„" "interp" c.weight 0 [])] | |
1151 | + | "”","interp",[] -> [LCGrenderer.make_frame_simple [] ["rquot"] c (make_node "”" "interp" c.weight 0 [])] | |
1152 | + | "«","interp",[] -> [LCGrenderer.make_frame_simple [] ["rquot3"] c (make_node "«" "interp" c.weight 0 []); | |
1150 | 1153 | LCGrenderer.make_quot_frame |
1151 | 1154 | ["number",d.e.number,[];"case",d.e.case,[];"gender",d.e.gender,[];"person",d.e.person,[]] |
1152 | 1155 | (Tensor[Atom "np"; AVar "number"; AVar "case"; AVar "gender"; AVar "person"]) (Tensor[Atom "rquot2"]) |
1153 | 1156 | ["np";"number";"case";"gender";"person"] d |
1154 | - (make_node "«" "interp" d.weight 0 [])] | |
1155 | - | "»","interp",[] -> [LCGrenderer.make_frame_simple [] ["rquot2"] d (make_node "»" "interp" d.weight 0 []); | |
1157 | + (make_node "«" "interp" c.weight 0 [])] | |
1158 | + | "»","interp",[] -> [LCGrenderer.make_frame_simple [] ["rquot2"] c (make_node "»" "interp" c.weight 0 []); | |
1156 | 1159 | LCGrenderer.make_quot_frame |
1157 | 1160 | ["number",d.e.number,[];"case",d.e.case,[];"gender",d.e.gender,[];"person",d.e.person,[]] |
1158 | 1161 | (Tensor[Atom "np"; AVar "number"; AVar "case"; AVar "gender"; AVar "person"]) (Tensor[Atom "rquot3"]) |
1159 | 1162 | ["np";"number";"case";"gender";"person"] d |
1160 | - (make_node "»" "interp" d.weight 0 [])] | |
1161 | - | "(","interp",[] -> [LCGrenderer.make_inclusion_frame (Tensor[Atom "rparen"]) d (make_node "(" "interp" d.weight 0 [])] | |
1162 | - | ")","interp",[] -> [LCGrenderer.make_frame_simple [] ["rparen"] d (make_node ")" "interp" d.weight 0 [])] | |
1163 | - | "[","interp",[] -> [LCGrenderer.make_inclusion_frame (Tensor[Atom "rparen"]) d (make_node "[" "interp" d.weight 0 [])] | |
1164 | - | "]","interp",[] -> [LCGrenderer.make_frame_simple [] ["rparen"] d (make_node "]" "interp" d.weight 0 [])] | |
1163 | + (make_node "»" "interp" c.weight 0 [])] | |
1164 | + | "(","interp",[] -> [LCGrenderer.make_inclusion_frame (Tensor[Atom "rparen"]) d (make_node "(" "interp" c.weight 0 [])] | |
1165 | + | ")","interp",[] -> [LCGrenderer.make_frame_simple [] ["rparen"] c (make_node ")" "interp" c.weight 0 [])] | |
1166 | + | "[","interp",[] -> [LCGrenderer.make_inclusion_frame (Tensor[Atom "rparen"]) d (make_node "[" "interp" c.weight 0 [])] | |
1167 | + | "]","interp",[] -> [LCGrenderer.make_frame_simple [] ["rparen"] c (make_node "]" "interp" c.weight 0 [])] | |
1165 | 1168 | | lemma,"unk",[] -> |
1166 | 1169 | let quant = ["number",d.e.number,all_numbers;"case",d.e.case,all_cases; "gender",d.e.gender,all_genders; "person",d.e.person, ["ter"]] in |
1167 | 1170 | let t = ["np"; "number"; "case"; "gender"; "person"] in |
1168 | - let batrs = make_node lemma "unk" d.weight 0 ["number"; "case"; "gender"; "person"] in | |
1169 | - [LCGrenderer.make_frame_simple quant t d ( batrs)] | |
1171 | + let batrs = make_node lemma "unk" c.weight 0 ["number"; "case"; "gender"; "person"] in | |
1172 | + [LCGrenderer.make_frame_simple quant t c ( batrs)] | |
1170 | 1173 | | _,"xxx",[] -> [] (* FIXME *) |
1171 | - | ".","interp",[] -> [LCGrenderer.make_frame_simple [] ["dot"] d (make_node "." "interp" d.weight 0 [])] (* FIXME: to jest potrzebne przy CONLL *) | |
1174 | + | ".","interp",[] -> [LCGrenderer.make_frame_simple [] ["dot"] c (make_node "." "interp" c.weight 0 [])] (* FIXME: to jest potrzebne przy CONLL *) | |
1172 | 1175 | | "<conll_root>","interp",[] -> |
1173 | - let batrs = (make_node "<conll_root>" "interp" d.weight 0 []) in | |
1176 | + let batrs = (make_node "<conll_root>" "interp" c.weight 0 []) in | |
1174 | 1177 | let schema_list = [[schema_field CLAUSE "Clause" Forward [Phrase IP;Phrase (CP(Int,CompUndef));Phrase (NP(Case "voc"));Phrase (Lex "interj")]]] in |
1175 | - [LCGrenderer.make_frame false tokens [] schema_list ["<conll_root>"] d batrs] | |
1178 | + [LCGrenderer.make_frame false tokens lex_sems [] schema_list ["<conll_root>"] d batrs] | |
1176 | 1179 | | lemma,c,l -> failwith ("process_interp: " ^ lemma ^ ":" ^ c ^ ":" ^ (String.concat ":" (Xlist.map l (String.concat ".")))) in |
1177 | 1180 | |
1178 | -let process_bracket_lemma (d:PreTypes.token_record) = function | |
1181 | +let process_bracket_lemma (c:ENIAMtokenizerTypes.token_record) (d:ENIAMlexSemanticsTypes.lex_sem) = function | |
1179 | 1182 | (* "<query>" -> |
1180 | - [LCGrenderer.make_frame x_flag tokens [] [[schema_field NOSEM "" Forward [Phrase Null;Phrase (Lex "<dummy>")]];[arg_schema_field Forward [Phrase (Lex "</query>")]]] (["<query>"]) {d with orth=""} (make_node "<query1>" "interp" d.weight 0 []); | |
1181 | - LCGrenderer.make_frame x_flag tokens [] [[schema_field SENTENCE "" Forward [Phrase (Lex "<ors>")]]] (["<query>"]) {d with orth=""} (make_node "<query2>" "interp" d.weight 0 []); | |
1182 | - LCGrenderer.make_frame x_flag tokens [] [[(*nosem*)arg_schema_field Forward [Phrase (Lex "<speaker>")]];[nosem_schema_field Forward [Phrase (Lex "<colon>")]];[(*nosem*)arg_schema_field Forward [Phrase (Lex "<ors>")]];[(*nosem*)arg_schema_field Forward [Phrase (Lex "</query>")]]] (["<query>"]) {d with orth=""} (make_node "<query3>" "interp" d.weight 0 []); | |
1183 | - LCGrenderer.make_frame x_flag tokens [] [[schema_field SENTENCE "" Forward [Phrase (Lex "<colon>")]]] (["<query>"]) {d with orth=""} (make_node "<query4>" "interp" d.weight 0 []); | |
1184 | - LCGrenderer.make_frame x_flag tokens [] [[schema_field SENTENCE "" Forward [Phrase (Lex "<colon>")]];[schema_field SENTENCE "" Forward [Phrase (Lex "<ors>")]]] (["<query>"]) {d with orth=""} (make_node "<query5>" "interp" d.weight 0 []); (* FIXME: zdania w odwróconej kolejności *) | |
1185 | - LCGrenderer.make_frame x_flag tokens [] [[schema_field SENTENCE "Sentence" Forward [Phrase (Lex "<sentence>")]];[schema_field SENTENCE "" Forward [Phrase (Lex "<ors>")]]] (["<query>"]) {d with orth=""} (make_node "<query6>" "interp" d.weight 0 [])] (* FIXME: zdania w odwróconej kolejności *) | |
1183 | + [LCGrenderer.make_frame x_flag tokens lex_sems [] [[schema_field NOSEM "" Forward [Phrase Null;Phrase (Lex "<dummy>")]];[arg_schema_field Forward [Phrase (Lex "</query>")]]] (["<query>"]) {d with orth=""} (make_node "<query1>" "interp" c.weight 0 []); | |
1184 | + LCGrenderer.make_frame x_flag tokens lex_sems [] [[schema_field SENTENCE "" Forward [Phrase (Lex "<ors>")]]] (["<query>"]) {d with orth=""} (make_node "<query2>" "interp" c.weight 0 []); | |
1185 | + LCGrenderer.make_frame x_flag tokens lex_sems [] [[(*nosem*)arg_schema_field Forward [Phrase (Lex "<speaker>")]];[nosem_schema_field Forward [Phrase (Lex "<colon>")]];[(*nosem*)arg_schema_field Forward [Phrase (Lex "<ors>")]];[(*nosem*)arg_schema_field Forward [Phrase (Lex "</query>")]]] (["<query>"]) {d with orth=""} (make_node "<query3>" "interp" c.weight 0 []); | |
1186 | + LCGrenderer.make_frame x_flag tokens lex_sems [] [[schema_field SENTENCE "" Forward [Phrase (Lex "<colon>")]]] (["<query>"]) {d with orth=""} (make_node "<query4>" "interp" c.weight 0 []); | |
1187 | + LCGrenderer.make_frame x_flag tokens lex_sems [] [[schema_field SENTENCE "" Forward [Phrase (Lex "<colon>")]];[schema_field SENTENCE "" Forward [Phrase (Lex "<ors>")]]] (["<query>"]) {d with orth=""} (make_node "<query5>" "interp" c.weight 0 []); (* FIXME: zdania w odwróconej kolejności *) | |
1188 | + LCGrenderer.make_frame x_flag tokens lex_sems [] [[schema_field SENTENCE "Sentence" Forward [Phrase (Lex "<sentence>")]];[schema_field SENTENCE "" Forward [Phrase (Lex "<ors>")]]] (["<query>"]) {d with orth=""} (make_node "<query6>" "interp" c.weight 0 [])] (* FIXME: zdania w odwróconej kolejności *) | |
1186 | 1189 | | "</query>" -> |
1187 | 1190 | let t = (["</query>"]) in |
1188 | - let batrs = (make_node "</query>" "interp" d.weight 0 []) in | |
1191 | + let batrs = (make_node "</query>" "interp" c.weight 0 []) in | |
1189 | 1192 | let schema_list = [[schema_field NOSEM "" Backward [Phrase Null;Phrase (Lex "<dummy>")]];[schema_field SENTENCE "Sentence" Backward [Multi[Lex "<sentence>"](*Phrase(Lex "s")*)]]] in |
1190 | - [LCGrenderer.make_frame x_flag tokens [] schema_list t d batrs]*) | |
1193 | + [LCGrenderer.make_frame x_flag tokens lex_sems [] schema_list t d batrs]*) | |
1191 | 1194 | | "„s" -> [] |
1192 | - (*let batrs = make_node "pro-komunikować" "pro" d.weight 0 [] in | |
1193 | - [LCGrenderer.make_frame x_flag tokens [] [[schema_field OBJ "Theme" Forward [Phrase (Lex "</or1>")]]] (["<sentence>"(*"or"*)]) {d with orth=""} batrs; | |
1194 | - LCGrenderer.make_frame_simple [] ["<dummy>"] d ( (make_node "„s" "interp" d.weight 0 []))]*) | |
1195 | + (*let batrs = make_node "pro-komunikować" "pro" c.weight 0 [] in | |
1196 | + [LCGrenderer.make_frame x_flag tokens lex_sems [] [[schema_field OBJ "Theme" Forward [Phrase (Lex "</or1>")]]] (["<sentence>"(*"or"*)]) {d with orth=""} batrs; | |
1197 | + LCGrenderer.make_frame_simple [] ["<dummy>"] d ( (make_node "„s" "interp" c.weight 0 []))]*) | |
1195 | 1198 | | "”s" -> [] |
1196 | 1199 | (*let t = (["</or1>"]) in |
1197 | - let batrs = (make_node "”s" "interp" d.weight 0 []) in | |
1200 | + let batrs = (make_node "”s" "interp" c.weight 0 []) in | |
1198 | 1201 | let schema_list = [[schema_field SENTENCE "Sentence" Backward [Multi[Lex "<sentence>"](*Phrase(Lex "s")*)]]] in |
1199 | - [LCGrenderer.make_frame x_flag tokens [] schema_list t d batrs; | |
1200 | - LCGrenderer.make_frame_simple [] ["<dummy>"] d ( (make_node "”s" "interp" d.weight 0 []))]*) | |
1202 | + [LCGrenderer.make_frame x_flag tokens lex_sems [] schema_list t d batrs; | |
1203 | + LCGrenderer.make_frame_simple [] ["<dummy>"] d ( (make_node "”s" "interp" c.weight 0 []))]*) | |
1201 | 1204 | | "«s" -> [] (* FIXME *) |
1202 | 1205 | | "»s" -> [] (* FIXME *) |
1203 | 1206 | | ":" -> |
1204 | - [LCGrenderer.make_frame_simple [] ["or"] d (LCGrenderer.make_pro_komunikat tokens)] | |
1207 | + [LCGrenderer.make_frame_simple [] ["or"] c (LCGrenderer.make_pro_komunikat tokens lex_sems)] | |
1205 | 1208 | | ":s" -> |
1206 | - let batrs = make_node "pro-komunikować" "pro" d.weight 0 [] in | |
1207 | - [LCGrenderer.make_frame x_flag tokens [] [[schema_field SUBJ "Initiator" Backward [Phrase (Lex "<speaker>")]]] (["<colon>"]) {d with orth=""} batrs; | |
1208 | - LCGrenderer.make_frame x_flag tokens [] [[schema_field SUBJ "Initiator" Backward [Phrase (Lex "<speaker>")]];[schema_field OBJ "Theme" Forward [Phrase (Lex "</query>")]]] (["<colon>"]) {d with orth=""} batrs] | |
1209 | - (*| "<or>" -> [LCGrenderer.make_frame x_flag tokens [] [[nosem_schema_field Forward [Phrase (Lex "</or>")]]] (["or"]) {d with orth=""} (make_node "<or>" "interp" d.weight 0 [])]*) | |
1209 | + let batrs = make_node "pro-komunikować" "pro" c.weight 0 [] in | |
1210 | + [LCGrenderer.make_frame x_flag tokens lex_sems [] [[schema_field SUBJ "Initiator" Backward [Phrase (Lex "<speaker>")]]] (["<colon>"]) {c with orth=""} batrs; | |
1211 | + LCGrenderer.make_frame x_flag tokens lex_sems [] [[schema_field SUBJ "Initiator" Backward [Phrase (Lex "<speaker>")]];[schema_field OBJ "Theme" Forward [Phrase (Lex "</query>")]]] (["<colon>"]) {c with orth=""} batrs] | |
1212 | + (*| "<or>" -> [LCGrenderer.make_frame x_flag tokens lex_sems [] [[nosem_schema_field Forward [Phrase (Lex "</or>")]]] (["or"]) {d with orth=""} (make_node "<or>" "interp" c.weight 0 [])]*) | |
1210 | 1213 | | "<or>" -> [] |
1211 | 1214 | | "<or-sentence>" -> (* FIXME: dodać mówcę jako pro *) |
1212 | - let batrs = make_node "pro-komunikować" "pro" d.weight 0 [] in | |
1213 | - [LCGrenderer.make_frame x_flag tokens [] [[schema_field ARG ""(*"Theme"*) Forward [Phrase (Lex "s")]]] ["<root>"] {d with orth=""} batrs; | |
1214 | - LCGrenderer.or_frame (make_node "<sentence>" "interp" d.weight 0 [])] | |
1215 | + let batrs = make_node "pro-komunikować" "pro" c.weight 0 [] in | |
1216 | + [LCGrenderer.make_frame x_flag tokens lex_sems [] [[schema_field ARG ""(*"Theme"*) Forward [Phrase (Lex "s")]]] ["<root>"] {c with orth=""} batrs; | |
1217 | + LCGrenderer.or_frame (make_node "<sentence>" "interp" c.weight 0 [])] | |
1215 | 1218 | | "</or-sentence>" -> |
1216 | 1219 | let t = ["or2"] in |
1217 | - let batrs = (make_node "</or-sentence>" "interp" d.weight 0 []) in | |
1220 | + let batrs = (make_node "</or-sentence>" "interp" c.weight 0 []) in | |
1218 | 1221 | let schema_list = [[schema_field CLAUSE "Clause" Backward [Multi[IP;CP(Int,CompUndef);NP(Case "voc");Lex "interj"](*Phrase IP;Phrase(CP(Int,CompUndef));Phrase(NP(Case "voc"))*)]](*;[WalFrames.schema_field NOSEM "" Backward [Phrase(Lex "<sentence>")]]*)] in |
1219 | - [LCGrenderer.make_frame x_flag tokens [] schema_list t d batrs] | |
1222 | + [LCGrenderer.make_frame x_flag tokens lex_sems [] schema_list t d batrs] | |
1220 | 1223 | | "</or>" -> [] |
1221 | 1224 | (*let t = (["</or>"]) in |
1222 | - let batrs = (make_node "</or>" "interp" d.weight 0 []) in | |
1225 | + let batrs = (make_node "</or>" "interp" c.weight 0 []) in | |
1223 | 1226 | let schema_list = [[schema_field SENTENCE "Sentence" Backward [Multi[Lex "<sentence>"](*Phrase(Lex "s")*)]]] in |
1224 | - [LCGrenderer.make_frame x_flag tokens [] schema_list t d batrs] (* FIXME: semantyka *)*) | |
1227 | + [LCGrenderer.make_frame x_flag tokens lex_sems [] schema_list t d batrs] (* FIXME: semantyka *)*) | |
1225 | 1228 | | "<sentence>" -> |
1226 | - [LCGrenderer.make_frame x_flag tokens [] [[arg_schema_field Forward [Phrase (Lex "s")]]] ["<root>"] {d with orth=""} (make_node "<sentence>" "interp" d.weight 0 []); | |
1227 | - LCGrenderer.make_frame x_flag tokens [] [[arg_schema_field Forward [Phrase (NP(Case "nom"))]];[nosem_schema_field Forward [Phrase (Lex "</speaker>")]]] (["<speaker>"]) {d with orth=""} (make_node "<speaker>" "interp" d.weight 0 [])] | |
1229 | + [LCGrenderer.make_frame x_flag tokens lex_sems [] [[arg_schema_field Forward [Phrase (Lex "s")]]] ["<root>"] {c with orth=""} (make_node "<sentence>" "interp" c.weight 0 []); | |
1230 | + LCGrenderer.make_frame x_flag tokens lex_sems [] [[arg_schema_field Forward [Phrase (NP(Case "nom"))]];[nosem_schema_field Forward [Phrase (Lex "</speaker>")]]] (["<speaker>"]) {c with orth=""} (make_node "<speaker>" "interp" c.weight 0 [])] | |
1228 | 1231 | | "</sentence>" -> |
1229 | 1232 | let t = ["s"] in |
1230 | - let batrs = (make_node "</sentence>" "interp" d.weight 0 []) in | |
1233 | + let batrs = (make_node "</sentence>" "interp" c.weight 0 []) in | |
1231 | 1234 | let schema_list = [[schema_field CLAUSE "Clause" Backward [Multi[IP;CP(Int,CompUndef);NP(Case "voc");Lex "interj"](*Phrase IP;Phrase(CP(Int,CompUndef));Phrase(NP(Case "voc"))*)]](*;[WalFrames.schema_field NOSEM "" Backward [Phrase(Lex "<sentence>")]]*)] in |
1232 | - [LCGrenderer.make_frame_simple [] ["</speaker>"] d ( (make_node "</speaker>" "interp" d.weight 0 [])); | |
1233 | - LCGrenderer.make_frame x_flag tokens [] schema_list t d batrs] | |
1235 | + [LCGrenderer.make_frame_simple [] ["</speaker>"] c ( (make_node "</speaker>" "interp" c.weight 0 [])); | |
1236 | + LCGrenderer.make_frame x_flag tokens lex_sems [] schema_list t d batrs] | |
1234 | 1237 | | lemma -> raise Not_found in |
1235 | 1238 | |
1236 | 1239 | let get_labels () = { |
... | ... | @@ -1242,26 +1245,26 @@ let get_labels () = { |
1242 | 1245 | } in |
1243 | 1246 | |
1244 | 1247 | (* create_entries *) |
1245 | - match d with | |
1248 | + match c with | |
1246 | 1249 | {token = Interp "<clause>"} -> [BracketSet(Forward),Dot] |
1247 | 1250 | | {token = Interp "</clause>"} -> [BracketSet(Backward),Dot] |
1248 | 1251 | | {token = Interp lemma} -> |
1249 | 1252 | (try |
1250 | - Xlist.fold (process_bracket_lemma d lemma) [] (fun l (symbol,sem) -> (Bracket(true,true,symbol),sem) :: l) | |
1253 | + Xlist.fold (process_bracket_lemma c d lemma) [] (fun l (symbol,sem) -> (Bracket(true,true,symbol),sem) :: l) | |
1251 | 1254 | with Not_found -> |
1252 | 1255 | (* print_endline ("x"^lemma^"x"); *) |
1253 | - let entries = process_interp d (lemma,"interp",[]) in | |
1256 | + let entries = process_interp c d (lemma,"interp",[]) in | |
1254 | 1257 | Xlist.map entries (fun (symbol,sem) -> Bracket(false,false,symbol),sem)) |
1255 | 1258 | | {token = Lemma(lemma,"sinterj",[[]])} -> |
1256 | 1259 | let t = ["interj"] in |
1257 | - let batrs = make_node lemma "sinterj" d.weight 0 [] in | |
1258 | - let symbol,sem = LCGrenderer.make_frame_simple [] t d ( batrs) in | |
1260 | + let batrs = make_node lemma "sinterj" c.weight 0 [] in | |
1261 | + let symbol,sem = LCGrenderer.make_frame_simple [] t c ( batrs) in | |
1259 | 1262 | [Bracket(true,true,symbol),sem] |
1260 | 1263 | | {token = Lemma(lemma,pos,interp)} -> |
1261 | 1264 | (* print_endline (lemma ^ " " ^ pos); *) |
1262 | 1265 | Xlist.fold interp [] (fun l tags -> |
1263 | 1266 | let d = {d with e=get_labels (); valence=LCGrenderer.make_controll d.valence} in |
1264 | - let entries = process_interp d (lemma,pos,tags) in | |
1267 | + let entries = process_interp c d (lemma,pos,tags) in | |
1265 | 1268 | Xlist.map entries (fun (symbol,sem) -> Bracket(false,false,symbol),sem) @ l) |
1266 | 1269 | | _ -> [] |
1267 | 1270 | |
... | ... | @@ -1280,10 +1283,11 @@ let create (paths,last) tokens lex_sems = |
1280 | 1283 | uni_weight := 0.; |
1281 | 1284 | let chart = LCGchart.make last in |
1282 | 1285 | let chart = Xlist.fold paths chart (fun chart (id,lnode,rnode) -> |
1283 | - let t = ExtArray.get tokens id in | |
1286 | + let c = ExtArray.get tokens id in | |
1287 | + let d = ExtArray.get lex_sems id in | |
1284 | 1288 | (* if t.weight < -0.9 || Xlist.mem t.attrs "notvalidated proper" || Xlist.mem t.attrs "lemmatized as lowercase" then chart else *) |
1285 | - let chart = LCGchart.add_inc chart lnode rnode (Tensor[Atom ("[" ^ t.orth ^ "]")], Dot) 0 in | |
1286 | - LCGchart.add_inc_list chart lnode rnode (create_entries tokens id (t:PreTypes.token_record) false) 0) in | |
1289 | + let chart = LCGchart.add_inc chart lnode rnode (Tensor[Atom ("[" ^ c.orth ^ "]")], Dot) 0 in | |
1290 | + LCGchart.add_inc_list chart lnode rnode (create_entries tokens lex_sems id (c:ENIAMtokenizerTypes.token_record) d false) 0) in | |
1287 | 1291 | let set = Xlist.fold paths IntIntSet.empty (fun set (_,lnode,rnode) -> IntIntSet.add set (lnode,rnode)) in |
1288 | 1292 | let chart = IntIntSet.fold set chart (fun chart (i,j) -> LCGchart.make_unique chart i j) in |
1289 | 1293 | chart |
... | ... | @@ -1299,13 +1303,14 @@ let rec dep_create_rec nodes sons conll_id = |
1299 | 1303 | (* Printf.printf "dep_create_rec [%s] %d [%s]\n" (String.concat ";" (Xlist.map left string_of_int)) conll_id (String.concat ";" (Xlist.map right string_of_int)); *) |
1300 | 1304 | DepNode(conll_id, Xlist.map left (dep_create_rec nodes sons), node, Xlist.map right (dep_create_rec nodes sons)) |
1301 | 1305 | |
1302 | -let dep_create paths tokens = | |
1306 | +let dep_create paths tokens lex_sems = | |
1303 | 1307 | uni_weight := 0.; |
1304 | 1308 | let sons = Int.fold 1 (Array.length paths - 1) IntMap.empty (fun sons i -> |
1305 | 1309 | let _,super,_ = paths.(i) in |
1306 | 1310 | IntMap.add_inc sons super [i] (fun l -> i :: l)) in |
1307 | 1311 | let nodes = Int.fold 0 (Array.length paths - 1) IntMap.empty (fun nodes i -> |
1308 | 1312 | let id,_,_ = paths.(i) in |
1309 | - let t = ExtArray.get tokens id in | |
1310 | - IntMap.add nodes i (create_entries tokens id t true)) in | |
1313 | + let c = ExtArray.get tokens id in | |
1314 | + let d = ExtArray.get lex_sems id in | |
1315 | + IntMap.add nodes i (create_entries tokens lex_sems id c d true)) in | |
1311 | 1316 | dep_create_rec nodes sons 0 |
... | ... |
parser/LCGrenderer.ml
... | ... | @@ -17,8 +17,9 @@ |
17 | 17 | * along with this program. If not, see <http://www.gnu.org/licenses/>. |
18 | 18 | *) |
19 | 19 | |
20 | -open WalTypes | |
21 | -open PreTypes | |
20 | +open ENIAMtokenizerTypes | |
21 | +open ENIAMwalTypes | |
22 | +open ENIAMlexSemanticsTypes | |
22 | 23 | open Xstd |
23 | 24 | |
24 | 25 | let dir_of_dir = function |
... | ... | @@ -189,7 +190,7 @@ let make_arg_phrase = function |
189 | 190 | | Null -> One |
190 | 191 | | X -> Tensor[Atom "X"] |
191 | 192 | | Lex lex -> Tensor[Atom lex] |
192 | - | phrase -> failwith ("make_arg_phrase: " ^ WalStringOf.phrase phrase) | |
193 | + | phrase -> failwith ("make_arg_phrase: " ^ ENIAMwalStringOf.phrase phrase) | |
193 | 194 | |
194 | 195 | let make_arg_pos = function (* wprowadzam uzgodnienia a nie wartości cech, bo wartości cech są wprowadzane przez leksem a uzgodnienia wiążą je z wartościami u nadrzędnika *) |
195 | 196 | | SUBST(_,Case case) -> [Atom "subst"; Top; Atom case; Top; Top] |
... | ... | @@ -226,7 +227,7 @@ let make_arg_pos = function (* wprowadzam uzgodnienia a nie wartości cech, bo w |
226 | 227 | | COMPAR -> [Atom "TODO"] (* FIXME: todo *) |
227 | 228 | | COMP ctype -> [Atom "comp"; arg_of_ctype ctype] |
228 | 229 | | PERS _ -> [Atom "TODO"] (* FIXME: todo *) |
229 | - | pos -> failwith ("make_arg_pos: " ^ WalStringOf.pos pos) | |
230 | + | pos -> failwith ("make_arg_pos: " ^ ENIAMwalStringOf.pos pos) | |
230 | 231 | |
231 | 232 | let rec make_arg quant = function |
232 | 233 | Phrase phrase -> make_arg_phrase phrase |
... | ... | @@ -234,7 +235,7 @@ let rec make_arg quant = function |
234 | 235 | | LexArg(id,arg,lex) -> Tensor([Atom "lex";Atom id;Atom lex] @ make_arg_pos arg) |
235 | 236 | (* | LexRealization(arg,lex) -> (match make_arg arg with Tensor l -> Tensor([Atom "lexr";Atom lex] @ l) | _ -> failwith "make_arg") *) |
236 | 237 | | Raised(arg1,dir,arg2) -> Imp(Tensor(make_tensor_type quant arg1),dir_of_dir dir,Tensor(make_tensor_type quant arg2)) |
237 | - | morf -> failwith ("make_arg: " ^ WalStringOf.morf morf) | |
238 | + | morf -> failwith ("make_arg: " ^ ENIAMwalStringOf.morf morf) | |
238 | 239 | |
239 | 240 | let empty_schema_field = |
240 | 241 | {gf=NOGF; role=""; role_attr=""; sel_prefs=[]; cr=[]; ce=[]; dir=Both; morfs=[]} |
... | ... | @@ -252,19 +253,22 @@ let get_pro_id () = |
252 | 253 | incr pro_id_counter; |
253 | 254 | !pro_id_counter*) |
254 | 255 | |
255 | -let make_pro tokens = | |
256 | - let t = {empty_token with token=Lemma("pro","pro",[]); senses=["pro",["0"],0.]} in | |
256 | +let make_pro tokens lex_sems = | |
257 | + let t = {empty_token with token=Lemma("pro","pro",[])} in | |
257 | 258 | let id = ExtArray.add tokens t in |
259 | + let _ = ExtArray.add lex_sems {empty_lex_sem with senses=["pro",["0"],0.]} in | |
258 | 260 | Node{empty_node with pred="pro"; cat="pro"; weight=0.; id=id; attrs=[]; args=Dot} |
259 | 261 | |
260 | -let make_prong tokens = | |
261 | - let t = {empty_token with token=Lemma("pro","pro",[]); senses=["pro",["0"],0.]} in | |
262 | +let make_prong tokens lex_sems = | |
263 | + let t = {empty_token with token=Lemma("pro","pro",[])} in | |
262 | 264 | let id = ExtArray.add tokens t in |
265 | + let _ = ExtArray.add lex_sems {empty_lex_sem with senses=["pro",["0"],0.]} in | |
263 | 266 | Node{empty_node with pred="pro"; cat="pro"; weight=0.; id=id; attrs=["NUM",SubstVar "number";"GEND",SubstVar "gender";"PERS",SubstVar "person"]; args=Dot} |
264 | 267 | |
265 | -let make_pro_komunikat tokens = | |
266 | - let t = {empty_token with token=Lemma("pro-komunikat","pro",[]); senses=["pro-komunikat",["0"],0.]} in | |
268 | +let make_pro_komunikat tokens lex_sems = | |
269 | + let t = {empty_token with token=Lemma("pro-komunikat","pro",[])} in | |
267 | 270 | let id = ExtArray.add tokens t in |
271 | + let _ = ExtArray.add lex_sems {empty_lex_sem with senses=["pro-komunikat",["0"],0.]} in | |
268 | 272 | {empty_node with pred="pro-komunikat"; cat="pro"; weight=10.; id=id; attrs=[]; args=Dot} |
269 | 273 | |
270 | 274 | let make_var vars gf = |
... | ... | @@ -299,7 +303,7 @@ let make_var vars gf = |
299 | 303 | | CLAUSE -> "clause" |
300 | 304 | | SENTENCE -> "sentence"*) |
301 | 305 | |
302 | -let make_args tokens quant var_map v = function | |
306 | +let make_args tokens lex_sems quant var_map v = function | |
303 | 307 | {gf=RAISED; morfs=[arg]} as s -> |
304 | 308 | let arg = make_arg quant arg in |
305 | 309 | ((dir_of_dir s.dir,arg),v,[],[Var v]),var_map |
... | ... | @@ -325,10 +329,10 @@ let make_args tokens quant var_map v = function |
325 | 329 | | s -> (* FIXME: argument pusty występuje tyle razy ile jest preferencji, a chyba powinien jeden raz *) |
326 | 330 | let args2 = Xlist.map s.morfs (fun morf -> make_arg quant morf, morf) in |
327 | 331 | let sem_args = Xlist.map args2 (function |
328 | - One, Phrase Pro -> SetAttr("MORF",Morf(Phrase Pro),make_pro tokens) (*s.sel_prefs*) | |
329 | - | One, Phrase ProNG -> SetAttr("MORF",Morf(Phrase ProNG),make_prong tokens) (*s.sel_prefs*) | |
330 | - | One, E Pro -> SetAttr("MORF",Morf(E Pro ),make_pro tokens) (*s.sel_prefs*) | |
331 | - | One, E ProNG -> SetAttr("MORF",Morf(E ProNG),make_prong tokens) (*s.sel_prefs*) | |
332 | + One, Phrase Pro -> SetAttr("MORF",Morf(Phrase Pro),make_pro tokens lex_sems) (*s.sel_prefs*) | |
333 | + | One, Phrase ProNG -> SetAttr("MORF",Morf(Phrase ProNG),make_prong tokens lex_sems) (*s.sel_prefs*) | |
334 | + | One, E Pro -> SetAttr("MORF",Morf(E Pro ),make_pro tokens lex_sems) (*s.sel_prefs*) | |
335 | + | One, E ProNG -> SetAttr("MORF",Morf(E ProNG),make_prong tokens lex_sems) (*s.sel_prefs*) | |
332 | 336 | | One, Phrase Null -> Dot |
333 | 337 | | One, _ -> failwith "make_args 3" |
334 | 338 | | _,morf -> SetAttr("MORF",Morf morf,Var "q")) in |
... | ... | @@ -340,14 +344,14 @@ let make_args tokens quant var_map v = function |
340 | 344 | ((dir_of_dir s.dir,Plus(Xlist.map args2 fst)),v, |
341 | 345 | [Case(Var v,Xlist.map sem_args (function Dot -> "q",Dot | t -> "q",Cut(SetAttr("AROLE",Val s.role,SetAttr("GF",Gf s.gf,(*SetElem*) t)))))],[]),var_map |
342 | 346 | |
343 | -let make_args2 tokens quant var_map s = | |
347 | +let make_args2 tokens lex_sems quant var_map s = | |
344 | 348 | let v,var_map = make_var var_map (String.lowercase s.role) (*gf*) in |
345 | 349 | (* let s = {s with morfs=List.flatten (Xlist.map s.morfs (function E l -> Xlist.map l (fun p -> E[p]) | m -> [m]))} in *) |
346 | - make_args tokens quant var_map v s | |
350 | + make_args tokens lex_sems quant var_map v s | |
347 | 351 | |
348 | -let make_schema tokens quant schema var_map = | |
352 | +let make_schema tokens lex_sems quant schema var_map = | |
349 | 353 | let schema,_,var_map = Xlist.fold schema ([],StringMap.empty,var_map) (fun (schema,labels,var_map) s -> |
350 | - let schema_pos,var_map = make_args2 tokens quant var_map s in | |
354 | + let schema_pos,var_map = make_args2 tokens lex_sems quant var_map s in | |
351 | 355 | schema_pos :: schema, labels, var_map) in |
352 | 356 | Xlist.fold schema ([],[],[],[]) (fun (args,vars,sem_args,raised_args) (arg,var,sem_arg,raised_arg) -> |
353 | 357 | arg :: args, var :: vars, sem_arg @ sem_args, raised_arg @ raised_args), var_map |
... | ... | @@ -356,11 +360,11 @@ let add_x_args schema_list = |
356 | 360 | [{gf=ADJUNCT; role="Unknown Backward"; role_attr="Backward"; sel_prefs=[]; cr=[]; ce=[]; dir=Backward; morfs=[Multi[X]]}; |
357 | 361 | {gf=ADJUNCT; role="Unknown Forward"; role_attr="Forward"; sel_prefs=[]; cr=[]; ce=[]; dir=Forward; morfs=[Multi[X]]}] :: schema_list |
358 | 362 | |
359 | -let make_frame x_flag tokens quant schema_list tl d node = (* UWAGA: to zadziała, gdy jest conajwyżej jeden podniesiony typ *) | |
363 | +let make_frame x_flag tokens lex_sems quant schema_list tl d node = (* UWAGA: to zadziała, gdy jest conajwyżej jeden podniesiony typ *) | |
360 | 364 | let schema_list = if x_flag then add_x_args schema_list else schema_list in |
361 | 365 | let args_vars_list,sem_args,raised_args,_ = Xlist.fold schema_list ([],[],[],StringMap.empty) (fun (args_vars_list,sem_args,raised_args,var_map) schema -> |
362 | -(* print_endline (WalStringOf.schema schema); *) | |
363 | - let (args,vars,sem_arg,raised_arg),var_map = make_schema tokens quant schema var_map in | |
366 | +(* print_endline (ENIAMwalStringOf.schema schema); *) | |
367 | + let (args,vars,sem_arg,raised_arg),var_map = make_schema tokens lex_sems quant schema var_map in | |
364 | 368 | (args,vars) :: args_vars_list, sem_arg @ sem_args, raised_arg @ raised_args, var_map) in |
365 | 369 | let t = Tensor(make_tensor_type quant tl) in |
366 | 370 | let at = Xlist.fold schema_list tl (fun at schema -> |
... | ... | @@ -379,9 +383,9 @@ let make_frame x_flag tokens quant schema_list tl d node = (* UWAGA: to zadział |
379 | 383 | simplify_impset (ImpSet(t,args),LambdaSet(vars,sem))) in |
380 | 384 | make_type_quantification quant (t,sem) |
381 | 385 | |
382 | -let make_frame_raised tokens quant schema_list tl d node sem_mods = | |
386 | +let make_frame_raised tokens lex_sems quant schema_list tl d node sem_mods = | |
383 | 387 | let args_vars_list,sem_args,raised_args,_ = Xlist.fold schema_list ([],[],[],StringMap.empty) (fun (args_vars_list,sem_args,raised_args,var_map) schema -> |
384 | - let (args,vars,sem_arg,raised_arg),var_map = make_schema tokens quant schema var_map in | |
388 | + let (args,vars,sem_arg,raised_arg),var_map = make_schema tokens lex_sems quant schema var_map in | |
385 | 389 | (args,vars) :: args_vars_list, sem_arg @ sem_args, raised_arg @ raised_args, var_map) in |
386 | 390 | let t = Tensor(make_tensor_type quant tl) in |
387 | 391 | let at = Xlist.fold (List.rev schema_list) tl (fun at schema -> |
... | ... | @@ -400,6 +404,7 @@ let make_frame_raised tokens quant schema_list tl d node sem_mods = |
400 | 404 | | _ -> failwith "make_frame_raised: raised_args" in |
401 | 405 | let sem = Xlist.fold sem_mods sem (fun sem (e,t) -> SetAttr(e,t,sem)) in |
402 | 406 | let id = ExtArray.add tokens {empty_token with token=Lemma("raised","raised",[])} in (* FIXME: czy raised to jest to co tu być powinno? *) |
407 | + let _ = ExtArray.add lex_sems empty_lex_sem in | |
403 | 408 | let sem = Node{empty_node with args = Cut(SetAttr("GF",Gf CORE,sem)); id=id; gs=make_gs quant tl} in |
404 | 409 | let t,sem = Xlist.fold args_vars_list (t,sem) (fun (t,sem) (args,vars) -> |
405 | 410 | simplify_impset (ImpSet(t,args),LambdaSet(vars,sem))) in |
... | ... |
parser/LCGstringOf.ml
... | ... | @@ -56,8 +56,8 @@ let rec linear_term c = function |
56 | 56 | "[" ^ |
57 | 57 | (String.concat "; " (Xlist.map (["PRED",Val t.pred;"CAT",Val t.cat;"ID",Val (string_of_int t.id);"WEIGHT",Val (string_of_float t.weight);"GS",t.gs;"ARGS",t.args] @ t.attrs) (fun (e,t) -> |
58 | 58 | e ^ ": " ^ (linear_term 0 t)))) ^ "]" |
59 | - | Morf m -> WalStringOf.morf m | |
60 | - | Gf s -> WalStringOf.gf s | |
59 | + | Morf m -> ENIAMwalStringOf.morf m | |
60 | + | Gf s -> ENIAMwalStringOf.gf s | |
61 | 61 | | Ref i -> "ref " ^ string_of_int i |
62 | 62 | | Cut t -> "cut(" ^ linear_term 0 t ^ ")" |
63 | 63 | | Choice choices -> "choice(" ^ String.concat ";" (StringMap.fold choices [] (fun l ei t -> (sprintf "%s: %s" ei (linear_term 0 t)) :: l)) ^ ")" |
... | ... |
parser/LCGtypes.ml
... | ... | @@ -29,14 +29,14 @@ type node = { |
29 | 29 | weight: float; |
30 | 30 | id: int; |
31 | 31 | gs: linear_term; |
32 | - agf: WalTypes.gf; | |
33 | - amorf: WalTypes.morf; | |
32 | + agf: ENIAMwalTypes.gf; | |
33 | + amorf: ENIAMwalTypes.morf; | |
34 | 34 | arole: string; |
35 | 35 | arole_attr: string; |
36 | 36 | meaning: string; |
37 | 37 | hipero: StringSet.t; |
38 | 38 | meaning_weight: float; |
39 | - position: WalTypes.schema_field; | |
39 | + position: ENIAMwalTypes.schema_field; | |
40 | 40 | attrs: (string * linear_term) list; |
41 | 41 | args: linear_term} |
42 | 42 | |
... | ... | @@ -72,8 +72,8 @@ and linear_term = |
72 | 72 | | Apply of linear_term |
73 | 73 | | Insert of linear_term * linear_term |
74 | 74 | | Node of node |
75 | - | Morf of WalTypes.morf | |
76 | - | Gf of WalTypes.gf | |
75 | + | Morf of ENIAMwalTypes.morf | |
76 | + | Gf of ENIAMwalTypes.gf | |
77 | 77 | | Choice of linear_term StringMap.t (*string * string * linear_term*) (* etykieta * indeks * term *) |
78 | 78 | | Concept of concept |
79 | 79 | | Context of context |
... | ... |
parser/LCGvalence.ml
... | ... | @@ -17,7 +17,7 @@ |
17 | 17 | * along with this program. If not, see <http://www.gnu.org/licenses/>. |
18 | 18 | *) |
19 | 19 | |
20 | -open WalTypes | |
20 | +open ENIAMwalTypes | |
21 | 21 | open LCGtypes |
22 | 22 | open Printf |
23 | 23 | open Xstd |
... | ... | @@ -56,29 +56,30 @@ let extract_roles = function |
56 | 56 | | _ -> failwith "extract_roles" |
57 | 57 | |
58 | 58 | let get_lemma = function |
59 | - PreTypes.Lemma(lemma,cat,_) -> lemma,cat | |
60 | - | PreTypes.Interp lemma -> lemma,"interp" | |
59 | + ENIAMtokenizerTypes.Lemma(lemma,cat,_) -> lemma,cat | |
60 | + | ENIAMtokenizerTypes.Interp lemma -> lemma,"interp" | |
61 | 61 | | _ -> "","" |
62 | 62 | |
63 | -let prepare_valence tokens = | |
63 | +let prepare_valence tokens lex_sems = | |
64 | 64 | let valence = Array.make (ExtArray.size tokens) [] in |
65 | 65 | Int.iter 1 (ExtArray.size tokens - 1) (fun id -> |
66 | - let d = ExtArray.get tokens id in | |
67 | - let lemma,cat = get_lemma d.PreTypes.token in | |
66 | + let c = ExtArray.get tokens id in | |
67 | + let d = ExtArray.get lex_sems id in | |
68 | + let lemma,cat = get_lemma c.ENIAMtokenizerTypes.token in | |
68 | 69 | let lemma = if lemma = "<or-sentence>" (*|| lemma = ":s" || lemma = "„s"*) then "pro-komunikować" else lemma in |
69 | 70 | if lemma = "" then () else |
70 | 71 | let prep_valence = |
71 | 72 | if cat = "prep" then |
72 | 73 | (* (0,lemma,StringSet.empty,0.,"NOSEM","",Frame(EmptyAtrs[],[])) :: *) |
73 | - match d.PreTypes.semantics with | |
74 | - PreTypes.Normal -> [] | |
75 | - | PreTypes.PrepSemantics l -> | |
76 | - Xlist.rev_map l (fun (lrole,lrole_attr,hipero,sel_prefs) -> | |
74 | + match d.ENIAMlexSemanticsTypes.semantics with | |
75 | + ENIAMlexSemanticsTypes.Normal -> [] | |
76 | + | ENIAMlexSemanticsTypes.PrepSemantics l -> (* FIXME: uzgadnianie cases *) | |
77 | + Xlist.rev_map l (fun (case,lrole,lrole_attr,hipero,sel_prefs) -> | |
77 | 78 | 0,lemma,hipero,0.,lrole,lrole_attr,Frame(EmptyAtrs[],[])) |
78 | 79 | | _ -> failwith "prepare_valence" |
79 | 80 | else [] in |
80 | - let valence2 = if d.PreTypes.valence = [] then [0,Frame(EmptyAtrs[],[])] else d.PreTypes.valence in | |
81 | - let lrole,lrole_attr = d.PreTypes.lroles in | |
81 | + let valence2 = if d.ENIAMlexSemanticsTypes.valence = [] then [0,Frame(EmptyAtrs[],[])] else d.ENIAMlexSemanticsTypes.valence in | |
82 | + let lrole,lrole_attr = d.ENIAMlexSemanticsTypes.lroles in | |
82 | 83 | valence.(id) <- prep_valence @ List.flatten (Xlist.map valence2 (function |
83 | 84 | fnum,Frame(attrs,schema) -> |
84 | 85 | let meanings,lemma,attrs = extract_meaning lemma attrs in |
... | ... | @@ -86,14 +87,14 @@ let prepare_valence tokens = |
86 | 87 | if cat = "pact" || cat = "ppas" then extract_roles attrs else |
87 | 88 | if cat = "pcon" then "Con","" else |
88 | 89 | if cat = "pant" then "Ant","" else |
89 | - d.PreTypes.lroles in | |
90 | - Xlist.map (prepare_senses lemma meanings d.PreTypes.senses) (fun (meaning,hipero,weight) -> | |
90 | + d.ENIAMlexSemanticsTypes.lroles in | |
91 | + Xlist.map (prepare_senses lemma meanings d.ENIAMlexSemanticsTypes.senses) (fun (meaning,hipero,weight) -> | |
91 | 92 | let hipero = if cat = "conj" then ["0"] else hipero in |
92 | 93 | fnum,meaning,StringSet.of_list hipero,weight,lrole,lrole_attr, |
93 | 94 | Frame(attrs,Xlist.map schema (fun s -> |
94 | 95 | (* let s = if s.sel_prefs=[] then (print_endline ("prepare_valence empty sel_prefs: " ^ lemma ^ " " ^ cat); {s with sel_prefs=["ALL"]}) else s in *) |
95 | 96 | if s.role="" && s.gf <> ADJUNCT && s.gf <> NOSEM then ( |
96 | - printf "%d: %s\n%!" fnum (WalStringOf.frame lemma (Frame(attrs,schema))); | |
97 | + printf "%d: %s\n%!" fnum (ENIAMwalStringOf.frame lemma (Frame(attrs,schema))); | |
97 | 98 | failwith ("prepare_valence empty role: " ^ lemma ^ " " ^ cat)) else |
98 | 99 | {s with morfs=List.sort compare s.morfs}))) |
99 | 100 | | fnum,(LexFrame _ as frame) -> [fnum,"lex",StringSet.empty,0.,lrole,lrole_attr,frame] |
... | ... | @@ -194,9 +195,9 @@ let match_args_pos modifications nodes e i schema t = |
194 | 195 | let schema,selected = |
195 | 196 | if morfs = [] then schema,[] else |
196 | 197 | let morfs = List.sort compare morfs in |
197 | -(* printf "gf=%s morfs=%s\n%!" (WalStringOf.gf gf) (String.concat ";" (Xlist.map morfs WalStringOf.morf)); *) | |
198 | +(* printf "gf=%s morfs=%s\n%!" (ENIAMwalStringOf.gf gf) (String.concat ";" (Xlist.map morfs ENIAMwalStringOf.morf)); *) | |
198 | 199 | Xlist.fold schema ([],[]) (fun (schema,selected) pos -> |
199 | -(* printf "pos.gf=%s pos.morfs=%s\n%!" (WalStringOf.gf pos.gf) (String.concat ";" (Xlist.map pos.morfs WalStringOf.morf)); *) | |
200 | +(* printf "pos.gf=%s pos.morfs=%s\n%!" (ENIAMwalStringOf.gf pos.gf) (String.concat ";" (Xlist.map pos.morfs ENIAMwalStringOf.morf)); *) | |
200 | 201 | if gf = pos.gf || (gf = ADJUNCT && pos.gf=ARG) then |
201 | 202 | if match_position (morfs,(*mark_sem_morfs*) pos.morfs) then schema, pos :: selected else pos :: schema, selected |
202 | 203 | else pos :: schema, selected) in |
... | ... | @@ -288,17 +289,17 @@ let rec propagate_nosem_selprefs modifications ei = function |
288 | 289 | if (t.cat = "prep" && t.arole = "NOSEM") || t.cat = "num" then |
289 | 290 | let refs = IntSet.of_list (get_arg_refs [] t.args) in |
290 | 291 | IntSet.iter refs (fun r -> |
291 | - modifications.(r) <- StringMap.add_inc modifications.(r) ei t.position.WalTypes.sel_prefs (fun l -> | |
292 | - if l = t.position.WalTypes.sel_prefs then l else failwith ("propagate_nosem_selprefs 1: [" ^ String.concat ";" l ^ "] [" ^ String.concat ";" t.position.WalTypes.sel_prefs ^ "]"))); | |
293 | - Node{t with position= {t.position with WalTypes.sel_prefs = []}} | |
292 | + modifications.(r) <- StringMap.add_inc modifications.(r) ei t.position.ENIAMwalTypes.sel_prefs (fun l -> | |
293 | + if l = t.position.ENIAMwalTypes.sel_prefs then l else failwith ("propagate_nosem_selprefs 1: [" ^ String.concat ";" l ^ "] [" ^ String.concat ";" t.position.ENIAMwalTypes.sel_prefs ^ "]"))); | |
294 | + Node{t with position= {t.position with ENIAMwalTypes.sel_prefs = []}} | |
294 | 295 | else Node t |
295 | 296 | | _ -> failwith "propagate_nosem_selprefs 2" |
296 | 297 | |
297 | 298 | let rec apply_modifications2_rec mods = function |
298 | 299 | Variant(e,l) -> Variant(e,Xlist.map l (fun (i,t) -> i, apply_modifications2_rec mods t)) |
299 | 300 | | Node t -> |
300 | - if t.position.WalTypes.sel_prefs <> [] then failwith "apply_modifications2_rec" else | |
301 | - Node{t with position={t.position with WalTypes.sel_prefs=mods}} | |
301 | + if t.position.ENIAMwalTypes.sel_prefs <> [] then failwith "apply_modifications2_rec" else | |
302 | + Node{t with position={t.position with ENIAMwalTypes.sel_prefs=mods}} | |
302 | 303 | | _ -> failwith "apply_modifications2_rec" |
303 | 304 | |
304 | 305 | let apply_modifications2 modifications references = |
... | ... | @@ -310,9 +311,9 @@ let apply_modifications2 modifications references = |
310 | 311 | try apply_modifications2_rec (StringMap.find modifications.(r) ei) t with Not_found -> t)) |
311 | 312 | | _ -> failwith "apply_modifications2") |
312 | 313 | |
313 | -let assign_frames_and_senses tokens references = | |
314 | +let assign_frames_and_senses tokens lex_sems references = | |
314 | 315 | let modifications = Array.make (Array.length references) StringMap.empty in |
315 | - let valence = prepare_valence tokens in | |
316 | + let valence = prepare_valence tokens lex_sems in | |
316 | 317 | let nodes = Array.map get_nodes references in |
317 | 318 | let references = Array.map (assign_frames_and_senses_rec modifications valence nodes) nodes in |
318 | 319 | apply_modifications (*tokens*) modifications nodes references; |
... | ... |
parser/disambSelPref.ml
... | ... | @@ -16,85 +16,85 @@ |
16 | 16 | * You should have received a copy of the GNU General Public License |
17 | 17 | * along with this program. If not, see <http://www.gnu.org/licenses/>. |
18 | 18 | *) |
19 | - | |
20 | -open WalTypes | |
19 | + | |
20 | +open ENIAMwalTypes | |
21 | 21 | open LCGtypes |
22 | 22 | open Printf |
23 | 23 | open Xstd |
24 | - | |
24 | + | |
25 | 25 | let fit_node1 t args w = |
26 | - let w = | |
27 | - if t.agf = ADJUNCT || t.agf = CORE || t.agf = NOSEM || t.agf = CLAUSE || t.agf = SENTENCE then w else | |
26 | + let w = | |
27 | + if t.agf = ADJUNCT || t.agf = CORE || t.agf = NOSEM || t.agf = CLAUSE || t.agf = SENTENCE then w else | |
28 | 28 | (* if is_nosem_node t then fit_sel_prefs_nosem_node disamb ei t + w else *) |
29 | 29 | if t.position.role = "" && (t.agf = SUBJ || t.agf = OBJ || t.agf = ARG) then w + 20 else |
30 | - let b = | |
30 | + let b = | |
31 | 31 | if StringSet.mem t.hipero "0" then true else |
32 | 32 | Xlist.fold t.position.sel_prefs false (fun b s -> StringSet.mem t.hipero s || b) in |
33 | - (if b then 0 else 1) + w in | |
33 | + (if b then 0 else 1) + w in | |
34 | 34 | Node{t with args=args},w |
35 | - | |
35 | + | |
36 | 36 | let fit_node2 t args w = |
37 | - let b = Xlist.fold t.position.WalTypes.sel_prefs false (fun b s -> StringSet.mem t.hipero s || b) in | |
37 | + let b = Xlist.fold t.position.sel_prefs false (fun b s -> StringSet.mem t.hipero s || b) in | |
38 | 38 | let t = {t with args=args} in |
39 | - if b then Node t,w else | |
40 | - (match t.agf, t.position.WalTypes.gf with | |
41 | - WalTypes.ADJUNCT,_ -> (* FIXME: można dodać tuszowanie braków w walentym *) | |
42 | - let pos = | |
39 | + if b then Node t,w else | |
40 | + (match t.agf, t.position.gf with | |
41 | + ADJUNCT,_ -> (* FIXME: można dodać tuszowanie braków w walentym *) | |
42 | + let pos = | |
43 | 43 | (* let r,a = paths_array.(t.id).PreTypes.lroles in |
44 | 44 | if r <> "" then (* FIXME: pomijam to, że role dla rzeczowników dotyczą tylko inst *) |
45 | - {t.position with WalTypes.role=r; WalTypes.role_attr=a} else*) | |
46 | - {t.position with WalTypes.role=t.arole; WalTypes.role_attr=t.arole_attr} in | |
45 | + {t.position with role=r; role_attr=a} else*) | |
46 | + {t.position with role=t.arole; role_attr=t.arole_attr} in | |
47 | 47 | Node{t with position=pos}, w+1 |
48 | - | WalTypes.CLAUSE,WalTypes.NOGF -> Node t,w+0 | |
49 | - | WalTypes.SENTENCE,WalTypes.NOGF -> Node t,w+0 | |
50 | - | WalTypes.ARG,WalTypes.NOGF -> Node t,w+1 | |
51 | - | WalTypes.CORE,WalTypes.NOGF -> | |
52 | - let pos = {t.position with WalTypes.role=t.arole; WalTypes.role_attr=t.arole_attr} in | |
48 | + | CLAUSE,NOGF -> Node t,w+0 | |
49 | + | SENTENCE,NOGF -> Node t,w+0 | |
50 | + | ARG,NOGF -> Node t,w+1 | |
51 | + | CORE,NOGF -> | |
52 | + let pos = {t.position with role=t.arole; role_attr=t.arole_attr} in | |
53 | 53 | Node{t with position=pos}, w+0 |
54 | - | WalTypes.OBJ,WalTypes.NOGF -> Node t,w+0 | |
55 | - | WalTypes.SUBJ,WalTypes.NOGF -> Node t,w+0 | |
56 | - | WalTypes.SUBJ,WalTypes.SUBJ -> Node t,w+2 | |
57 | - | WalTypes.OBJ,WalTypes.OBJ -> Node t,w+2 | |
58 | - | WalTypes.ARG,WalTypes.ARG -> Node t,w+1 | |
59 | - | WalTypes.NOSEM,WalTypes.NOGF -> Node t,w+0 | |
60 | - | WalTypes.NOGF,WalTypes.NOGF -> Node t,w+0 | |
61 | - | WalTypes.NOSEM,WalTypes.NOSEM -> Node t,w+0 | |
62 | -(* | WalTypes.,WalTypes. -> 0 *) | |
54 | + | OBJ,NOGF -> Node t,w+0 | |
55 | + | SUBJ,NOGF -> Node t,w+0 | |
56 | + | SUBJ,SUBJ -> Node t,w+2 | |
57 | + | OBJ,OBJ -> Node t,w+2 | |
58 | + | ARG,ARG -> Node t,w+1 | |
59 | + | NOSEM,NOGF -> Node t,w+0 | |
60 | + | NOGF,NOGF -> Node t,w+0 | |
61 | + | NOSEM,NOSEM -> Node t,w+0 | |
62 | +(* | , -> 0 *) | |
63 | 63 | | a,g ->(* printf "fit_sel_prefs_rec: pred=%s agf=%s pos.gf=%s\n%!" t.pred (WalStringOf.gf a) (WalStringOf.gf g);*) Node t,w+1) |
64 | - | |
65 | -let rec fit_sel_prefs_choice fit_node_fun references disamb satisfaction r = function | |
66 | - Choice choice -> | |
64 | + | |
65 | +let rec fit_sel_prefs_choice fit_node_fun references disamb satisfaction r = function | |
66 | + Choice choice -> | |
67 | 67 | let choice,sat = StringMap.fold choice (StringMap.empty,StringMap.empty) (fun (choice,sat) ei t -> |
68 | 68 | let t,w = fit_sel_prefs_variant fit_node_fun references disamb satisfaction t in |
69 | 69 | StringMap.add choice ei t, StringMap.add sat ei w) in |
70 | 70 | satisfaction.(r) <- sat; |
71 | 71 | Choice choice |
72 | 72 | | _ -> failwith "fit_sel_prefs_choice" |
73 | - | |
74 | -and fit_sel_prefs_variant fit_node_fun references disamb satisfaction = function | |
75 | - Variant(e,l) -> | |
73 | + | |
74 | +and fit_sel_prefs_variant fit_node_fun references disamb satisfaction = function | |
75 | + Variant(e,l) -> | |
76 | 76 | let l,min_w = Xlist.fold l ([],max_int) (fun (l,min_w) (i,t) -> |
77 | 77 | let t,w = fit_sel_prefs_rec fit_node_fun references disamb satisfaction (e ^ i) t in |
78 | 78 | if w = min_w then (i,t) :: l, min_w else |
79 | 79 | if w < min_w then [i,t],w else l,min_w) in |
80 | 80 | Variant(e, List.rev l),min_w |
81 | 81 | | _ -> failwith "fit_sel_prefs_variant" |
82 | - | |
83 | -and fit_sel_prefs_rec fit_node_fun references disamb satisfaction ei = function | |
84 | - Node t -> | |
82 | + | |
83 | +and fit_sel_prefs_rec fit_node_fun references disamb satisfaction ei = function | |
84 | + Node t -> | |
85 | 85 | let args,w = fit_sel_prefs_rec fit_node_fun references disamb satisfaction ei t.args in |
86 | 86 | fit_node2 t args w |
87 | - | Tuple l -> | |
87 | + | Tuple l -> | |
88 | 88 | let l,sum_w = Xlist.fold l ([],0) (fun (l,sum_w) t -> |
89 | 89 | let t,w = fit_sel_prefs_rec fit_node_fun references disamb satisfaction ei t in |
90 | 90 | t :: l, sum_w + w) in |
91 | 91 | Tuple(List.rev l), sum_w |
92 | - | Variant(e,l) as t -> | |
92 | + | Variant(e,l) as t -> | |
93 | 93 | let l,min_w = Xlist.fold l ([],max_int) (fun (l,min_w) (i,t) -> |
94 | 94 | let t,w = fit_sel_prefs_rec fit_node_fun references disamb satisfaction ei t in |
95 | 95 | if w = min_w then (i,t) :: l, min_w else |
96 | 96 | if w < min_w then [i,t],w else l,min_w) in |
97 | - let l = | |
97 | + let l = | |
98 | 98 | let map = Xlist.fold l TermSet.empty (fun map (_,t) -> TermSet.add map t) in |
99 | 99 | fst (TermSet.fold map ([],1) (fun (l,i) t -> (string_of_int i,t) :: l, i+1)) in |
100 | 100 | (match l with |
... | ... | @@ -103,7 +103,7 @@ and fit_sel_prefs_rec fit_node_fun references disamb satisfaction ei = function |
103 | 103 | | _ -> Variant(e, List.rev l),min_w) |
104 | 104 | | Dot -> Dot, 0 |
105 | 105 | | Val s -> Val s, 0 |
106 | - | Ref i -> | |
106 | + | Ref i -> | |
107 | 107 | if disamb.(i) = Dot then (disamb.(i) <- fit_sel_prefs_choice fit_node_fun references disamb satisfaction i references.(i)); |
108 | 108 | Ref i, (try StringMap.find satisfaction.(i) ei with Not_found -> failwith ("fit_sel_prefs_rec 3: r=" ^ string_of_int i ^ " ei=" ^ ei)) |
109 | 109 | | t -> failwith ("fit_sel_prefs_rec 2: " ^ LCGstringOf.linear_term 0 t) |
... | ... | @@ -113,8 +113,5 @@ let fit_sel_prefs fit_node_fun references = |
113 | 113 | let satisfaction = Array.make (Array.length references) StringMap.empty in |
114 | 114 | disamb.(0) <- fst (fit_sel_prefs_variant fit_node_fun references disamb satisfaction references.(0)); |
115 | 115 | disamb |
116 | - | |
117 | -(***************************************************************************************) | |
118 | - | |
119 | - | |
120 | 116 | |
117 | +(***************************************************************************************) | |
... | ... |
parser/exec.ml
... | ... | @@ -33,6 +33,8 @@ let empty_result = { |
33 | 33 | selected_sent_text=RawText ""; |
34 | 34 | semantic_text=RawText ""; |
35 | 35 | selected_semantic_text=RawText ""; |
36 | + tokens=ExtArray.make 1 ENIAMtokenizerTypes.empty_token; | |
37 | + lex_sems=ExtArray.make 1 ENIAMlexSemanticsTypes.empty_lex_sem; | |
36 | 38 | } |
37 | 39 | |
38 | 40 | let empty_eniam_parse_result = { |
... | ... | @@ -157,7 +159,7 @@ let eniam_parse_sentence timeout test_only_flag paths last tokens lex_sems = |
157 | 159 | let result = {result with parse_time=time4 -. time3; chart_size=LCGchart.get_no_entries chart} in |
158 | 160 | if LCGchart.is_parsed chart then |
159 | 161 | try |
160 | - let term = LCGchart.get_parsed_term tokens chart in | |
162 | + let term = LCGchart.get_parsed_term tokens lex_sems chart in | |
161 | 163 | let dependency_tree = LCGreductions.reduce term references in |
162 | 164 | let time5 = time_fun () in |
163 | 165 | let result = if test_only_flag then result else {result with dependency_tree=dependency_tree} in |
... | ... | @@ -216,7 +218,7 @@ let conll_parse_sentence timeout test_only_flag paths tokens lex_sems = |
216 | 218 | let result = {result with parse_time=time4 -. time3} in |
217 | 219 | if LCGchart.is_dep_parsed parsed_dep_chart then |
218 | 220 | try |
219 | - let term = LCGchart.get_dep_parsed_term tokens parsed_dep_chart in | |
221 | + let term = LCGchart.get_dep_parsed_term tokens lex_sems parsed_dep_chart in | |
220 | 222 | (* LCGlatexOf.print_dependency_tree "dep_dependency_tree1" dependency_tree; *) |
221 | 223 | let dependency_tree = LCGreductions.reduce term references in |
222 | 224 | let time5 = time_fun () in |
... | ... | @@ -322,12 +324,12 @@ let rec parse_sentence timeout test_only_flag mode file_prefix tokens lex_sems = |
322 | 324 | if not Paths.config.Paths.mate_parser_enabled then DepSentence paths else ( |
323 | 325 | print_endline "parse_sentence 1"; |
324 | 326 | (* print_endline (Visualization.html_of_dep_sentence tokens paths); *) |
325 | - let conll = CONLL.string_of_paths ENIAMsubsyntaxTypes.Mate tokens paths in | |
327 | + let conll = ENIAM_CONLL.string_of_paths ENIAMsubsyntaxTypes.Mate tokens paths in | |
326 | 328 | print_endline "parse_sentence 2"; |
327 | 329 | (* printf "|%s|\n" conll; *) |
328 | 330 | Printf.fprintf mate_out "%s%!" conll; |
329 | 331 | print_endline "parse_sentence 3"; |
330 | - let new_paths = get_paths paths (CONLL.load_sentence mate_in) in | |
332 | + let new_paths = get_paths paths (ENIAM_CONLL.load_sentence mate_in) in | |
331 | 333 | print_endline "parse_sentence 4"; |
332 | 334 | (* print_endline (Visualization.html_of_dep_sentence tokens new_paths); *) |
333 | 335 | let result = conll_parse_sentence timeout test_only_flag new_paths tokens lex_sems in |
... | ... | @@ -364,7 +366,7 @@ let rec parse_text timeout test_only_flag mode tokens lex_sems = function |
364 | 366 | | StructText paragraphs -> |
365 | 367 | let paragraphs = Xlist.rev_map paragraphs (fun paragraph -> |
366 | 368 | parse_paragraph timeout test_only_flag mode tokens lex_sems paragraph) in |
367 | - StructText(List.rev paragraphs, tokens) | |
369 | + StructText(List.rev paragraphs) | |
368 | 370 | | AltText l -> AltText(Xlist.map l (fun (mode,text) -> |
369 | 371 | mode, parse_text timeout test_only_flag mode tokens lex_sems text)) |
370 | 372 | |
... | ... | @@ -420,18 +422,18 @@ let rec select_sentences_paragraph = function |
420 | 422 | |
421 | 423 | let rec select_sentences_text = function |
422 | 424 | RawText s -> RawText s |
423 | - | StructText(paragraphs,tokens) -> | |
425 | + | StructText paragraphs -> | |
424 | 426 | let paragraphs = Xlist.rev_map paragraphs (fun paragraph -> |
425 | 427 | select_sentences_paragraph paragraph) in |
426 | - StructText(List.rev paragraphs, tokens) | |
428 | + StructText(List.rev paragraphs) | |
427 | 429 | | AltText l -> AltText(Xlist.map l (fun (mode,text) -> |
428 | 430 | mode, select_sentences_text text)) |
429 | 431 | |
430 | -let semantic_processing timeout test_only_flag file_prefix tokens max_n dependency_tree = | |
432 | +let semantic_processing timeout test_only_flag file_prefix tokens lex_sems max_n dependency_tree = | |
431 | 433 | let time5 = time_fun () in |
432 | 434 | let result = {empty_semantic_processing_result with file_prefix=file_prefix} in |
433 | 435 | try |
434 | - let (*dependency_tree2*)(*sem*)disamb = LCGvalence.assign_frames_and_senses tokens dependency_tree in | |
436 | + let (*dependency_tree2*)(*sem*)disamb = LCGvalence.assign_frames_and_senses tokens lex_sems dependency_tree in | |
435 | 437 | let disamb(*sem*) = DisambSelPref.fit_sel_prefs DisambSelPref.fit_node1 (*dependency_tree2*)disamb in |
436 | 438 | let (*sem*)disamb = DisambLemma.disambiguate_nodes (*dependency_tree*)(*sem*)disamb in |
437 | 439 | let (*sem*)disamb = DisambLemma.remove_unused(*disambiguate_nodes*) (*dependency_tree*)(*sem*)disamb in |
... | ... | @@ -441,7 +443,7 @@ let semantic_processing timeout test_only_flag file_prefix tokens max_n dependen |
441 | 443 | let sem = DisambLemma.disambiguate_meanings (*dependency_tree*)sem in |
442 | 444 | let sem(*disamb*) = DisambLemma.remove_unused_choices(*disambiguate_nodes*) (*dependency_tree*)sem(*disamb*) in |
443 | 445 | let result = if test_only_flag then result else {result with sem=sem} in |
444 | - let sem2 = SemGraph.translate tokens (*disamb*)sem in | |
446 | + let sem2 = SemGraph.translate tokens lex_sems (*disamb*)sem in | |
445 | 447 | let result = if test_only_flag then result else {result with sem2=sem2} in |
446 | 448 | let sem3(*disamb*) = SemGraph.make_tree(*disambiguate_nodes*) (*dependency_tree*)sem2(*disamb*) in |
447 | 449 | let sem3(*disamb*) = SemGraph.simplify_tree(*disambiguate_nodes*) (*dependency_tree*)sem3(*disamb*) in |
... | ... | @@ -469,41 +471,41 @@ let semantic_processing timeout test_only_flag file_prefix tokens max_n dependen |
469 | 471 | {result with status=SemError; msg=Printexc.to_string e; sem_time=time6 -. time5} |
470 | 472 | |
471 | 473 | |
472 | -let rec semantic_processing_sentence timeout test_only_flag tokens max_n = function | |
474 | +let rec semantic_processing_sentence timeout test_only_flag tokens lex_sems max_n = function | |
473 | 475 | RawSentence s -> RawSentence s |
474 | - | ENIAMSentence result -> SemSentence (semantic_processing timeout test_only_flag result.file_prefix tokens max_n result.dependency_tree) | |
475 | - | CONLLSentence result -> SemSentence (semantic_processing timeout test_only_flag result.file_prefix tokens max_n result.dependency_tree) | |
476 | + | ENIAMSentence result -> SemSentence (semantic_processing timeout test_only_flag result.file_prefix tokens lex_sems max_n result.dependency_tree) | |
477 | + | CONLLSentence result -> SemSentence (semantic_processing timeout test_only_flag result.file_prefix tokens lex_sems max_n result.dependency_tree) | |
476 | 478 | | QuotedSentences sentences -> |
477 | 479 | let sentences = Xlist.rev_map sentences (fun p -> |
478 | - let sentence = semantic_processing_sentence timeout test_only_flag tokens max_n p.psentence in | |
480 | + let sentence = semantic_processing_sentence timeout test_only_flag tokens lex_sems max_n p.psentence in | |
479 | 481 | {p with psentence=sentence}) in |
480 | 482 | QuotedSentences(List.rev sentences) |
481 | 483 | | AltSentence l -> |
482 | 484 | let l = Xlist.rev_map l (fun (mode,sentence) -> |
483 | - mode, semantic_processing_sentence timeout test_only_flag tokens max_n sentence) in | |
485 | + mode, semantic_processing_sentence timeout test_only_flag tokens lex_sems max_n sentence) in | |
484 | 486 | AltSentence(List.rev l) |
485 | 487 | | _ -> failwith "semantic_processing_sentence" |
486 | 488 | |
487 | -let rec semantic_processing_paragraph timeout test_only_flag tokens max_n = function | |
489 | +let rec semantic_processing_paragraph timeout test_only_flag tokens lex_sems max_n = function | |
488 | 490 | RawParagraph s -> RawParagraph s |
489 | 491 | | StructParagraph sentences -> |
490 | 492 | let sentences = Xlist.rev_map sentences (fun p -> |
491 | - let sentence = semantic_processing_sentence timeout test_only_flag tokens max_n p.psentence in | |
493 | + let sentence = semantic_processing_sentence timeout test_only_flag tokens lex_sems max_n p.psentence in | |
492 | 494 | {p with psentence=sentence}) in |
493 | 495 | StructParagraph(List.rev sentences) |
494 | 496 | | AltParagraph l -> |
495 | 497 | let l = Xlist.rev_map l (fun (mode,paragraph) -> |
496 | - mode, semantic_processing_paragraph timeout test_only_flag tokens max_n paragraph) in | |
498 | + mode, semantic_processing_paragraph timeout test_only_flag tokens lex_sems max_n paragraph) in | |
497 | 499 | AltParagraph(List.rev l) |
498 | 500 | |
499 | -let rec semantic_processing_text timeout test_only_flag max_n = function | |
501 | +let rec semantic_processing_text timeout test_only_flag tokens lex_sems max_n = function | |
500 | 502 | RawText s -> RawText s |
501 | - | StructText(paragraphs,tokens) -> | |
503 | + | StructText paragraphs -> | |
502 | 504 | let paragraphs = Xlist.rev_map paragraphs (fun paragraph -> |
503 | - semantic_processing_paragraph timeout test_only_flag tokens max_n paragraph) in | |
504 | - StructText(List.rev paragraphs, tokens) | |
505 | + semantic_processing_paragraph timeout test_only_flag tokens lex_sems max_n paragraph) in | |
506 | + StructText(List.rev paragraphs) | |
505 | 507 | | AltText l -> AltText(Xlist.map l (fun (mode,text) -> |
506 | - mode, semantic_processing_text timeout test_only_flag max_n text)) | |
508 | + mode, semantic_processing_text timeout test_only_flag tokens lex_sems max_n text)) | |
507 | 509 | |
508 | 510 | let rec extract_query_text = function |
509 | 511 | RawText s -> s |
... | ... | @@ -512,7 +514,7 @@ let rec extract_query_text = function |
512 | 514 | |
513 | 515 | let process_query pre_in pre_out timeout test_only_flag id full_query max_n = |
514 | 516 | (* print_endline "process_query 0"; *) |
515 | - let result = {empty_result with input_text=translate_text full_query} in | |
517 | + let result = {empty_result with input_text=translate_text (fst full_query)} in | |
516 | 518 | let time1 = time_fun () in |
517 | 519 | (* print_endline "process_query 1"; *) |
518 | 520 | Marshal.to_channel pre_out full_query []; |
... | ... | @@ -523,11 +525,11 @@ let process_query pre_in pre_out timeout test_only_flag id full_query max_n = |
523 | 525 | ENIAMtokenizerTypes.token_record ExtArray.t * |
524 | 526 | ENIAMlexSemanticsTypes.lex_sem ExtArray.t * string * float) in |
525 | 527 | let time2 = time_fun () in |
526 | - let result = if test_only_flag then result else {result with pre_text=translate_text pre_text} in | |
528 | + let result = if test_only_flag then result else {result with pre_text=translate_text pre_text; tokens=tokens; lex_sems=lex_sems} in | |
527 | 529 | let result = {result with pre_time1=pre_time1; pre_time2=time2 -. time1} in |
528 | 530 | if msg <> "" then {result with status=PreprocessingError; msg=msg} else ( |
529 | 531 | (* print_endline "process_query 3"; *) |
530 | - let parsed_text = parse_text timeout test_only_flag Struct (translate_text pre_text) in | |
532 | + let parsed_text = parse_text timeout test_only_flag Struct tokens lex_sems (translate_text pre_text) in | |
531 | 533 | (* print_endline "process_query 4"; *) |
532 | 534 | let time3 = time_fun () in |
533 | 535 | let result = if test_only_flag then result else {result with status=Parsed; parsed_text=parsed_text} in |
... | ... | @@ -538,7 +540,7 @@ let process_query pre_in pre_out timeout test_only_flag id full_query max_n = |
538 | 540 | else select_sentences_text parsed_text in |
539 | 541 | (* print_endline "process_query 6"; *) |
540 | 542 | let result = if test_only_flag then result else {result with status=Parsed; selected_sent_text=selected_sent_text} in |
541 | - let semantic_text = semantic_processing_text timeout test_only_flag max_n selected_sent_text in | |
543 | + let semantic_text = semantic_processing_text timeout test_only_flag tokens lex_sems max_n selected_sent_text in | |
542 | 544 | (* print_endline "process_query 7"; *) |
543 | 545 | let selected_semantic_text = |
544 | 546 | if not Paths.config.Paths.sentence_selection_enabled then semantic_text |
... | ... |
parser/execTypes.ml
... | ... | @@ -92,7 +92,7 @@ and paragraph = |
92 | 92 | |
93 | 93 | type text = |
94 | 94 | RawText of string |
95 | - | StructText of paragraph list | |
95 | + | StructText of paragraph list | |
96 | 96 | | AltText of (mode * text) list |
97 | 97 | |
98 | 98 | |
... | ... | @@ -109,6 +109,8 @@ type result = { |
109 | 109 | selected_sent_text: text; |
110 | 110 | semantic_text: text; |
111 | 111 | selected_semantic_text: text; |
112 | + tokens: ENIAMtokenizerTypes.token_record ExtArray.t; | |
113 | + lex_sems: ENIAMlexSemanticsTypes.lex_sem ExtArray.t; | |
112 | 114 | } |
113 | 115 | |
114 | 116 | type sum_result = { |
... | ... |
parser/makefile
1 | 1 | OCAMLC=ocamlc |
2 | 2 | OCAMLOPT=ocamlopt |
3 | 3 | OCAMLDEP=ocamldep |
4 | -INCLUDES=-I +xml-light -I +xlib -I +zip -I +bz2 -I ../../../Dropbox/lib/latexvis -I ../../installed/latexvis -I ../lib/xt -I ../../../Dropbox/Clarin-pl/podzadania/nkjp/fold_text -I ../podzadania/morfeusz -I ../pre -I ../tokenizer -I ../subsyntax -I ../walenty -I ../lexSemantics -I ../corpora | |
4 | +INCLUDES=-I +xml-light -I +xlib -I +zip -I +bz2 -I ../../../Dropbox/lib/latexvis -I ../../installed/latexvis -I ../lib/xt -I ../../../Dropbox/Clarin-pl/podzadania/nkjp/fold_text -I ../podzadania/morfeusz -I ../pre -I ../tokenizer -I ../subsyntax -I ../walenty -I ../lexSemantics -I ../integration | |
5 | 5 | #INCLUDES=-I +xml-light -I +xlib -I ../pre |
6 | 6 | OCAMLFLAGS=$(INCLUDES) -g |
7 | 7 | OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa bz2.cmxa xlib.cmxa latexvis.cmxa #nkjp.cmxa |
8 | 8 | #OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa xlib.cmxa |
9 | 9 | |
10 | -PRE= ../pre/paths.ml ../walenty/ENIAMwalTypes.ml ../tokenizer/ENIAMtokenizerTypes.ml ../subsyntax/ENIAMsubsyntaxTypes.ml ../lexSemantics/ENIAMlexSemanticsTypes.ml ../walenty/ENIAMwalStringOf.ml ../corpora/CONLL.ml | |
10 | +PRE= ../pre/paths.ml ../tokenizer/ENIAMtokenizerTypes.ml ../subsyntax/ENIAMsubsyntaxTypes.ml ../walenty/ENIAMwalTypes.ml ../lexSemantics/ENIAMlexSemanticsTypes.ml ../walenty/ENIAMwalStringOf.ml ../integration/ENIAM_CONLL.ml | |
11 | 11 | LCG= LCGtypes.ml LCGstringOf.ml LCGrules.ml LCGrenderer.ml LCGchart.ml LCGlatexOf.ml LCGreductions.ml LCGlexicon.ml LCGvalence.ml |
12 | 12 | #LCG= LCGtypes.ml LCGstringOf.ml LCGrules.ml LCGrenderer.ml LCGchart.ml LCGreductions.ml LCGlexicon.ml LCGvalence.ml |
13 | 13 | DISAMB= disambSelPref.ml disambLemma.ml |
... | ... | @@ -16,9 +16,9 @@ SEM= semGraph.ml semTypes.ml semStringOf.ml semLatexOf.ml semMmlOf.ml semMrl.ml |
16 | 16 | EXEC= execTypes.ml visualization.ml exec.ml |
17 | 17 | |
18 | 18 | all: |
19 | -# $(OCAMLOPT) -o pipe $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) pipe.ml | |
20 | - $(OCAMLOPT) -o server2 $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) server.ml | |
21 | - $(OCAMLOPT) -o parser2.cgi $(OCAMLOPTFLAGS) $(PRE) LCGtypes.ml LCGstringOf.ml LCGrules.ml LCGrenderer.ml LCGchart.ml LCGlatexOf.ml semTypes.ml semMmlOf.ml execTypes.ml visualization.ml webInterface.ml | |
19 | + $(OCAMLOPT) -o pipe $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) pipe.ml | |
20 | + # $(OCAMLOPT) -o server2 $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) server.ml | |
21 | + # $(OCAMLOPT) -o parser2.cgi $(OCAMLOPTFLAGS) $(PRE) LCGtypes.ml LCGstringOf.ml LCGrules.ml LCGrenderer.ml LCGchart.ml LCGlatexOf.ml semTypes.ml semMmlOf.ml execTypes.ml visualization.ml webInterface.ml | |
22 | 22 | # $(OCAMLOPT) -o eniam.distr $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) overseer.ml |
23 | 23 | # $(OCAMLOPT) -o eniam.worker $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) worker.ml |
24 | 24 | # $(OCAMLOPT) -o parser.api $(OCAMLOPTFLAGS) $(PRE) $(LCG) $(DISAMB) $(SEM) $(EXEC) apiInterface.ml |
... | ... | @@ -55,4 +55,4 @@ swigra_test: swigra_test.ml |
55 | 55 | $(OCAMLOPT) $(OCAMLOPTFLAGS) -c $< |
56 | 56 | |
57 | 57 | clean: |
58 | - rm -f *~ *.cm[oix] *.o eniam eniam.distr eniam.worker server2 parser2.cgi | |
58 | + rm -f *~ *.cm[oix] *.o eniam eniam.distr eniam.worker server2 parser2.cgi pipe | |
... | ... |
parser/pipe.ml
... | ... | @@ -30,7 +30,7 @@ let get_sock_addr host_name port = |
30 | 30 | let get_paths query = |
31 | 31 | let i,o = Unix.open_connection (get_sock_addr Paths.pre_host Paths.pre_port) in |
32 | 32 | Printf.fprintf o "%s\n%!" query; |
33 | - let paths,msg,time = (Marshal.from_channel i : ((int * int * PreTypes.token_record) list * int * int) * string * float) in | |
33 | + let paths,msg,time = (Marshal.from_channel i : ((int * int * ENIAMtokenizerTypes.token_record) list * int * int) * string * float) in | |
34 | 34 | Printf.fprintf o "\n%!"; |
35 | 35 | let _ = Unix.shutdown_connection i in |
36 | 36 | paths,msg,time |
... | ... | @@ -39,21 +39,21 @@ let get_paths query = |
39 | 39 | |
40 | 40 | let simple_disambiguate (paths,last) = |
41 | 41 | Xlist.fold paths [] (fun paths (i,j,t) -> |
42 | - if Xlist.mem t.PreTypes.attrs "notvalidated proper" || Xlist.mem t.PreTypes.attrs "lemma not validated" then paths else (i,j,t) :: paths),last | |
42 | + if Xlist.mem t.ENIAMtokenizerTypes.attrs "notvalidated proper" || Xlist.mem t.ENIAMtokenizerTypes.attrs "lemma not validated" then paths else (i,j,t) :: paths),last | |
43 | 43 | |
44 | 44 | (* FIXME: przerobić koordynację *) |
45 | 45 | |
46 | 46 | let lcg_process query = |
47 | 47 | let ic,oc = Unix.open_connection (get_sock_addr Paths.pre_host Paths.pre_port) in |
48 | - let result = Exec.process_query ic oc 30. false "x" (PreTypes.RawText query,ExtArray.make 1 ENIAMtokenizerTypes.empty_token) 10 in | |
48 | + let result = Exec.process_query ic oc 30. false "x" (ENIAMsubsyntaxTypes.RawText query,ExtArray.make 1 ENIAMtokenizerTypes.empty_token) 10 in | |
49 | 49 | let path = "results/" in |
50 | - Visualization.print_html_text path "input_text" result.input_text; | |
51 | - Visualization.print_html_text path "pre_text" result.pre_text; | |
52 | - Visualization.print_html_text path "parsed_text" result.parsed_text; | |
53 | - Visualization.print_html_text path "selected_sent_text" result.selected_sent_text; | |
54 | - Visualization.print_html_text path "semantic_text" result.semantic_text; | |
55 | - Visualization.print_html_text path "selected_semantic_text" result.selected_semantic_text; | |
56 | - Visualization.print_main_result_text "aaa/" (path ^ "main/") "xxxx" result.selected_semantic_text; | |
50 | + Visualization.print_html_text path "input_text" result.input_text result.tokens result.lex_sems; | |
51 | + Visualization.print_html_text path "pre_text" result.pre_text result.tokens result.lex_sems; | |
52 | + Visualization.print_html_text path "parsed_text" result.parsed_text result.tokens result.lex_sems; | |
53 | + Visualization.print_html_text path "selected_sent_text" result.selected_sent_text result.tokens result.lex_sems; | |
54 | + Visualization.print_html_text path "semantic_text" result.semantic_text result.tokens result.lex_sems; | |
55 | + Visualization.print_html_text path "selected_semantic_text" result.selected_semantic_text result.tokens result.lex_sems; | |
56 | + Visualization.print_main_result_text "aaa/" (path ^ "main/") "xxxx" result.tokens result.selected_semantic_text; | |
57 | 57 | Exec.print_result stdout result; |
58 | 58 | (*Visualization.print_paths "results/" "paths" result.paths; |
59 | 59 | Visualization.print_paths_latex "paths" result.paths; |
... | ... | @@ -117,7 +117,7 @@ let lcg_process query = |
117 | 117 | LatexMain.latex_compile_and_clean "results/" "chart"*) |
118 | 118 | | _ -> ());*) |
119 | 119 | (* Printf.fprintf oc "\n%!"; *) |
120 | - Marshal.to_channel oc (PreTypes.RawText "",ExtArray.make 1 ENIAMtokenizerTypes.empty_token) []; | |
120 | + Marshal.to_channel oc (ENIAMsubsyntaxTypes.RawText "",ExtArray.make 1 ENIAMtokenizerTypes.empty_token) []; | |
121 | 121 | flush oc; |
122 | 122 | let _ = Unix.shutdown_connection ic in |
123 | 123 | () |
... | ... | @@ -162,7 +162,7 @@ let lcg_process_file filename result_path result_name = failwith "lcg_process_fi |
162 | 162 | (* let _ = LCGexec.process_file_id "data/sentences-składnica-with-trees.tab" "results/sentences-składnica-with-trees.eff" 100. *) |
163 | 163 | |
164 | 164 | (* Przetwarzanie korpusów w formacie CONLL *) |
165 | - | |
165 | +(* | |
166 | 166 | let id_counter = ref 0 |
167 | 167 | |
168 | 168 | let get_id () = |
... | ... | @@ -187,7 +187,7 @@ let process_id s = |
187 | 187 | Xstring.check_prefix "morph_" c && Xstring.check_sufix "-s" c then |
188 | 188 | Xstring.cut_prefix "NKJP_1M_" a ^ "." ^ Xstring.cut_sufix "-s" (Xstring.cut_prefix "morph_" c) |
189 | 189 | else failwith ("process_id: " ^ s) |
190 | - | |
190 | +*) | |
191 | 191 | (* FIXME |
192 | 192 | let process_conll_corpus filename = |
193 | 193 | let corpus = File.file_in filename (fun file -> CONLL.match_corpus (CONLL.load_corpus file)) in |
... | ... | @@ -232,13 +232,13 @@ let _ = |
232 | 232 | *) |
233 | 233 | |
234 | 234 | |
235 | - | |
235 | +(* | |
236 | 236 | let has_pos pos (paths,_,_) = |
237 | 237 | Xlist.fold paths false (fun b (_,_,t) -> |
238 | 238 | match t.PreTypes.token with |
239 | 239 | PreTypes.Lemma(_,cat,_) -> if cat = pos then true else b |
240 | 240 | | _ -> b) |
241 | - | |
241 | +*) | |
242 | 242 | (* Wydobycie zdań zawierających symbole *) |
243 | 243 | (*let _ = |
244 | 244 | let i,o = Unix.open_connection (get_sock_addr host port) in |
... | ... | @@ -346,7 +346,7 @@ let print_stats n stats = |
346 | 346 | stats.adj_sense (float stats.adj_sense /. float n) (float stats.adj_sense /. float stats.adj) |
347 | 347 | stats.adj_valence (float stats.adj_valence /. float n) (float stats.adj_valence /. float stats.adj); |
348 | 348 | () |
349 | - | |
349 | +(* | |
350 | 350 | let get_stats stats (paths,_) = |
351 | 351 | Xlist.fold paths stats (fun stats (_,_,t) -> |
352 | 352 | (* if Xlist.mem t.PreTypes.attrs "notvalidated proper" || Xlist.mem t.PreTypes.attrs "lemma not validated" then stats else *) |
... | ... | @@ -364,7 +364,7 @@ let get_stats stats (paths,_) = |
364 | 364 | adj_valence=if t.PreTypes.valence=[] then stats.adj_valence else stats.adj_valence+1} |
365 | 365 | | _ -> stats) |
366 | 366 | | _ -> stats) |
367 | - | |
367 | +*) | |
368 | 368 | (* Test pokrycia słowosieci i walentego *) |
369 | 369 | (*let _ = |
370 | 370 | let sentences = File.load_lines "data/sentences-składnica.txt" in |
... | ... |
parser/semGraph.ml
... | ... | @@ -61,7 +61,7 @@ let rec get_person = function |
61 | 61 | | _ :: l -> get_person l |
62 | 62 | | [] -> "" |
63 | 63 | |
64 | -let rec create_normal_concept (*roles role_attrs*) tokens t sem_args = | |
64 | +let rec create_normal_concept (*roles role_attrs*) tokens lex_sems t sem_args = | |
65 | 65 | let sem_args = if t.cat = "pro" then |
66 | 66 | match get_person t.attrs with |
67 | 67 | "pri" -> ["indexical"] |
... | ... | @@ -70,13 +70,13 @@ let rec create_normal_concept (*roles role_attrs*) tokens t sem_args = |
70 | 70 | | "" -> ["indexical";"coreferential";"deictic"] |
71 | 71 | | _ -> failwith "create_normal_concept: pro" |
72 | 72 | else sem_args in (* FIXME: przesunąć to do rozszerzania path_array *) |
73 | - if t.agf = WalTypes.NOSEM then t.args else | |
73 | + if t.agf = ENIAMwalTypes.NOSEM then t.args else | |
74 | 74 | let c = {empty_concept with |
75 | 75 | c_sense = Val t.meaning; |
76 | 76 | c_relations=(*create_concepts tokens*) t.args; |
77 | 77 | c_quant=make_sem_args sem_args; |
78 | 78 | c_variable=string_of_int t.id,""; |
79 | - c_pos=(*if t.id >= Array.length tokens then -1 else*) (ExtArray.get tokens t.id).PreTypes.beg; | |
79 | + c_pos=(*if t.id >= Array.length tokens then -1 else*) (ExtArray.get tokens t.id).ENIAMtokenizerTypes.beg; | |
80 | 80 | c_local_quant=true} in |
81 | 81 | if t.cat = "subst" || t.cat = "depr" || t.cat = "ger" || t.cat = "unk" || StringSet.mem symbols t.cat then (* FIXME: wykrywanie plurale tantum *) |
82 | 82 | let c = {c with c_local_quant=false} in |
... | ... | @@ -103,9 +103,10 @@ let rec create_normal_concept (*roles role_attrs*) tokens t sem_args = |
103 | 103 | | e,t -> failwith ("create_normal_concept noun: " ^ e)) in |
104 | 104 | let c = if t.cat = "depr" then {c with c_relations=Tuple[c.c_relations;SingleRelation(Val "depr")]} else c in |
105 | 105 | if cx_flag then |
106 | - let id = ExtArray.add tokens PreTypes.empty_token in | |
106 | + let id = ExtArray.add tokens ENIAMtokenizerTypes.empty_token in | |
107 | + let _ = ExtArray.add lex_sems ENIAMlexSemanticsTypes.empty_lex_sem in | |
107 | 108 | Context{empty_context with cx_contents=Concept c; cx_variable=string_of_int id,""; cx_pos=c.c_pos} |
108 | - else Relation(Val t.position.WalTypes.role,Val t.position.WalTypes.role_attr,Concept c) else | |
109 | + else Relation(Val t.position.ENIAMwalTypes.role,Val t.position.ENIAMwalTypes.role_attr,Concept c) else | |
109 | 110 | if t.cat = "fin" || t.cat = "bedzie" || t.cat = "praet" || t.cat = "winien" || t.cat = "impt" || t.cat = "imps" || t.cat = "pred" || t.pred = "pro-komunikować" then |
110 | 111 | let c = {c with c_local_quant=false} in |
111 | 112 | let c = Xlist.fold t.attrs c (fun c -> function |
... | ... | @@ -122,10 +123,11 @@ let rec create_normal_concept (*roles role_attrs*) tokens t sem_args = |
122 | 123 | | "NEG",Val "+" -> {c with c_quant=Tuple[c.c_quant;Val "nie"]} |
123 | 124 | | e,t -> failwith ("create_normal_concept verb: " ^ e)) in |
124 | 125 | let c = if t.pred = "pro-komunikować" then {c with c_relations=Relation(Val "Theme",Val "",c.c_relations)} else c in (* FIXME: to by trzeba przesunąć na wcześniej *) |
125 | - let id = ExtArray.add tokens PreTypes.empty_token in | |
126 | + let id = ExtArray.add tokens ENIAMtokenizerTypes.empty_token in | |
127 | + let _ = ExtArray.add lex_sems ENIAMlexSemanticsTypes.empty_lex_sem in | |
126 | 128 | let cx = {empty_context with cx_contents=Concept c; cx_variable=string_of_int id,""; cx_pos=c.c_pos} in |
127 | - if t.position.WalTypes.role <> "" || t.position.WalTypes.role_attr <> "" then failwith "create_normal_concept: verb" else | |
128 | -(* Relation(Val t.position.WalTypes.role,Val t.position.WalTypes.role_attr,Context cx) else *) | |
129 | + if t.position.ENIAMwalTypes.role <> "" || t.position.ENIAMwalTypes.role_attr <> "" then failwith "create_normal_concept: verb" else | |
130 | +(* Relation(Val t.position.ENIAMwalTypes.role,Val t.position.ENIAMwalTypes.role_attr,Context cx) else *) | |
129 | 131 | Context cx else |
130 | 132 | if t.cat = "inf" then |
131 | 133 | let c = {c with c_local_quant=false} in |
... | ... | @@ -134,9 +136,10 @@ let rec create_normal_concept (*roles role_attrs*) tokens t sem_args = |
134 | 136 | | "TENSE",t -> {c with c_relations=Tuple[c.c_relations;SingleRelation t]} |
135 | 137 | | "NEG",Val "+" -> {c with c_quant=Tuple[c.c_quant;Val "nie"]} |
136 | 138 | | e,t -> failwith ("create_normal_concept verb: " ^ e)) in |
137 | - let id = ExtArray.add tokens PreTypes.empty_token in | |
139 | + let id = ExtArray.add tokens ENIAMtokenizerTypes.empty_token in | |
140 | + let _ = ExtArray.add lex_sems in | |
138 | 141 | let cx = {empty_context with cx_contents=Concept c; cx_variable=string_of_int id,""; cx_pos=c.c_pos} in |
139 | - Relation(Val t.position.WalTypes.role,Val t.position.WalTypes.role_attr,Context cx) else | |
142 | + Relation(Val t.position.ENIAMwalTypes.role,Val t.position.ENIAMwalTypes.role_attr,Context cx) else | |
140 | 143 | if t.cat = "adj" || t.cat = "adjc" || t.cat = "adjp" || t.cat = "adja" || t.cat = "pact" || t.cat = "ppas" || t.cat = "ordnum" || t.cat = "roman-adj" then |
141 | 144 | let c = if t.cat = "pact" || t.cat = "ppas" then {c with c_local_quant=false} else c in |
142 | 145 | let c = Xlist.fold t.attrs c (fun c -> function |
... | ... | @@ -157,8 +160,8 @@ let rec create_normal_concept (*roles role_attrs*) tokens t sem_args = |
157 | 160 | | "LEX",_ -> c (* FIXME *) |
158 | 161 | | e,t -> failwith ("create_normal_concept adj: " ^ e)) in |
159 | 162 | if t.cat = "pact" || t.cat = "ppas" then |
160 | - RevRelation(Val t.position.WalTypes.role,Val t.position.WalTypes.role_attr,Concept c) | |
161 | - else Relation(Val t.position.WalTypes.role,Val t.position.WalTypes.role_attr,Concept c) else | |
163 | + RevRelation(Val t.position.ENIAMwalTypes.role,Val t.position.ENIAMwalTypes.role_attr,Concept c) | |
164 | + else Relation(Val t.position.ENIAMwalTypes.role,Val t.position.ENIAMwalTypes.role_attr,Concept c) else | |
162 | 165 | if t.cat = "adv" || t.cat = "pcon" || t.cat = "pant" then |
163 | 166 | let c = if t.cat = "pcon" || t.cat = "pant" then {c with c_local_quant=false} else c in |
164 | 167 | let c = Xlist.fold t.attrs c (fun c -> function |
... | ... | @@ -169,7 +172,7 @@ let rec create_normal_concept (*roles role_attrs*) tokens t sem_args = |
169 | 172 | | "TYPE",_ -> c |
170 | 173 | | "NEG",Val "+" -> {c with c_quant=Tuple[c.c_quant;Val "nie"]} |
171 | 174 | | e,t -> failwith ("create_normal_concept adv: " ^ e)) in |
172 | - Relation(Val t.position.WalTypes.role,Val t.position.WalTypes.role_attr,Concept c) else | |
175 | + Relation(Val t.position.ENIAMwalTypes.role,Val t.position.ENIAMwalTypes.role_attr,Concept c) else | |
173 | 176 | if t.cat = "pro" || t.cat = "ppron12" || t.cat = "ppron3" || t.cat = "siebie" then (* FIXME: indexicalność *) |
174 | 177 | let c = {c with c_local_quant=false} in |
175 | 178 | let c = Xlist.fold t.attrs c (fun c -> function |
... | ... | @@ -180,13 +183,13 @@ let rec create_normal_concept (*roles role_attrs*) tokens t sem_args = |
180 | 183 | | "SYN",_ -> c |
181 | 184 | | "NSEM",_ -> c |
182 | 185 | | e,t -> failwith ("create_normal_concept pron: " ^ e)) in |
183 | - Relation(Val t.position.WalTypes.role,Val t.position.WalTypes.role_attr,Concept c) else | |
186 | + Relation(Val t.position.ENIAMwalTypes.role,Val t.position.ENIAMwalTypes.role_attr,Concept c) else | |
184 | 187 | if t.cat = "prep" then |
185 | - if t.arole = "NOSEM" then Relation(Val t.position.WalTypes.role,Val t.position.WalTypes.role_attr,t.args) else | |
188 | + if t.arole = "NOSEM" then Relation(Val t.position.ENIAMwalTypes.role,Val t.position.ENIAMwalTypes.role_attr,t.args) else | |
186 | 189 | let c = Xlist.fold t.attrs c (fun c -> function |
187 | 190 | | "CASE",_ -> c |
188 | 191 | | e,t -> failwith ("create_normal_concept prep: " ^ e)) in |
189 | - Relation(Val t.position.WalTypes.role,Val t.position.WalTypes.role_attr,Concept c) else | |
192 | + Relation(Val t.position.ENIAMwalTypes.role,Val t.position.ENIAMwalTypes.role_attr,Concept c) else | |
190 | 193 | if t.cat = "num" || t.cat = "intnum" || t.cat = "realnum" || t.cat = "intnum-interval" || t.cat = "realnum-interval" then |
191 | 194 | let c = Xlist.fold t.attrs c (fun c -> function |
192 | 195 | (* "MEANING",t -> {c with c_sense=Tuple[c.c_sense;t]} *) |
... | ... | @@ -197,20 +200,20 @@ let rec create_normal_concept (*roles role_attrs*) tokens t sem_args = |
197 | 200 | | "PERS",_ -> c |
198 | 201 | | "TYPE",_ -> c |
199 | 202 | | e,t -> failwith ("create_normal_concept num: " ^ e)) in |
200 | - Relation(Val t.position.WalTypes.role,Val t.position.WalTypes.role_attr,(*Quantifier*)(Concept c)) else | |
203 | + Relation(Val t.position.ENIAMwalTypes.role,Val t.position.ENIAMwalTypes.role_attr,(*Quantifier*)(Concept c)) else | |
201 | 204 | if t.cat = "qub" && t.pred="się" then |
202 | 205 | let c = {c with c_quant=Tuple[c.c_quant;Val "coreferential"]} in |
203 | - Relation(Val t.position.WalTypes.role,Val t.position.WalTypes.role_attr,(*Quantifier*)(Concept c)) else | |
206 | + Relation(Val t.position.ENIAMwalTypes.role,Val t.position.ENIAMwalTypes.role_attr,(*Quantifier*)(Concept c)) else | |
204 | 207 | if t.cat = "qub" && (t.pred="czy" || t.pred="gdyby") then |
205 | - Relation(Val t.position.WalTypes.role,Val t.position.WalTypes.role_attr,SetContextName(t.meaning,t.args)) else | |
208 | + Relation(Val t.position.ENIAMwalTypes.role,Val t.position.ENIAMwalTypes.role_attr,SetContextName(t.meaning,t.args)) else | |
206 | 209 | if t.cat = "qub" then |
207 | 210 | let c = Xlist.fold t.attrs c (fun c -> function |
208 | 211 | (* | "TYPE",Val "int" -> {c with c_quant=Tuple[c.c_quant;Val "interrogative"]} |
209 | 212 | | "TYPE",_ -> c*) |
210 | 213 | | e,t -> failwith ("create_normal_concept qub: " ^ e)) in |
211 | - Relation(Val t.position.WalTypes.role,Val t.position.WalTypes.role_attr,Concept c) else | |
214 | + Relation(Val t.position.ENIAMwalTypes.role,Val t.position.ENIAMwalTypes.role_attr,Concept c) else | |
212 | 215 | if t.cat = "comp" then |
213 | - Relation(Val t.position.WalTypes.role,Val t.position.WalTypes.role_attr,SetContextName(t.meaning,t.args)) else | |
216 | + Relation(Val t.position.ENIAMwalTypes.role,Val t.position.ENIAMwalTypes.role_attr,SetContextName(t.meaning,t.args)) else | |
214 | 217 | if t.cat = "conj" then |
215 | 218 | let c = {empty_context with cx_sense=Val t.meaning; cx_contents=RemoveRelation t.args; cx_variable=string_of_int t.id,""; cx_pos=c.c_pos} in |
216 | 219 | let c = Xlist.fold t.attrs c (fun c -> function |
... | ... | @@ -219,7 +222,7 @@ let rec create_normal_concept (*roles role_attrs*) tokens t sem_args = |
219 | 222 | | "GEND",_ -> c |
220 | 223 | | "PERS",_ -> c |
221 | 224 | | e,t -> failwith ("create_normal_concept conj: " ^ e)) in |
222 | - Relation(Val t.position.WalTypes.role,Val t.position.WalTypes.role_attr,Context c) else | |
225 | + Relation(Val t.position.ENIAMwalTypes.role,Val t.position.ENIAMwalTypes.role_attr,Context c) else | |
223 | 226 | if t.cat = "interj" then Node t else |
224 | 227 | if t.cat = "sinterj" then |
225 | 228 | let c = Xlist.fold t.attrs c (fun c -> function |
... | ... | @@ -240,7 +243,7 @@ let rec create_normal_concept (*roles role_attrs*) tokens t sem_args = |
240 | 243 | if t.cat = "interp" && t.pred = "</query>" then |
241 | 244 | let l = List.rev (make_args_list t.args) in |
242 | 245 | let x = Xlist.fold (List.tl l) (List.hd l) (fun t s -> AddRelation(RemoveRelation t,"Next","Sentence",RemoveRelation s)) in |
243 | - if t.agf = WalTypes.OBJ then Relation(Val t.arole,Val t.arole_attr,x) else x else | |
246 | + if t.agf = ENIAMwalTypes.OBJ then Relation(Val t.arole,Val t.arole_attr,x) else x else | |
244 | 247 | if t.cat = "interp" && t.pred = "<query1>" then t.args else |
245 | 248 | if t.cat = "interp" && t.pred = "<query2>" then t.args else |
246 | 249 | if t.cat = "interp" && t.pred = "<query4>" then t.args else |
... | ... | @@ -252,46 +255,46 @@ let rec create_normal_concept (*roles role_attrs*) tokens t sem_args = |
252 | 255 | Xlist.fold (List.tl l) (List.hd l) (fun t s -> AddRelation(RemoveRelation t,"Next","Sentence",RemoveRelation s)) else |
253 | 256 | if t.cat = "interp" && t.pred = "?" then SingleRelation(Val "int") else |
254 | 257 | if t.cat = "interp" && t.pred = "„" then |
255 | - Relation(Val t.position.WalTypes.role,Val t.position.WalTypes.role_attr,RemoveRelation t.args) else | |
256 | - if t.cat = "interp" || t.pred = "</or-sentence>" then Relation(Val t.position.WalTypes.role,Val t.position.WalTypes.role_attr,t.args) else ( | |
258 | + Relation(Val t.position.ENIAMwalTypes.role,Val t.position.ENIAMwalTypes.role_attr,RemoveRelation t.args) else | |
259 | + if t.cat = "interp" || t.pred = "</or-sentence>" then Relation(Val t.position.ENIAMwalTypes.role,Val t.position.ENIAMwalTypes.role_attr,t.args) else ( | |
257 | 260 | if t.cat = "interp" then Node t else |
258 | - if t.cat = "" then Relation(Val t.position.WalTypes.role,Val t.position.WalTypes.role_attr,t.args) else | |
261 | + if t.cat = "" then Relation(Val t.position.ENIAMwalTypes.role,Val t.position.ENIAMwalTypes.role_attr,t.args) else | |
259 | 262 | (* print_endline t.pred; *) |
260 | 263 | Node t) |
261 | 264 | |
262 | -and create_concepts tokens = function | |
265 | +and create_concepts tokens lex_sems = function | |
263 | 266 | Node t -> |
264 | 267 | (* print_endline ("cc " ^ t.pred); *) |
265 | 268 | (* let agf = t.agf in *) |
266 | 269 | let attrs = remove_unimportant_attrs t.attrs in |
267 | 270 | (* let attrs,roles,role_attrs = get_roles attrs in *) |
268 | 271 | let t = {t with attrs=attrs} in |
269 | - (match (ExtArray.get tokens t.id).PreTypes.semantics with | |
270 | - PreTypes.Normal -> | |
271 | - let t = create_normal_concept tokens t [] in | |
272 | -(* if agf = WalTypes.CORE then Core t else *) t | |
273 | - | PreTypes.PrepSemantics _ -> | |
274 | - let t = create_normal_concept tokens t [] in | |
275 | -(* if agf = WalTypes.CORE then Core t else *) t | |
276 | - | PreTypes.Special l -> | |
277 | - let t = create_normal_concept tokens t l in | |
278 | -(* if agf = WalTypes.CORE then Core t else *) t | |
279 | -(* | PreTypes.SpecialNoun(lemma,_) -> | |
272 | + (match (ExtArray.get lex_sems t.id).ENIAMlexSemanticsTypes.semantics with | |
273 | + ENIAMlexSemanticsTypes.Normal -> | |
274 | + let t = create_normal_concept tokens lex_sems t [] in | |
275 | +(* if agf = ENIAMwalTypes.CORE then Core t else *) t | |
276 | + | ENIAMlexSemanticsTypes.PrepSemantics _ -> | |
277 | + let t = create_normal_concept tokens lex_sems t [] in | |
278 | +(* if agf = ENIAMwalTypes.CORE then Core t else *) t | |
279 | + | ENIAMlexSemanticsTypes.Special l -> | |
280 | + let t = create_normal_concept tokens lex_sems t l in | |
281 | +(* if agf = ENIAMwalTypes.CORE then Core t else *) t | |
282 | +(* | ENIAMlexSemanticsTypes.SpecialNoun(lemma,_) -> | |
280 | 283 | let t = create_normal_concept tokens t in*) |
281 | -(* if agf = WalTypes.CORE then Core t else t*) | |
284 | +(* if agf = ENIAMwalTypes.CORE then Core t else t*) | |
282 | 285 | (*| _ -> failwith "create_concepts: ni"*)) |
283 | - | Tuple l -> Tuple(Xlist.map l (create_concepts tokens)) | |
284 | - | Variant(e,l) -> Variant(e,Xlist.map l (fun (i,t) -> i, create_concepts tokens t)) | |
286 | + | Tuple l -> Tuple(Xlist.map l (create_concepts tokens lex_sems)) | |
287 | + | Variant(e,l) -> Variant(e,Xlist.map l (fun (i,t) -> i, create_concepts tokens lex_sems t)) | |
285 | 288 | | Dot -> Dot |
286 | 289 | | Ref i -> Ref i |
287 | - | Choice choices -> Choice(StringMap.map choices (create_concepts tokens)) | |
290 | + | Choice choices -> Choice(StringMap.map choices (create_concepts tokens lex_sems)) | |
288 | 291 | | t -> failwith ("create_concepts: " ^ LCGstringOf.linear_term 0 t) |
289 | 292 | |
290 | 293 | |
291 | -let translate tokens term = | |
294 | +let translate tokens lex_sems term = | |
292 | 295 | let sem = Array.copy term in |
293 | 296 | Int.iter 0 (Array.length sem - 1) (fun i -> |
294 | - sem.(i) <- create_concepts tokens sem.(i)); | |
297 | + sem.(i) <- create_concepts tokens lex_sems sem.(i)); | |
295 | 298 | sem |
296 | 299 | |
297 | 300 | (***************************************************************************************) |
... | ... |
parser/visualization.ml
... | ... | @@ -20,7 +20,7 @@ |
20 | 20 | open LCGtypes |
21 | 21 | open Xstd |
22 | 22 | open Printf |
23 | -open PreTypes | |
23 | +open ENIAMtokenizerTypes | |
24 | 24 | |
25 | 25 | let string_of_interps interps = |
26 | 26 | String.concat "|" (Xlist.map interps (fun interp -> |
... | ... | @@ -28,46 +28,46 @@ let string_of_interps interps = |
28 | 28 | (String.concat "." interp2)))))) |
29 | 29 | |
30 | 30 | let rec string_of_token = function |
31 | - PreTypes.SmallLetter orth -> sprintf "SmallLetter(%s)" orth | |
32 | - | PreTypes.CapLetter(orth,lc) -> sprintf "CapLetter(%s,%s)" orth lc | |
33 | - | PreTypes.AllSmall orth -> sprintf "AllSmall(%s)" orth | |
34 | - | PreTypes.AllCap(orth,lc,lc2) -> sprintf "AllCap(%s,%s,%s)" orth lc lc2 | |
35 | - | PreTypes.FirstCap(orth,lc,cl,ll) -> sprintf "FirstCap(%s,%s,%s,%s)" orth lc cl ll | |
36 | - | PreTypes.SomeCap orth -> sprintf "SomeCap(%s)" orth | |
37 | - | PreTypes.RomanDig(v,t) -> sprintf "RomanDig(%s,%s)" v t | |
38 | - | PreTypes.Interp orth -> sprintf "Interp(%s)" orth | |
39 | - | PreTypes.Symbol orth -> sprintf "Symbol(%s)" orth | |
40 | - | PreTypes.Dig(v,t) -> sprintf "Dig(%s,%s)" v t | |
41 | - | PreTypes.Other2 orth -> sprintf "Other(%s)" orth | |
42 | - | PreTypes.Lemma(lemma,cat,interps) -> sprintf "Lemma(%s,%s,%s)" lemma cat (string_of_interps interps) | |
43 | - | PreTypes.Proper(lemma,cat,interps,senses) -> sprintf "Proper(%s,%s,%s,%s)" lemma cat (string_of_interps interps) (String.concat "|" senses) | |
44 | - | PreTypes.Compound(sense,l) -> sprintf "Compound(%s,[%s])" sense (String.concat ";" (Xlist.map l string_of_token)) | |
45 | - | PreTypes.Tokens(cat,l) -> sprintf "Tokens(%s,%s)" cat (String.concat ";" (Xlist.map l string_of_int)) | |
31 | + SmallLetter orth -> sprintf "SmallLetter(%s)" orth | |
32 | + | CapLetter(orth,lc) -> sprintf "CapLetter(%s,%s)" orth lc | |
33 | + | AllSmall orth -> sprintf "AllSmall(%s)" orth | |
34 | + | AllCap(orth,lc,lc2) -> sprintf "AllCap(%s,%s,%s)" orth lc lc2 | |
35 | + | FirstCap(orth,lc,cl,ll) -> sprintf "FirstCap(%s,%s,%s,%s)" orth lc cl ll | |
36 | + | SomeCap orth -> sprintf "SomeCap(%s)" orth | |
37 | + | RomanDig(v,t) -> sprintf "RomanDig(%s,%s)" v t | |
38 | + | Interp orth -> sprintf "Interp(%s)" orth | |
39 | + | Symbol orth -> sprintf "Symbol(%s)" orth | |
40 | + | Dig(v,t) -> sprintf "Dig(%s,%s)" v t | |
41 | + | Other orth -> sprintf "Other(%s)" orth | |
42 | + | Lemma(lemma,cat,interps) -> sprintf "Lemma(%s,%s,%s)" lemma cat (string_of_interps interps) | |
43 | + | Proper(lemma,cat,interps,senses) -> sprintf "Proper(%s,%s,%s,%s)" lemma cat (string_of_interps interps) (String.concat "|" senses) | |
44 | + | Compound(sense,l) -> sprintf "Compound(%s,[%s])" sense (String.concat ";" (Xlist.map l string_of_token)) | |
45 | + | Tokens(cat,l) -> sprintf "Tokens(%s,%s)" cat (String.concat ";" (Xlist.map l string_of_int)) | |
46 | 46 | |
47 | 47 | let lemma_of_token = function |
48 | - PreTypes.SmallLetter orth -> orth | |
49 | - | PreTypes.CapLetter(orth,lc) -> orth | |
50 | - | PreTypes.AllSmall orth -> orth | |
51 | - | PreTypes.AllCap(orth,lc,lc2) -> orth | |
52 | - | PreTypes.FirstCap(orth,lc,cl,ll) -> orth | |
53 | - | PreTypes.SomeCap orth -> orth | |
54 | - | PreTypes.RomanDig(v,t) -> v | |
55 | - | PreTypes.Interp orth -> orth | |
56 | - | PreTypes.Symbol orth -> orth | |
57 | - | PreTypes.Dig(v,t) -> v | |
58 | - | PreTypes.Other2 orth -> orth | |
59 | - | PreTypes.Lemma(lemma,cat,interps) -> lemma | |
60 | - | PreTypes.Proper(lemma,cat,interps,senses) -> lemma | |
61 | - | PreTypes.Compound(sense,l) -> "Compound" | |
62 | - | PreTypes.Tokens(cat,l) -> "Tokens" | |
48 | + SmallLetter orth -> orth | |
49 | + | CapLetter(orth,lc) -> orth | |
50 | + | AllSmall orth -> orth | |
51 | + | AllCap(orth,lc,lc2) -> orth | |
52 | + | FirstCap(orth,lc,cl,ll) -> orth | |
53 | + | SomeCap orth -> orth | |
54 | + | RomanDig(v,t) -> v | |
55 | + | Interp orth -> orth | |
56 | + | Symbol orth -> orth | |
57 | + | Dig(v,t) -> v | |
58 | + | Other orth -> orth | |
59 | + | Lemma(lemma,cat,interps) -> lemma | |
60 | + | Proper(lemma,cat,interps,senses) -> lemma | |
61 | + | Compound(sense,l) -> "Compound" | |
62 | + | Tokens(cat,l) -> "Tokens" | |
63 | 63 | |
64 | 64 | let rec spaces i = |
65 | 65 | if i = 0 then "" else " " ^ spaces (i-1) |
66 | - | |
66 | +(* | |
67 | 67 | let rec string_of_tokens i = function |
68 | - PreTypes.Token t -> sprintf "%s{orth=%s;beg=%d;len=%d;next=%d;token=%s;weight=%.2f;attrs=[%s];\n%s senses=[%s];\n%s valence=[%s];\n%s simple_valence=[%s];lroles=%s,%s}" (spaces i) t.PreTypes.orth t.PreTypes.beg t.PreTypes.len t.PreTypes.next (string_of_token t.PreTypes.token) | |
69 | - t.PreTypes.weight (String.concat ";" t.PreTypes.attrs) (spaces i) (String.concat ";" (Xlist.map t.PreTypes.senses (fun (sense,hipero,weight) -> sprintf "%s[%s]%.2f" sense (String.concat "," hipero) weight))) | |
70 | - (spaces i) (String.concat ";" (Xlist.map t.PreTypes.valence (WalStringOf.fnum_frame ""))) (spaces i) (String.concat ";" (Xlist.map t.PreTypes.simple_valence (WalStringOf.fnum_frame ""))) (fst t.lroles) (snd t.lroles) | |
68 | + Token t -> sprintf "%s{orth=%s;beg=%d;len=%d;next=%d;token=%s;weight=%.2f;attrs=[%s];\n%s senses=[%s];\n%s valence=[%s];\n%s simple_valence=[%s];lroles=%s,%s}" (spaces i) t.orth t.beg t.len t.next (string_of_token t.token) | |
69 | + t.weight (String.concat ";" t.PreTypes.attrs) (spaces i) (String.concat ";" (Xlist.map t.PreTypes.senses (fun (sense,hipero,weight) -> sprintf "%s[%s]%.2f" sense (String.concat "," hipero) weight))) | |
70 | + (spaces i) (String.concat ";" (Xlist.map t.PreTypes.valence (ENIAMwalStringOf.fnum_frame ""))) (spaces i) (String.concat ";" (Xlist.map t.PreTypes.simple_valence (ENIAMwalStringOf.fnum_frame ""))) (fst t.lroles) (snd t.lroles) | |
71 | 71 | | PreTypes.Variant l -> sprintf "%sVariant[\n%s]" (spaces i) (String.concat ";\n" (Xlist.map l (string_of_tokens (i+1)))) |
72 | 72 | | PreTypes.Seq l -> sprintf "%sSeq[\n%s]" (spaces i) (String.concat ";\n" (Xlist.map l (string_of_tokens (i+1)))) |
73 | 73 | |
... | ... | @@ -75,7 +75,7 @@ let paths_to_string_indexed (paths,last,next_id) = |
75 | 75 | String.concat "\n" (Xlist.map paths (fun (i,j,t) -> |
76 | 76 | Printf.sprintf "%2d %2d %s" i j (string_of_tokens 0 (PreTypes.Token t)))) |
77 | 77 | ^ Printf.sprintf "\nlast=%d next_id=%d" last next_id |
78 | - | |
78 | +*) | |
79 | 79 | (*let string_of_token_record1 t = |
80 | 80 | sprintf "{orth=%s;beg=%d;len=%d;next=%d;token=%s;id=%d;lnode=%d;rnode=%d;conll_id=%s;conll_super=%s;conll_label=%s;attrs=[%s]}" |
81 | 81 | t.PreTypes.orth t.PreTypes.beg t.PreTypes.len t.PreTypes.next (string_of_token t.PreTypes.token) |
... | ... | @@ -104,8 +104,8 @@ let string_of_status = function |
104 | 104 | let rec xml_of_dependency_tree = function |
105 | 105 | Node t -> Xml.Element("node",["pred",t.pred;"cat",t.cat;"weight",string_of_float t.weight;"id",string_of_int t.id],[ |
106 | 106 | Xml.Element("gs",[],[xml_of_dependency_tree t.gs]); |
107 | - Xml.Element("agf",[],[Xml.PCData (WalStringOf.gf t.agf)]); | |
108 | - Xml.Element("amorf",[],[Xml.PCData (WalStringOf.morf t.amorf)]); | |
107 | + Xml.Element("agf",[],[Xml.PCData (ENIAMwalStringOf.gf t.agf)]); | |
108 | + Xml.Element("amorf",[],[Xml.PCData (ENIAMwalStringOf.morf t.amorf)]); | |
109 | 109 | Xml.Element("attrs",[],Xlist.map t.attrs (fun (e,t) -> Xml.Element("attr",["label",e],[xml_of_dependency_tree t]))); |
110 | 110 | Xml.Element("args",[],[xml_of_dependency_tree t.args])]) |
111 | 111 | | Concept c -> Xml.Element("concept",["var",fst c.c_variable ^ snd c.c_variable;"pos",string_of_int c.c_pos],[ |
... | ... | @@ -168,8 +168,8 @@ let string_of_node t = |
168 | 168 | "PRED",Val t.pred;"CAT",Val t.cat;"ID",Val (string_of_int t.id);"WEIGHT",Val (string_of_float t.weight);"GS",t.gs; |
169 | 169 | "AGF",Gf t.agf;"AMORF",Morf t.amorf;"AROLE",Val t.arole;"AROLE-ATTR",Val t.arole_attr; |
170 | 170 | "MEANING",Val t.meaning;"HIPERO",Tuple(Xlist.map (StringSet.to_list t.hipero) (fun s -> Val s));"MEANING-WEIGHT",Val (string_of_float t.meaning_weight); |
171 | - "ROLE",Val t.position.WalTypes.role;"ROLE-ATTR",Val t.position.WalTypes.role_attr;"SEL-PREFS",Tuple(Xlist.map t.position.WalTypes.sel_prefs (fun s -> Val s)); | |
172 | - "GF",Gf t.position.WalTypes.gf] @ t.attrs in | |
171 | + "ROLE",Val t.position.ENIAMwalTypes.role;"ROLE-ATTR",Val t.position.ENIAMwalTypes.role_attr;"SEL-PREFS",Tuple(Xlist.map t.position.ENIAMwalTypes.sel_prefs (fun s -> Val s)); | |
172 | + "GF",Gf t.position.ENIAMwalTypes.gf] @ t.attrs in | |
173 | 173 | "{ " ^ String.concat " | " (Xlist.map l (fun (e,t) -> "{ " ^ e ^ " | " ^ escape_string (LCGstringOf.linear_term 0 t) ^ " }")) ^ " }" |
174 | 174 | |
175 | 175 | let single_rel_id_count = ref 0 |
... | ... | @@ -447,16 +447,16 @@ let print_graph2 path name query t = |
447 | 447 | Xlist.iter (Str.split (Str.regexp path) path) (fun _ -> Sys.chdir "..")*) |
448 | 448 | |
449 | 449 | let rec get_lemma = function |
450 | - PreTypes.Interp orth -> orth | |
451 | - | PreTypes.Lemma(lemma,cat,_) -> lemma ^ "\n" ^ cat | |
450 | + ENIAMtokenizerTypes.Interp orth -> orth | |
451 | + | ENIAMtokenizerTypes.Lemma(lemma,cat,_) -> lemma ^ "\n" ^ cat | |
452 | 452 | | _ -> "" |
453 | 453 | |
454 | 454 | let print_paths path name paths = |
455 | 455 | File.file_out (path ^ name ^ ".gv") (fun file -> |
456 | 456 | fprintf file "digraph G {\n"; |
457 | 457 | Array.iter (fun t -> |
458 | - let lemma = get_lemma t.PreTypes.token in | |
459 | - if lemma <> "" then fprintf file " %d -> %d [label=\"%s\\n%s\"]\n" t.PreTypes.beg t.PreTypes.next t.PreTypes.orth lemma) paths; | |
458 | + let lemma = get_lemma t.ENIAMtokenizerTypes.token in | |
459 | + if lemma <> "" then fprintf file " %d -> %d [label=\"%s\\n%s\"]\n" t.ENIAMtokenizerTypes.beg t.ENIAMtokenizerTypes.next t.ENIAMtokenizerTypes.orth lemma) paths; | |
460 | 460 | fprintf file "}\n"); |
461 | 461 | Sys.chdir path; |
462 | 462 | ignore (Sys.command ("dot -Tpng " ^ name ^ ".gv -o " ^ name ^ ".png")); |
... | ... | @@ -474,7 +474,7 @@ let rec print_simplified_dependency_tree_rec2 file tokens edge upper = function |
474 | 474 | |
475 | 475 | let rec print_simplified_dependency_tree_rec file tokens edge upper id = function |
476 | 476 | Node t -> |
477 | - let orth = if t.id = 0 then "" else (ExtArray.get tokens t.id).PreTypes.orth in | |
477 | + let orth = if t.id = 0 then "" else (ExtArray.get tokens t.id).ENIAMtokenizerTypes.orth in | |
478 | 478 | fprintf file " %s [label=\"%s\\n%s\\n%s:%s\\n%f\"]\n" id (LCGstringOf.linear_term 0 t.gs) orth t.pred t.cat t.weight; |
479 | 479 | print_edge file edge upper id; |
480 | 480 | print_simplified_dependency_tree_rec2 file tokens "" id t.args |
... | ... | @@ -495,7 +495,7 @@ let print_simplified_dependency_tree path name tokens dependency_tree = |
495 | 495 | Int.iter 0 (Array.length dependency_tree - 1) (fun i -> print_simplified_dependency_tree_rec file tokens "" "" ("x" ^ string_of_int i) dependency_tree.(i)); |
496 | 496 | (* match dependency_tree.(i) with |
497 | 497 | Node t -> |
498 | - let orth = if t.id = 0 then "" else tokens.(t.id).PreTypes.orth in | |
498 | + let orth = if t.id = 0 then "" else tokens.(t.id).ENIAMtokenizerTypes.orth in | |
499 | 499 | fprintf file " %d [label=\"%s\\n%s\\n%s:%s\"]\n" i (LCGstringOf.linear_term 0 t.gs) orth t.pred t.cat; |
500 | 500 | let refs = get_refs [] t.args in |
501 | 501 | Xlist.iter refs (fun r -> |
... | ... | @@ -510,14 +510,14 @@ let print_simplified_dependency_tree path name tokens dependency_tree = |
510 | 510 | File.file_out filename (fun file -> |
511 | 511 | fprintf file "digraph G {\n"; |
512 | 512 | let set = Xlist.fold paths IntSet.empty (fun set t -> |
513 | - IntSet.add (IntSet.add set t.PreTypes.beg) t.PreTypes.next) in | |
513 | + IntSet.add (IntSet.add set t.ENIAMtokenizerTypes.beg) t.ENIAMtokenizerTypes.next) in | |
514 | 514 | IntSet.iter set (fun i -> fprintf file " %d [width=0; height=0; label=\"\"]\n" i); |
515 | 515 | Xlist.iter paths (fun t -> |
516 | - let lemma = get_lemma t.PreTypes.token in | |
516 | + let lemma = get_lemma t.ENIAMtokenizerTypes.token in | |
517 | 517 | if lemma <> "" then ( |
518 | - let s = if t.PreTypes.orth = "" then lemma else t.PreTypes.orth ^ "\n" ^ lemma in | |
519 | - fprintf file " %d -> i%d -> %d [arrowhead=none]\n" t.PreTypes.beg t.PreTypes.id t.PreTypes.next; | |
520 | - fprintf file " i%d [label=\"%s\"]\n" t.PreTypes.id s)); | |
518 | + let s = if t.ENIAMtokenizerTypes.orth = "" then lemma else t.ENIAMtokenizerTypes.orth ^ "\n" ^ lemma in | |
519 | + fprintf file " %d -> i%d -> %d [arrowhead=none]\n" t.ENIAMtokenizerTypes.beg t.ENIAMtokenizerTypes.id t.ENIAMtokenizerTypes.next; | |
520 | + fprintf file " i%d [label=\"%s\"]\n" t.ENIAMtokenizerTypes.id s)); | |
521 | 521 | fprintf file "}\n"); |
522 | 522 | Sys.chdir "results"; |
523 | 523 | ignore (Sys.command "dot -Tpng tree.gv -o tree.png"); |
... | ... | @@ -527,13 +527,13 @@ let print_simplified_dependency_tree path name tokens dependency_tree = |
527 | 527 | File.file_out filename (fun file -> |
528 | 528 | fprintf file "digraph G {\n"; |
529 | 529 | fprintf file " subgraph {\n ordering=out\n"; |
530 | - let same = Xlist.fold (Xlist.sort paths (fun s t -> compare s.PreTypes.beg t.PreTypes.beg)) [] (fun same t -> | |
531 | - let lemma = get_lemma t.PreTypes.token in | |
530 | + let same = Xlist.fold (Xlist.sort paths (fun s t -> compare s.ENIAMtokenizerTypes.beg t.ENIAMtokenizerTypes.beg)) [] (fun same t -> | |
531 | + let lemma = get_lemma t.ENIAMtokenizerTypes.token in | |
532 | 532 | if lemma <> "" then ( |
533 | - let s = if t.PreTypes.orth = "" then lemma else t.PreTypes.orth ^ "\n" ^ lemma in | |
534 | - fprintf file " i%d -> out [arrowhead=none]\n" t.PreTypes.id; | |
535 | - fprintf file " i%d [label=\"%s\"]\n" t.PreTypes.id s; | |
536 | - t.PreTypes.id :: same) | |
533 | + let s = if t.ENIAMtokenizerTypes.orth = "" then lemma else t.ENIAMtokenizerTypes.orth ^ "\n" ^ lemma in | |
534 | + fprintf file " i%d -> out [arrowhead=none]\n" t.ENIAMtokenizerTypes.id; | |
535 | + fprintf file " i%d [label=\"%s\"]\n" t.ENIAMtokenizerTypes.id s; | |
536 | + t.ENIAMtokenizerTypes.id :: same) | |
537 | 537 | else same) in |
538 | 538 | fprintf file " }\n"; |
539 | 539 | fprintf file " { rank = same; %s }\n" (String.concat "; " (Xlist.map same (fun i -> sprintf "\"i%d\"" i))); |
... | ... | @@ -555,19 +555,19 @@ let rec schema_latex schema = |
555 | 555 | "\\begin{tabular}{l}" ^ |
556 | 556 | String.concat "\\\\" (Xlist.map schema (fun s -> |
557 | 557 | LatexMain.escape_string (String.concat "," ( |
558 | - (if s.WalTypes.gf = WalTypes.ARG then [] else [WalStringOf.gf s.WalTypes.gf])@ | |
559 | - (if s.WalTypes.role = "" then [] else [s.WalTypes.role])@ | |
560 | - (if s.WalTypes.role_attr = "" then [] else [s.WalTypes.role_attr])@ | |
561 | - s.WalTypes.sel_prefs@(WalStringOf.controllers s.WalTypes.cr)@(WalStringOf.controllees s.WalTypes.ce)) ^ WalStringOf.direction s.WalTypes.dir ^ "{" ^ String.concat ";" (Xlist.map s.WalTypes.morfs WalStringOf.morf) ^ "}"))) ^ | |
558 | + (if s.ENIAMwalTypes.gf = ENIAMwalTypes.ARG then [] else [ENIAMwalStringOf.gf s.ENIAMwalTypes.gf])@ | |
559 | + (if s.ENIAMwalTypes.role = "" then [] else [s.ENIAMwalTypes.role])@ | |
560 | + (if s.ENIAMwalTypes.role_attr = "" then [] else [s.ENIAMwalTypes.role_attr])@ | |
561 | + s.ENIAMwalTypes.sel_prefs@(ENIAMwalStringOf.controllers s.ENIAMwalTypes.cr)@(ENIAMwalStringOf.controllees s.ENIAMwalTypes.ce)) ^ ENIAMwalStringOf.direction s.ENIAMwalTypes.dir ^ "{" ^ String.concat ";" (Xlist.map s.ENIAMwalTypes.morfs ENIAMwalStringOf.morf) ^ "}"))) ^ | |
562 | 562 | "\\end{tabular}" |
563 | 563 | |
564 | 564 | let fnum_frame_latex = function |
565 | - fnum,WalTypes.Frame(atrs,s) -> | |
566 | - Printf.sprintf "%d: %s: %s" fnum (LatexMain.escape_string (WalStringOf.frame_atrs atrs)) (schema_latex s) | |
567 | - | fnum,WalTypes.LexFrame(id,p,r,s) -> | |
568 | - Printf.sprintf "%d: %s: %s: %s: %s" fnum id (LatexMain.escape_string (WalStringOf.pos p)) (WalStringOf.restr r) (schema_latex s) | |
569 | - | fnum,WalTypes.ComprepFrame(le,p,r,s) -> | |
570 | - Printf.sprintf "%d: %s: %s: %s: %s" fnum le (LatexMain.escape_string (WalStringOf.pos p)) (WalStringOf.restr r) (schema_latex s) | |
565 | + fnum,ENIAMwalTypes.Frame(atrs,s) -> | |
566 | + Printf.sprintf "%d: %s: %s" fnum (LatexMain.escape_string (ENIAMwalStringOf.frame_atrs atrs)) (schema_latex s) | |
567 | + | fnum,ENIAMwalTypes.LexFrame(id,p,r,s) -> | |
568 | + Printf.sprintf "%d: %s: %s: %s: %s" fnum id (LatexMain.escape_string (ENIAMwalStringOf.pos p)) (ENIAMwalStringOf.restr r) (schema_latex s) | |
569 | + | fnum,ENIAMwalTypes.ComprepFrame(le,p,r,s) -> | |
570 | + Printf.sprintf "%d: %s: %s: %s: %s" fnum le (LatexMain.escape_string (ENIAMwalStringOf.pos p)) (ENIAMwalStringOf.restr r) (schema_latex s) | |
571 | 571 | |
572 | 572 | (*let print_paths_latex name paths = |
573 | 573 | LatexMain.latex_file_out "results/" name "a0" false (fun file -> |
... | ... | @@ -575,11 +575,11 @@ let fnum_frame_latex = function |
575 | 575 | Int.iter 0 (Array.length paths - 1) (fun i -> |
576 | 576 | let t = paths.(i) in |
577 | 577 | fprintf file "%s & %d & %d & %d & %s & %d & %.4f & %s & %s %s &\\begin{tabular}{l|l|p{4cm}}%s\\end{tabular} &\\begin{tabular}{l}%s\\end{tabular} &\\begin{tabular}{l}%s\\end{tabular}\\\\\n\\hline\n" |
578 | - t.PreTypes.orth t.PreTypes.beg t.PreTypes.len t.PreTypes.next (LatexMain.escape_string (string_of_token t.PreTypes.token)) t.PreTypes.id t.PreTypes.weight | |
579 | - (String.concat ";" t.PreTypes.attrs) (fst t.PreTypes.lroles) (snd t.PreTypes.lroles) | |
580 | - (String.concat "\\\\\n" (Xlist.map t.PreTypes.senses (fun (sense,hipero,weight) -> sprintf "%s & %.2f & %s" sense weight (String.concat "," hipero)))) | |
581 | - (String.concat "\\\\\n\\hline\n" (Xlist.map t.PreTypes.simple_valence (fun x -> fnum_frame_latex x))) | |
582 | - (String.concat "\\\\\n\\hline\n" (Xlist.map t.PreTypes.valence (fun x -> fnum_frame_latex x)))); | |
578 | + t.ENIAMtokenizerTypes.orth t.ENIAMtokenizerTypes.beg t.ENIAMtokenizerTypes.len t.ENIAMtokenizerTypes.next (LatexMain.escape_string (string_of_token t.ENIAMtokenizerTypes.token)) t.ENIAMtokenizerTypes.id t.ENIAMtokenizerTypes.weight | |
579 | + (String.concat ";" t.ENIAMtokenizerTypes.attrs) (fst t.ENIAMtokenizerTypes.lroles) (snd t.ENIAMtokenizerTypes.lroles) | |
580 | + (String.concat "\\\\\n" (Xlist.map t.ENIAMtokenizerTypes.senses (fun (sense,hipero,weight) -> sprintf "%s & %.2f & %s" sense weight (String.concat "," hipero)))) | |
581 | + (String.concat "\\\\\n\\hline\n" (Xlist.map t.ENIAMtokenizerTypes.simple_valence (fun x -> fnum_frame_latex x))) | |
582 | + (String.concat "\\\\\n\\hline\n" (Xlist.map t.ENIAMtokenizerTypes.valence (fun x -> fnum_frame_latex x)))); | |
583 | 583 | fprintf file "\\end{longtable}"); |
584 | 584 | LatexMain.latex_compile_and_clean "results/" name*) |
585 | 585 | |
... | ... | @@ -715,7 +715,7 @@ let html_of_struct_sentence tokens paths last = |
715 | 715 | String.concat "\n" (Xlist.map (List.sort compare paths) (fun (id,lnode,rnode) -> |
716 | 716 | let t = ExtArray.get tokens id in |
717 | 717 | sprintf "<tr><td>%s</td><td>%s</td><td>%d</td><td>%d</td><td>%d</td></tr>" |
718 | - t.PreTypes.orth (escape_html (string_of_token t.PreTypes.token)) id lnode rnode)) ^ | |
718 | + t.ENIAMtokenizerTypes.orth (escape_html (string_of_token t.ENIAMtokenizerTypes.token)) id lnode rnode)) ^ | |
719 | 719 | sprintf "<tr><td></td><td></td><td></td><td>%d</td><td></td></tr>" last ^ |
720 | 720 | "</table>" |
721 | 721 | |
... | ... | @@ -725,7 +725,7 @@ let html_of_dep_sentence tokens paths = |
725 | 725 | let id,super,label = paths.(conll_id) in |
726 | 726 | let t = ExtArray.get tokens id in |
727 | 727 | (sprintf "<tr><td>%s</td><td>%s</td><td>%d</td><td>%d</td><td>%d</td><td>%s</td></tr>" |
728 | - t.PreTypes.orth (escape_html (string_of_token t.PreTypes.token)) id conll_id super label) :: l))) ^ | |
728 | + t.ENIAMtokenizerTypes.orth (escape_html (string_of_token t.ENIAMtokenizerTypes.token)) id conll_id super label) :: l))) ^ | |
729 | 729 | "</table>" |
730 | 730 | |
731 | 731 | let html_of_tokens tokens = |
... | ... | @@ -733,26 +733,28 @@ let html_of_tokens tokens = |
733 | 733 | String.concat "\n" (List.rev (Int.fold 0 (ExtArray.size tokens - 1) [] (fun l id -> |
734 | 734 | let t = ExtArray.get tokens id in |
735 | 735 | (sprintf "<tr><td>%d</td><td>%s</td><td>%d</td><td>%d</td><td>%d</td><td>%s</td><td>%s</td></tr>" |
736 | - id t.PreTypes.orth t.PreTypes.beg t.PreTypes.len t.PreTypes.next (escape_html (string_of_token t.PreTypes.token)) | |
737 | - (String.concat "; " t.PreTypes.attrs)) :: l))) ^ | |
736 | + id t.ENIAMtokenizerTypes.orth t.ENIAMtokenizerTypes.beg t.ENIAMtokenizerTypes.len t.ENIAMtokenizerTypes.next (escape_html (string_of_token t.ENIAMtokenizerTypes.token)) | |
737 | + (String.concat "; " t.ENIAMtokenizerTypes.attrs)) :: l))) ^ | |
738 | 738 | "</table>" |
739 | 739 | |
740 | -let html_of_tokens_simple_valence tokens = | |
740 | +let html_of_tokens_simple_valence tokens lex_sems = | |
741 | 741 | "<table><tr><td><b>id</b></td><td><b>orth</b></td><td><b>simple_valence</b></td></tr>" ^ |
742 | 742 | String.concat "\n" (List.rev (Int.fold 0 (ExtArray.size tokens - 1) [] (fun l id -> |
743 | 743 | let t = ExtArray.get tokens id in |
744 | - Xlist.fold t.simple_valence l (fun l (fnum,frame) -> | |
744 | + let d = ExtArray.get lex_sems id in | |
745 | + Xlist.fold d.ENIAMlexSemanticsTypes.simple_valence l (fun l (fnum,frame) -> | |
745 | 746 | (sprintf "<tr><td>%d</td><td>%s</td><td>%s</td></tr>" |
746 | - id t.PreTypes.orth (WalStringOf.fnum_frame (lemma_of_token t.token) (fnum,frame))) :: l)))) ^ | |
747 | + id t.ENIAMtokenizerTypes.orth (ENIAMwalStringOf.fnum_frame (lemma_of_token t.token) (fnum,frame))) :: l)))) ^ | |
747 | 748 | "</table>" |
748 | 749 | |
749 | -let html_of_tokens_valence tokens = | |
750 | +let html_of_tokens_valence tokens lex_sems = | |
750 | 751 | "<table><tr><td><b>id</b></td><td><b>orth</b></td><td><b>simple_valence</b></td></tr>" ^ |
751 | 752 | String.concat "\n" (List.rev (Int.fold 0 (ExtArray.size tokens - 1) [] (fun l id -> |
752 | 753 | let t = ExtArray.get tokens id in |
753 | - Xlist.fold t.valence l (fun l (fnum,frame) -> | |
754 | + let d = ExtArray.get lex_sems id in | |
755 | + Xlist.fold d.ENIAMlexSemanticsTypes.valence l (fun l (fnum,frame) -> | |
754 | 756 | (sprintf "<tr><td>%d</td><td>%s</td><td>%s</td></tr>" |
755 | - id t.PreTypes.orth (WalStringOf.fnum_frame (lemma_of_token t.token) (fnum,frame))) :: l)))) ^ | |
757 | + id t.ENIAMtokenizerTypes.orth (ENIAMwalStringOf.fnum_frame (lemma_of_token t.token) (fnum,frame))) :: l)))) ^ | |
756 | 758 | "</table>" |
757 | 759 | |
758 | 760 | let create_latex_chart path name chart = |
... | ... |