Commit b8f39c6574ce14459ad32c358b36a2f6c1ba9248
1 parent
d102eb5a
Analiza controli w Walentym
Showing
16 changed files
with
94 additions
and
260 deletions
LCGlexicon/ENIAMcategoriesPL.ml
... | ... | @@ -86,6 +86,7 @@ let subst_time_lexemes = ref StringSet.empty |
86 | 86 | |
87 | 87 | let subst_pronoun_lexemes = StringSet.of_list ["co"; "kto"; "cokolwiek"; "ktokolwiek"; "nic"; "nikt"; "coś"; "ktoś"; "to"] |
88 | 88 | let adj_pronoun_lexemes = StringSet.of_list ["czyj"; "jaki"; "który"; "jakiś"; "ten"; "taki"] |
89 | +let compar_lexemes = StringSet.of_list ["jak"; "jako"; "niż"; "niczym"; "niby"; "co"; "zamiast"] | |
89 | 90 | |
90 | 91 | (* let adj_quant_lexemes = StringSet.of_list ["każdy"; "wszelki"; "wszystek"; "żaden"; "jakiś"; "pewien"; "niektóry"; "jedyny"; "sam"] *) |
91 | 92 | |
... | ... | @@ -200,12 +201,12 @@ let clarify_categories proper cat = function |
200 | 201 | let cases = expand_cases cases in |
201 | 202 | [{empty_cats with lemma=lemma; pos="siebie"; pos2="pron"; numbers=all_numbers; cases=cases; genders=all_genders; persons=["ter"]}] |
202 | 203 | | lemma,"prep",[cases;woks] -> |
203 | - if lemma = "jak" || lemma = "jako" || lemma = "niż" || lemma = "niczym" || lemma = "niby" || lemma = "co" || lemma = "zamiast" then | |
204 | + if StringSet.mem compar_lexemes lemma then | |
204 | 205 | [{empty_cats with lemma=lemma; pos="compar"; pos2="prep"}] else |
205 | 206 | let cases = expand_cases cases in |
206 | 207 | [{empty_cats with lemma=lemma; pos="prep"; pos2="prep"; cases=cases}] |
207 | 208 | | lemma,"prep",[cases] -> |
208 | - if lemma = "jak" || lemma = "jako" || lemma = "niż" || lemma = "niczym" || lemma = "niby" || lemma = "co" || lemma = "zamiast" then | |
209 | + if StringSet.mem compar_lexemes lemma then | |
209 | 210 | [{empty_cats with lemma=lemma; pos="compar"; pos2="prep"}] else |
210 | 211 | let cases = expand_cases cases in |
211 | 212 | [{empty_cats with lemma=lemma; pos="prep"; pos2="prep"; cases=cases}] |
... | ... |
LCGlexicon/TODO
LCGlexicon/resources/lexicon-pl.dic
... | ... | @@ -105,7 +105,10 @@ lemma=jakiś|ten|taki,pos=apron: |
105 | 105 | |
106 | 106 | # liczebniki |
107 | 107 | # FIXME: liczba po rzeczowniku # FIXME: zbadać jak liczebniki współdziałąją z jako COMPAR |
108 | -pos=num|intnum|realnum|intnum-interval|realnum-interval: | |
108 | +# dwie reguły są potrzebne po to, żeby w ENIAMsemValence.match_value nie pojawiał się variant | |
109 | +pos=num|intnum|realnum|intnum-interval|realnum-interval,nsem=count: | |
110 | + num*number*case*gender*person*acm*nsem{\(1+qub),/(1+inclusion)}; # FIXME: jak usunięcie Phrase ProNG wpływa na pokrycie? | |
111 | +pos=num|intnum|realnum|intnum-interval|realnum-interval,nsem=mass: | |
109 | 112 | num*number*case*gender*person*acm*nsem{\(1+qub),/(1+inclusion)}; # FIXME: jak usunięcie Phrase ProNG wpływa na pokrycie? |
110 | 113 | |
111 | 114 | # pojemniki |
... | ... | @@ -138,7 +141,7 @@ lemma=w,pos=prep,case=loc: prepnp*lemma*case{\(1+advp*T),/(day-month+day+ye |
138 | 141 | |
139 | 142 | # komparatywy |
140 | 143 | # FIXME: trzeba poprawić comparnp i comparpp w walencji |
141 | -pos=compar: QUANT[case=nom&gen&dat&acc&inst] compar*lemma*case{\(1+advp*T),/np*T*case*T*T}{\(1+qub),/(1+inclusion)}; | |
144 | +pos=compar: QUANT[case=nom&gen&dat&acc&inst] compar*lemma*case{\(1+advp*T),/(np*T*case*T*T+adjp*T*case*T)}{\(1+qub),/(1+inclusion)}; | |
142 | 145 | pos=compar: QUANT[case=postp] compar*lemma*case{\(1+advp*T),/(prepnp*T*T+prepadjp*T*T)}{\(1+qub),/(1+inclusion)}; |
143 | 146 | |
144 | 147 | # frazy przymiotnikowe |
... | ... |
lexSemantics/ENIAMlexSemantics.ml
... | ... | @@ -205,6 +205,20 @@ let mark_nosem frame = |
205 | 205 | else p))} |
206 | 206 | |
207 | 207 | let assign_prep_semantics lemma = |
208 | + if StringSet.mem ENIAMcategoriesPL.compar_lexemes lemma then | |
209 | + [{empty_frame with | |
210 | + meanings = [find_prep_meaning lemma [Predef "ALL"]]; | |
211 | + positions= [{empty_position with | |
212 | + dir=Forward_; gf=CORE; | |
213 | + morfs=ENIAMwalRenderer.compar_morfs; is_necessary=Req}]; | |
214 | + agf="arg"}; | |
215 | + {empty_frame with | |
216 | + meanings = [find_prep_meaning lemma [Predef "ALL"]]; | |
217 | + positions= [{empty_position with | |
218 | + sel_prefs=[SynsetName "ALL"]; dir=Forward_; gf=CORE; | |
219 | + morfs=ENIAMwalRenderer.compar_morfs; is_necessary=Req}]; | |
220 | + arole="Arg"; arole_attr=""; arev=false; agf="adjunct"}] | |
221 | + else | |
208 | 222 | let roles = try StringMap.find ENIAMlexSemanticsData.prep_roles lemma with Not_found -> [] in |
209 | 223 | (* Printf.printf "assign_prep_semantics: |roles|=%d\n%!" (Xlist.size roles); *) |
210 | 224 | {empty_frame with |
... | ... | @@ -213,6 +227,14 @@ let assign_prep_semantics lemma = |
213 | 227 | dir=if lemma="temu" then Backward_ else Forward_; gf=CORE; |
214 | 228 | morfs=ENIAMwalRenderer.prep_morfs; is_necessary=Req}]; |
215 | 229 | agf="arg"} :: |
230 | + (if roles = [] then (* FIXME: zaślepka do usunięcia po stworzeniu listy przyimków *) | |
231 | + [{empty_frame with | |
232 | + meanings = [find_prep_meaning lemma [Predef "ALL"]]; | |
233 | + positions= [{empty_position with | |
234 | + sel_prefs=[SynsetName "ALL"]; dir=if lemma="temu" then Backward_ else Forward_; gf=CORE; | |
235 | + morfs=ENIAMwalRenderer.prep_morfs; is_necessary=Req}]; | |
236 | + arole="Arg"; arole_attr=""; arev=false; agf="adjunct"}] | |
237 | + else | |
216 | 238 | Xlist.map roles (function (case,arole,arole_attr,hipero,sel_prefs) -> |
217 | 239 | (* Printf.printf "assign_prep_semantics: case=%s arole=%s arole_attr=%s\n%!" case arole arole_attr; *) |
218 | 240 | let meaning = find_prep_meaning lemma hipero in (* FIXME: zaślepka dla meaning i weight *) |
... | ... | @@ -222,7 +244,7 @@ let assign_prep_semantics lemma = |
222 | 244 | morfs=ENIAMwalRenderer.prep_morfs(*ENIAMwalRenderer.assing_prep_morfs (lemma,case)*); is_necessary=Req}] in |
223 | 245 | (* print_endline "assign_prep_semantics 2"; *) |
224 | 246 | {empty_frame with selectors=[ENIAM_LCGlexiconTypes.Case,ENIAM_LCGlexiconTypes.Eq,[case]]; meanings=[meaning]; positions=find_selprefs positions; |
225 | - arole=arole; arole_attr=arole_attr; arev=false; agf="adjunct"}) | |
247 | + arole=arole; arole_attr=arole_attr; arev=false; agf="adjunct"})) | |
226 | 248 | |
227 | 249 | let assign_num_semantics lemma = |
228 | 250 | let sems = try StringMap.find !num_sem lemma with Not_found -> [] in |
... | ... | @@ -369,7 +391,7 @@ let disambiguate_senses lex_sems group = |
369 | 391 | Xlist.fold frame.positions prefs (fun prefs t -> |
370 | 392 | Xlist.fold t.sel_prefs prefs (fun prefs -> function |
371 | 393 | SynsetName s -> StringSet.add prefs s |
372 | - | _ -> failwith "disambiguate_senses")))) in | |
394 | + | t -> failwith ("disambiguate_senses: " ^ ENIAMwalStringOf.sel_prefs t))))) in | |
373 | 395 | (*let hipero = Xlist.fold group (StringSet.singleton "ALL") (fun hipero id -> |
374 | 396 | Xlist.fold (ExtArray.get lex_sems id).senses hipero (fun hipero (_,l,_) -> |
375 | 397 | Xlist.fold l hipero StringSet.add)) in |
... | ... |
lexSemantics/ENIAMlexSemanticsData.ml
... | ... | @@ -267,19 +267,19 @@ let prep_roles = Xlist.fold [ (* lemma,case,role,role_attr,meaning/hipero,sel_pr |
267 | 267 | "przed","acc", "Location","Goal",["POŁOŻENIE"],["POŁOŻENIE"]; |
268 | 268 | "w","acc", "Location","Goal",["POŁOŻENIE"],["POŁOŻENIE"]; |
269 | 269 | "za","acc", "Location","Goal",["POŁOŻENIE"],["POŁOŻENIE"]; |
270 | - "dzięki","dat", "Condition","",["CZEMU"],[]; | |
271 | - "na","acc", "Condition","",["CZEMU"],[]; | |
272 | - "na","postp", "Condition","",["CZEMU"],[]; | |
273 | - "od","gen", "Condition","",["CZEMU"],[]; | |
274 | - "przez","acc", "Condition","",["CZEMU"],[]; | |
275 | - "wskutek","gen", "Condition","",["CZEMU"],[]; | |
276 | - "z","gen", "Condition","",["CZEMU"],[]; | |
277 | - "dla","gen", "Purpose","",["CZEMU"],[]; | |
278 | - "do","gen", "Purpose","",["CZEMU"],[]; | |
279 | - "ku","dat", "Purpose","",["CZEMU"],[]; | |
280 | - "na","acc", "Purpose","",["CZEMU"],[]; | |
281 | - "na","postp", "Purpose","",["CZEMU"],[]; | |
282 | - "po","acc", "Purpose","",["CZEMU"],[]; | |
270 | + "dzięki","dat", "Condition","",["CZEMU"],["ALL"]; | |
271 | + "na","acc", "Condition","",["CZEMU"],["ALL"]; | |
272 | + "na","postp", "Condition","",["CZEMU"],["ALL"]; | |
273 | + "od","gen", "Condition","",["CZEMU"],["ALL"]; | |
274 | + "przez","acc", "Condition","",["CZEMU"],["ALL"]; | |
275 | + "wskutek","gen", "Condition","",["CZEMU"],["ALL"]; | |
276 | + "z","gen", "Condition","",["CZEMU"],["ALL"]; | |
277 | + "dla","gen", "Purpose","",["CZEMU"],["ALL"]; | |
278 | + "do","gen", "Purpose","",["CZEMU"],["ALL"]; | |
279 | + "ku","dat", "Purpose","",["CZEMU"],["ALL"]; | |
280 | + "na","acc", "Purpose","",["CZEMU"],["ALL"]; | |
281 | + "na","postp", "Purpose","",["CZEMU"],["ALL"]; | |
282 | + "po","acc", "Purpose","",["CZEMU"],["ALL"]; | |
283 | 283 | "do","gen", "Duration","",["CZAS"],["CZAS"]; |
284 | 284 | "od","gen", "Duration","",["CZAS"],["CZAS"]; |
285 | 285 | "przez","acc", "Duration","",["CZAS"],["CZAS"]; |
... | ... | @@ -326,13 +326,13 @@ let prep_roles = Xlist.fold [ (* lemma,case,role,role_attr,meaning/hipero,sel_pr |
326 | 326 | "wedle","gen", "Location","",["POŁOŻENIE"],["POŁOŻENIE"]; |
327 | 327 | "wpośród","gen", "Location","",["POŁOŻENIE"],["POŁOŻENIE"]; |
328 | 328 | "wśrzód","gen", "Location","",["POŁOŻENIE"],["POŁOŻENIE"]; |
329 | - "po","postp", "Manner","",["ALL"],[]; | |
330 | - "bez","gen", "Manner","",["ALL"],[]; | |
329 | + "po","postp", "Manner","",["ALL"],["ALL"]; | |
330 | + "bez","gen", "Manner","",["ALL"],["ALL"]; | |
331 | 331 | (* "jako","str", "Manner","",[],[]; |
332 | 332 | "jak","str", "Manner","",[],[];*) |
333 | - "pod","acc", "Manner","",["ALL"],[]; | |
334 | - "z","inst", "Manner","",["ALL"],[]; | |
335 | - "z","postp", "Manner","",["ALL"],[]; | |
333 | + "pod","acc", "Manner","",["ALL"],["ALL"]; | |
334 | + "z","inst", "Manner","",["ALL"],["ALL"]; | |
335 | + "z","postp", "Manner","",["ALL"],["ALL"]; | |
336 | 336 | "dokoła","gen", "Path","",["POŁOŻENIE"],["POŁOŻENIE"]; |
337 | 337 | "dookoła","gen", "Path","",["POŁOŻENIE"],["POŁOŻENIE"]; |
338 | 338 | "koło","gen", "Path","",["POŁOŻENIE"],["POŁOŻENIE"]; |
... | ... | @@ -368,9 +368,10 @@ let prep_roles = Xlist.fold [ (* lemma,case,role,role_attr,meaning/hipero,sel_pr |
368 | 368 | "w","loc", "Time","",["CZAS"],["CZAS"]; |
369 | 369 | "temu","acc", "Time","",["CZAS"],["CZAS"]; (* dodane *) |
370 | 370 | "za","gen", "Time","",["CZAS"],["CZAS"]; (* dodane *) |
371 | + "o","acc", "Arg","",["ALL"],["ALL"]; (* FIXME: zaślepka *) | |
371 | 372 | ] StringMap.empty (fun map (lemma,case,role,role_attr,hipero,sel_prefs) -> |
372 | 373 | let hipero = Xlist.map hipero (fun hipero -> ENIAMwalTypes.Predef hipero) in |
373 | - let sel_prefs = Xlist.map sel_prefs (fun sel_prefs -> ENIAMwalTypes.Predef sel_prefs) in | |
374 | + let sel_prefs = Xlist.map sel_prefs (fun sel_prefs -> ENIAMwalTypes.SynsetName sel_prefs) in | |
374 | 375 | StringMap.add_inc map lemma [case,role,role_attr,hipero,sel_prefs] |
375 | 376 | (fun l -> (case,role,role_attr,hipero,sel_prefs) :: l)) |
376 | 377 | (* "przeciwko","dat","Dat"; |
... | ... |
lexSemantics/ENIAMvalence.ml
... | ... | @@ -641,6 +641,7 @@ let get_aroles schema lemma = function |
641 | 641 | | "perl" -> ([Mode,Eq,["perl"]],"Path","",false) :: l |
642 | 642 | | "dur" -> ([Mode,Eq,["dur"]],"Duration","",false) :: l |
643 | 643 | | "temp" -> ([Mode,Eq,["temp"]],"Time","",false) :: l |
644 | + | "pron" -> ([Mode,Eq,["mod"]],"Manner","",false) :: l | |
644 | 645 | | _ -> failwith "get_aroles") |
645 | 646 | | "qub" -> |
646 | 647 | let l = try StringMap.find ENIAMlexSemanticsData.qub_roles lemma with Not_found -> ["Arg",""] in |
... | ... |
lexSemantics/ENIAMwalRenderer.ml
... | ... | @@ -363,3 +363,10 @@ let prep_morfs = [ |
363 | 363 | LCG(Tensor[Atom "day"]); |
364 | 364 | LCG(Tensor[Atom "date"]); |
365 | 365 | ] |
366 | + | |
367 | +let compar_morfs = [ | |
368 | + LCG(Tensor[Atom "np"; Top; Atom "case"; Top; Top]); | |
369 | + LCG(Tensor[Atom "adjp"; Top; Atom "case"; Top]); | |
370 | + LCG(Tensor[Atom "prepnp"; Top; Top]); | |
371 | + LCG(Tensor[Atom "prepadjp"; Top; Top]); | |
372 | + ] | |
... | ... |
semantics/ENIAMsemGraph.ml
... | ... | @@ -94,6 +94,8 @@ let create_normal_concept (*roles role_attrs*) tokens lex_sems t = |
94 | 94 | | "NSYN",Val "pronoun" -> c(*{c with c_quant=Tuple[c.c_quant;Val "indexical"]}*),measure,cx_flag |
95 | 95 | | "NSEM",Val "count" -> c(*{c with c_quant=Tuple[c.c_quant;Val "count"]}*),measure,cx_flag |
96 | 96 | | "NSEM",Val "mass" -> {c with c_quant=Tuple[c.c_quant;Val "mass"]},measure,cx_flag |
97 | + | "NSEM",Variant(e,[a,Val "mass";b,Val "count"]) -> {c with c_quant=Tuple[c.c_quant;Variant(e,[a,Val "mass";b,Val "count"])]},measure,cx_flag (* FIXME: tu by należało podzielić to na dwa pudełka *) | |
98 | + | "NSEM",Variant(e,[a,Val "count";b,Val "mass"]) -> {c with c_quant=Tuple[c.c_quant;Variant(e,[a,Val "count";b,Val "mass"])]},measure,cx_flag | |
97 | 99 | | "NSEM",Val "measure" -> c,true,cx_flag |
98 | 100 | | "NSEM",Val "time" -> c,measure,cx_flag(*failwith "create_normal_concept: time"*) |
99 | 101 | | "NUM",t -> {c with c_quant=Tuple[c.c_quant;t]},measure,cx_flag |
... | ... | @@ -110,7 +112,7 @@ let create_normal_concept (*roles role_attrs*) tokens lex_sems t = |
110 | 112 | | "LEX",_ -> c,measure,cx_flag (* FIXME *) *) |
111 | 113 | (* | "TYPE",Val "int" -> {c with c_quant=Tuple[c.c_quant;Val "interrogative"]},measure *) |
112 | 114 | (* | "TYPE",_ -> c,measure,cx_flag (* FIXME *) *) |
113 | - | e,t -> failwith ("create_normal_concept noun: " ^ e)) in | |
115 | + | e,t -> failwith ("create_normal_concept noun: " ^ e ^ ": " ^ ENIAMsemStringOf.linear_term 0 t)) in | |
114 | 116 | (* let c = if t.pos = "depr" then {c with c_relations=Tuple[c.c_relations;SingleRelation(Val "depr")]} else c in *) |
115 | 117 | if cx_flag then |
116 | 118 | let id = ExtArray.add tokens ENIAMtokenizerTypes.empty_token_env in |
... | ... |
semantics/ENIAMsemLexicon.ml
... | ... | @@ -65,6 +65,8 @@ let parse_role p = function |
65 | 65 | "adjunct" -> {p with gf=ADJUNCT} |
66 | 66 | | "unk" -> {p with role="unk"} |
67 | 67 | | "nosem" -> {p with gf=NOSEM} |
68 | + | "Poss" -> {p with role="Poss"; sel_prefs=[SynsetName "ALL"]} | |
69 | + | "Coref" -> {p with role="Coref"; sel_prefs=[SynsetName "ALL"]} | |
68 | 70 | | "Count" -> {p with role="Count"; sel_prefs=[SynsetName "ALL"]} |
69 | 71 | | "Measure" -> {p with role="Measure"; sel_prefs=[SynsetName "ALL"]} |
70 | 72 | | s -> failwith ("parse_role: " ^ s) |
... | ... |
semantics/ENIAMsemValence.ml
... | ... | @@ -278,8 +278,8 @@ let rec assign_frames_rec tokens lex_sems tree arg_symbols visited = function |
278 | 278 | let s = ExtArray.get lex_sems t.id in |
279 | 279 | let symbol = get_phrase_symbol t.symbol in |
280 | 280 | let frames = Xlist.fold s.ENIAMlexSemanticsTypes.frames [] (fun frames frame -> |
281 | - print_endline ("selectors: " ^ ENIAMcategoriesPL.string_of_selectors frame.selectors); | |
282 | - Printf.printf "assign_frames_rec: lemma=%s positions=[%s]\n%!" t.lemma (ENIAMwalStringOf.schema frame.positions); | |
281 | + (* print_endline ("selectors: " ^ ENIAMcategoriesPL.string_of_selectors frame.selectors); *) | |
282 | + (* Printf.printf "assign_frames_rec: lemma=%s positions=[%s]\n%!" t.lemma (ENIAMwalStringOf.schema frame.positions); *) | |
283 | 283 | try |
284 | 284 | let attrs = apply_selectors t.attrs frame.selectors in |
285 | 285 | let frame = ENIAMsemLexicon.extend_frame symbol frame in |
... | ... | @@ -380,14 +380,21 @@ exception AGF |
380 | 380 | |
381 | 381 | let rec manage_agf = function |
382 | 382 | | Node t -> |
383 | + (* print_endline ("manage_agf 1 " ^ ENIAM_LCGstringOf.linear_term 0 (Node t)); *) | |
383 | 384 | let attrs,agf = try extract_attr "agf" [] t.attrs with Not_found -> failwith "manage_agf" in |
385 | + (* print_endline "manage_agf 2"; *) | |
384 | 386 | let gf = try get_attr "gf" t.attrs with Not_found -> Dot in (* FIXME: to by się chyba przydało poprawić, żeby gf było zawsze ustalone *) |
387 | + (* print_endline "manage_agf 3"; *) | |
385 | 388 | if agf = Val "" || agf=gf then Node{t with attrs=attrs} else raise AGF |
386 | 389 | | Variant(e,l) -> |
390 | + (* print_endline ("manage_agf 4: " ^ ENIAM_LCGstringOf.linear_term 0 (Variant(e,l))); *) | |
387 | 391 | let l = Xlist.fold l [] (fun l (i,t) -> try (i, manage_agf t) :: l with AGF -> l) in |
392 | + (* print_endline ("manage_agf 5: " ^ ENIAM_LCGstringOf.linear_term 0 (Variant(e,l))); *) | |
388 | 393 | if l = [] then raise AGF else Variant(e,List.rev l) |
389 | 394 | | Tuple l -> |
395 | + (* print_endline "manage_agf 6"; *) | |
390 | 396 | let l = Xlist.rev_map l manage_agf in |
397 | + (* print_endline "manage_agf 7"; *) | |
391 | 398 | Tuple(List.rev l) |
392 | 399 | | Dot -> Dot |
393 | 400 | | t -> failwith ("cut_nodes: " ^ ENIAM_LCGstringOf.linear_term 0 t) |
... | ... | @@ -408,7 +415,7 @@ let rec reduce_tree_rec tokens lex_sems result_tree mid_tree orig_tree = functio |
408 | 415 | | Node t -> |
409 | 416 | let args = reduce_tree_rec tokens lex_sems result_tree mid_tree orig_tree t.args in |
410 | 417 | (* print_endline ("reduce_tree_rec 1: " ^ ENIAM_LCGstringOf.linear_term 0 args); *) |
411 | - let args = try manage_agf args with AGF -> failwith "reduce_tree_rec: AGF" in | |
418 | + let args = try manage_agf args with AGF -> failwith "reduce_tree_rec: AGF" in (* FIXME: to nie musi być błąd, należałoby przechwytywać wyjątek na poziorie wariantu powyżej *) | |
412 | 419 | let args = cut_nodes result_tree args in |
413 | 420 | (* print_endline ("reduce_tree_rec 2: " ^ ENIAM_LCGstringOf.linear_term 0 args); *) |
414 | 421 | (*let id = |
... | ... |
semantics/resources/lexicon-pl.dic
1 | 1 | |
2 | 2 | day-lex: /(date+day+day-month):unk; |
3 | 3 | date: /(1+year-lex):unk; |
4 | -day: /month-lex:unk; | |
5 | -day-interval: /month-lex:unk; | |
6 | -day-month: /(1+year-lex):unk; | |
4 | +day: /month-lex:Poss; | |
5 | +day-interval: /month-lex:Poss; | |
6 | +day-month: /(1+year-lex):Poss; | |
7 | 7 | year-lex: |(1+adjp*number*case*gender):unk; |
8 | -month-lex: /(1+year+np*T*gen*T*T):unk; | |
8 | +month-lex: /(1+year+np*T*gen*T*T):Poss; | |
9 | 9 | |
10 | 10 | date-interval: null; |
11 | 11 | day-month-interval: null; |
... | ... | @@ -26,8 +26,8 @@ email: null; |
26 | 26 | np: |
27 | 27 | \(1+num*number*case*gender*person*congr*nsem+num*number*case*gender*person*rec*nsem):adjunct \(1+qub):adjunct /(1+inclusion):adjunct |
28 | 28 | \(1+measure*unumber*ucase*ugender*uperson):Measure |
29 | - /(1+date+day+day-month+date-interval+day-interval+day-month-interval+year+year-interval+hour+hour-minute+hour-interval+hour-minute-interval):unk | |
30 | - |(1+year):unk /(1+obj-id):unk; | |
29 | + /(1+date+day+day-month+date-interval+day-interval+day-month-interval+year+year-interval+hour+hour-minute+hour-interval+hour-minute-interval+roman+roman-interval):Coref | |
30 | + |(1+year):unk /(1+obj-id):Coref; | |
31 | 31 | |
32 | 32 | num: \(1+qub):adjunct /(1+inclusion):adjunct; |
33 | 33 | |
... | ... |
testy/testy_podstawowe.txt
walenty/ENIAMwalGenerate.ml
... | ... | @@ -167,223 +167,3 @@ let _ = |
167 | 167 | print_meanings "resources/meanings.tab" meanings; |
168 | 168 | print_adv_types "resources/adv_modes.tab" adv_types; |
169 | 169 | ()) |
170 | - | |
171 | -(* Test wczytywania Walentego TEI *) | |
172 | -(* let _ = | |
173 | - let walenty,phrases = ENIAMwalTEI.load_walenty "/home/yacheu/Dokumenty/NLP resources/Walenty/walenty_20170311.xml" in | |
174 | - let n = Xlist.fold ENIAMwalTEI.walenty 0 (fun n e -> let l = connect e in n + Xlist.size l) in | |
175 | - let m = Xlist.fold ENIAMwalTEI.walenty 0 (fun n e -> let l = schemata e in n + Xlist.size l) in | |
176 | - Printf.printf "%d connected\n%d schemata\n|phrases|=%d\n" n m (IntMap.size phrases); | |
177 | - () *) | |
178 | - | |
179 | -(* Test unikalności indeksów sensów *) | |
180 | -(* let _ = | |
181 | - let walenty,phrases = ENIAMwalTEI.load_walenty "/home/yacheu/Dokumenty/NLP resources/Walenty/walenty_20170311.xml" in | |
182 | - Xlist.fold walenty IntMap.empty (fun map e -> | |
183 | - Xlist.fold e.meanings map (fun map m -> | |
184 | - IntMap.add_inc map m.mng_id m (fun m1 -> if m1 = m then m else failwith "meaning"))) *) | |
185 | - | |
186 | -(* | |
187 | -(* let insert_phrases phrases = function | |
188 | - Frame(atrs,s) -> Frame(atrs,Xlist.map s (fun p -> | |
189 | - {p with morfs=Xlist.map p.morfs (function | |
190 | - MorfId id -> (try IntMap.find phrases id with Not_found -> failwith "insert_phrases") | |
191 | - | _ -> failwith "insert_phrases")})) | |
192 | - | _ -> failwith "insert_phrases: ni" | |
193 | - | |
194 | -let print_entry pos_map pos orth = | |
195 | - let orth_map = try StringMap.find pos_map pos with Not_found -> StringMap.empty in | |
196 | - let frames = try StringMap.find orth_map orth with Not_found -> [] in | |
197 | - Xlist.iter frames (fun frame -> | |
198 | - let frame = insert_phrases ENIAMwalTEI.phrases frame in | |
199 | - print_endline (ENIAMwalStringOf.frame orth frame)) *) | |
200 | - | |
201 | -(* Wypisanie hasła *) | |
202 | -(* let _ = | |
203 | - print_entry connected_walenty "verb" "brudzić"; | |
204 | - () *) | |
205 | - | |
206 | -(* let has_nontrivial_lex = function | |
207 | - Frame(atrs,s) -> Xlist.fold s false (fun b p -> | |
208 | - if p.role = "Lemma" && p.role_attr = "" then b else | |
209 | - Xlist.fold p.morfs b (fun b -> function | |
210 | - MorfId id -> failwith "has_nontrivial_lex" | |
211 | - | LexPhrase _ -> true | |
212 | - (* | LexRPhrase _ -> true | |
213 | - | LexPhraseMode _ -> true *) | |
214 | - | _ -> b)) | |
215 | - | _ -> failwith "has_nontrivial_lex: ni" *) | |
216 | - | |
217 | -(* Leksykalizacje nie wchodzące do lematu *) | |
218 | -(* let _ = | |
219 | - StringMap.iter connected_walenty (fun _ orth_map -> | |
220 | - StringMap.iter orth_map (fun orth frames -> | |
221 | - Xlist.iter frames (fun frame -> | |
222 | - let frame = insert_phrases ENIAMwalTEI.phrases frame in | |
223 | - if has_nontrivial_lex frame then | |
224 | - print_endline (ENIAMwalStringOf.frame orth frame)))) *) | |
225 | - | |
226 | -let simplify_frame_verb = function | |
227 | - Phrase(NP(Case "dat")) -> [] | |
228 | - | Phrase(NP(Case "inst")) -> [] | |
229 | - | Phrase(PrepNP _) -> [] | |
230 | - | Phrase(ComprepNP _) -> [] | |
231 | - | Phrase(AdvP) -> [] | |
232 | - | t -> [t] | |
233 | - | |
234 | -let simplify_frame_noun = function | |
235 | - Phrase(NP(Case "gen")) -> [] | |
236 | - | Phrase(NP(Case "nom")) -> [] | |
237 | - | Phrase(NP(CaseAgr)) -> [] | |
238 | - | Phrase(PrepNP _) -> [] | |
239 | - | Phrase(ComprepNP _) -> [] | |
240 | - | Phrase(AdjP CaseAgr) -> [] | |
241 | - | PhraseComp(Ncp(Case "gen"),_) | |
242 | - | PhraseComp(Prepncp(_,_),_) -> [] | |
243 | - | PhraseAbbr(Possp,[]) -> [] | |
244 | - | t -> [t] | |
245 | - | |
246 | -let simplify_frame_adj = function | |
247 | - | t -> [t] | |
248 | - | |
249 | -let simplify_frame_adv = function | |
250 | - | t -> [t] | |
251 | - | |
252 | - | |
253 | -(* let simplify_frame pos = function | |
254 | - Frame(atrs,s) -> | |
255 | - let schema = Xlist.fold s [] (fun schema p -> | |
256 | - let morfs = Xlist.fold p.morfs [] (fun morfs morf -> | |
257 | - match pos with | |
258 | - "verb" -> simplify_frame_verb morf @ morfs | |
259 | - | "noun" -> simplify_frame_noun morf @ morfs | |
260 | - | "adj" -> simplify_frame_adj morf @ morfs | |
261 | - | "adv" -> simplify_frame_adv morf @ morfs | |
262 | - | _ -> failwith "simplify_frame") in | |
263 | - if morfs = [] then schema else | |
264 | - {p with ce=[]; cr=[]; morfs=morfs} :: schema) in | |
265 | - if schema = [] then [] else [Frame(atrs,schema)] | |
266 | - | _ -> failwith "simplify_frame: ni" *) | |
267 | - | |
268 | - | |
269 | -(* Uproszczone schematy *) | |
270 | -(* let _ = | |
271 | - StringMap.iter schemata_walenty (fun pos orth_map -> | |
272 | - if pos = "noun" then | |
273 | - StringMap.iter orth_map (fun orth frames -> | |
274 | - Xlist.iter frames (fun frame -> | |
275 | - let frame = insert_phrases ENIAMwalTEI.phrases frame in | |
276 | - let frames = simplify_frame pos frame in | |
277 | - Xlist.iter frames (fun frame -> print_endline (ENIAMwalStringOf.frame orth frame))))) *) | |
278 | - | |
279 | -(* let has_mode_coordination = function | |
280 | - Frame(atrs,s) -> Xlist.fold s false (fun b p -> | |
281 | - let n = Xlist.fold p.morfs 0 (fun n -> function | |
282 | - MorfId id -> failwith "has_nontrivial_lex" | |
283 | - | PhraseAbbr(Advp _,_) -> n+1 | |
284 | - | PhraseAbbr(Xp _,_) -> n+1 | |
285 | - (* | LexPhraseMode _ -> n+1 FIXME*) | |
286 | - | _ -> n) in | |
287 | - if n>1 then true else b) | |
288 | - | _ -> failwith "has_nontrivial_lex: ni" *) | |
289 | - | |
290 | -(* Koordynacja z mode *) | |
291 | -(* let _ = | |
292 | - StringMap.iter schemata_walenty(*connected_walenty*) (fun _ orth_map -> | |
293 | - StringMap.iter orth_map (fun orth frames -> | |
294 | - Xlist.iter frames (fun frame -> | |
295 | - let frame = insert_phrases ENIAMwalTEI.phrases frame in | |
296 | - if has_mode_coordination frame then | |
297 | - print_endline (ENIAMwalStringOf.frame orth frame)))) *) | |
298 | - | |
299 | - | |
300 | -(* let get_entry orth pos *) | |
301 | - (* | |
302 | -let load_walenty2 () = | |
303 | - let walenty = load_walenty walenty_filename in | |
304 | - Xlist.fold walenty StringMap.empty (fun walenty entry -> | |
305 | - if entry.frames = [] then Xlist.fold (connect2 entry) walenty (fun walenty (lemma,pos,frame) -> | |
306 | - let map = try StringMap.find walenty pos with Not_found -> StringMap.empty in | |
307 | - let map = StringMap.add_inc map lemma [frame] (fun l -> frame :: l) in | |
308 | - StringMap.add walenty pos map) | |
309 | - else Xlist.fold (connect entry) walenty (fun walenty (lemma,pos,frame) -> | |
310 | - let map = try StringMap.find walenty pos with Not_found -> StringMap.empty in | |
311 | - let map = StringMap.add_inc map lemma [frame] (fun l -> frame :: l) in | |
312 | - StringMap.add walenty pos map)) | |
313 | - | |
314 | - | |
315 | -let print_stringqmap filename qmap = | |
316 | - let l = StringQMap.fold qmap [] (fun l k v -> (v,k) :: l) in | |
317 | - File.file_out filename (fun file -> | |
318 | - Xlist.iter (Xlist.sort l compare) (fun (v,k) -> | |
319 | - Printf.fprintf file "%5d %s\n" v k)) | |
320 | - | |
321 | -let sel_prefs_quantities walenty = | |
322 | - Xlist.fold walenty StringQMap.empty (fun quant e -> | |
323 | - Xlist.fold e.frames quant (fun quant f -> | |
324 | - Xlist.fold f.arguments quant (fun quant a -> | |
325 | - Xlist.fold a.sel_prefs quant (fun quant l -> | |
326 | - Xlist.fold l quant (fun quant -> function | |
327 | - Numeric s -> | |
328 | - let name = try ENIAMplWordnet.synset_name s with Not_found -> "unknown" in | |
329 | - StringQMap.add quant ("N " ^ s ^ " " ^ name) | |
330 | - | Symbol s -> StringQMap.add quant ("S " ^ s) | |
331 | - | Relation(s,t) -> StringQMap.add quant ("R " ^ s ^ " | " ^ t)))))) | |
332 | -*) | |
333 | -(*let _ = | |
334 | - let walenty = load_walenty walenty_filename in | |
335 | - let quant = sel_prefs_quantities walenty in | |
336 | - print_stringqmap "results/quant_sel_prefs.txt" quant*) | |
337 | - | |
338 | -(*let _ = | |
339 | - let walenty = load_walenty2 () in | |
340 | - let frames_sem = try StringMap.find (StringMap.find walenty "verb") "bębnić" with Not_found -> failwith "walTEI" in | |
341 | - Xlist.iter frames_sem (fun frame -> | |
342 | - print_endline (WalStringOf.frame "bębnić" frame))*) | |
343 | - | |
344 | - | |
345 | -(* Wypisanie realizacji *) | |
346 | -(* let _ = | |
347 | - Xlist.iter ENIAMwalTEI.expands (fun (id,morf,l) -> | |
348 | - Printf.printf "%d %s:\n" id (ENIAMwalStringOf.morf morf); | |
349 | - Xlist.iter l (fun morf -> Printf.printf " %s\n" (ENIAMwalStringOf.morf morf))) *) | |
350 | - | |
351 | -(* Wypisanie realizacji po przetworzeniu *) | |
352 | -(* let _ = | |
353 | - AbbrMap.iter expands (fun morf l -> | |
354 | - Printf.printf "%s:\n" (ENIAMwalStringOf.phrase_abbr morf); | |
355 | - Xlist.iter l (fun morf -> Printf.printf " %s\n" (ENIAMwalStringOf.morf morf))) *) | |
356 | - | |
357 | -let has_realization = function | |
358 | - PhraseAbbr _ -> true | |
359 | - | PhraseComp _ -> true | |
360 | - | _ -> false | |
361 | - | |
362 | -(* Wypisanie fraz, które podlegają rozwijaniu *) | |
363 | -(*let _ = | |
364 | - IntMap.iter ENIAMwalTEI.phrases (fun i morf -> | |
365 | - if has_realization morf then | |
366 | - Printf.printf "%4d %s\n" i (ENIAMwalStringOf.morf morf)) *) | |
367 | - | |
368 | -(* Wypisanie fraz, które podlegają rozwijaniu *) | |
369 | -(* let _ = | |
370 | - IntMap.iter phrases (fun i morf -> | |
371 | - if has_realization morf then | |
372 | - Printf.printf "%4d %s\n" i (ENIAMwalStringOf.morf morf)) *) | |
373 | - | |
374 | -(* let test_phrases = [17088; 17133; 1642] | |
375 | - let _ = | |
376 | - Xlist.iter test_phrases (fun i -> | |
377 | - let m1 = IntMap.find ENIAMwalTEI.phrases i in | |
378 | - let m2 = IntMap.find phrases i in | |
379 | - Printf.printf "%4d %s\n" i (ENIAMwalStringOf.morf m1); | |
380 | - Printf.printf "%4d %s\n" i (ENIAMwalStringOf.morf m2)) *) | |
381 | - | |
382 | -(* let print_entries entries = | |
383 | - StringMap.iter entries (fun pos entries2 -> | |
384 | - StringMap.iter entries2 (fun lemma entries3 -> | |
385 | - EntrySet.iter entries3 (fun entry -> | |
386 | - Printf.printf "%s: %s: %s\n" pos lemma (ENIAMwalStringOf.entry entry)))) *) | |
387 | - | |
388 | -(* let _ = print_entries entries *) | |
389 | -*) | |
... | ... |
walenty/ENIAMwalTEI.ml
... | ... | @@ -86,9 +86,9 @@ let parse_gf = function |
86 | 86 | |
87 | 87 | let parse_control arg = function |
88 | 88 | "controller" -> {arg with cr="1" :: arg.cr} |
89 | - | "controllee" -> {arg with ce="1" :: arg.cr} | |
89 | + | "controllee" -> {arg with ce="1" :: arg.ce} | |
90 | 90 | | "controller2" -> {arg with cr="2" :: arg.cr} |
91 | - | "controllee2" -> {arg with ce="2" :: arg.cr} | |
91 | + | "controllee2" -> {arg with ce="2" :: arg.ce} | |
92 | 92 | | s -> failwith ("parse_control: " ^ s) |
93 | 93 | |
94 | 94 | let parse_case = function |
... | ... |
walenty/makefile
... | ... | @@ -6,11 +6,15 @@ OCAMLFLAGS=$(INCLUDES) -g |
6 | 6 | OCAMLOPTFLAGS=$(INCLUDES) unix.cmxa xml-light.cmxa str.cmxa nums.cmxa zip.cmxa bz2.cmxa xlib.cmxa |
7 | 7 | INSTALLDIR=`ocamlc -where`/eniam |
8 | 8 | |
9 | -SOURCES=entries.ml ENIAMwalTypes.ml ENIAMwalStringOf.ml ENIAMwalTEI.ml ENIAMwalConnect.ml ENIAMwalRealizations.ml ENIAMwalLex.ml ENIAMwalGenerate.ml | |
9 | +SOURCES=entries.ml ENIAMwalTypes.ml ENIAMwalStringOf.ml ENIAMwalTEI.ml ENIAMwalConnect.ml ENIAMwalRealizations.ml ENIAMwalLex.ml | |
10 | 10 | |
11 | -all: $(SOURCES) | |
11 | +all: $(SOURCES) ENIAMwalGenerate.ml | |
12 | 12 | $(OCAMLOPT) -o converter $(OCAMLOPTFLAGS) $^ |
13 | 13 | |
14 | +analyze: $(SOURCES) ENIAMwalAnalyze.ml | |
15 | + mkdir -p results | |
16 | + $(OCAMLOPT) -o analyze $(OCAMLOPTFLAGS) $^ | |
17 | + | |
14 | 18 | install: |
15 | 19 | mkdir -p /usr/share/eniam/Walenty |
16 | 20 | cp resources/* /usr/share/eniam/Walenty |
... | ... | @@ -40,4 +44,4 @@ install-local: |
40 | 44 | $(OCAMLOPT) $(OCAMLOPTFLAGS) -c $< |
41 | 45 | |
42 | 46 | clean: |
43 | - rm -f *~ *.cm[aoix] *.o *.so *.cmxa *.a converter resources/*.tab | |
47 | + rm -f *~ *.cm[aoix] *.o *.so *.cmxa *.a converter analyze resources/*.tab results/* | |
... | ... |